Team Fortress 2 Source Code as on 22/4/2020
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1285 lines
54 KiB

  1. //========= Copyright Valve Corporation, All rights reserved. ============//
  2. //
  3. // Purpose:
  4. //
  5. // $NoKeywords: $
  6. //
  7. //===========================================================================//
  8. #ifndef TIER1_STRTOOLS_H
  9. #define TIER1_STRTOOLS_H
  10. #include "tier0/platform.h"
  11. #include <ctype.h>
  12. #include <stdarg.h>
  13. #ifdef _WIN32
  14. #pragma once
  15. #elif POSIX
  16. #include <wchar.h>
  17. #include <math.h>
  18. #include <wctype.h>
  19. #endif
  20. #include <string.h>
  21. #include <stdlib.h>
  22. class CUtlBuffer;
  23. class CUtlString;
  24. #ifdef _WIN64
  25. #define str_size unsigned int
  26. #else
  27. #define str_size size_t
  28. #endif
  29. template< class T, class I > class CUtlMemory;
  30. template< class T, class A > class CUtlVector;
  31. //-----------------------------------------------------------------------------
  32. // Portable versions of standard string functions
  33. //-----------------------------------------------------------------------------
  34. void _V_memset ( const char* file, int line, void *dest, int fill, int count );
  35. void _V_memcpy ( const char* file, int line, void *dest, const void *src, int count );
  36. void _V_memmove ( const char* file, int line, void *dest, const void *src, int count );
  37. int _V_memcmp ( const char* file, int line, const void *m1, const void *m2, int count );
  38. int _V_strlen ( const char* file, int line, const char *str );
  39. void _V_strcpy ( const char* file, int line, char *dest, const char *src );
  40. char* _V_strrchr ( const char* file, int line, const char *s, char c );
  41. int _V_strcmp ( const char* file, int line, const char *s1, const char *s2 );
  42. int _V_wcscmp ( const char* file, int line, const wchar_t *s1, const wchar_t *s2 );
  43. char* _V_strstr ( const char* file, int line, const char *s1, const char *search );
  44. int _V_wcslen ( const char* file, int line, const wchar_t *pwch );
  45. wchar_t* _V_wcslower (const char* file, int line, wchar_t *start);
  46. wchar_t* _V_wcsupr (const char* file, int line, wchar_t *start);
  47. // ASCII-optimized functions which fall back to CRT only when necessary
  48. char *V_strupr( char *start );
  49. char *V_strlower( char *start );
  50. int V_stricmp( const char *s1, const char *s2 );
  51. int V_strncmp( const char *s1, const char *s2, int count );
  52. int V_strnicmp( const char *s1, const char *s2, int n );
  53. //-----------------------------------------------------------------------------
  54. // Purpose: Slightly modified strtok. Does not modify the input string. Does
  55. // not skip over more than one separator at a time. This allows parsing
  56. // strings where tokens between separators may or may not be present:
  57. //
  58. // Door01,,,0 would be parsed as "Door01" "" "" "0"
  59. // Door01,Open,,0 would be parsed as "Door01" "Open" "" "0"
  60. //
  61. // Input : token - Returns with a token, or zero length if the token was missing.
  62. // str - String to parse.
  63. // sep - Character to use as separator. UNDONE: allow multiple separator chars
  64. // Output : Returns a pointer to the next token to be parsed.
  65. //-----------------------------------------------------------------------------
  66. const char *nexttoken(char *token, size_t nMaxTokenLen, const char *str, char sep);
  67. template <size_t maxLenInChars> inline const char *nexttoken( OUT_Z_ARRAY char (&pToken)[maxLenInChars], const char *str, char sep)
  68. {
  69. return nexttoken( pToken, maxLenInChars, str, sep );
  70. }
  71. #ifdef POSIX
  72. inline char *strupr( char *start )
  73. {
  74. return V_strupr( start );
  75. }
  76. inline char *strlwr( char *start )
  77. {
  78. return V_strlower( start );
  79. }
  80. inline wchar_t *_wcslwr( wchar_t *start )
  81. {
  82. wchar_t *str = start;
  83. while( str && *str )
  84. {
  85. *str = (wchar_t)towlower(static_cast<wint_t>(*str));
  86. str++;
  87. }
  88. return start;
  89. };
  90. inline wchar_t *_wcsupr( wchar_t *start )
  91. {
  92. wchar_t *str = start;
  93. while( str && *str )
  94. {
  95. *str = (wchar_t)towupper(static_cast<wint_t>(*str));
  96. str++;
  97. }
  98. return start;
  99. };
  100. #endif // POSIX
  101. #ifdef _DEBUG
  102. #define V_memset(dest, fill, count) _V_memset (__FILE__, __LINE__, (dest), (fill), (count))
  103. #define V_memcpy(dest, src, count) _V_memcpy (__FILE__, __LINE__, (dest), (src), (count))
  104. #define V_memmove(dest, src, count) _V_memmove (__FILE__, __LINE__, (dest), (src), (count))
  105. #define V_memcmp(m1, m2, count) _V_memcmp (__FILE__, __LINE__, (m1), (m2), (count))
  106. #define V_strlen(str) _V_strlen (__FILE__, __LINE__, (str))
  107. #define V_strcpy(dest, src) _V_strcpy (__FILE__, __LINE__, (dest), (src))
  108. #define V_strrchr(s, c) _V_strrchr (__FILE__, __LINE__, (s), (c))
  109. #define V_strcmp(s1, s2) _V_strcmp (__FILE__, __LINE__, (s1), (s2))
  110. #define V_wcscmp(s1, s2) _V_wcscmp (__FILE__, __LINE__, (s1), (s2))
  111. #define V_strstr(s1, search ) _V_strstr (__FILE__, __LINE__, (s1), (search) )
  112. #define V_wcslen(pwch) _V_wcslen (__FILE__, __LINE__, (pwch))
  113. #define V_wcslower(start) _V_wcslower (__FILE__, __LINE__, (start))
  114. #define V_wcsupr(start) _V_wcsupr (__FILE__, __LINE__, (start))
  115. #else
  116. inline void V_memset (void *dest, int fill, int count) { memset( dest, fill, count ); }
  117. inline void V_memcpy (void *dest, const void *src, int count) { memcpy( dest, src, count ); }
  118. inline void V_memmove (void *dest, const void *src, int count) { memmove( dest, src, count ); }
  119. inline int V_memcmp (const void *m1, const void *m2, int count){ return memcmp( m1, m2, count ); }
  120. inline int V_strlen (const char *str) { return (int) strlen ( str ); }
  121. inline void V_strcpy (char *dest, const char *src) { strcpy( dest, src ); }
  122. inline int V_wcslen(const wchar_t *pwch) { return (int)wcslen(pwch); }
  123. inline char* V_strrchr (const char *s, char c) { return (char*)strrchr( s, c ); }
  124. inline int V_strcmp (const char *s1, const char *s2) { return strcmp( s1, s2 ); }
  125. inline int V_wcscmp (const wchar_t *s1, const wchar_t *s2) { return wcscmp( s1, s2 ); }
  126. inline char* V_strstr( const char *s1, const char *search ) { return (char*)strstr( s1, search ); }
  127. inline wchar_t* V_wcslower (wchar_t *start) { return _wcslwr( start ); }
  128. inline wchar_t* V_wcsupr (wchar_t *start) { return _wcsupr( start ); }
  129. #endif
  130. int V_atoi (const char *str);
  131. int64 V_atoi64(const char *str);
  132. uint64 V_atoui64(const char *str);
  133. int64 V_strtoi64( const char *nptr, char **endptr, int base );
  134. uint64 V_strtoui64( const char *nptr, char **endptr, int base );
  135. float V_atof(const char *str);
  136. char* V_stristr( char* pStr, const char* pSearch );
  137. const char* V_stristr( const char* pStr, const char* pSearch );
  138. const char* V_strnistr( const char* pStr, const char* pSearch, int n );
  139. const char* V_strnchr( const char* pStr, char c, int n );
  140. inline int V_strcasecmp (const char *s1, const char *s2) { return V_stricmp(s1, s2); }
  141. inline int V_strncasecmp (const char *s1, const char *s2, int n) { return V_strnicmp(s1, s2, n); }
  142. void V_qsort_s( void *base, size_t num, size_t width, int ( __cdecl *compare )(void *, const void *,
  143. const void *), void *context );
  144. // returns string immediately following prefix, (ie str+strlen(prefix)) or NULL if prefix not found
  145. const char *StringAfterPrefix ( const char *str, const char *prefix );
  146. const char *StringAfterPrefixCaseSensitive( const char *str, const char *prefix );
  147. inline bool StringHasPrefix ( const char *str, const char *prefix ) { return StringAfterPrefix ( str, prefix ) != NULL; }
  148. inline bool StringHasPrefixCaseSensitive( const char *str, const char *prefix ) { return StringAfterPrefixCaseSensitive( str, prefix ) != NULL; }
  149. template< bool CASE_SENSITIVE > inline bool _V_strEndsWithInner( const char *pStr, const char *pSuffix )
  150. {
  151. int nSuffixLen = V_strlen( pSuffix );
  152. int nStringLen = V_strlen( pStr );
  153. if ( nSuffixLen == 0 )
  154. return true; // All strings end with the empty string (matches Java & .NET behaviour)
  155. if ( nStringLen < nSuffixLen )
  156. return false;
  157. pStr += nStringLen - nSuffixLen;
  158. if ( CASE_SENSITIVE )
  159. return !V_strcmp( pStr, pSuffix );
  160. else
  161. return !V_stricmp( pStr, pSuffix );
  162. }
  163. // Does 'pStr' end with 'pSuffix'? (case sensitive/insensitive variants)
  164. inline bool V_strEndsWith( const char *pStr, const char *pSuffix ) { return _V_strEndsWithInner<TRUE>( pStr, pSuffix ); }
  165. inline bool V_striEndsWith( const char *pStr, const char *pSuffix ) { return _V_strEndsWithInner<FALSE>( pStr, pSuffix ); }
  166. // Normalizes a float string in place.
  167. // (removes leading zeros, trailing zeros after the decimal point, and the decimal point itself where possible)
  168. void V_normalizeFloatString( char* pFloat );
  169. // this is locale-unaware and therefore faster version of standard isdigit()
  170. // It also avoids sign-extension errors.
  171. inline bool V_isdigit( char c )
  172. {
  173. return c >= '0' && c <= '9';
  174. }
  175. inline bool V_iswdigit( int c )
  176. {
  177. return ( ( (uint)( c - '0' ) ) < 10 );
  178. }
  179. inline bool V_isempty( const char* pszString ) { return !pszString || !pszString[ 0 ]; }
  180. // The islower/isdigit/etc. functions all expect a parameter that is either
  181. // 0-0xFF or EOF. It is easy to violate this constraint simply by passing
  182. // 'char' to these functions instead of unsigned char.
  183. // The V_ functions handle the char/unsigned char mismatch by taking a
  184. // char parameter and casting it to unsigned char so that chars with the
  185. // sign bit set will be zero extended instead of sign extended.
  186. // Not that EOF cannot be passed to these functions.
  187. //
  188. // These functions could also be used for optimizations if locale
  189. // considerations make some of the CRT functions slow.
  190. //#undef isdigit // In case this is implemented as a macro
  191. //#define isdigit use_V_isdigit_instead_of_isdigit
  192. inline bool V_isalpha(char c) { return isalpha( (unsigned char)c ) != 0; }
  193. //#undef isalpha
  194. //#define isalpha use_V_isalpha_instead_of_isalpha
  195. inline bool V_isalnum(char c) { return isalnum( (unsigned char)c ) != 0; }
  196. //#undef isalnum
  197. //#define isalnum use_V_isalnum_instead_of_isalnum
  198. inline bool V_isprint(char c) { return isprint( (unsigned char)c ) != 0; }
  199. //#undef isprint
  200. //#define isprint use_V_isprint_instead_of_isprint
  201. inline bool V_isxdigit(char c) { return isxdigit( (unsigned char)c ) != 0; }
  202. //#undef isxdigit
  203. //#define isxdigit use_V_isxdigit_instead_of_isxdigit
  204. inline bool V_ispunct(char c) { return ispunct( (unsigned char)c ) != 0; }
  205. //#undef ispunct
  206. //#define ispunct use_V_ispunct_instead_of_ispunct
  207. inline bool V_isgraph(char c) { return isgraph( (unsigned char)c ) != 0; }
  208. //#undef isgraph
  209. //#define isgraph use_V_isgraph_instead_of_isgraph
  210. inline bool V_isupper(char c) { return isupper( (unsigned char)c ) != 0; }
  211. //#undef isupper
  212. //#define isupper use_V_isupper_instead_of_isupper
  213. inline bool V_islower(char c) { return islower( (unsigned char)c ) != 0; }
  214. //#undef islower
  215. //#define islower use_V_islower_instead_of_islower
  216. inline bool V_iscntrl(char c) { return iscntrl( (unsigned char)c ) != 0; }
  217. //#undef iscntrl
  218. //#define iscntrl use_V_iscntrl_instead_of_iscntrl
  219. inline bool V_isspace(char c) { return isspace( (unsigned char)c ) != 0; }
  220. //#undef isspace
  221. //#define isspace use_V_isspace_instead_of_isspace
  222. //-----------------------------------------------------------------------------
  223. // Purpose: returns true if it's a valid hex string
  224. //-----------------------------------------------------------------------------
  225. inline bool V_isvalidhex( char const *in, int inputchars )
  226. {
  227. if ( inputchars < 2 )
  228. return false;
  229. if ( inputchars % 2 == 1 )
  230. return false;
  231. for ( int i = 0; i < inputchars; i++ )
  232. {
  233. char c = in[i];
  234. if ( !(
  235. (c >= '0' && c <= '9') ||
  236. (c >= 'a' && c <= 'f') ||
  237. (c >= 'A' && c <= 'F')
  238. ) )
  239. {
  240. return false;
  241. }
  242. }
  243. return true;
  244. }
  245. //-----------------------------------------------------------------------------
  246. // Purpose: Checks if the string is lower case
  247. // NOTE: Only works with ASCII strings
  248. //-----------------------------------------------------------------------------
  249. inline bool V_isstrlower( const char *pch )
  250. {
  251. const char *pCurrent = pch;
  252. while ( *pCurrent != '\0' )
  253. {
  254. if ( *pCurrent >= 'A' && *pCurrent <= 'Z' )
  255. return false;
  256. pCurrent++;
  257. }
  258. return true;
  259. }
  260. // These are versions of functions that guarantee NULL termination.
  261. //
  262. // maxLen is the maximum number of bytes in the destination string.
  263. // pDest[maxLen-1] is always NULL terminated if pSrc's length is >= maxLen.
  264. //
  265. // This means the last parameter can usually be a sizeof() of a string.
  266. void V_strncpy( OUT_Z_CAP(maxLenInChars) char *pDest, const char *pSrc, int maxLenInChars );
  267. // Ultimate safe strcpy function, for arrays only -- buffer size is inferred by the compiler
  268. template <size_t maxLenInChars> void V_strcpy_safe( OUT_Z_ARRAY char (&pDest)[maxLenInChars], const char *pSrc )
  269. {
  270. V_strncpy( pDest, pSrc, (int)maxLenInChars );
  271. }
  272. // A function which duplicates a string using new[] to allocate the new string.
  273. inline char *V_strdup( const char *pSrc )
  274. {
  275. int nLen = V_strlen( pSrc );
  276. char *pResult = new char [ nLen+1 ];
  277. V_memcpy( pResult, pSrc, nLen+1 );
  278. return pResult;
  279. }
  280. void V_wcsncpy( OUT_Z_BYTECAP(maxLenInBytes) wchar_t *pDest, wchar_t const *pSrc, int maxLenInBytes );
  281. template <size_t maxLenInChars> void V_wcscpy_safe( OUT_Z_ARRAY wchar_t (&pDest)[maxLenInChars], wchar_t const *pSrc )
  282. {
  283. V_wcsncpy( pDest, pSrc, maxLenInChars * sizeof(*pDest) );
  284. }
  285. #define COPY_ALL_CHARACTERS -1
  286. char *V_strncat( INOUT_Z_CAP(cchDest) char *pDest, const char *pSrc, size_t cchDest, int max_chars_to_copy=COPY_ALL_CHARACTERS );
  287. template <size_t cchDest> char *V_strcat_safe( INOUT_Z_ARRAY char (&pDest)[cchDest], const char *pSrc, int nMaxCharsToCopy=COPY_ALL_CHARACTERS )
  288. {
  289. return V_strncat( pDest, pSrc, (int)cchDest, nMaxCharsToCopy );
  290. }
  291. wchar_t *V_wcsncat( INOUT_Z_CAP(cchDest) wchar_t *pDest, const wchar_t *pSrc, size_t cchDest, int nMaxCharsToCopy=COPY_ALL_CHARACTERS );
  292. template <size_t cchDest> wchar_t *V_wcscat_safe( INOUT_Z_ARRAY wchar_t (&pDest)[cchDest], const wchar_t *pSrc, int nMaxCharsToCopy=COPY_ALL_CHARACTERS )
  293. {
  294. return V_wcsncat( pDest, pSrc, (int)cchDest, nMaxCharsToCopy );
  295. }
  296. char *V_strnlwr( INOUT_Z_CAP(cchBuf) char *pBuf, size_t cchBuf);
  297. template <size_t cchDest> char *V_strlwr_safe( INOUT_Z_ARRAY char (&pBuf)[cchDest] )
  298. {
  299. return _V_strnlwr( pBuf, (int)cchDest );
  300. }
  301. // Unicode string conversion policies - what to do if an illegal sequence is encountered
  302. enum EStringConvertErrorPolicy
  303. {
  304. _STRINGCONVERTFLAG_SKIP = 1,
  305. _STRINGCONVERTFLAG_FAIL = 2,
  306. _STRINGCONVERTFLAG_ASSERT = 4,
  307. STRINGCONVERT_REPLACE = 0,
  308. STRINGCONVERT_SKIP = _STRINGCONVERTFLAG_SKIP,
  309. STRINGCONVERT_FAIL = _STRINGCONVERTFLAG_FAIL,
  310. STRINGCONVERT_ASSERT_REPLACE = _STRINGCONVERTFLAG_ASSERT + STRINGCONVERT_REPLACE,
  311. STRINGCONVERT_ASSERT_SKIP = _STRINGCONVERTFLAG_ASSERT + STRINGCONVERT_SKIP,
  312. STRINGCONVERT_ASSERT_FAIL = _STRINGCONVERTFLAG_ASSERT + STRINGCONVERT_FAIL,
  313. };
  314. // Unicode (UTF-8, UTF-16, UTF-32) fundamental conversion functions.
  315. bool Q_IsValidUChar32( uchar32 uValue );
  316. int Q_UChar32ToUTF8Len( uchar32 uValue );
  317. int Q_UChar32ToUTF8( uchar32 uValue, char *pOut );
  318. int Q_UChar32ToUTF16Len( uchar32 uValue );
  319. int Q_UChar32ToUTF16( uchar32 uValue, uchar16 *pOut );
  320. // Validate that a Unicode string is well-formed and contains only valid code points
  321. bool Q_UnicodeValidate( const char *pUTF8 );
  322. bool Q_UnicodeValidate( const uchar16 *pUTF16 );
  323. bool Q_UnicodeValidate( const uchar32 *pUTF32 );
  324. // Returns length of string in Unicode code points (printed glyphs or non-printing characters)
  325. int Q_UnicodeLength( const char *pUTF8 );
  326. int Q_UnicodeLength( const uchar16 *pUTF16 );
  327. int Q_UnicodeLength( const uchar32 *pUTF32 );
  328. // Returns length of string in elements, not characters! These are analogous to Q_strlen and Q_wcslen
  329. inline int Q_strlen16( const uchar16 *puc16 ) { int nElems = 0; while ( puc16[nElems] ) ++nElems; return nElems; }
  330. inline int Q_strlen32( const uchar32 *puc32 ) { int nElems = 0; while ( puc32[nElems] ) ++nElems; return nElems; }
  331. // Repair invalid Unicode strings by dropping truncated characters and fixing improperly-double-encoded UTF-16 sequences.
  332. // Unlike conversion functions which replace with '?' by default, a repair operation assumes that you know that something
  333. // is wrong with the string (eg, mid-sequence truncation) and you just want to do the best possible job of cleaning it up.
  334. // You can pass a REPLACE or FAIL policy if you would prefer to replace characters with '?' or clear the entire string.
  335. // Returns nonzero on success, or 0 if the policy is FAIL and an invalid sequence was found.
  336. int Q_UnicodeRepair( char *pUTF8, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_SKIP );
  337. int Q_UnicodeRepair( uchar16 *pUTF16, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_SKIP );
  338. int Q_UnicodeRepair( uchar32 *pUTF32, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_SKIP );
  339. // Advance pointer forward by N Unicode code points (printed glyphs or non-printing characters), stopping at terminating null if encountered.
  340. char *Q_UnicodeAdvance( char *pUTF8, int nCharacters );
  341. uchar16 *Q_UnicodeAdvance( uchar16 *pUTF16, int nCharactersnCharacters );
  342. uchar32 *Q_UnicodeAdvance( uchar32 *pUTF32, int nChars );
  343. inline const char *Q_UnicodeAdvance( const char *pUTF8, int nCharacters ) { return Q_UnicodeAdvance( (char*) pUTF8, nCharacters ); }
  344. inline const uchar16 *Q_UnicodeAdvance( const uchar16 *pUTF16, int nCharacters ) { return Q_UnicodeAdvance( (uchar16*) pUTF16, nCharacters ); }
  345. inline const uchar32 *Q_UnicodeAdvance( const uchar32 *pUTF32, int nCharacters ) { return Q_UnicodeAdvance( (uchar32*) pUTF32, nCharacters ); }
  346. // Truncate to maximum of N Unicode code points (printed glyphs or non-printing characters)
  347. inline void Q_UnicodeTruncate( char *pUTF8, int nCharacters ) { *Q_UnicodeAdvance( pUTF8, nCharacters ) = 0; }
  348. inline void Q_UnicodeTruncate( uchar16 *pUTF16, int nCharacters ) { *Q_UnicodeAdvance( pUTF16, nCharacters ) = 0; }
  349. inline void Q_UnicodeTruncate( uchar32 *pUTF32, int nCharacters ) { *Q_UnicodeAdvance( pUTF32, nCharacters ) = 0; }
  350. // Conversion between Unicode string types (UTF-8, UTF-16, UTF-32). Deals with bytes, not element counts,
  351. // to minimize harm from the programmer mistakes which continue to plague our wide-character string code.
  352. // Returns the number of bytes written to the output, or if output is NULL, the number of bytes required.
  353. int Q_UTF8ToUTF16( const char *pUTF8, OUT_Z_BYTECAP(cubDestSizeInBytes) uchar16 *pUTF16, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
  354. int Q_UTF8ToUTF32( const char *pUTF8, OUT_Z_BYTECAP(cubDestSizeInBytes) uchar32 *pUTF32, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
  355. int Q_UTF16ToUTF8( const uchar16 *pUTF16, OUT_Z_BYTECAP(cubDestSizeInBytes) char *pUTF8, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
  356. int Q_UTF16ToUTF32( const uchar16 *pUTF16, OUT_Z_BYTECAP(cubDestSizeInBytes) uchar32 *pUTF32, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
  357. int Q_UTF32ToUTF8( const uchar32 *pUTF32, OUT_Z_BYTECAP(cubDestSizeInBytes) char *pUTF8, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
  358. int Q_UTF32ToUTF16( const uchar32 *pUTF32, OUT_Z_BYTECAP(cubDestSizeInBytes) uchar16 *pUTF16, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
  359. // This is disgusting and exist only easily to facilitate having 16-bit and 32-bit wchar_t's on different platforms
  360. int Q_UTF32ToUTF32( const uchar32 *pUTF32Source, OUT_Z_BYTECAP(cubDestSizeInBytes) uchar32 *pUTF32Dest, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
  361. // Conversion between count-limited UTF-n character arrays, including any potential NULL characters.
  362. // Output has a terminating NULL for safety; strip the last character if you want an unterminated string.
  363. // Returns the number of bytes written to the output, or if output is NULL, the number of bytes required.
  364. int Q_UTF8CharsToUTF16( const char *pUTF8, int nElements, OUT_Z_BYTECAP(cubDestSizeInBytes) uchar16 *pUTF16, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
  365. int Q_UTF8CharsToUTF32( const char *pUTF8, int nElements, OUT_Z_BYTECAP(cubDestSizeInBytes) uchar32 *pUTF32, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
  366. int Q_UTF16CharsToUTF8( const uchar16 *pUTF16, int nElements, OUT_Z_BYTECAP(cubDestSizeInBytes) char *pUTF8, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
  367. int Q_UTF16CharsToUTF32( const uchar16 *pUTF16, int nElements, OUT_Z_BYTECAP(cubDestSizeInBytes) uchar32 *pUTF32, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
  368. int Q_UTF32CharsToUTF8( const uchar32 *pUTF32, int nElements, OUT_Z_BYTECAP(cubDestSizeInBytes) char *pUTF8, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
  369. int Q_UTF32CharsToUTF16( const uchar32 *pUTF32, int nElements, OUT_Z_BYTECAP(cubDestSizeInBytes) uchar16 *pUTF16, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
  370. // Decode a single UTF-8 character to a uchar32, returns number of UTF-8 bytes parsed
  371. int Q_UTF8ToUChar32( const char *pUTF8_, uchar32 &uValueOut, bool &bErrorOut );
  372. // Decode a single UTF-16 character to a uchar32, returns number of UTF-16 characters (NOT BYTES) consumed
  373. int Q_UTF16ToUChar32( const uchar16 *pUTF16, uchar32 &uValueOut, bool &bErrorOut );
  374. // NOTE: WString means either UTF32 or UTF16 depending on the platform and compiler settings.
  375. #if defined( _MSC_VER ) || defined( _WIN32 )
  376. #define Q_UTF8ToWString Q_UTF8ToUTF16
  377. #define Q_UTF8CharsToWString Q_UTF8CharsToUTF16
  378. #define Q_UTF32ToWString Q_UTF32ToUTF16
  379. #define Q_WStringToUTF8 Q_UTF16ToUTF8
  380. #define Q_WStringCharsToUTF8 Q_UTF16CharsToUTF8
  381. #define Q_WStringToUTF32 Q_UTF16ToUTF32
  382. #else
  383. #define Q_UTF8ToWString Q_UTF8ToUTF32
  384. #define Q_UTF8CharsToWString Q_UTF8CharsToUTF32
  385. #define Q_UTF32ToWString Q_UTF32ToUTF32
  386. #define Q_WStringToUTF8 Q_UTF32ToUTF8
  387. #define Q_WStringCharsToUTF8 Q_UTF32CharsToUTF8
  388. #define Q_WStringToUTF32 Q_UTF32ToUTF32
  389. #endif
  390. // These are legacy names which don't make a lot of sense but are used everywhere. Prefer the WString convention wherever possible
  391. #define V_UTF8ToUnicode Q_UTF8ToWString
  392. #define V_UnicodeToUTF8 Q_WStringToUTF8
  393. #ifdef WIN32
  394. // This function is ill-defined as it relies on the current ANSI code page. Currently Win32 only for tools.
  395. int Q_LocaleSpecificANSIToUTF8( const char *pANSI, int cubSrcInBytes, OUT_Z_BYTECAP(cubDestSizeInBytes) char *pUTF8, int cubDestSizeInBytes );
  396. #endif
  397. // Windows-1252 is mostly the same as ISO Latin-1, and probably what you want if you are
  398. // saddled with an 8-bit ANSI string that originated on a Windows system.
  399. int Q_Windows1252CharsToUTF8( const char *pchSrc, int cchSrc, OUT_Z_BYTECAP(cchDestUTF8) char *pchDestUTF8, int cchDestUTF8 );
  400. // CP 437 is used for VGA console text and some old-school file formats such as ZIP. It
  401. // is also known as the "IBM PC OEM code page" and various related names. You probably
  402. // don't want to use this function unless you know for a fact that you're dealing with
  403. // old-school OEM code pages. Otherwise try the Windows-1252 function above.
  404. int Q_CP437CharsToUTF8( const char *pchSrc, int cchSrc, OUT_Z_BYTECAP(cchDestUTF8) char *pchDestUTF8, int cchDestUTF8 );
  405. // replaces characters in a UTF8 string with their identical-looking equivalent (non-roundtrippable)
  406. //
  407. // older version of API uses a small homoglyph table; newer version uses a larger one
  408. //
  409. // strings using old version are baked into the database, so we won't toss it quite yet,
  410. // but don't use it for new features.
  411. int Q_NormalizeUTF8Old( const char *pchSrc, OUT_Z_CAP(cchDest) char *pchDest, int cchDest );
  412. int Q_NormalizeUTF8( const char *pchSrc, OUT_Z_CAP(cchDest) char *pchDest, int cchDest );
  413. //-----------------------------------------------------------------------------
  414. // Purpose: replaces characters in a UTF8 string with similar-looking equivalents.
  415. // Only replaces with ASCII characters.. non-recognized characters will be replaced with ?
  416. // This operation is destructive (i.e. you can't roundtrip through the normalized
  417. // form).
  418. //-----------------------------------------------------------------------------
  419. template <size_t maxLenInChars> int Q_NormalizeUTF8ToASCII( OUT_Z_ARRAY char (&pchDest)[maxLenInChars], const char *pchSrc )
  420. {
  421. int nResult = Q_NormalizeUTF8( pchSrc, pchDest, maxLenInChars );
  422. // replace non ASCII characters with ?
  423. for ( int i = 0; i < nResult; i++ )
  424. {
  425. if ( pchDest[i] > 127 || pchDest[i] < 0 )
  426. {
  427. pchDest[i] = '?';
  428. }
  429. }
  430. return nResult;
  431. }
  432. // UNDONE: Find a non-compiler-specific way to do this
  433. #ifdef _WIN32
  434. #ifndef _VA_LIST_DEFINED
  435. #ifdef _M_ALPHA
  436. struct va_list
  437. {
  438. char *a0; /* pointer to first homed integer argument */
  439. int offset; /* byte offset of next parameter */
  440. };
  441. #else // !_M_ALPHA
  442. typedef char * va_list;
  443. #endif // !_M_ALPHA
  444. #define _VA_LIST_DEFINED
  445. #endif // _VA_LIST_DEFINED
  446. #elif POSIX
  447. #include <stdarg.h>
  448. #endif
  449. #ifdef _WIN32
  450. #define CORRECT_PATH_SEPARATOR '\\'
  451. #define CORRECT_PATH_SEPARATOR_S "\\"
  452. #define INCORRECT_PATH_SEPARATOR '/'
  453. #define INCORRECT_PATH_SEPARATOR_S "/"
  454. #elif POSIX
  455. #define CORRECT_PATH_SEPARATOR '/'
  456. #define CORRECT_PATH_SEPARATOR_S "/"
  457. #define INCORRECT_PATH_SEPARATOR '\\'
  458. #define INCORRECT_PATH_SEPARATOR_S "\\"
  459. #endif
  460. int V_vsnprintf( OUT_Z_CAP(maxLenInCharacters) char *pDest, int maxLenInCharacters, PRINTF_FORMAT_STRING const char *pFormat, va_list params );
  461. template <size_t maxLenInCharacters> int V_vsprintf_safe( OUT_Z_ARRAY char (&pDest)[maxLenInCharacters], PRINTF_FORMAT_STRING const char *pFormat, va_list params ) { return V_vsnprintf( pDest, maxLenInCharacters, pFormat, params ); }
  462. int V_snprintf( OUT_Z_CAP(maxLenInChars) char *pDest, int maxLenInChars, PRINTF_FORMAT_STRING const char *pFormat, ... ) FMTFUNCTION( 3, 4 );
  463. // gcc insists on only having format annotations on declarations, not definitions, which is why I have both.
  464. template <size_t maxLenInChars> int V_sprintf_safe( OUT_Z_ARRAY char (&pDest)[maxLenInChars], PRINTF_FORMAT_STRING const char *pFormat, ... ) FMTFUNCTION( 2, 3 );
  465. template <size_t maxLenInChars> int V_sprintf_safe( OUT_Z_ARRAY char (&pDest)[maxLenInChars], PRINTF_FORMAT_STRING const char *pFormat, ... )
  466. {
  467. va_list params;
  468. va_start( params, pFormat );
  469. int result = V_vsnprintf( pDest, maxLenInChars, pFormat, params );
  470. va_end( params );
  471. return result;
  472. }
  473. // gcc insists on only having format annotations on declarations, not definitions, which is why I have both.
  474. // Append formatted text to an array in a safe manner -- always null-terminated, truncation rather than buffer overrun.
  475. template <size_t maxLenInChars> int V_sprintfcat_safe( INOUT_Z_ARRAY char (&pDest)[maxLenInChars], PRINTF_FORMAT_STRING const char *pFormat, ... ) FMTFUNCTION( 2, 3 );
  476. template <size_t maxLenInChars> int V_sprintfcat_safe( INOUT_Z_ARRAY char (&pDest)[maxLenInChars], PRINTF_FORMAT_STRING const char *pFormat, ... )
  477. {
  478. va_list params;
  479. va_start( params, pFormat );
  480. size_t usedLength = V_strlen(pDest);
  481. // This code is here to check against buffer overruns when uninitialized arrays are passed in.
  482. // It should never be executed. Unfortunately we can't assert in this header file.
  483. if ( usedLength >= maxLenInChars )
  484. usedLength = 0;
  485. int result = V_vsnprintf( pDest + usedLength, maxLenInChars - usedLength, pFormat, params );
  486. va_end( params );
  487. return result;
  488. }
  489. int V_vsnwprintf( OUT_Z_CAP(maxLenInCharacters) wchar_t *pDest, int maxLenInCharacters, PRINTF_FORMAT_STRING const wchar_t *pFormat, va_list params );
  490. template <size_t maxLenInCharacters> int V_vswprintf_safe( OUT_Z_ARRAY wchar_t (&pDest)[maxLenInCharacters], PRINTF_FORMAT_STRING const wchar_t *pFormat, va_list params ) { return V_vsnwprintf( pDest, maxLenInCharacters, pFormat, params ); }
  491. int V_vsnprintfRet( OUT_Z_CAP(maxLenInCharacters) char *pDest, int maxLenInCharacters, PRINTF_FORMAT_STRING const char *pFormat, va_list params, bool *pbTruncated );
  492. template <size_t maxLenInCharacters> int V_vsprintfRet_safe( OUT_Z_ARRAY char (&pDest)[maxLenInCharacters], PRINTF_FORMAT_STRING const char *pFormat, va_list params, bool *pbTruncated ) { return V_vsnprintfRet( pDest, maxLenInCharacters, pFormat, params, pbTruncated ); }
  493. // FMTFUNCTION can only be used on ASCII functions, not wide-char functions.
  494. int V_snwprintf( OUT_Z_CAP(maxLenInCharacters) wchar_t *pDest, int maxLenInCharacters, PRINTF_FORMAT_STRING const wchar_t *pFormat, ... );
  495. template <size_t maxLenInChars> int V_swprintf_safe( OUT_Z_ARRAY wchar_t (&pDest)[maxLenInChars], PRINTF_FORMAT_STRING const wchar_t *pFormat, ... )
  496. {
  497. va_list params;
  498. va_start( params, pFormat );
  499. int result = V_vsnwprintf( pDest, maxLenInChars, pFormat, params );
  500. va_end( params );
  501. return result;
  502. }
  503. // Prints out a pretified memory counter string value ( e.g., 7,233.27 Mb, 1,298.003 Kb, 127 bytes )
  504. char *V_pretifymem( float value, int digitsafterdecimal = 2, bool usebinaryonek = false );
  505. // Prints out a pretified integer with comma separators (eg, 7,233,270,000)
  506. char *V_pretifynum( int64 value );
  507. int _V_UCS2ToUnicode( const ucs2 *pUCS2, OUT_Z_BYTECAP(cubDestSizeInBytes) wchar_t *pUnicode, int cubDestSizeInBytes );
  508. template< typename T > inline int V_UCS2ToUnicode( const ucs2 *pUCS2, OUT_Z_BYTECAP(cubDestSizeInBytes) wchar_t *pUnicode, T cubDestSizeInBytes )
  509. {
  510. return _V_UCS2ToUnicode( pUCS2, pUnicode, static_cast<int>(cubDestSizeInBytes) );
  511. }
  512. int _V_UCS2ToUTF8( const ucs2 *pUCS2, OUT_Z_BYTECAP(cubDestSizeInBytes) char *pUTF8, int cubDestSizeInBytes );
  513. template< typename T > inline int V_UCS2ToUTF8( const ucs2 *pUCS2, OUT_Z_BYTECAP(cubDestSizeInBytes) char *pUTF8, T cubDestSizeInBytes )
  514. {
  515. return _V_UCS2ToUTF8( pUCS2, pUTF8, static_cast<int>(cubDestSizeInBytes) );
  516. }
  517. int _V_UnicodeToUCS2( const wchar_t *pUnicode, int cubSrcInBytes, OUT_Z_BYTECAP(cubDestSizeInBytes) char *pUCS2, int cubDestSizeInBytes );
  518. template< typename T, typename U > inline int V_UnicodeToUCS2( const wchar_t *pUnicode, T cubSrcInBytes, OUT_Z_BYTECAP(cubDestSizeInBytes) char *pUCS2, U cubDestSizeInBytes )
  519. {
  520. return _V_UnicodeToUCS2( pUnicode, static_cast<int>(cubSrcInBytes), pUCS2, static_cast<int>(cubDestSizeInBytes) );
  521. }
  522. int _V_UTF8ToUCS2( const char *pUTF8, int cubSrcInBytes, OUT_Z_BYTECAP(cubDestSizeInBytes) ucs2 *pUCS2, int cubDestSizeInBytes );
  523. template< typename T, typename U > inline int V_UTF8ToUCS2( const char *pUTF8, T cubSrcInBytes, OUT_Z_BYTECAP(cubDestSizeInBytes) ucs2 *pUCS2, U cubDestSizeInBytes )
  524. {
  525. return _V_UTF8ToUCS2( pUTF8, static_cast<int>(cubSrcInBytes), pUCS2, static_cast<int>(cubDestSizeInBytes) );
  526. }
  527. // strips leading and trailing whitespace; returns true if any characters were removed. UTF-8 and UTF-16 versions.
  528. bool Q_StripPrecedingAndTrailingWhitespace( char *pch );
  529. bool Q_StripPrecedingAndTrailingWhitespaceW( wchar_t *pwch );
  530. // strips leading and trailing whitespace, also taking "aggressive" characters
  531. // like punctuation spaces, non-breaking spaces, composing characters, and so on
  532. bool Q_AggressiveStripPrecedingAndTrailingWhitespace( char *pch );
  533. bool Q_AggressiveStripPrecedingAndTrailingWhitespaceW( wchar_t *pwch );
  534. bool Q_RemoveAllEvilCharacters( char *pch );
  535. // Functions for converting hexidecimal character strings back into binary data etc.
  536. //
  537. // e.g.,
  538. // int output;
  539. // V_hextobinary( "ffffffff", 8, &output, sizeof( output ) );
  540. // would make output == 0xfffffff or -1
  541. // Similarly,
  542. // char buffer[ 9 ];
  543. // V_binarytohex( &output, sizeof( output ), buffer, sizeof( buffer ) );
  544. // would put "ffffffff" into buffer (note null terminator!!!)
  545. unsigned char V_nibble( char c );
  546. void V_hextobinary( char const *in, int numchars, byte *out, int maxoutputbytes );
  547. void V_binarytohex( const byte *in, int inputbytes, char *out, int outsize );
  548. // Tools for working with filenames
  549. // Extracts the base name of a file (no path, no extension, assumes '/' or '\' as path separator)
  550. void V_FileBase( const char *in, char *out,int maxlen );
  551. // Remove the final characters of ppath if it's '\' or '/'.
  552. void V_StripTrailingSlash( char *ppath );
  553. // Remove the final characters of ppline if they are whitespace (uses V_isspace)
  554. void V_StripTrailingWhitespace( char *ppline );
  555. // Remove the initial characters of ppline if they are whitespace (uses V_isspace)
  556. void V_StripLeadingWhitespace( char *ppline );
  557. // Remove the initial/final characters of ppline if they are " quotes
  558. void V_StripSurroundingQuotes( char *ppline );
  559. // Remove any extension from in and return resulting string in out
  560. void V_StripExtension( const char *in, char *out, int outLen );
  561. // Make path end with extension if it doesn't already have an extension
  562. void V_DefaultExtension( char *path, const char *extension, int pathStringLength );
  563. // Strips any current extension from path and ensures that extension is the new extension
  564. void V_SetExtension( char *path, const char *extension, int pathStringLength );
  565. // Removes any filename from path ( strips back to previous / or \ character )
  566. void V_StripFilename( char *path );
  567. // Remove the final directory from the path
  568. bool V_StripLastDir( char *dirName, int maxlen );
  569. // Returns a pointer to the unqualified file name (no path) of a file name
  570. const char * V_UnqualifiedFileName( const char * in );
  571. // Given a path and a filename, composes "path\filename", inserting the (OS correct) separator if necessary
  572. void V_ComposeFileName( const char *path, const char *filename, char *dest, int destSize );
  573. // Copy out the path except for the stuff after the final pathseparator
  574. bool V_ExtractFilePath( const char *path, char *dest, int destSize );
  575. // Copy out the file extension into dest
  576. void V_ExtractFileExtension( const char *path, char *dest, int destSize );
  577. const char *V_GetFileExtension( const char * path );
  578. // returns a pointer to just the filename part of the path
  579. // (everything after the last path seperator)
  580. const char *V_GetFileName( const char * path );
  581. // This removes "./" and "../" from the pathname. pFilename should be a full pathname.
  582. // Also incorporates the behavior of V_FixSlashes and optionally V_FixDoubleSlashes.
  583. // Returns false if it tries to ".." past the root directory in the drive (in which case
  584. // it is an invalid path).
  585. bool V_RemoveDotSlashes( char *pFilename, char separator = CORRECT_PATH_SEPARATOR, bool bRemoveDoubleSlashes = true );
  586. // If pPath is a relative path, this function makes it into an absolute path
  587. // using the current working directory as the base, or pStartingDir if it's non-NULL.
  588. // Returns false if it runs out of room in the string, or if pPath tries to ".." past the root directory.
  589. void V_MakeAbsolutePath( char *pOut, int outLen, const char *pPath, const char *pStartingDir = NULL );
  590. inline void V_MakeAbsolutePath( char *pOut, int outLen, const char *pPath, const char *pStartingDir, bool bLowercaseName )
  591. {
  592. V_MakeAbsolutePath( pOut, outLen, pPath, pStartingDir );
  593. if ( bLowercaseName )
  594. {
  595. V_strlower( pOut );
  596. }
  597. }
  598. // Creates a relative path given two full paths
  599. // The first is the full path of the file to make a relative path for.
  600. // The second is the full path of the directory to make the first file relative to
  601. // Returns false if they can't be made relative (on separate drives, for example)
  602. bool V_MakeRelativePath( const char *pFullPath, const char *pDirectory, char *pRelativePath, int nBufLen );
  603. // Fixes up a file name, removing dot slashes, fixing slashes, converting to lowercase, etc.
  604. void V_FixupPathName( OUT_Z_CAP(nOutLen) char *pOut, size_t nOutLen, const char *pPath );
  605. // Adds a path separator to the end of the string if there isn't one already. Returns false if it would run out of space.
  606. void V_AppendSlash( INOUT_Z_CAP(strSize) char *pStr, int strSize );
  607. // Returns true if the path is an absolute path.
  608. bool V_IsAbsolutePath( IN_Z const char *pPath );
  609. // Scans pIn and replaces all occurences of pMatch with pReplaceWith.
  610. // Writes the result to pOut.
  611. // Returns true if it completed successfully.
  612. // If it would overflow pOut, it fills as much as it can and returns false.
  613. bool V_StrSubst( IN_Z const char *pIn, IN_Z const char *pMatch, const char *pReplaceWith,
  614. OUT_Z_CAP(outLen) char *pOut, int outLen, bool bCaseSensitive=false );
  615. // Split the specified string on the specified separator.
  616. // Returns a list of strings separated by pSeparator.
  617. // You are responsible for freeing the contents of outStrings (call outStrings.PurgeAndDeleteElements).
  618. void V_SplitString( IN_Z const char *pString, IN_Z const char *pSeparator, CUtlVector<char*, CUtlMemory<char*, int> > &outStrings );
  619. void V_SplitString( const char *pString, const char *pSeparator, CUtlVector< CUtlString, CUtlMemory<CUtlString, int> > &outStrings, bool bIncludeEmptyStrings = false );
  620. // Just like V_SplitString, but it can use multiple possible separators.
  621. void V_SplitString2( IN_Z const char *pString, const char **pSeparators, int nSeparators, CUtlVector<char*, CUtlMemory<char*, int> > &outStrings );
  622. // Returns false if the buffer is not large enough to hold the working directory name.
  623. bool V_GetCurrentDirectory( OUT_Z_CAP(maxLen) char *pOut, int maxLen );
  624. // Set the working directory thus.
  625. bool V_SetCurrentDirectory( const char *pDirName );
  626. // This function takes a slice out of pStr and stores it in pOut.
  627. // It follows the Python slice convention:
  628. // Negative numbers wrap around the string (-1 references the last character).
  629. // Large numbers are clamped to the end of the string.
  630. void V_StrSlice( const char *pStr, int firstChar, int lastCharNonInclusive, OUT_Z_CAP(outSize) char *pOut, int outSize );
  631. // Chop off the left nChars of a string.
  632. void V_StrLeft( const char *pStr, int nChars, OUT_Z_CAP(outSize) char *pOut, int outSize );
  633. // Chop off the right nChars of a string.
  634. void V_StrRight( const char *pStr, int nChars, OUT_Z_CAP(outSize) char *pOut, int outSize );
  635. // change "special" characters to have their c-style backslash sequence. like \n, \r, \t, ", etc.
  636. // returns a pointer to a newly allocated string, which you must delete[] when finished with.
  637. char *V_AddBackSlashesToSpecialChars( char const *pSrc );
  638. // Force slashes of either type to be = separator character
  639. void V_FixSlashes( char *pname, char separator = CORRECT_PATH_SEPARATOR );
  640. // This function fixes cases of filenames like materials\\blah.vmt or somepath\otherpath\\ and removes the extra double slash.
  641. void V_FixDoubleSlashes( char *pStr );
  642. // Convert multibyte to wchar + back
  643. // Specify -1 for nInSize for null-terminated string
  644. void V_strtowcs( const char *pString, int nInSize, OUT_Z_BYTECAP(nOutSizeInBytes) wchar_t *pWString, int nOutSizeInBytes );
  645. void V_wcstostr( const wchar_t *pWString, int nInSize, OUT_Z_CAP(nOutSizeInBytes) char *pString, int nOutSizeInBytes );
  646. // buffer-safe strcat
  647. inline void V_strcat( INOUT_Z_CAP(cchDest) char *dest, const char *src, int cchDest )
  648. {
  649. V_strncat( dest, src, cchDest, COPY_ALL_CHARACTERS );
  650. }
  651. // Buffer safe wcscat
  652. inline void V_wcscat( INOUT_Z_CAP(cchDest) wchar_t *dest, const wchar_t *src, int cchDest )
  653. {
  654. V_wcsncat( dest, src, cchDest, COPY_ALL_CHARACTERS );
  655. }
  656. // Encode a string for display as HTML -- this only encodes ' " & < >, which are the important ones to encode for
  657. // security and ensuring HTML display doesn't break. Other special chars like the ? sign and so forth will not
  658. // be encoded
  659. //
  660. // Returns false if there was not enough room in pDest to encode the entire source string, otherwise true
  661. bool V_BasicHtmlEntityEncode( OUT_Z_CAP( nDestSize ) char *pDest, const int nDestSize, char const *pIn, const int nInSize, bool bPreserveWhitespace = false );
  662. // Decode a string with htmlentities HTML -- this should handle all special chars, not just the ones Q_BasicHtmlEntityEncode uses.
  663. //
  664. // Returns false if there was not enough room in pDest to decode the entire source string, otherwise true
  665. bool V_HtmlEntityDecodeToUTF8( OUT_Z_CAP( nDestSize ) char *pDest, const int nDestSize, char const *pIn, const int nInSize );
  666. // strips HTML from a string. Should call Q_HTMLEntityDecodeToUTF8 afterward.
  667. void V_StripAndPreserveHTML( CUtlBuffer *pbuffer, const char *pchHTML, const char **rgszPreserveTags, uint cPreserveTags, uint cMaxResultSize );
  668. void V_StripAndPreserveHTMLCore( CUtlBuffer *pbuffer, const char *pchHTML, const char **rgszPreserveTags, uint cPreserveTags, const char **rgszNoCloseTags, uint cNoCloseTags, uint cMaxResultSize );
  669. // Extracts the domain from a URL
  670. bool V_ExtractDomainFromURL( const char *pchURL, OUT_Z_CAP( cchDomain ) char *pchDomain, int cchDomain );
  671. // returns true if the url passed in is on the specified domain
  672. bool V_URLContainsDomain( const char *pchURL, const char *pchDomain );
  673. //-----------------------------------------------------------------------------
  674. // returns true if the character is allowed in a URL, false otherwise
  675. //-----------------------------------------------------------------------------
  676. bool V_IsValidURLCharacter( const char *pch, int *pAdvanceBytes );
  677. //-----------------------------------------------------------------------------
  678. // returns true if the character is allowed in a DNS doman name, false otherwise
  679. //-----------------------------------------------------------------------------
  680. bool V_IsValidDomainNameCharacter( const char *pch, int *pAdvanceBytes );
  681. // Converts BBCode tags to HTML tags
  682. bool V_BBCodeToHTML( OUT_Z_CAP( nDestSize ) char *pDest, const int nDestSize, char const *pIn, const int nInSize );
  683. // helper to identify "mean" spaces, which we don't like in visible identifiers
  684. // such as player Name
  685. bool V_IsMeanSpaceW( wchar_t wch );
  686. // helper to identify characters which are deprecated in Unicode,
  687. // and we simply don't accept
  688. bool V_IsDeprecatedW( wchar_t wch );
  689. //-----------------------------------------------------------------------------
  690. // generic unique name helper functions
  691. //-----------------------------------------------------------------------------
  692. // returns startindex if none found, 2 if "prefix" found, and n+1 if "prefixn" found
  693. template < class NameArray >
  694. int V_GenerateUniqueNameIndex( const char *prefix, const NameArray &nameArray, int startindex = 0 )
  695. {
  696. if ( prefix == NULL )
  697. return 0;
  698. int freeindex = startindex;
  699. int nNames = nameArray.Count();
  700. for ( int i = 0; i < nNames; ++i )
  701. {
  702. const char *pName = nameArray[ i ];
  703. if ( !pName )
  704. continue;
  705. const char *pIndexStr = StringAfterPrefix( pName, prefix );
  706. if ( pIndexStr )
  707. {
  708. int index = *pIndexStr ? atoi( pIndexStr ) : 1;
  709. if ( index >= freeindex )
  710. {
  711. // TODO - check that there isn't more junk after the index in pElementName
  712. freeindex = index + 1;
  713. }
  714. }
  715. }
  716. return freeindex;
  717. }
  718. template < class NameArray >
  719. bool V_GenerateUniqueName( OUT_Z_CAP(memsize) char *name, int memsize, const char *prefix, const NameArray &nameArray )
  720. {
  721. if ( name == NULL || memsize == 0 )
  722. return false;
  723. if ( prefix == NULL )
  724. {
  725. name[ 0 ] = '\0';
  726. return false;
  727. }
  728. int prefixLength = V_strlen( prefix );
  729. if ( prefixLength + 1 > memsize )
  730. {
  731. name[ 0 ] = '\0';
  732. return false;
  733. }
  734. int i = V_GenerateUniqueNameIndex( prefix, nameArray );
  735. if ( i <= 0 )
  736. {
  737. V_strncpy( name, prefix, memsize );
  738. return true;
  739. }
  740. int newlen = prefixLength + ( int )log10( ( float )i ) + 1;
  741. if ( newlen + 1 > memsize )
  742. {
  743. V_strncpy( name, prefix, memsize );
  744. return false;
  745. }
  746. V_snprintf( name, memsize, "%s%d", prefix, i );
  747. return true;
  748. }
  749. //
  750. // This utility class is for performing UTF-8 <-> UTF-16 conversion.
  751. // It is intended for use with function/method parameters.
  752. //
  753. // For example, you can call
  754. // FunctionTakingUTF16( CStrAutoEncode( utf8_string ).ToWString() )
  755. // or
  756. // FunctionTakingUTF8( CStrAutoEncode( utf16_string ).ToString() )
  757. //
  758. // The converted string is allocated off the heap, and destroyed when
  759. // the object goes out of scope.
  760. //
  761. // if the string cannot be converted, NULL is returned.
  762. //
  763. // This class doesn't have any conversion operators; the intention is
  764. // to encourage the developer to get used to having to think about which
  765. // encoding is desired.
  766. //
  767. class CStrAutoEncode
  768. {
  769. public:
  770. // ctor
  771. explicit CStrAutoEncode( const char *pch )
  772. {
  773. m_pch = pch;
  774. m_pwch = NULL;
  775. #if !defined( WIN32 ) && !defined(_WIN32)
  776. m_pucs2 = NULL;
  777. m_bCreatedUCS2 = false;
  778. #endif
  779. m_bCreatedUTF16 = false;
  780. }
  781. // ctor
  782. explicit CStrAutoEncode( const wchar_t *pwch )
  783. {
  784. m_pch = NULL;
  785. m_pwch = pwch;
  786. #if !defined( WIN32 ) && !defined(_WIN32)
  787. m_pucs2 = NULL;
  788. m_bCreatedUCS2 = false;
  789. #endif
  790. m_bCreatedUTF16 = true;
  791. }
  792. #if !defined(WIN32) && !defined(_WINDOWS) && !defined(_WIN32)
  793. explicit CStrAutoEncode( const ucs2 *pwch )
  794. {
  795. m_pch = NULL;
  796. m_pwch = NULL;
  797. m_pucs2 = pwch;
  798. m_bCreatedUCS2 = true;
  799. m_bCreatedUTF16 = false;
  800. }
  801. #endif
  802. // returns the UTF-8 string, converting on the fly.
  803. const char* ToString()
  804. {
  805. PopulateUTF8();
  806. return m_pch;
  807. }
  808. // returns the UTF-8 string - a writable pointer.
  809. // only use this if you don't want to call const_cast
  810. // yourself. We need this for cases like CreateProcess.
  811. char* ToStringWritable()
  812. {
  813. PopulateUTF8();
  814. return const_cast< char* >( m_pch );
  815. }
  816. // returns the UTF-16 string, converting on the fly.
  817. const wchar_t* ToWString()
  818. {
  819. PopulateUTF16();
  820. return m_pwch;
  821. }
  822. #if !defined( WIN32 ) && !defined(_WIN32)
  823. // returns the UTF-16 string, converting on the fly.
  824. const ucs2* ToUCS2String()
  825. {
  826. PopulateUCS2();
  827. return m_pucs2;
  828. }
  829. #endif
  830. // returns the UTF-16 string - a writable pointer.
  831. // only use this if you don't want to call const_cast
  832. // yourself. We need this for cases like CreateProcess.
  833. wchar_t* ToWStringWritable()
  834. {
  835. PopulateUTF16();
  836. return const_cast< wchar_t* >( m_pwch );
  837. }
  838. // dtor
  839. ~CStrAutoEncode()
  840. {
  841. // if we're "native unicode" then the UTF-8 string is something we allocated,
  842. // and vice versa.
  843. if ( m_bCreatedUTF16 )
  844. {
  845. delete [] m_pch;
  846. }
  847. else
  848. {
  849. delete [] m_pwch;
  850. }
  851. #if !defined( WIN32 ) && !defined(_WIN32)
  852. if ( !m_bCreatedUCS2 && m_pucs2 )
  853. delete [] m_pucs2;
  854. #endif
  855. }
  856. private:
  857. // ensure we have done any conversion work required to farm out a
  858. // UTF-8 encoded string.
  859. //
  860. // We perform two heap allocs here; the first one is the worst-case
  861. // (four bytes per Unicode code point). This is usually quite pessimistic,
  862. // so we perform a second allocation that's just the size we need.
  863. void PopulateUTF8()
  864. {
  865. if ( !m_bCreatedUTF16 )
  866. return; // no work to do
  867. if ( m_pwch == NULL )
  868. return; // don't have a UTF-16 string to convert
  869. if ( m_pch != NULL )
  870. return; // already been converted to UTF-8; no work to do
  871. // each Unicode code point can expand to as many as four bytes in UTF-8; we
  872. // also need to leave room for the terminating NUL.
  873. uint32 cbMax = 4 * static_cast<uint32>( V_wcslen( m_pwch ) ) + 1;
  874. char *pchTemp = new char[ cbMax ];
  875. if ( V_UnicodeToUTF8( m_pwch, pchTemp, cbMax ) )
  876. {
  877. uint32 cchAlloc = static_cast<uint32>( V_strlen( pchTemp ) ) + 1;
  878. char *pchHeap = new char[ cchAlloc ];
  879. V_strncpy( pchHeap, pchTemp, cchAlloc );
  880. delete [] pchTemp;
  881. m_pch = pchHeap;
  882. }
  883. else
  884. {
  885. // do nothing, and leave the UTF-8 string NULL
  886. delete [] pchTemp;
  887. }
  888. }
  889. // ensure we have done any conversion work required to farm out a
  890. // UTF-16 encoded string.
  891. //
  892. // We perform two heap allocs here; the first one is the worst-case
  893. // (one code point per UTF-8 byte). This is sometimes pessimistic,
  894. // so we perform a second allocation that's just the size we need.
  895. void PopulateUTF16()
  896. {
  897. if ( m_bCreatedUTF16 )
  898. return; // no work to do
  899. if ( m_pch == NULL )
  900. return; // no UTF-8 string to convert
  901. if ( m_pwch != NULL )
  902. return; // already been converted to UTF-16; no work to do
  903. uint32 cchMax = static_cast<uint32>( V_strlen( m_pch ) ) + 1;
  904. wchar_t *pwchTemp = new wchar_t[ cchMax ];
  905. if ( V_UTF8ToUnicode( m_pch, pwchTemp, cchMax * sizeof( wchar_t ) ) )
  906. {
  907. uint32 cchAlloc = static_cast<uint32>( V_wcslen( pwchTemp ) ) + 1;
  908. wchar_t *pwchHeap = new wchar_t[ cchAlloc ];
  909. V_wcsncpy( pwchHeap, pwchTemp, cchAlloc * sizeof( wchar_t ) );
  910. delete [] pwchTemp;
  911. m_pwch = pwchHeap;
  912. }
  913. else
  914. {
  915. // do nothing, and leave the UTF-16 string NULL
  916. delete [] pwchTemp;
  917. }
  918. }
  919. #if !defined( WIN32 ) && !defined(_WIN32)
  920. // ensure we have done any conversion work required to farm out a
  921. // UTF-16 encoded string.
  922. //
  923. // We perform two heap allocs here; the first one is the worst-case
  924. // (one code point per UTF-8 byte). This is sometimes pessimistic,
  925. // so we perform a second allocation that's just the size we need.
  926. void PopulateUCS2()
  927. {
  928. if ( m_bCreatedUCS2 )
  929. return;
  930. if ( m_pch == NULL )
  931. return; // no UTF-8 string to convert
  932. if ( m_pucs2 != NULL )
  933. return; // already been converted to UTF-16; no work to do
  934. uint32 cchMax = static_cast<uint32>( V_strlen( m_pch ) ) + 1;
  935. ucs2 *pwchTemp = new ucs2[ cchMax ];
  936. if ( V_UTF8ToUCS2( m_pch, cchMax, pwchTemp, cchMax * sizeof( ucs2 ) ) )
  937. {
  938. uint32 cchAlloc = cchMax;
  939. ucs2 *pwchHeap = new ucs2[ cchAlloc ];
  940. memcpy( pwchHeap, pwchTemp, cchAlloc * sizeof( ucs2 ) );
  941. delete [] pwchTemp;
  942. m_pucs2 = pwchHeap;
  943. }
  944. else
  945. {
  946. // do nothing, and leave the UTF-16 string NULL
  947. delete [] pwchTemp;
  948. }
  949. }
  950. #endif
  951. // one of these pointers is an owned pointer; whichever
  952. // one is the encoding OTHER than the one we were initialized
  953. // with is the pointer we've allocated and must free.
  954. const char *m_pch;
  955. const wchar_t *m_pwch;
  956. #if !defined( WIN32 ) && !defined(_WIN32)
  957. const ucs2 *m_pucs2;
  958. bool m_bCreatedUCS2;
  959. #endif
  960. // "created as UTF-16", means our owned string is the UTF-8 string not the UTF-16 one.
  961. bool m_bCreatedUTF16;
  962. };
  963. // Encodes a string (or binary data) in URL encoding format, see rfc1738 section 2.2.
  964. // Dest buffer should be 3 times the size of source buffer to guarantee it has room to encode.
  965. void Q_URLEncodeRaw( OUT_Z_CAP(nDestLen) char *pchDest, int nDestLen, const char *pchSource, int nSourceLen );
  966. // Decodes a string (or binary data) from URL encoding format, see rfc1738 section 2.2.
  967. // Dest buffer should be at least as large as source buffer to gurantee room for decode.
  968. // Dest buffer being the same as the source buffer (decode in-place) is explicitly allowed.
  969. //
  970. // Returns the amount of space actually used in the output buffer.
  971. size_t Q_URLDecodeRaw( OUT_CAP(nDecodeDestLen) char *pchDecodeDest, int nDecodeDestLen, const char *pchEncodedSource, int nEncodedSourceLen );
  972. // trim right whitespace
  973. inline char* TrimRight( char *pString )
  974. {
  975. char *pEnd = pString + V_strlen( pString );
  976. // trim
  977. while ( pString < ( pEnd-- ) )
  978. {
  979. if ( uint( *pEnd ) <= uint( ' ' ) )
  980. {
  981. *pEnd = '\0';
  982. }
  983. else
  984. break;
  985. }
  986. return pString;
  987. }
  988. inline const char * SkipBlanks( const char *pString )
  989. {
  990. const char *p = pString;
  991. while ( *p && uint( *p ) <= uint( ' ' ) )
  992. {
  993. p++;
  994. }
  995. return p;
  996. }
  997. inline int V_strcspn( const char *s1, const char *search ) { return (int)( strcspn( s1, search ) ); }
  998. // Encodes a string (or binary data) in URL encoding format, this isn't the strict rfc1738 format, but instead uses + for spaces.
  999. // This is for historical reasons and HTML spec foolishness that lead to + becoming a de facto standard for spaces when encoding form data.
  1000. // Dest buffer should be 3 times the size of source buffer to guarantee it has room to encode.
  1001. void Q_URLEncode( OUT_Z_CAP(nDestLen) char *pchDest, int nDestLen, const char *pchSource, int nSourceLen );
  1002. // Decodes a string (or binary data) in URL encoding format, this isn't the strict rfc1738 format, but instead uses + for spaces.
  1003. // This is for historical reasons and HTML spec foolishness that lead to + becoming a de facto standard for spaces when encoding form data.
  1004. // Dest buffer should be at least as large as source buffer to gurantee room for decode.
  1005. // Dest buffer being the same as the source buffer (decode in-place) is explicitly allowed.
  1006. //
  1007. // Returns the amount of space actually used in the output buffer.
  1008. size_t Q_URLDecode( OUT_CAP(nDecodeDestLen) char *pchDecodeDest, int nDecodeDestLen, const char *pchEncodedSource, int nEncodedSourceLen );
  1009. // NOTE: This is for backward compatability!
  1010. // We need to DLL-export the Q methods in vstdlib but not link to them in other projects
  1011. #if !defined( VSTDLIB_BACKWARD_COMPAT )
  1012. #define Q_memset V_memset
  1013. #define Q_memcpy V_memcpy
  1014. #define Q_memmove V_memmove
  1015. #define Q_memcmp V_memcmp
  1016. #define Q_strlen V_strlen
  1017. #define Q_strcpy V_strcpy
  1018. #define Q_strrchr V_strrchr
  1019. #define Q_strcmp V_strcmp
  1020. #define Q_wcscmp V_wcscmp
  1021. #define Q_stricmp V_stricmp
  1022. #define Q_strstr V_strstr
  1023. #define Q_strupr V_strupr
  1024. #define Q_strlower V_strlower
  1025. #define Q_wcslen V_wcslen
  1026. #define Q_strncmp V_strncmp
  1027. #define Q_strcasecmp V_strcasecmp
  1028. #define Q_strncasecmp V_strncasecmp
  1029. #define Q_strnicmp V_strnicmp
  1030. #define Q_atoi V_atoi
  1031. #define Q_atoi64 V_atoi64
  1032. #define Q_atoui64 V_atoui64
  1033. #define Q_atof V_atof
  1034. #define Q_stristr V_stristr
  1035. #define Q_strnistr V_strnistr
  1036. #define Q_strnchr V_strnchr
  1037. #define Q_normalizeFloatString V_normalizeFloatString
  1038. #define Q_strncpy V_strncpy
  1039. #define Q_snprintf V_snprintf
  1040. #define Q_wcsncpy V_wcsncpy
  1041. #define Q_strncat V_strncat
  1042. #define Q_strnlwr V_strnlwr
  1043. #define Q_vsnprintf V_vsnprintf
  1044. #define Q_vsnprintfRet V_vsnprintfRet
  1045. #define Q_pretifymem V_pretifymem
  1046. #define Q_pretifynum V_pretifynum
  1047. #define Q_UTF8ToUnicode V_UTF8ToUnicode
  1048. #define Q_UnicodeToUTF8 V_UnicodeToUTF8
  1049. #define Q_hextobinary V_hextobinary
  1050. #define Q_binarytohex V_binarytohex
  1051. #define Q_FileBase V_FileBase
  1052. #define Q_StripTrailingSlash V_StripTrailingSlash
  1053. #define Q_StripExtension V_StripExtension
  1054. #define Q_DefaultExtension V_DefaultExtension
  1055. #define Q_SetExtension V_SetExtension
  1056. #define Q_StripFilename V_StripFilename
  1057. #define Q_StripLastDir V_StripLastDir
  1058. #define Q_UnqualifiedFileName V_UnqualifiedFileName
  1059. #define Q_ComposeFileName V_ComposeFileName
  1060. #define Q_ExtractFilePath V_ExtractFilePath
  1061. #define Q_ExtractFileExtension V_ExtractFileExtension
  1062. #define Q_GetFileExtension V_GetFileExtension
  1063. #define Q_RemoveDotSlashes V_RemoveDotSlashes
  1064. #define Q_MakeAbsolutePath V_MakeAbsolutePath
  1065. #define Q_AppendSlash V_AppendSlash
  1066. #define Q_IsAbsolutePath V_IsAbsolutePath
  1067. #define Q_StrSubst V_StrSubst
  1068. #define Q_SplitString V_SplitString
  1069. #define Q_SplitString2 V_SplitString2
  1070. #define Q_StrSlice V_StrSlice
  1071. #define Q_StrLeft V_StrLeft
  1072. #define Q_StrRight V_StrRight
  1073. #define Q_FixSlashes V_FixSlashes
  1074. #define Q_strtowcs V_strtowcs
  1075. #define Q_wcstostr V_wcstostr
  1076. #define Q_strcat V_strcat
  1077. #define Q_GenerateUniqueNameIndex V_GenerateUniqueNameIndex
  1078. #define Q_GenerateUniqueName V_GenerateUniqueName
  1079. #define Q_MakeRelativePath V_MakeRelativePath
  1080. #define Q_qsort_s V_qsort_s
  1081. #endif // !defined( VSTDLIB_DLL_EXPORT )
  1082. #ifdef POSIX
  1083. #define FMT_WS L"%ls"
  1084. #else
  1085. #define FMT_WS L"%s"
  1086. #endif
  1087. // Strip white space at the beginning and end of a string
  1088. int V_StrTrim( char *pStr );
  1089. #endif // TIER1_STRTOOLS_H