csgo/cstrike15_src/tier1/keyvaluesjson.cpp


								//========= Copyright Valve Corporation, All rights reserved. =================//

								//

								// Read JSON-formatted data into KeyValues

								//

								//=============================================================================//


								#include "tier1/keyvaluesjson.h"

								#include "tier1/utlbuffer.h"

								#include "tier1/strtools.h"


								KeyValuesJSONParser::KeyValuesJSONParser( const CUtlBuffer &buf )

								{

									Init( (const char *)buf.Base(), buf.TellPut() );

								}


								KeyValuesJSONParser::KeyValuesJSONParser( const char *pszText, int cbSize )

								{

									Init( pszText, cbSize >= 0 ? cbSize : V_strlen(pszText) );

								}


								KeyValuesJSONParser::~KeyValuesJSONParser() {}


								void KeyValuesJSONParser::Init( const char *pszText, int cbSize )

								{

									m_szErrMsg[0] = '\0';

									m_nLine = 1;

									m_cur = pszText;

									m_end = pszText+cbSize;


									m_eToken = kToken_Null;

									NextToken();

								}


								KeyValues *KeyValuesJSONParser::ParseFile()

								{

									// A valid JSON object should contain a single object, surrounded by curly braces.

									if ( m_eToken == kToken_EOF )

									{

										V_sprintf_safe( m_szErrMsg, "Input contains no data" );

										return NULL;

									}

									if ( m_eToken == kToken_Err )

										return NULL;

									if ( m_eToken == '{' )

									{


										// Parse the the entire file as one big object

										KeyValues *pResult = new KeyValues("");

										if ( !ParseObject( pResult ) )

										{

											pResult->deleteThis();

											return NULL;

										}

										if ( m_eToken == kToken_EOF )

											return pResult;

										pResult->deleteThis();

									}

									V_sprintf_safe( m_szErrMsg, "%s not expected here.  A valid JSON document should be a single object, which begins with '{' and ends with '}'", GetTokenDebugText() );

									return NULL;

								}


								bool KeyValuesJSONParser::ParseObject( KeyValues *pObject )

								{

									Assert( m_eToken == '{' );

									int nOpenDelimLine = m_nLine;

									NextToken();

									KeyValues *pLastChild = NULL;

									while ( m_eToken != '}' )

									{

										// Parse error?

										if ( m_eToken == kToken_Err )

											return false;

										if ( m_eToken == kToken_EOF )

										{

											// Actually report the error at the line of the unmatched delimiter.

											// There's no need to report the line number of the end of file, that is always

											// useless.

											m_nLine = nOpenDelimLine;

											V_strcpy_safe( m_szErrMsg, "End of input was reached and '{' was not matched by '}'" );

											return false;

										}


										// It must be a string, for the key name

										if ( m_eToken != kToken_String )

										{

											V_sprintf_safe( m_szErrMsg, "%s not expected here; expected string for key name or '}'", GetTokenDebugText() );

											return false;

										}


										KeyValues *pChildValue = new KeyValues( m_vecTokenChars.Base() );

										NextToken();


										// Expect and eat colon

										if ( m_eToken != ':' )

										{

											V_sprintf_safe( m_szErrMsg, "%s not expected here.  Missing ':'?", GetTokenDebugText() );

											pChildValue->deleteThis();

											return false;

										}

										NextToken();


										// Recursively parse the value

										if ( !ParseValue( pChildValue ) )

										{

											pChildValue->deleteThis();

											return false;

										}


										// Add to parent.

										pObject->AddSubkeyUsingKnownLastChild( pChildValue, pLastChild );

										pLastChild = pChildValue;


										// Eat the comma, if there is one.  If no comma,

										// then the other thing that could come next

										// is the closing brace to close the object

										// NOTE: We are allowing the extra comma after the last item

										if ( m_eToken == ',' )

										{

											NextToken();

										}

										else if ( m_eToken != '}' )

										{

											V_sprintf_safe( m_szErrMsg, "%s not expected here.  Missing ',' or '}'?", GetTokenDebugText() );

											return false;

										}

									}


									// Eat closing '}'

									NextToken();


									// Success

									return true;

								}


								bool KeyValuesJSONParser::ParseArray( KeyValues *pArray )

								{

									Assert( m_eToken == '[' );

									int nOpenDelimLine = m_nLine;

									NextToken();

									KeyValues *pLastChild = NULL;

									int idx = 0;

									while ( m_eToken != ']' )

									{

										// Parse error?

										if ( m_eToken == kToken_Err )

											return false;

										if ( m_eToken == kToken_EOF )

										{

											// Actually report the error at the line of the unmatched delimiter.

											// There's no need to report the line number of the end of file, that is always

											// useless.

											m_nLine = nOpenDelimLine;

											V_strcpy_safe( m_szErrMsg, "End of input was reached and '[' was not matched by ']'" );

											return false;

										}


										// Set a dummy key name based on the index

										char szKeyName[ 32 ];

										V_sprintf_safe( szKeyName, "%d", idx );

										++idx;

										KeyValues *pChildValue = new KeyValues( szKeyName );


										// Recursively parse the value

										if ( !ParseValue( pChildValue ) )

										{

											pChildValue->deleteThis();

											return false;

										}


										// Add to parent.

										pArray->AddSubkeyUsingKnownLastChild( pChildValue, pLastChild );

										pLastChild = pChildValue;


										// Handle a colon here specially.  If one appears, the odds are they

										// are trying to put object-like data inside of an array

										if ( m_eToken == ':' )

										{

											V_sprintf_safe( m_szErrMsg, "':' not expected inside an array.  ('[]' used when '{}' was intended?)" );

											return false;

										}


										// Eat the comma, if there is one.  If no comma,

										// then the other thing that could come next

										// is the closing brace to close the object

										// NOTE: We are allowing the extra comma after the last item

										if ( m_eToken == ',' )

										{

											NextToken();

										}

										else if ( m_eToken != ']' )

										{

											V_sprintf_safe( m_szErrMsg, "%s not expected here.  Missing ',' or ']'?", GetTokenDebugText() );

											return false;

										}

									}


									// Eat closing ']'

									NextToken();


									// Success

									return true;

								}


								bool KeyValuesJSONParser::ParseValue( KeyValues *pValue )

								{

									switch ( m_eToken )

									{

										case '{': return ParseObject( pValue );

										case '[': return ParseArray( pValue );

										case kToken_String:

											pValue->SetString( NULL, m_vecTokenChars.Base() );

											NextToken();

											return true;


										case kToken_NumberInt:

										{

											const char *pszNum = m_vecTokenChars.Base();


											// Negative?

											if ( *pszNum == '-' )

											{

												int64 val64 = V_atoi64( pszNum );

												if ( val64 < INT32_MIN )

												{

													// !KLUDGE! KeyValues cannot support this!

													V_sprintf_safe( m_szErrMsg, "%s is out of range for KeyValues, which doesn't support signed 64-bit numbers", pszNum );

													return false;

												}


												pValue->SetInt( NULL, (int)val64 );

											}

											else

											{

												uint64 val64 = V_atoui64( pszNum );

												if ( val64 > 0x7fffffffU )

												{

													pValue->SetUint64( NULL, val64 );

												}

												else

												{

													pValue->SetInt( NULL, (int)val64 );

												}

											}

											NextToken();

											return true;

										}


										case kToken_NumberFloat:

										{

											float f = V_atof( m_vecTokenChars.Base() );

											pValue->SetFloat( NULL, f );

											NextToken();

											return true;

										}


										case kToken_True:

											pValue->SetBool( NULL, true );

											NextToken();

											return true;


										case kToken_False:

											pValue->SetBool( NULL, false );

											NextToken();

											return true;


										case kToken_Null:

											pValue->SetPtr( NULL, NULL );

											NextToken();

											return true;


										case kToken_Err:

											return false;

									}


									V_sprintf_safe( m_szErrMsg, "%s not expected here; missing value?", GetTokenDebugText() );

									return false;

								}


								void KeyValuesJSONParser::NextToken()

								{


									// Already in terminal state?

									if ( m_eToken < 0 )

										return;


									// Clear token

									m_vecTokenChars.SetCount(0);


									// Scan until we hit the end of input

									while ( m_cur < m_end )

									{


										// Next character?

										char c = *m_cur;

										switch (c)

										{

											// Whitespace?  Eat it and keep parsing

											case ' ':

											case '\t':

												++m_cur;

												break;


											// Newline?  Eat it and advance line number

											case '\n':

											case '\r':

												++m_nLine;

												++m_cur;


												// Eat \r\n or \n\r pair as a single character

												if ( m_cur < m_end && *m_cur == ( '\n' + '\r' - c ) )

													++m_cur;

												break;


											// Single-character JSON token?

											case ':':

											case '{':

											case '}':

											case '[':

											case ']':

											case ',':

												m_eToken = c;

												++m_cur;

												return;


											// String?

											case '\"':

											case '\'': // NOTE: We allow strings to be delimited by single quotes, which is not JSON compliant

												ParseStringToken();

												return;


											case '-':

											case '.':

											case '0':

											case '1':

											case '2':

											case '3':

											case '4':

											case '5':

											case '6':

											case '7':

											case '8':

											case '9':

												ParseNumberToken();

												return;


											// Literal "true"

											case 't':

												if ( m_cur + 4 <= m_end && m_cur[1] == 'r' && m_cur[2] == 'u' && m_cur[3] == 'e' )

												{

													m_cur += 4;

													m_eToken = kToken_True;

													return;

												}

												goto unexpected_char;


											// Literal "false"

											case 'f':

												if ( m_cur + 5 <= m_end && m_cur[1] == 'a' && m_cur[2] == 'l' && m_cur[3] == 's' && m_cur[4] == 'e' )

												{

													m_cur += 5;

													m_eToken = kToken_False;

													return;

												}

												goto unexpected_char;


											// Literal "null"

											case 'n':

												if ( m_cur + 4 <= m_end && m_cur[1] == 'u' && m_cur[2] == 'l' && m_cur[3] == 'l' )

												{

													m_cur += 4;

													m_eToken = kToken_Null;

													return;

												}

												goto unexpected_char;


											case '/':

												// C++-style comment?

												if ( m_cur < m_end && m_cur[1] == '/' )

												{

													m_cur += 2;

													while ( m_cur < m_end && *m_cur != '\n' && *m_cur != '\r' )

														++m_cur;

													// Leave newline as the next character, we'll handle it above

													break;

												}

												// | fall

												// | through

												// V


											default:

											unexpected_char:

												if ( V_isprint(c) )

													V_sprintf_safe( m_szErrMsg, "Unexpected character 0x%02x ('%c')", (uint8)c, c );

												else

													V_sprintf_safe( m_szErrMsg, "Unexpected character 0x%02x", (uint8)c );

												m_eToken = kToken_Err;

												return;

										}

									}


									m_eToken = kToken_EOF;

								}


								void KeyValuesJSONParser::ParseNumberToken()

								{

									// Clear token

									m_vecTokenChars.SetCount(0);


									// Eat leading minus sign

									if ( *m_cur	== '-' )

									{

										m_vecTokenChars.AddToTail( '-' );

										++m_cur;

									}


									if ( m_cur >= m_end )

									{

										V_strcpy_safe( m_szErrMsg, "Unexpected EOF while parsing number" );

										m_eToken = kToken_Err;

										return;

									}


									char c = *m_cur;

									m_vecTokenChars.AddToTail( c );

									bool bHasWholePart = false;

									switch ( c )

									{

										case '0':

											// Leading 0 cannot be followed by any more digits, as per JSON spec (and to make sure nobody tries to parse octal).

											++m_cur;

											bHasWholePart = true;

											break;


										case '1':

										case '2':

										case '3':

										case '4':

										case '5':

										case '6':

										case '7':

										case '8':

										case '9':

											bHasWholePart = true;

											++m_cur;


											// Accumulate digits until we hit a non-digit

											while ( m_cur < m_end && *m_cur >= '0' && *m_cur <= '9' )

												m_vecTokenChars.AddToTail( *(m_cur++) );

											break;


										case '.':

											// strict JSON doesn't allow a number that starts with a decimal point, but we do

											break;

									}


									// Assume this is integral, unless we hit a decimal point and/or exponent

									m_eToken = kToken_NumberInt;


									// Fractional portion?

									if ( m_cur < m_end && *m_cur == '.' )

									{

										m_eToken = kToken_NumberFloat;


										// Eat decimal point

										m_vecTokenChars.AddToTail( *(m_cur++) );


										// Accumulate digits until we hit a non-digit

										bool bHasFractionPart = false;

										while ( m_cur < m_end && *m_cur >= '0' && *m_cur <= '9' )

										{

											m_vecTokenChars.AddToTail( *(m_cur++) );

											bHasFractionPart = true;

										}


										// Make sure we aren't just a single '.'

										if ( !bHasWholePart && !bHasFractionPart )

										{

											m_vecTokenChars.AddToTail(0);

											V_sprintf_safe( m_szErrMsg, "Invalid number starting with '%s'", m_vecTokenChars.Base() );

											m_eToken = kToken_Err;

											return;

										}

									}


									// Exponent?

									if ( m_cur < m_end && ( *m_cur == 'e' || *m_cur == 'E' ) )

									{

										m_eToken = kToken_NumberFloat;


										// Eat 'e'

										m_vecTokenChars.AddToTail( *(m_cur++) );


										// Optional sign

										if ( m_cur < m_end && ( *m_cur == '-' || *m_cur == '+' ) )

											m_vecTokenChars.AddToTail( *(m_cur++) );


										// Accumulate digits until we hit a non-digit

										bool bHasExponentDigit = false;

										while ( m_cur < m_end && *m_cur >= '0' && *m_cur <= '9' )

										{

											m_vecTokenChars.AddToTail( *(m_cur++) );

											bHasExponentDigit = true;

										}

										if ( !bHasExponentDigit )

										{

											V_strcpy_safe( m_szErrMsg, "Bad exponent in floating point number" );

											m_eToken = kToken_Err;

											return;

										}

									}


									// OK, We have parsed a valid number.

									// Terminate token

									m_vecTokenChars.AddToTail( '\0' );


									// EOF?  That's OK for now, at this lexical parsing level.  We'll handle the error

									// at the higher parse level, when expecting a comma or closing delimiter

									if ( m_cur >= m_end )

										return;


									// Is the next thing a valid character?  This is the most common case.

									c = *m_cur;

									if ( V_isspace( c ) || c == ',' || c == '}' || c == ']' || c == '/' )

										return;


									// Handle these guys as "tokens", to provide a slightly more meaningful error message

									if ( c == '[' || c == '{' )

										return;


									// Anything else, treat the whole thing as an invalid numerical constant

									if ( V_isprint(c) )

										V_sprintf_safe( m_szErrMsg, "Number contains invalid character 0x%02x ('%c')", (uint8)c, c );

									else

										V_sprintf_safe( m_szErrMsg, "Number contains invalid character 0x%02x", (uint8)c );

									m_eToken = kToken_Err;

								}


								void KeyValuesJSONParser::ParseStringToken()

								{

									char cDelim = *(m_cur++);


									while ( m_cur < m_end )

									{

										char c = *(m_cur++);

										if ( c == '\r' || c == '\n' )

										{

											V_sprintf_safe( m_szErrMsg, "Hit end of line before closing quote (%c)", c );

											m_eToken = kToken_Err;

											return;

										}

										if ( c == cDelim )

										{

											m_eToken = kToken_String;

											m_vecTokenChars.AddToTail( '\0' );

											return;

										}


										// Ordinary character?  Just append it

										if ( c != '\\' )

										{

											m_vecTokenChars.AddToTail( c );

											continue;

										}


										// Escaped character.

										// End of string?  We'll handle it above

										if ( m_cur >= m_end )

											continue;


										// Check table of allowed escape characters

										switch (c)

										{

											case '\\':

											case '/':

											case '\'':

											case '\"': m_vecTokenChars.AddToTail( c ); break;

											case 'b': m_vecTokenChars.AddToTail( '\b' ); break;

											case 'f': m_vecTokenChars.AddToTail( '\f' ); break;

											case 'n': m_vecTokenChars.AddToTail( '\n' ); break;

											case 'r': m_vecTokenChars.AddToTail( '\r' ); break;

											case 't': m_vecTokenChars.AddToTail( '\t' ); break;


											case 'u':

											{


												// Make sure are followed by exactly 4 hex digits

												if ( m_cur + 4 > m_end || !V_isxdigit( m_cur[0] ) || !V_isxdigit( m_cur[1] ) || !V_isxdigit( m_cur[2] ) || !V_isxdigit( m_cur[3] ) )

												{

													V_sprintf_safe( m_szErrMsg, "\\u must be followed by exactly 4 hex digits" );

													m_eToken = kToken_Err;

													return;

												}


												// Parse the codepoint

												uchar32 nCodePoint = 0;

												for ( int n = 0 ; n < 4 ; ++n )

												{

													nCodePoint <<= 4;

													char chHex = *(m_cur++);

													if ( chHex >= '0' && chHex <= '9' )

														nCodePoint += chHex - '0';

													else if ( chHex >= 'a' && chHex <= 'a' )

														nCodePoint += chHex + 0x0a - 'a';

													else if ( chHex >= 'A' && chHex <= 'A' )

														nCodePoint += chHex + 0x0a - 'A';

													else

														Assert( false ); // inconceivable, due to above

												}


												// Encode it in UTF-8

												char utf8Encode[8];

												int r = Q_UChar32ToUTF8( nCodePoint, utf8Encode );

												if ( r < 0 || r > 4 )

												{

													V_sprintf_safe( m_szErrMsg, "Invalid code point \\u%04x", nCodePoint );

													m_eToken = kToken_Err;

													return;

												}

												for ( int i = 0 ; i < r ; ++i )

													m_vecTokenChars.AddToTail( utf8Encode[i] );

											} break;


											default:

												if ( V_isprint(c) )

													V_sprintf_safe( m_szErrMsg, "Invalid escape character 0x%02x ('\\%c')", (uint8)c, c );

												else

													V_sprintf_safe( m_szErrMsg, "Invalid escape character 0x%02x", (uint8)c );

												m_eToken = kToken_Err;

												return;

										}

									}


									V_sprintf_safe( m_szErrMsg, "Hit end of input before closing quote (%c)", cDelim );

									m_eToken = kToken_Err;

								}


								const char *KeyValuesJSONParser::GetTokenDebugText()

								{

									switch ( m_eToken )

									{

										case kToken_EOF: return "<EOF>";

										case kToken_String: return "<string>";

										case kToken_NumberInt:

										case kToken_NumberFloat: return "<number>";

										case kToken_True: return "'true'";

										case kToken_False: return "'false'";

										case kToken_Null: return "'null'";

										case '{': return "'{'";

										case '}': return "'}'";

										case '[': return "'['";

										case ']': return "']'";

										case ':': return "':'";

										case ',': return "','";

									}


									// We shouldn't ever need to ask for a debug string for the error token,

									// and anything else is an error

									Assert( false );

									return "<parse error>";

								}


								#ifdef _DEBUG


								static void JSONTest_ParseValid( const char *pszData )

								{

									KeyValuesJSONParser parser( pszData );

									KeyValues *pFile = parser.ParseFile();

									Assert( pFile );

									pFile->deleteThis();

								}


								static void JSONTest_ParseInvalid( const char *pszData, const char *pszExpectedErrMsgSnippet, int nExpectedFailureLine )

								{

									KeyValuesJSONParser parser( pszData );

									KeyValues *pFile = parser.ParseFile();

									Assert( pFile == NULL );

									Assert( V_stristr( parser.m_szErrMsg, pszExpectedErrMsgSnippet ) != NULL );

									Assert( parser.m_nLine == nExpectedFailureLine );

								}


								void TestKeyValuesJSONParser()

								{

									JSONTest_ParseValid( "{}" );

									JSONTest_ParseValid( R"JSON({

										"key": "string_value",

										"pos_int32": 123,

										"pos_int64": 123456789012,

										"neg_int32": -456,

										"float": -45.23,

										"pos_exponent": 1e30,

										"neg_exponent": 1e-16,

										"decimal_and_exponent": 1.e+30,

										"no_leading_zero": .7, // we support this, even though strict JSON says it's no good

										"zero": 0,

										"true_value": true,

										"false_value": false,

										"null_value": null,

										"with_escaped": "\r \t \n",

										"unicode": "\u1234 \\u12f3",

										"array_of_ints": [ 1, 2, 3, -45 ],

										"empty_array": [],

										"array_with_stuff_inside": [

											{}, // this is a comment.

											[ 0.45, {}, "hello!" ],

											{ "id": 0 },

											// Trailing comma above.  Comment here

										],

									})JSON" );

									JSONTest_ParseInvalid( "{ \"key\": 123", "missing", 1 );

									JSONTest_ParseInvalid( "{ \"key\": 123.4f }", "number", 1 );

								}


								#endif