Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

713 lines
17 KiB

  1. //========= Copyright Valve Corporation, All rights reserved. =================//
  2. //
  3. // Read JSON-formatted data into KeyValues
  4. //
  5. //=============================================================================//
  6. #include "tier1/keyvaluesjson.h"
  7. #include "tier1/utlbuffer.h"
  8. #include "tier1/strtools.h"
  9. KeyValuesJSONParser::KeyValuesJSONParser( const CUtlBuffer &buf )
  10. {
  11. Init( (const char *)buf.Base(), buf.TellPut() );
  12. }
  13. KeyValuesJSONParser::KeyValuesJSONParser( const char *pszText, int cbSize )
  14. {
  15. Init( pszText, cbSize >= 0 ? cbSize : V_strlen(pszText) );
  16. }
  17. KeyValuesJSONParser::~KeyValuesJSONParser() {}
  18. void KeyValuesJSONParser::Init( const char *pszText, int cbSize )
  19. {
  20. m_szErrMsg[0] = '\0';
  21. m_nLine = 1;
  22. m_cur = pszText;
  23. m_end = pszText+cbSize;
  24. m_eToken = kToken_Null;
  25. NextToken();
  26. }
  27. KeyValues *KeyValuesJSONParser::ParseFile()
  28. {
  29. // A valid JSON object should contain a single object, surrounded by curly braces.
  30. if ( m_eToken == kToken_EOF )
  31. {
  32. V_sprintf_safe( m_szErrMsg, "Input contains no data" );
  33. return NULL;
  34. }
  35. if ( m_eToken == kToken_Err )
  36. return NULL;
  37. if ( m_eToken == '{' )
  38. {
  39. // Parse the the entire file as one big object
  40. KeyValues *pResult = new KeyValues("");
  41. if ( !ParseObject( pResult ) )
  42. {
  43. pResult->deleteThis();
  44. return NULL;
  45. }
  46. if ( m_eToken == kToken_EOF )
  47. return pResult;
  48. pResult->deleteThis();
  49. }
  50. V_sprintf_safe( m_szErrMsg, "%s not expected here. A valid JSON document should be a single object, which begins with '{' and ends with '}'", GetTokenDebugText() );
  51. return NULL;
  52. }
  53. bool KeyValuesJSONParser::ParseObject( KeyValues *pObject )
  54. {
  55. Assert( m_eToken == '{' );
  56. int nOpenDelimLine = m_nLine;
  57. NextToken();
  58. KeyValues *pLastChild = NULL;
  59. while ( m_eToken != '}' )
  60. {
  61. // Parse error?
  62. if ( m_eToken == kToken_Err )
  63. return false;
  64. if ( m_eToken == kToken_EOF )
  65. {
  66. // Actually report the error at the line of the unmatched delimiter.
  67. // There's no need to report the line number of the end of file, that is always
  68. // useless.
  69. m_nLine = nOpenDelimLine;
  70. V_strcpy_safe( m_szErrMsg, "End of input was reached and '{' was not matched by '}'" );
  71. return false;
  72. }
  73. // It must be a string, for the key name
  74. if ( m_eToken != kToken_String )
  75. {
  76. V_sprintf_safe( m_szErrMsg, "%s not expected here; expected string for key name or '}'", GetTokenDebugText() );
  77. return false;
  78. }
  79. KeyValues *pChildValue = new KeyValues( m_vecTokenChars.Base() );
  80. NextToken();
  81. // Expect and eat colon
  82. if ( m_eToken != ':' )
  83. {
  84. V_sprintf_safe( m_szErrMsg, "%s not expected here. Missing ':'?", GetTokenDebugText() );
  85. pChildValue->deleteThis();
  86. return false;
  87. }
  88. NextToken();
  89. // Recursively parse the value
  90. if ( !ParseValue( pChildValue ) )
  91. {
  92. pChildValue->deleteThis();
  93. return false;
  94. }
  95. // Add to parent.
  96. pObject->AddSubkeyUsingKnownLastChild( pChildValue, pLastChild );
  97. pLastChild = pChildValue;
  98. // Eat the comma, if there is one. If no comma,
  99. // then the other thing that could come next
  100. // is the closing brace to close the object
  101. // NOTE: We are allowing the extra comma after the last item
  102. if ( m_eToken == ',' )
  103. {
  104. NextToken();
  105. }
  106. else if ( m_eToken != '}' )
  107. {
  108. V_sprintf_safe( m_szErrMsg, "%s not expected here. Missing ',' or '}'?", GetTokenDebugText() );
  109. return false;
  110. }
  111. }
  112. // Eat closing '}'
  113. NextToken();
  114. // Success
  115. return true;
  116. }
  117. bool KeyValuesJSONParser::ParseArray( KeyValues *pArray )
  118. {
  119. Assert( m_eToken == '[' );
  120. int nOpenDelimLine = m_nLine;
  121. NextToken();
  122. KeyValues *pLastChild = NULL;
  123. int idx = 0;
  124. while ( m_eToken != ']' )
  125. {
  126. // Parse error?
  127. if ( m_eToken == kToken_Err )
  128. return false;
  129. if ( m_eToken == kToken_EOF )
  130. {
  131. // Actually report the error at the line of the unmatched delimiter.
  132. // There's no need to report the line number of the end of file, that is always
  133. // useless.
  134. m_nLine = nOpenDelimLine;
  135. V_strcpy_safe( m_szErrMsg, "End of input was reached and '[' was not matched by ']'" );
  136. return false;
  137. }
  138. // Set a dummy key name based on the index
  139. char szKeyName[ 32 ];
  140. V_sprintf_safe( szKeyName, "%d", idx );
  141. ++idx;
  142. KeyValues *pChildValue = new KeyValues( szKeyName );
  143. // Recursively parse the value
  144. if ( !ParseValue( pChildValue ) )
  145. {
  146. pChildValue->deleteThis();
  147. return false;
  148. }
  149. // Add to parent.
  150. pArray->AddSubkeyUsingKnownLastChild( pChildValue, pLastChild );
  151. pLastChild = pChildValue;
  152. // Handle a colon here specially. If one appears, the odds are they
  153. // are trying to put object-like data inside of an array
  154. if ( m_eToken == ':' )
  155. {
  156. V_sprintf_safe( m_szErrMsg, "':' not expected inside an array. ('[]' used when '{}' was intended?)" );
  157. return false;
  158. }
  159. // Eat the comma, if there is one. If no comma,
  160. // then the other thing that could come next
  161. // is the closing brace to close the object
  162. // NOTE: We are allowing the extra comma after the last item
  163. if ( m_eToken == ',' )
  164. {
  165. NextToken();
  166. }
  167. else if ( m_eToken != ']' )
  168. {
  169. V_sprintf_safe( m_szErrMsg, "%s not expected here. Missing ',' or ']'?", GetTokenDebugText() );
  170. return false;
  171. }
  172. }
  173. // Eat closing ']'
  174. NextToken();
  175. // Success
  176. return true;
  177. }
  178. bool KeyValuesJSONParser::ParseValue( KeyValues *pValue )
  179. {
  180. switch ( m_eToken )
  181. {
  182. case '{': return ParseObject( pValue );
  183. case '[': return ParseArray( pValue );
  184. case kToken_String:
  185. pValue->SetString( NULL, m_vecTokenChars.Base() );
  186. NextToken();
  187. return true;
  188. case kToken_NumberInt:
  189. {
  190. const char *pszNum = m_vecTokenChars.Base();
  191. // Negative?
  192. if ( *pszNum == '-' )
  193. {
  194. int64 val64 = V_atoi64( pszNum );
  195. if ( val64 < INT32_MIN )
  196. {
  197. // !KLUDGE! KeyValues cannot support this!
  198. V_sprintf_safe( m_szErrMsg, "%s is out of range for KeyValues, which doesn't support signed 64-bit numbers", pszNum );
  199. return false;
  200. }
  201. pValue->SetInt( NULL, (int)val64 );
  202. }
  203. else
  204. {
  205. uint64 val64 = V_atoui64( pszNum );
  206. if ( val64 > 0x7fffffffU )
  207. {
  208. pValue->SetUint64( NULL, val64 );
  209. }
  210. else
  211. {
  212. pValue->SetInt( NULL, (int)val64 );
  213. }
  214. }
  215. NextToken();
  216. return true;
  217. }
  218. case kToken_NumberFloat:
  219. {
  220. float f = V_atof( m_vecTokenChars.Base() );
  221. pValue->SetFloat( NULL, f );
  222. NextToken();
  223. return true;
  224. }
  225. case kToken_True:
  226. pValue->SetBool( NULL, true );
  227. NextToken();
  228. return true;
  229. case kToken_False:
  230. pValue->SetBool( NULL, false );
  231. NextToken();
  232. return true;
  233. case kToken_Null:
  234. pValue->SetPtr( NULL, NULL );
  235. NextToken();
  236. return true;
  237. case kToken_Err:
  238. return false;
  239. }
  240. V_sprintf_safe( m_szErrMsg, "%s not expected here; missing value?", GetTokenDebugText() );
  241. return false;
  242. }
  243. void KeyValuesJSONParser::NextToken()
  244. {
  245. // Already in terminal state?
  246. if ( m_eToken < 0 )
  247. return;
  248. // Clear token
  249. m_vecTokenChars.SetCount(0);
  250. // Scan until we hit the end of input
  251. while ( m_cur < m_end )
  252. {
  253. // Next character?
  254. char c = *m_cur;
  255. switch (c)
  256. {
  257. // Whitespace? Eat it and keep parsing
  258. case ' ':
  259. case '\t':
  260. ++m_cur;
  261. break;
  262. // Newline? Eat it and advance line number
  263. case '\n':
  264. case '\r':
  265. ++m_nLine;
  266. ++m_cur;
  267. // Eat \r\n or \n\r pair as a single character
  268. if ( m_cur < m_end && *m_cur == ( '\n' + '\r' - c ) )
  269. ++m_cur;
  270. break;
  271. // Single-character JSON token?
  272. case ':':
  273. case '{':
  274. case '}':
  275. case '[':
  276. case ']':
  277. case ',':
  278. m_eToken = c;
  279. ++m_cur;
  280. return;
  281. // String?
  282. case '\"':
  283. case '\'': // NOTE: We allow strings to be delimited by single quotes, which is not JSON compliant
  284. ParseStringToken();
  285. return;
  286. case '-':
  287. case '.':
  288. case '0':
  289. case '1':
  290. case '2':
  291. case '3':
  292. case '4':
  293. case '5':
  294. case '6':
  295. case '7':
  296. case '8':
  297. case '9':
  298. ParseNumberToken();
  299. return;
  300. // Literal "true"
  301. case 't':
  302. if ( m_cur + 4 <= m_end && m_cur[1] == 'r' && m_cur[2] == 'u' && m_cur[3] == 'e' )
  303. {
  304. m_cur += 4;
  305. m_eToken = kToken_True;
  306. return;
  307. }
  308. goto unexpected_char;
  309. // Literal "false"
  310. case 'f':
  311. if ( m_cur + 5 <= m_end && m_cur[1] == 'a' && m_cur[2] == 'l' && m_cur[3] == 's' && m_cur[4] == 'e' )
  312. {
  313. m_cur += 5;
  314. m_eToken = kToken_False;
  315. return;
  316. }
  317. goto unexpected_char;
  318. // Literal "null"
  319. case 'n':
  320. if ( m_cur + 4 <= m_end && m_cur[1] == 'u' && m_cur[2] == 'l' && m_cur[3] == 'l' )
  321. {
  322. m_cur += 4;
  323. m_eToken = kToken_Null;
  324. return;
  325. }
  326. goto unexpected_char;
  327. case '/':
  328. // C++-style comment?
  329. if ( m_cur < m_end && m_cur[1] == '/' )
  330. {
  331. m_cur += 2;
  332. while ( m_cur < m_end && *m_cur != '\n' && *m_cur != '\r' )
  333. ++m_cur;
  334. // Leave newline as the next character, we'll handle it above
  335. break;
  336. }
  337. // | fall
  338. // | through
  339. // V
  340. default:
  341. unexpected_char:
  342. if ( V_isprint(c) )
  343. V_sprintf_safe( m_szErrMsg, "Unexpected character 0x%02x ('%c')", (uint8)c, c );
  344. else
  345. V_sprintf_safe( m_szErrMsg, "Unexpected character 0x%02x", (uint8)c );
  346. m_eToken = kToken_Err;
  347. return;
  348. }
  349. }
  350. m_eToken = kToken_EOF;
  351. }
  352. void KeyValuesJSONParser::ParseNumberToken()
  353. {
  354. // Clear token
  355. m_vecTokenChars.SetCount(0);
  356. // Eat leading minus sign
  357. if ( *m_cur == '-' )
  358. {
  359. m_vecTokenChars.AddToTail( '-' );
  360. ++m_cur;
  361. }
  362. if ( m_cur >= m_end )
  363. {
  364. V_strcpy_safe( m_szErrMsg, "Unexpected EOF while parsing number" );
  365. m_eToken = kToken_Err;
  366. return;
  367. }
  368. char c = *m_cur;
  369. m_vecTokenChars.AddToTail( c );
  370. bool bHasWholePart = false;
  371. switch ( c )
  372. {
  373. case '0':
  374. // Leading 0 cannot be followed by any more digits, as per JSON spec (and to make sure nobody tries to parse octal).
  375. ++m_cur;
  376. bHasWholePart = true;
  377. break;
  378. case '1':
  379. case '2':
  380. case '3':
  381. case '4':
  382. case '5':
  383. case '6':
  384. case '7':
  385. case '8':
  386. case '9':
  387. bHasWholePart = true;
  388. ++m_cur;
  389. // Accumulate digits until we hit a non-digit
  390. while ( m_cur < m_end && *m_cur >= '0' && *m_cur <= '9' )
  391. m_vecTokenChars.AddToTail( *(m_cur++) );
  392. break;
  393. case '.':
  394. // strict JSON doesn't allow a number that starts with a decimal point, but we do
  395. break;
  396. }
  397. // Assume this is integral, unless we hit a decimal point and/or exponent
  398. m_eToken = kToken_NumberInt;
  399. // Fractional portion?
  400. if ( m_cur < m_end && *m_cur == '.' )
  401. {
  402. m_eToken = kToken_NumberFloat;
  403. // Eat decimal point
  404. m_vecTokenChars.AddToTail( *(m_cur++) );
  405. // Accumulate digits until we hit a non-digit
  406. bool bHasFractionPart = false;
  407. while ( m_cur < m_end && *m_cur >= '0' && *m_cur <= '9' )
  408. {
  409. m_vecTokenChars.AddToTail( *(m_cur++) );
  410. bHasFractionPart = true;
  411. }
  412. // Make sure we aren't just a single '.'
  413. if ( !bHasWholePart && !bHasFractionPart )
  414. {
  415. m_vecTokenChars.AddToTail(0);
  416. V_sprintf_safe( m_szErrMsg, "Invalid number starting with '%s'", m_vecTokenChars.Base() );
  417. m_eToken = kToken_Err;
  418. return;
  419. }
  420. }
  421. // Exponent?
  422. if ( m_cur < m_end && ( *m_cur == 'e' || *m_cur == 'E' ) )
  423. {
  424. m_eToken = kToken_NumberFloat;
  425. // Eat 'e'
  426. m_vecTokenChars.AddToTail( *(m_cur++) );
  427. // Optional sign
  428. if ( m_cur < m_end && ( *m_cur == '-' || *m_cur == '+' ) )
  429. m_vecTokenChars.AddToTail( *(m_cur++) );
  430. // Accumulate digits until we hit a non-digit
  431. bool bHasExponentDigit = false;
  432. while ( m_cur < m_end && *m_cur >= '0' && *m_cur <= '9' )
  433. {
  434. m_vecTokenChars.AddToTail( *(m_cur++) );
  435. bHasExponentDigit = true;
  436. }
  437. if ( !bHasExponentDigit )
  438. {
  439. V_strcpy_safe( m_szErrMsg, "Bad exponent in floating point number" );
  440. m_eToken = kToken_Err;
  441. return;
  442. }
  443. }
  444. // OK, We have parsed a valid number.
  445. // Terminate token
  446. m_vecTokenChars.AddToTail( '\0' );
  447. // EOF? That's OK for now, at this lexical parsing level. We'll handle the error
  448. // at the higher parse level, when expecting a comma or closing delimiter
  449. if ( m_cur >= m_end )
  450. return;
  451. // Is the next thing a valid character? This is the most common case.
  452. c = *m_cur;
  453. if ( V_isspace( c ) || c == ',' || c == '}' || c == ']' || c == '/' )
  454. return;
  455. // Handle these guys as "tokens", to provide a slightly more meaningful error message
  456. if ( c == '[' || c == '{' )
  457. return;
  458. // Anything else, treat the whole thing as an invalid numerical constant
  459. if ( V_isprint(c) )
  460. V_sprintf_safe( m_szErrMsg, "Number contains invalid character 0x%02x ('%c')", (uint8)c, c );
  461. else
  462. V_sprintf_safe( m_szErrMsg, "Number contains invalid character 0x%02x", (uint8)c );
  463. m_eToken = kToken_Err;
  464. }
  465. void KeyValuesJSONParser::ParseStringToken()
  466. {
  467. char cDelim = *(m_cur++);
  468. while ( m_cur < m_end )
  469. {
  470. char c = *(m_cur++);
  471. if ( c == '\r' || c == '\n' )
  472. {
  473. V_sprintf_safe( m_szErrMsg, "Hit end of line before closing quote (%c)", c );
  474. m_eToken = kToken_Err;
  475. return;
  476. }
  477. if ( c == cDelim )
  478. {
  479. m_eToken = kToken_String;
  480. m_vecTokenChars.AddToTail( '\0' );
  481. return;
  482. }
  483. // Ordinary character? Just append it
  484. if ( c != '\\' )
  485. {
  486. m_vecTokenChars.AddToTail( c );
  487. continue;
  488. }
  489. // Escaped character.
  490. // End of string? We'll handle it above
  491. if ( m_cur >= m_end )
  492. continue;
  493. // Check table of allowed escape characters
  494. switch (c)
  495. {
  496. case '\\':
  497. case '/':
  498. case '\'':
  499. case '\"': m_vecTokenChars.AddToTail( c ); break;
  500. case 'b': m_vecTokenChars.AddToTail( '\b' ); break;
  501. case 'f': m_vecTokenChars.AddToTail( '\f' ); break;
  502. case 'n': m_vecTokenChars.AddToTail( '\n' ); break;
  503. case 'r': m_vecTokenChars.AddToTail( '\r' ); break;
  504. case 't': m_vecTokenChars.AddToTail( '\t' ); break;
  505. case 'u':
  506. {
  507. // Make sure are followed by exactly 4 hex digits
  508. if ( m_cur + 4 > m_end || !V_isxdigit( m_cur[0] ) || !V_isxdigit( m_cur[1] ) || !V_isxdigit( m_cur[2] ) || !V_isxdigit( m_cur[3] ) )
  509. {
  510. V_sprintf_safe( m_szErrMsg, "\\u must be followed by exactly 4 hex digits" );
  511. m_eToken = kToken_Err;
  512. return;
  513. }
  514. // Parse the codepoint
  515. uchar32 nCodePoint = 0;
  516. for ( int n = 0 ; n < 4 ; ++n )
  517. {
  518. nCodePoint <<= 4;
  519. char chHex = *(m_cur++);
  520. if ( chHex >= '0' && chHex <= '9' )
  521. nCodePoint += chHex - '0';
  522. else if ( chHex >= 'a' && chHex <= 'a' )
  523. nCodePoint += chHex + 0x0a - 'a';
  524. else if ( chHex >= 'A' && chHex <= 'A' )
  525. nCodePoint += chHex + 0x0a - 'A';
  526. else
  527. Assert( false ); // inconceivable, due to above
  528. }
  529. // Encode it in UTF-8
  530. char utf8Encode[8];
  531. int r = Q_UChar32ToUTF8( nCodePoint, utf8Encode );
  532. if ( r < 0 || r > 4 )
  533. {
  534. V_sprintf_safe( m_szErrMsg, "Invalid code point \\u%04x", nCodePoint );
  535. m_eToken = kToken_Err;
  536. return;
  537. }
  538. for ( int i = 0 ; i < r ; ++i )
  539. m_vecTokenChars.AddToTail( utf8Encode[i] );
  540. } break;
  541. default:
  542. if ( V_isprint(c) )
  543. V_sprintf_safe( m_szErrMsg, "Invalid escape character 0x%02x ('\\%c')", (uint8)c, c );
  544. else
  545. V_sprintf_safe( m_szErrMsg, "Invalid escape character 0x%02x", (uint8)c );
  546. m_eToken = kToken_Err;
  547. return;
  548. }
  549. }
  550. V_sprintf_safe( m_szErrMsg, "Hit end of input before closing quote (%c)", cDelim );
  551. m_eToken = kToken_Err;
  552. }
  553. const char *KeyValuesJSONParser::GetTokenDebugText()
  554. {
  555. switch ( m_eToken )
  556. {
  557. case kToken_EOF: return "<EOF>";
  558. case kToken_String: return "<string>";
  559. case kToken_NumberInt:
  560. case kToken_NumberFloat: return "<number>";
  561. case kToken_True: return "'true'";
  562. case kToken_False: return "'false'";
  563. case kToken_Null: return "'null'";
  564. case '{': return "'{'";
  565. case '}': return "'}'";
  566. case '[': return "'['";
  567. case ']': return "']'";
  568. case ':': return "':'";
  569. case ',': return "','";
  570. }
  571. // We shouldn't ever need to ask for a debug string for the error token,
  572. // and anything else is an error
  573. Assert( false );
  574. return "<parse error>";
  575. }
  576. #ifdef _DEBUG
  577. static void JSONTest_ParseValid( const char *pszData )
  578. {
  579. KeyValuesJSONParser parser( pszData );
  580. KeyValues *pFile = parser.ParseFile();
  581. Assert( pFile );
  582. pFile->deleteThis();
  583. }
  584. static void JSONTest_ParseInvalid( const char *pszData, const char *pszExpectedErrMsgSnippet, int nExpectedFailureLine )
  585. {
  586. KeyValuesJSONParser parser( pszData );
  587. KeyValues *pFile = parser.ParseFile();
  588. Assert( pFile == NULL );
  589. Assert( V_stristr( parser.m_szErrMsg, pszExpectedErrMsgSnippet ) != NULL );
  590. Assert( parser.m_nLine == nExpectedFailureLine );
  591. }
  592. void TestKeyValuesJSONParser()
  593. {
  594. JSONTest_ParseValid( "{}" );
  595. JSONTest_ParseValid( R"JSON({
  596. "key": "string_value",
  597. "pos_int32": 123,
  598. "pos_int64": 123456789012,
  599. "neg_int32": -456,
  600. "float": -45.23,
  601. "pos_exponent": 1e30,
  602. "neg_exponent": 1e-16,
  603. "decimal_and_exponent": 1.e+30,
  604. "no_leading_zero": .7, // we support this, even though strict JSON says it's no good
  605. "zero": 0,
  606. "true_value": true,
  607. "false_value": false,
  608. "null_value": null,
  609. "with_escaped": "\r \t \n",
  610. "unicode": "\u1234 \\u12f3",
  611. "array_of_ints": [ 1, 2, 3, -45 ],
  612. "empty_array": [],
  613. "array_with_stuff_inside": [
  614. {}, // this is a comment.
  615. [ 0.45, {}, "hello!" ],
  616. { "id": 0 },
  617. // Trailing comma above. Comment here
  618. ],
  619. })JSON" );
  620. JSONTest_ParseInvalid( "{ \"key\": 123", "missing", 1 );
  621. JSONTest_ParseInvalid( "{ \"key\": 123.4f }", "number", 1 );
  622. }
  623. #endif