Team Fortress 2 Source Code as on 22/4/2020
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

714 lines
17 KiB

  1. //========= Copyright Valve Corporation, All rights reserved. =================//
  2. //
  3. // Read JSON-formatted data into KeyValues
  4. //
  5. //=============================================================================//
  6. #include "tier1/keyvaluesjson.h"
  7. #include "tier1/utlbuffer.h"
  8. #include "tier1/strtools.h"
  9. #include <stdint.h> // INT32_MIN defn
  10. KeyValuesJSONParser::KeyValuesJSONParser( const CUtlBuffer &buf )
  11. {
  12. Init( (const char *)buf.Base(), buf.TellPut() );
  13. }
  14. KeyValuesJSONParser::KeyValuesJSONParser( const char *pszText, int cbSize )
  15. {
  16. Init( pszText, cbSize >= 0 ? cbSize : V_strlen(pszText) );
  17. }
  18. KeyValuesJSONParser::~KeyValuesJSONParser() {}
  19. void KeyValuesJSONParser::Init( const char *pszText, int cbSize )
  20. {
  21. m_szErrMsg[0] = '\0';
  22. m_nLine = 1;
  23. m_cur = pszText;
  24. m_end = pszText+cbSize;
  25. m_eToken = kToken_Null;
  26. NextToken();
  27. }
  28. KeyValues *KeyValuesJSONParser::ParseFile()
  29. {
  30. // A valid JSON object should contain a single object, surrounded by curly braces.
  31. if ( m_eToken == kToken_EOF )
  32. {
  33. V_sprintf_safe( m_szErrMsg, "Input contains no data" );
  34. return NULL;
  35. }
  36. if ( m_eToken == kToken_Err )
  37. return NULL;
  38. if ( m_eToken == '{' )
  39. {
  40. // Parse the the entire file as one big object
  41. KeyValues *pResult = new KeyValues("");
  42. if ( !ParseObject( pResult ) )
  43. {
  44. pResult->deleteThis();
  45. return NULL;
  46. }
  47. if ( m_eToken == kToken_EOF )
  48. return pResult;
  49. pResult->deleteThis();
  50. }
  51. V_sprintf_safe( m_szErrMsg, "%s not expected here. A valid JSON document should be a single object, which begins with '{' and ends with '}'", GetTokenDebugText() );
  52. return NULL;
  53. }
  54. bool KeyValuesJSONParser::ParseObject( KeyValues *pObject )
  55. {
  56. Assert( m_eToken == '{' );
  57. int nOpenDelimLine = m_nLine;
  58. NextToken();
  59. KeyValues *pLastChild = NULL;
  60. while ( m_eToken != '}' )
  61. {
  62. // Parse error?
  63. if ( m_eToken == kToken_Err )
  64. return false;
  65. if ( m_eToken == kToken_EOF )
  66. {
  67. // Actually report the error at the line of the unmatched delimiter.
  68. // There's no need to report the line number of the end of file, that is always
  69. // useless.
  70. m_nLine = nOpenDelimLine;
  71. V_strcpy_safe( m_szErrMsg, "End of input was reached and '{' was not matched by '}'" );
  72. return false;
  73. }
  74. // It must be a string, for the key name
  75. if ( m_eToken != kToken_String )
  76. {
  77. V_sprintf_safe( m_szErrMsg, "%s not expected here; expected string for key name or '}'", GetTokenDebugText() );
  78. return false;
  79. }
  80. KeyValues *pChildValue = new KeyValues( m_vecTokenChars.Base() );
  81. NextToken();
  82. // Expect and eat colon
  83. if ( m_eToken != ':' )
  84. {
  85. V_sprintf_safe( m_szErrMsg, "%s not expected here. Missing ':'?", GetTokenDebugText() );
  86. pChildValue->deleteThis();
  87. return false;
  88. }
  89. NextToken();
  90. // Recursively parse the value
  91. if ( !ParseValue( pChildValue ) )
  92. {
  93. pChildValue->deleteThis();
  94. return false;
  95. }
  96. // Add to parent.
  97. pObject->AddSubkeyUsingKnownLastChild( pChildValue, pLastChild );
  98. pLastChild = pChildValue;
  99. // Eat the comma, if there is one. If no comma,
  100. // then the other thing that could come next
  101. // is the closing brace to close the object
  102. // NOTE: We are allowing the extra comma after the last item
  103. if ( m_eToken == ',' )
  104. {
  105. NextToken();
  106. }
  107. else if ( m_eToken != '}' )
  108. {
  109. V_sprintf_safe( m_szErrMsg, "%s not expected here. Missing ',' or '}'?", GetTokenDebugText() );
  110. return false;
  111. }
  112. }
  113. // Eat closing '}'
  114. NextToken();
  115. // Success
  116. return true;
  117. }
  118. bool KeyValuesJSONParser::ParseArray( KeyValues *pArray )
  119. {
  120. Assert( m_eToken == '[' );
  121. int nOpenDelimLine = m_nLine;
  122. NextToken();
  123. KeyValues *pLastChild = NULL;
  124. int idx = 0;
  125. while ( m_eToken != ']' )
  126. {
  127. // Parse error?
  128. if ( m_eToken == kToken_Err )
  129. return false;
  130. if ( m_eToken == kToken_EOF )
  131. {
  132. // Actually report the error at the line of the unmatched delimiter.
  133. // There's no need to report the line number of the end of file, that is always
  134. // useless.
  135. m_nLine = nOpenDelimLine;
  136. V_strcpy_safe( m_szErrMsg, "End of input was reached and '[' was not matched by ']'" );
  137. return false;
  138. }
  139. // Set a dummy key name based on the index
  140. char szKeyName[ 32 ];
  141. V_sprintf_safe( szKeyName, "%d", idx );
  142. ++idx;
  143. KeyValues *pChildValue = new KeyValues( szKeyName );
  144. // Recursively parse the value
  145. if ( !ParseValue( pChildValue ) )
  146. {
  147. pChildValue->deleteThis();
  148. return false;
  149. }
  150. // Add to parent.
  151. pArray->AddSubkeyUsingKnownLastChild( pChildValue, pLastChild );
  152. pLastChild = pChildValue;
  153. // Handle a colon here specially. If one appears, the odds are they
  154. // are trying to put object-like data inside of an array
  155. if ( m_eToken == ':' )
  156. {
  157. V_sprintf_safe( m_szErrMsg, "':' not expected inside an array. ('[]' used when '{}' was intended?)" );
  158. return false;
  159. }
  160. // Eat the comma, if there is one. If no comma,
  161. // then the other thing that could come next
  162. // is the closing brace to close the object
  163. // NOTE: We are allowing the extra comma after the last item
  164. if ( m_eToken == ',' )
  165. {
  166. NextToken();
  167. }
  168. else if ( m_eToken != ']' )
  169. {
  170. V_sprintf_safe( m_szErrMsg, "%s not expected here. Missing ',' or ']'?", GetTokenDebugText() );
  171. return false;
  172. }
  173. }
  174. // Eat closing ']'
  175. NextToken();
  176. // Success
  177. return true;
  178. }
  179. bool KeyValuesJSONParser::ParseValue( KeyValues *pValue )
  180. {
  181. switch ( m_eToken )
  182. {
  183. case '{': return ParseObject( pValue );
  184. case '[': return ParseArray( pValue );
  185. case kToken_String:
  186. pValue->SetString( NULL, m_vecTokenChars.Base() );
  187. NextToken();
  188. return true;
  189. case kToken_NumberInt:
  190. {
  191. const char *pszNum = m_vecTokenChars.Base();
  192. // Negative?
  193. if ( *pszNum == '-' )
  194. {
  195. int64 val64 = V_atoi64( pszNum );
  196. if ( val64 < INT32_MIN )
  197. {
  198. // !KLUDGE! KeyValues cannot support this!
  199. V_sprintf_safe( m_szErrMsg, "%s is out of range for KeyValues, which doesn't support signed 64-bit numbers", pszNum );
  200. return false;
  201. }
  202. pValue->SetInt( NULL, (int)val64 );
  203. }
  204. else
  205. {
  206. uint64 val64 = V_atoui64( pszNum );
  207. if ( val64 > 0x7fffffffU )
  208. {
  209. pValue->SetUint64( NULL, val64 );
  210. }
  211. else
  212. {
  213. pValue->SetInt( NULL, (int)val64 );
  214. }
  215. }
  216. NextToken();
  217. return true;
  218. }
  219. case kToken_NumberFloat:
  220. {
  221. float f = V_atof( m_vecTokenChars.Base() );
  222. pValue->SetFloat( NULL, f );
  223. NextToken();
  224. return true;
  225. }
  226. case kToken_True:
  227. pValue->SetBool( NULL, true );
  228. NextToken();
  229. return true;
  230. case kToken_False:
  231. pValue->SetBool( NULL, false );
  232. NextToken();
  233. return true;
  234. case kToken_Null:
  235. pValue->SetPtr( NULL, NULL );
  236. NextToken();
  237. return true;
  238. case kToken_Err:
  239. return false;
  240. }
  241. V_sprintf_safe( m_szErrMsg, "%s not expected here; missing value?", GetTokenDebugText() );
  242. return false;
  243. }
  244. void KeyValuesJSONParser::NextToken()
  245. {
  246. // Already in terminal state?
  247. if ( m_eToken < 0 )
  248. return;
  249. // Clear token
  250. m_vecTokenChars.SetCount(0);
  251. // Scan until we hit the end of input
  252. while ( m_cur < m_end )
  253. {
  254. // Next character?
  255. char c = *m_cur;
  256. switch (c)
  257. {
  258. // Whitespace? Eat it and keep parsing
  259. case ' ':
  260. case '\t':
  261. ++m_cur;
  262. break;
  263. // Newline? Eat it and advance line number
  264. case '\n':
  265. case '\r':
  266. ++m_nLine;
  267. ++m_cur;
  268. // Eat \r\n or \n\r pair as a single character
  269. if ( m_cur < m_end && *m_cur == ( '\n' + '\r' - c ) )
  270. ++m_cur;
  271. break;
  272. // Single-character JSON token?
  273. case ':':
  274. case '{':
  275. case '}':
  276. case '[':
  277. case ']':
  278. case ',':
  279. m_eToken = c;
  280. ++m_cur;
  281. return;
  282. // String?
  283. case '\"':
  284. case '\'': // NOTE: We allow strings to be delimited by single quotes, which is not JSON compliant
  285. ParseStringToken();
  286. return;
  287. case '-':
  288. case '.':
  289. case '0':
  290. case '1':
  291. case '2':
  292. case '3':
  293. case '4':
  294. case '5':
  295. case '6':
  296. case '7':
  297. case '8':
  298. case '9':
  299. ParseNumberToken();
  300. return;
  301. // Literal "true"
  302. case 't':
  303. if ( m_cur + 4 <= m_end && m_cur[1] == 'r' && m_cur[2] == 'u' && m_cur[3] == 'e' )
  304. {
  305. m_cur += 4;
  306. m_eToken = kToken_True;
  307. return;
  308. }
  309. goto unexpected_char;
  310. // Literal "false"
  311. case 'f':
  312. if ( m_cur + 5 <= m_end && m_cur[1] == 'a' && m_cur[2] == 'l' && m_cur[3] == 's' && m_cur[4] == 'e' )
  313. {
  314. m_cur += 5;
  315. m_eToken = kToken_False;
  316. return;
  317. }
  318. goto unexpected_char;
  319. // Literal "null"
  320. case 'n':
  321. if ( m_cur + 4 <= m_end && m_cur[1] == 'u' && m_cur[2] == 'l' && m_cur[3] == 'l' )
  322. {
  323. m_cur += 4;
  324. m_eToken = kToken_Null;
  325. return;
  326. }
  327. goto unexpected_char;
  328. case '/':
  329. // C++-style comment?
  330. if ( m_cur < m_end && m_cur[1] == '/' )
  331. {
  332. m_cur += 2;
  333. while ( m_cur < m_end && *m_cur != '\n' && *m_cur != '\r' )
  334. ++m_cur;
  335. // Leave newline as the next character, we'll handle it above
  336. break;
  337. }
  338. // | fall
  339. // | through
  340. // V
  341. default:
  342. unexpected_char:
  343. if ( V_isprint(c) )
  344. V_sprintf_safe( m_szErrMsg, "Unexpected character 0x%02x ('%c')", (uint8)c, c );
  345. else
  346. V_sprintf_safe( m_szErrMsg, "Unexpected character 0x%02x", (uint8)c );
  347. m_eToken = kToken_Err;
  348. return;
  349. }
  350. }
  351. m_eToken = kToken_EOF;
  352. }
  353. void KeyValuesJSONParser::ParseNumberToken()
  354. {
  355. // Clear token
  356. m_vecTokenChars.SetCount(0);
  357. // Eat leading minus sign
  358. if ( *m_cur == '-' )
  359. {
  360. m_vecTokenChars.AddToTail( '-' );
  361. ++m_cur;
  362. }
  363. if ( m_cur >= m_end )
  364. {
  365. V_strcpy_safe( m_szErrMsg, "Unexpected EOF while parsing number" );
  366. m_eToken = kToken_Err;
  367. return;
  368. }
  369. char c = *m_cur;
  370. m_vecTokenChars.AddToTail( c );
  371. bool bHasWholePart = false;
  372. switch ( c )
  373. {
  374. case '0':
  375. // Leading 0 cannot be followed by any more digits, as per JSON spec (and to make sure nobody tries to parse octal).
  376. ++m_cur;
  377. bHasWholePart = true;
  378. break;
  379. case '1':
  380. case '2':
  381. case '3':
  382. case '4':
  383. case '5':
  384. case '6':
  385. case '7':
  386. case '8':
  387. case '9':
  388. bHasWholePart = true;
  389. ++m_cur;
  390. // Accumulate digits until we hit a non-digit
  391. while ( m_cur < m_end && *m_cur >= '0' && *m_cur <= '9' )
  392. m_vecTokenChars.AddToTail( *(m_cur++) );
  393. break;
  394. case '.':
  395. // strict JSON doesn't allow a number that starts with a decimal point, but we do
  396. break;
  397. }
  398. // Assume this is integral, unless we hit a decimal point and/or exponent
  399. m_eToken = kToken_NumberInt;
  400. // Fractional portion?
  401. if ( m_cur < m_end && *m_cur == '.' )
  402. {
  403. m_eToken = kToken_NumberFloat;
  404. // Eat decimal point
  405. m_vecTokenChars.AddToTail( *(m_cur++) );
  406. // Accumulate digits until we hit a non-digit
  407. bool bHasFractionPart = false;
  408. while ( m_cur < m_end && *m_cur >= '0' && *m_cur <= '9' )
  409. {
  410. m_vecTokenChars.AddToTail( *(m_cur++) );
  411. bHasFractionPart = true;
  412. }
  413. // Make sure we aren't just a single '.'
  414. if ( !bHasWholePart && !bHasFractionPart )
  415. {
  416. m_vecTokenChars.AddToTail(0);
  417. V_sprintf_safe( m_szErrMsg, "Invalid number starting with '%s'", m_vecTokenChars.Base() );
  418. m_eToken = kToken_Err;
  419. return;
  420. }
  421. }
  422. // Exponent?
  423. if ( m_cur < m_end && ( *m_cur == 'e' || *m_cur == 'E' ) )
  424. {
  425. m_eToken = kToken_NumberFloat;
  426. // Eat 'e'
  427. m_vecTokenChars.AddToTail( *(m_cur++) );
  428. // Optional sign
  429. if ( m_cur < m_end && ( *m_cur == '-' || *m_cur == '+' ) )
  430. m_vecTokenChars.AddToTail( *(m_cur++) );
  431. // Accumulate digits until we hit a non-digit
  432. bool bHasExponentDigit = false;
  433. while ( m_cur < m_end && *m_cur >= '0' && *m_cur <= '9' )
  434. {
  435. m_vecTokenChars.AddToTail( *(m_cur++) );
  436. bHasExponentDigit = true;
  437. }
  438. if ( !bHasExponentDigit )
  439. {
  440. V_strcpy_safe( m_szErrMsg, "Bad exponent in floating point number" );
  441. m_eToken = kToken_Err;
  442. return;
  443. }
  444. }
  445. // OK, We have parsed a valid number.
  446. // Terminate token
  447. m_vecTokenChars.AddToTail( '\0' );
  448. // EOF? That's OK for now, at this lexical parsing level. We'll handle the error
  449. // at the higher parse level, when expecting a comma or closing delimiter
  450. if ( m_cur >= m_end )
  451. return;
  452. // Is the next thing a valid character? This is the most common case.
  453. c = *m_cur;
  454. if ( V_isspace( c ) || c == ',' || c == '}' || c == ']' || c == '/' )
  455. return;
  456. // Handle these guys as "tokens", to provide a slightly more meaningful error message
  457. if ( c == '[' || c == '{' )
  458. return;
  459. // Anything else, treat the whole thing as an invalid numerical constant
  460. if ( V_isprint(c) )
  461. V_sprintf_safe( m_szErrMsg, "Number contains invalid character 0x%02x ('%c')", (uint8)c, c );
  462. else
  463. V_sprintf_safe( m_szErrMsg, "Number contains invalid character 0x%02x", (uint8)c );
  464. m_eToken = kToken_Err;
  465. }
  466. void KeyValuesJSONParser::ParseStringToken()
  467. {
  468. char cDelim = *(m_cur++);
  469. while ( m_cur < m_end )
  470. {
  471. char c = *(m_cur++);
  472. if ( c == '\r' || c == '\n' )
  473. {
  474. V_sprintf_safe( m_szErrMsg, "Hit end of line before closing quote (%c)", c );
  475. m_eToken = kToken_Err;
  476. return;
  477. }
  478. if ( c == cDelim )
  479. {
  480. m_eToken = kToken_String;
  481. m_vecTokenChars.AddToTail( '\0' );
  482. return;
  483. }
  484. // Ordinary character? Just append it
  485. if ( c != '\\' )
  486. {
  487. m_vecTokenChars.AddToTail( c );
  488. continue;
  489. }
  490. // Escaped character.
  491. // End of string? We'll handle it above
  492. if ( m_cur >= m_end )
  493. continue;
  494. // Check table of allowed escape characters
  495. switch (c)
  496. {
  497. case '\\':
  498. case '/':
  499. case '\'':
  500. case '\"': m_vecTokenChars.AddToTail( c ); break;
  501. case 'b': m_vecTokenChars.AddToTail( '\b' ); break;
  502. case 'f': m_vecTokenChars.AddToTail( '\f' ); break;
  503. case 'n': m_vecTokenChars.AddToTail( '\n' ); break;
  504. case 'r': m_vecTokenChars.AddToTail( '\r' ); break;
  505. case 't': m_vecTokenChars.AddToTail( '\t' ); break;
  506. case 'u':
  507. {
  508. // Make sure are followed by exactly 4 hex digits
  509. if ( m_cur + 4 > m_end || !V_isxdigit( m_cur[0] ) || !V_isxdigit( m_cur[1] ) || !V_isxdigit( m_cur[2] ) || !V_isxdigit( m_cur[3] ) )
  510. {
  511. V_sprintf_safe( m_szErrMsg, "\\u must be followed by exactly 4 hex digits" );
  512. m_eToken = kToken_Err;
  513. return;
  514. }
  515. // Parse the codepoint
  516. uchar32 nCodePoint = 0;
  517. for ( int n = 0 ; n < 4 ; ++n )
  518. {
  519. nCodePoint <<= 4;
  520. char chHex = *(m_cur++);
  521. if ( chHex >= '0' && chHex <= '9' )
  522. nCodePoint += chHex - '0';
  523. else if ( chHex >= 'a' && chHex <= 'a' )
  524. nCodePoint += chHex + 0x0a - 'a';
  525. else if ( chHex >= 'A' && chHex <= 'A' )
  526. nCodePoint += chHex + 0x0a - 'A';
  527. else
  528. Assert( false ); // inconceivable, due to above
  529. }
  530. // Encode it in UTF-8
  531. char utf8Encode[8];
  532. int r = Q_UChar32ToUTF8( nCodePoint, utf8Encode );
  533. if ( r < 0 || r > 4 )
  534. {
  535. V_sprintf_safe( m_szErrMsg, "Invalid code point \\u%04x", nCodePoint );
  536. m_eToken = kToken_Err;
  537. return;
  538. }
  539. for ( int i = 0 ; i < r ; ++i )
  540. m_vecTokenChars.AddToTail( utf8Encode[i] );
  541. } break;
  542. default:
  543. if ( V_isprint(c) )
  544. V_sprintf_safe( m_szErrMsg, "Invalid escape character 0x%02x ('\\%c')", (uint8)c, c );
  545. else
  546. V_sprintf_safe( m_szErrMsg, "Invalid escape character 0x%02x", (uint8)c );
  547. m_eToken = kToken_Err;
  548. return;
  549. }
  550. }
  551. V_sprintf_safe( m_szErrMsg, "Hit end of input before closing quote (%c)", cDelim );
  552. m_eToken = kToken_Err;
  553. }
  554. const char *KeyValuesJSONParser::GetTokenDebugText()
  555. {
  556. switch ( m_eToken )
  557. {
  558. case kToken_EOF: return "<EOF>";
  559. case kToken_String: return "<string>";
  560. case kToken_NumberInt:
  561. case kToken_NumberFloat: return "<number>";
  562. case kToken_True: return "'true'";
  563. case kToken_False: return "'false'";
  564. case kToken_Null: return "'null'";
  565. case '{': return "'{'";
  566. case '}': return "'}'";
  567. case '[': return "'['";
  568. case ']': return "']'";
  569. case ':': return "':'";
  570. case ',': return "','";
  571. }
  572. // We shouldn't ever need to ask for a debug string for the error token,
  573. // and anything else is an error
  574. Assert( false );
  575. return "<parse error>";
  576. }
  577. #ifdef _DEBUG
  578. static void JSONTest_ParseValid( const char *pszData )
  579. {
  580. KeyValuesJSONParser parser( pszData );
  581. KeyValues *pFile = parser.ParseFile();
  582. Assert( pFile );
  583. pFile->deleteThis();
  584. }
  585. static void JSONTest_ParseInvalid( const char *pszData, const char *pszExpectedErrMsgSnippet, int nExpectedFailureLine )
  586. {
  587. KeyValuesJSONParser parser( pszData );
  588. KeyValues *pFile = parser.ParseFile();
  589. Assert( pFile == NULL );
  590. Assert( V_stristr( parser.m_szErrMsg, pszExpectedErrMsgSnippet ) != NULL );
  591. Assert( parser.m_nLine == nExpectedFailureLine );
  592. }
  593. void TestKeyValuesJSONParser()
  594. {
  595. JSONTest_ParseValid( "{}" );
  596. JSONTest_ParseValid( R"JSON({
  597. "key": "string_value",
  598. "pos_int32": 123,
  599. "pos_int64": 123456789012,
  600. "neg_int32": -456,
  601. "float": -45.23,
  602. "pos_exponent": 1e30,
  603. "neg_exponent": 1e-16,
  604. "decimal_and_exponent": 1.e+30,
  605. "no_leading_zero": .7, // we support this, even though strict JSON says it's no good
  606. "zero": 0,
  607. "true_value": true,
  608. "false_value": false,
  609. "null_value": null,
  610. "with_escaped": "\r \t \n",
  611. "unicode": "\u1234 \\u12f3",
  612. "array_of_ints": [ 1, 2, 3, -45 ],
  613. "empty_array": [],
  614. "array_with_stuff_inside": [
  615. {}, // this is a comment.
  616. [ 0.45, {}, "hello!" ],
  617. { "id": 0 },
  618. // Trailing comma above. Comment here
  619. ],
  620. })JSON" );
  621. JSONTest_ParseInvalid( "{ \"key\": 123", "missing", 1 );
  622. JSONTest_ParseInvalid( "{ \"key\": 123.4f }", "number", 1 );
  623. }
  624. #endif