Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

555 lines
14 KiB

  1. //===--- YAMLParser.h - Simple YAML parser --------------------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This is a YAML 1.2 parser.
  11. //
  12. // See http://www.yaml.org/spec/1.2/spec.html for the full standard.
  13. //
  14. // This currently does not implement the following:
  15. // * Multi-line literal folding.
  16. // * Tag resolution.
  17. // * UTF-16.
  18. // * BOMs anywhere other than the first Unicode scalar value in the file.
  19. //
  20. // The most important class here is Stream. This represents a YAML stream with
  21. // 0, 1, or many documents.
  22. //
  23. // SourceMgr sm;
  24. // StringRef input = getInput();
  25. // yaml::Stream stream(input, sm);
  26. //
  27. // for (yaml::document_iterator di = stream.begin(), de = stream.end();
  28. // di != de; ++di) {
  29. // yaml::Node *n = di->getRoot();
  30. // if (n) {
  31. // // Do something with n...
  32. // } else
  33. // break;
  34. // }
  35. //
  36. //===----------------------------------------------------------------------===//
  37. #ifndef LLVM_SUPPORT_YAMLPARSER_H
  38. #define LLVM_SUPPORT_YAMLPARSER_H
  39. #include "llvm/ADT/OwningPtr.h"
  40. #include "llvm/ADT/SmallString.h"
  41. #include "llvm/ADT/StringRef.h"
  42. #include "llvm/Support/Allocator.h"
  43. #include "llvm/Support/SMLoc.h"
  44. #include <limits>
  45. #include <utility>
  46. namespace llvm {
  47. class MemoryBuffer;
  48. class SourceMgr;
  49. class raw_ostream;
  50. class Twine;
  51. namespace yaml {
  52. class document_iterator;
  53. class Document;
  54. class Node;
  55. class Scanner;
  56. struct Token;
  57. /// @brief Dump all the tokens in this stream to OS.
  58. /// @returns true if there was an error, false otherwise.
  59. bool dumpTokens(StringRef Input, raw_ostream &);
  60. /// @brief Scans all tokens in input without outputting anything. This is used
  61. /// for benchmarking the tokenizer.
  62. /// @returns true if there was an error, false otherwise.
  63. bool scanTokens(StringRef Input);
  64. /// @brief Escape \a Input for a double quoted scalar.
  65. std::string escape(StringRef Input);
  66. /// @brief This class represents a YAML stream potentially containing multiple
  67. /// documents.
  68. class Stream {
  69. public:
  70. /// @brief This keeps a reference to the string referenced by \p Input.
  71. Stream(StringRef Input, SourceMgr &);
  72. /// @brief This takes ownership of \p InputBuffer.
  73. Stream(MemoryBuffer *InputBuffer, SourceMgr &);
  74. ~Stream();
  75. document_iterator begin();
  76. document_iterator end();
  77. void skip();
  78. bool failed();
  79. bool validate() {
  80. skip();
  81. return !failed();
  82. }
  83. void printError(Node *N, const Twine &Msg);
  84. private:
  85. OwningPtr<Scanner> scanner;
  86. OwningPtr<Document> CurrentDoc;
  87. friend class Document;
  88. /// @brief Validate a %YAML x.x directive.
  89. void handleYAMLDirective(const Token &);
  90. };
  91. /// @brief Abstract base class for all Nodes.
  92. class Node {
  93. public:
  94. enum NodeKind {
  95. NK_Null,
  96. NK_Scalar,
  97. NK_KeyValue,
  98. NK_Mapping,
  99. NK_Sequence,
  100. NK_Alias
  101. };
  102. Node(unsigned int Type, OwningPtr<Document>&, StringRef Anchor);
  103. /// @brief Get the value of the anchor attached to this node. If it does not
  104. /// have one, getAnchor().size() will be 0.
  105. StringRef getAnchor() const { return Anchor; }
  106. SMRange getSourceRange() const { return SourceRange; }
  107. void setSourceRange(SMRange SR) { SourceRange = SR; }
  108. // These functions forward to Document and Scanner.
  109. Token &peekNext();
  110. Token getNext();
  111. Node *parseBlockNode();
  112. BumpPtrAllocator &getAllocator();
  113. void setError(const Twine &Message, Token &Location) const;
  114. bool failed() const;
  115. virtual void skip() {}
  116. unsigned int getType() const { return TypeID; }
  117. void *operator new ( size_t Size
  118. , BumpPtrAllocator &Alloc
  119. , size_t Alignment = 16) throw() {
  120. return Alloc.Allocate(Size, Alignment);
  121. }
  122. void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t) throw() {
  123. Alloc.Deallocate(Ptr);
  124. }
  125. protected:
  126. OwningPtr<Document> &Doc;
  127. SMRange SourceRange;
  128. void operator delete(void *) throw() {}
  129. virtual ~Node() {}
  130. private:
  131. unsigned int TypeID;
  132. StringRef Anchor;
  133. };
  134. /// @brief A null value.
  135. ///
  136. /// Example:
  137. /// !!null null
  138. class NullNode : public Node {
  139. public:
  140. NullNode(OwningPtr<Document> &D) : Node(NK_Null, D, StringRef()) {}
  141. static inline bool classof(const Node *N) {
  142. return N->getType() == NK_Null;
  143. }
  144. };
  145. /// @brief A scalar node is an opaque datum that can be presented as a
  146. /// series of zero or more Unicode scalar values.
  147. ///
  148. /// Example:
  149. /// Adena
  150. class ScalarNode : public Node {
  151. public:
  152. ScalarNode(OwningPtr<Document> &D, StringRef Anchor, StringRef Val)
  153. : Node(NK_Scalar, D, Anchor)
  154. , Value(Val) {
  155. SMLoc Start = SMLoc::getFromPointer(Val.begin());
  156. SMLoc End = SMLoc::getFromPointer(Val.end());
  157. SourceRange = SMRange(Start, End);
  158. }
  159. // Return Value without any escaping or folding or other fun YAML stuff. This
  160. // is the exact bytes that are contained in the file (after conversion to
  161. // utf8).
  162. StringRef getRawValue() const { return Value; }
  163. /// @brief Gets the value of this node as a StringRef.
  164. ///
  165. /// @param Storage is used to store the content of the returned StringRef iff
  166. /// it requires any modification from how it appeared in the source.
  167. /// This happens with escaped characters and multi-line literals.
  168. StringRef getValue(SmallVectorImpl<char> &Storage) const;
  169. static inline bool classof(const Node *N) {
  170. return N->getType() == NK_Scalar;
  171. }
  172. private:
  173. StringRef Value;
  174. StringRef unescapeDoubleQuoted( StringRef UnquotedValue
  175. , StringRef::size_type Start
  176. , SmallVectorImpl<char> &Storage) const;
  177. };
  178. /// @brief A key and value pair. While not technically a Node under the YAML
  179. /// representation graph, it is easier to treat them this way.
  180. ///
  181. /// TODO: Consider making this not a child of Node.
  182. ///
  183. /// Example:
  184. /// Section: .text
  185. class KeyValueNode : public Node {
  186. public:
  187. KeyValueNode(OwningPtr<Document> &D)
  188. : Node(NK_KeyValue, D, StringRef())
  189. , Key(0)
  190. , Value(0)
  191. {}
  192. /// @brief Parse and return the key.
  193. ///
  194. /// This may be called multiple times.
  195. ///
  196. /// @returns The key, or nullptr if failed() == true.
  197. Node *getKey();
  198. /// @brief Parse and return the value.
  199. ///
  200. /// This may be called multiple times.
  201. ///
  202. /// @returns The value, or nullptr if failed() == true.
  203. Node *getValue();
  204. virtual void skip() LLVM_OVERRIDE {
  205. getKey()->skip();
  206. getValue()->skip();
  207. }
  208. static inline bool classof(const Node *N) {
  209. return N->getType() == NK_KeyValue;
  210. }
  211. private:
  212. Node *Key;
  213. Node *Value;
  214. };
  215. /// @brief This is an iterator abstraction over YAML collections shared by both
  216. /// sequences and maps.
  217. ///
  218. /// BaseT must have a ValueT* member named CurrentEntry and a member function
  219. /// increment() which must set CurrentEntry to 0 to create an end iterator.
  220. template <class BaseT, class ValueT>
  221. class basic_collection_iterator
  222. : public std::iterator<std::forward_iterator_tag, ValueT> {
  223. public:
  224. basic_collection_iterator() : Base(0) {}
  225. basic_collection_iterator(BaseT *B) : Base(B) {}
  226. ValueT *operator ->() const {
  227. assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
  228. return Base->CurrentEntry;
  229. }
  230. ValueT &operator *() const {
  231. assert(Base && Base->CurrentEntry &&
  232. "Attempted to dereference end iterator!");
  233. return *Base->CurrentEntry;
  234. }
  235. operator ValueT*() const {
  236. assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
  237. return Base->CurrentEntry;
  238. }
  239. bool operator !=(const basic_collection_iterator &Other) const {
  240. if(Base != Other.Base)
  241. return true;
  242. return (Base && Other.Base) && Base->CurrentEntry
  243. != Other.Base->CurrentEntry;
  244. }
  245. basic_collection_iterator &operator++() {
  246. assert(Base && "Attempted to advance iterator past end!");
  247. Base->increment();
  248. // Create an end iterator.
  249. if (Base->CurrentEntry == 0)
  250. Base = 0;
  251. return *this;
  252. }
  253. private:
  254. BaseT *Base;
  255. };
  256. // The following two templates are used for both MappingNode and Sequence Node.
  257. template <class CollectionType>
  258. typename CollectionType::iterator begin(CollectionType &C) {
  259. assert(C.IsAtBeginning && "You may only iterate over a collection once!");
  260. C.IsAtBeginning = false;
  261. typename CollectionType::iterator ret(&C);
  262. ++ret;
  263. return ret;
  264. }
  265. template <class CollectionType>
  266. void skip(CollectionType &C) {
  267. // TODO: support skipping from the middle of a parsed collection ;/
  268. assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!");
  269. if (C.IsAtBeginning)
  270. for (typename CollectionType::iterator i = begin(C), e = C.end();
  271. i != e; ++i)
  272. i->skip();
  273. }
  274. /// @brief Represents a YAML map created from either a block map for a flow map.
  275. ///
  276. /// This parses the YAML stream as increment() is called.
  277. ///
  278. /// Example:
  279. /// Name: _main
  280. /// Scope: Global
  281. class MappingNode : public Node {
  282. public:
  283. enum MappingType {
  284. MT_Block,
  285. MT_Flow,
  286. MT_Inline ///< An inline mapping node is used for "[key: value]".
  287. };
  288. MappingNode(OwningPtr<Document> &D, StringRef Anchor, MappingType MT)
  289. : Node(NK_Mapping, D, Anchor)
  290. , Type(MT)
  291. , IsAtBeginning(true)
  292. , IsAtEnd(false)
  293. , CurrentEntry(0)
  294. {}
  295. friend class basic_collection_iterator<MappingNode, KeyValueNode>;
  296. typedef basic_collection_iterator<MappingNode, KeyValueNode> iterator;
  297. template <class T> friend typename T::iterator yaml::begin(T &);
  298. template <class T> friend void yaml::skip(T &);
  299. iterator begin() {
  300. return yaml::begin(*this);
  301. }
  302. iterator end() { return iterator(); }
  303. virtual void skip() LLVM_OVERRIDE {
  304. yaml::skip(*this);
  305. }
  306. static inline bool classof(const Node *N) {
  307. return N->getType() == NK_Mapping;
  308. }
  309. private:
  310. MappingType Type;
  311. bool IsAtBeginning;
  312. bool IsAtEnd;
  313. KeyValueNode *CurrentEntry;
  314. void increment();
  315. };
  316. /// @brief Represents a YAML sequence created from either a block sequence for a
  317. /// flow sequence.
  318. ///
  319. /// This parses the YAML stream as increment() is called.
  320. ///
  321. /// Example:
  322. /// - Hello
  323. /// - World
  324. class SequenceNode : public Node {
  325. public:
  326. enum SequenceType {
  327. ST_Block,
  328. ST_Flow,
  329. // Use for:
  330. //
  331. // key:
  332. // - val1
  333. // - val2
  334. //
  335. // As a BlockMappingEntry and BlockEnd are not created in this case.
  336. ST_Indentless
  337. };
  338. SequenceNode(OwningPtr<Document> &D, StringRef Anchor, SequenceType ST)
  339. : Node(NK_Sequence, D, Anchor)
  340. , SeqType(ST)
  341. , IsAtBeginning(true)
  342. , IsAtEnd(false)
  343. , WasPreviousTokenFlowEntry(true) // Start with an imaginary ','.
  344. , CurrentEntry(0)
  345. {}
  346. friend class basic_collection_iterator<SequenceNode, Node>;
  347. typedef basic_collection_iterator<SequenceNode, Node> iterator;
  348. template <class T> friend typename T::iterator yaml::begin(T &);
  349. template <class T> friend void yaml::skip(T &);
  350. void increment();
  351. iterator begin() {
  352. return yaml::begin(*this);
  353. }
  354. iterator end() { return iterator(); }
  355. virtual void skip() LLVM_OVERRIDE {
  356. yaml::skip(*this);
  357. }
  358. static inline bool classof(const Node *N) {
  359. return N->getType() == NK_Sequence;
  360. }
  361. private:
  362. SequenceType SeqType;
  363. bool IsAtBeginning;
  364. bool IsAtEnd;
  365. bool WasPreviousTokenFlowEntry;
  366. Node *CurrentEntry;
  367. };
  368. /// @brief Represents an alias to a Node with an anchor.
  369. ///
  370. /// Example:
  371. /// *AnchorName
  372. class AliasNode : public Node {
  373. public:
  374. AliasNode(OwningPtr<Document> &D, StringRef Val)
  375. : Node(NK_Alias, D, StringRef()), Name(Val) {}
  376. StringRef getName() const { return Name; }
  377. Node *getTarget();
  378. static inline bool classof(const Node *N) {
  379. return N->getType() == NK_Alias;
  380. }
  381. private:
  382. StringRef Name;
  383. };
  384. /// @brief A YAML Stream is a sequence of Documents. A document contains a root
  385. /// node.
  386. class Document {
  387. public:
  388. /// @brief Root for parsing a node. Returns a single node.
  389. Node *parseBlockNode();
  390. Document(Stream &ParentStream);
  391. /// @brief Finish parsing the current document and return true if there are
  392. /// more. Return false otherwise.
  393. bool skip();
  394. /// @brief Parse and return the root level node.
  395. Node *getRoot() {
  396. if (Root)
  397. return Root;
  398. return Root = parseBlockNode();
  399. }
  400. private:
  401. friend class Node;
  402. friend class document_iterator;
  403. /// @brief Stream to read tokens from.
  404. Stream &stream;
  405. /// @brief Used to allocate nodes to. All are destroyed without calling their
  406. /// destructor when the document is destroyed.
  407. BumpPtrAllocator NodeAllocator;
  408. /// @brief The root node. Used to support skipping a partially parsed
  409. /// document.
  410. Node *Root;
  411. Token &peekNext();
  412. Token getNext();
  413. void setError(const Twine &Message, Token &Location) const;
  414. bool failed() const;
  415. void handleTagDirective(const Token &Tag) {
  416. // TODO: Track tags.
  417. }
  418. /// @brief Parse %BLAH directives and return true if any were encountered.
  419. bool parseDirectives();
  420. /// @brief Consume the next token and error if it is not \a TK.
  421. bool expectToken(int TK);
  422. };
  423. /// @brief Iterator abstraction for Documents over a Stream.
  424. class document_iterator {
  425. public:
  426. document_iterator() : Doc(0) {}
  427. document_iterator(OwningPtr<Document> &D) : Doc(&D) {}
  428. bool operator ==(const document_iterator &Other) {
  429. if (isAtEnd() || Other.isAtEnd())
  430. return isAtEnd() && Other.isAtEnd();
  431. return *Doc == *Other.Doc;
  432. }
  433. bool operator !=(const document_iterator &Other) {
  434. return !(*this == Other);
  435. }
  436. document_iterator operator ++() {
  437. assert(Doc != 0 && "incrementing iterator past the end.");
  438. if (!(*Doc)->skip()) {
  439. Doc->reset(0);
  440. } else {
  441. Stream &S = (*Doc)->stream;
  442. Doc->reset(new Document(S));
  443. }
  444. return *this;
  445. }
  446. Document &operator *() {
  447. return *Doc->get();
  448. }
  449. OwningPtr<Document> &operator ->() {
  450. return *Doc;
  451. }
  452. private:
  453. bool isAtEnd() const {
  454. return Doc == 0 || *Doc == 0;
  455. }
  456. OwningPtr<Document> *Doc;
  457. };
  458. }
  459. }
  460. #endif