Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

356 lines
8.1 KiB

  1. ///////////////////////////////////////////////////////////////////////////////
  2. //
  3. // Copyright (c) 1998, Microsoft Corp. All rights reserved.
  4. //
  5. // FILE
  6. //
  7. // parser.h
  8. //
  9. // SYNOPSIS
  10. //
  11. // This file defines the class Parser.
  12. //
  13. // MODIFICATION HISTORY
  14. //
  15. // 02/06/1998 Original version.
  16. // 03/23/2000 Added erase. Removed the const_cast's.
  17. //
  18. ///////////////////////////////////////////////////////////////////////////////
  19. #ifndef _PARSER_H_
  20. #define _PARSER_H_
  21. #include <climits>
  22. #include <cmath>
  23. #include <cstdlib>
  24. #include <tchar.h>
  25. ///////////////////////////////////////////////////////////////////////////////
  26. //
  27. // CLASS
  28. //
  29. // Parser
  30. //
  31. // DESCRIPTION
  32. //
  33. // This class facilitates parsing a null-terminated string. Note that many
  34. // methods have two forms: findXXX and seekXXX. The difference is that the
  35. // find methods throw an exception if unsuccessful while the seek methods
  36. // set the cursor to the end of the string.
  37. //
  38. // NOTE
  39. //
  40. // The constructor takes a non-const string because the string is
  41. // temporarily modified while tokenizing. However, the string is returned
  42. // to its original form when parsing is complete. Therefore, if you know
  43. // the string isn't in read-only memory and isn't visible to another
  44. // thread, then you can safely use const_cast to parse a const string.
  45. //
  46. ///////////////////////////////////////////////////////////////////////////////
  47. class Parser
  48. {
  49. public:
  50. class ParseError {};
  51. Parser(_TCHAR* tcsString)
  52. : start(tcsString),
  53. current(tcsString),
  54. save(__T('\0')),
  55. tokenLocked(false)
  56. { }
  57. ~Parser()
  58. {
  59. releaseToken();
  60. }
  61. // Marks the current position as the beginning of a token.
  62. const _TCHAR* beginToken() throw (ParseError)
  63. {
  64. if (tokenLocked) { throw ParseError(); }
  65. return start = current;
  66. }
  67. // Erase nchar characters starting at the current position.
  68. void erase(size_t nchar) throw (ParseError)
  69. {
  70. size_t left = remaining();
  71. if (nchar > left) { throw ParseError(); }
  72. memmove(current, current + nchar, (left + 1 - nchar) * sizeof(TCHAR));
  73. }
  74. // Extracts a double from the string.
  75. double extractDouble() throw (ParseError)
  76. {
  77. _TCHAR* endptr;
  78. double d = _tcstod(current, &endptr);
  79. if (endptr == current || d == HUGE_VAL || d == -HUGE_VAL)
  80. {
  81. throw ParseError();
  82. }
  83. current = endptr;
  84. return d;
  85. }
  86. // Extracts a long from the string.
  87. long extractLong(int base = 10) throw (ParseError)
  88. {
  89. _TCHAR* endptr;
  90. long l = _tcstol(current, &endptr, base);
  91. if (endptr == current || l == LONG_MAX || l == LONG_MIN)
  92. {
  93. throw ParseError();
  94. }
  95. current = endptr;
  96. return l;
  97. }
  98. // Extracts an unsigned long from the string.
  99. unsigned long extractUnsignedLong(int base = 10) throw (ParseError)
  100. {
  101. _TCHAR* endptr;
  102. unsigned long ul = _tcstoul(current, &endptr, base);
  103. if (endptr == current || ul == ULONG_MAX)
  104. {
  105. throw ParseError();
  106. }
  107. current = endptr;
  108. return ul;
  109. }
  110. // Find any character in tcsCharSet.
  111. const _TCHAR* findAny(const _TCHAR* tcsCharSet) throw (ParseError)
  112. {
  113. return notEmpty(seekAny(tcsCharSet));
  114. }
  115. // Find the end of the string.
  116. const _TCHAR* findEnd() throw ()
  117. {
  118. return current += _tcslen(current);
  119. }
  120. // Find the next occurrence of 'c'.
  121. const _TCHAR* findNext(_TINT c) throw (ParseError)
  122. {
  123. return notEmpty(seekNext(c));
  124. }
  125. // Find the last occurrence of 'c' in the string.
  126. const _TCHAR* findLast(_TINT c) throw (ParseError)
  127. {
  128. return notEmpty(seekLast(c));
  129. }
  130. // Find the next occurrence of tcsString.
  131. const _TCHAR* findString(const _TCHAR* tcsString) throw (ParseError)
  132. {
  133. return notEmpty(seekString(tcsString));
  134. }
  135. // Find the next token delimited by any of the characters in tcsDelimit.
  136. // This method must be followed by a call to releaseToken before further
  137. // parsing.
  138. const _TCHAR* findToken(const _TCHAR* tcsDelimit) throw (ParseError)
  139. {
  140. return notEmpty(seekToken(tcsDelimit));
  141. }
  142. // Marks the current position as the end of a token. The token does not
  143. // include the current character. This method must be followed by a call
  144. // to releaseToken before further parsing.
  145. const _TCHAR* endToken() throw (ParseError)
  146. {
  147. if (tokenLocked) { throw ParseError(); }
  148. tokenLocked = true;
  149. save = *current;
  150. *current = __T('\0');
  151. return start;
  152. }
  153. // Skips the specified character.
  154. const _TCHAR* ignore(_TINT c) throw (ParseError)
  155. {
  156. if (*current++ != c) { throw ParseError(); }
  157. return current;
  158. }
  159. // Skips the specified character string.
  160. const _TCHAR* ignore(const _TCHAR* tcsString) throw (ParseError)
  161. {
  162. size_t len = _tcslen(tcsString);
  163. if (len > remaining() || _tcsncmp(current, tcsString, len) != 0)
  164. {
  165. throw ParseError();
  166. }
  167. return current += len;
  168. }
  169. // Returns true if the string has not been fully parsed.
  170. bool more() const throw ()
  171. {
  172. return *current != __T('\0');
  173. }
  174. // Releases a token returned by findToken, endToken, or seekToken.
  175. const _TCHAR* releaseToken() throw ()
  176. {
  177. if (tokenLocked)
  178. {
  179. tokenLocked = false;
  180. *current = save;
  181. }
  182. return start;
  183. }
  184. // Returns the number of unparsed characters.
  185. size_t remaining() const throw ()
  186. {
  187. return _tcslen(current);
  188. }
  189. //////////
  190. // The seek family of methods perform like their find counterparts except
  191. // they do not throw an exception on failure. Instead they set the cursor
  192. // to the end of the string.
  193. //////////
  194. const _TCHAR* seekAny(const _TCHAR* tcsCharSet) throw ()
  195. {
  196. return setCurrent(_tcspbrk(current, tcsCharSet));
  197. }
  198. const _TCHAR* seekNext(_TINT c) throw ()
  199. {
  200. return setCurrent(_tcschr(current, c));
  201. }
  202. const _TCHAR* seekLast(_TINT c) throw ()
  203. {
  204. return setCurrent(_tcsrchr(current, c));
  205. }
  206. const _TCHAR* seekString(const _TCHAR* tcsString) throw ()
  207. {
  208. return setCurrent(_tcsstr(current, tcsString));
  209. }
  210. const _TCHAR* seekToken(const _TCHAR* tcsDelimit) throw (ParseError)
  211. {
  212. skip(tcsDelimit);
  213. if (!more()) { return NULL; }
  214. beginToken();
  215. seekAny(tcsDelimit);
  216. return endToken();
  217. }
  218. // Skip occurrences of any characters in tcsCharSet.
  219. const _TCHAR* skip(const _TCHAR* tcsCharSet) throw ()
  220. {
  221. return current += _tcsspn(current, tcsCharSet);
  222. }
  223. // Skip a fixed number of characters.
  224. const _TCHAR* skip(size_t numChar) throw (ParseError)
  225. {
  226. if (numChar > _tcslen(current)) { throw ParseError(); }
  227. return current += numChar;
  228. }
  229. const _TCHAR* operator--(int) throw (ParseError)
  230. {
  231. if (current == start) { throw ParseError(); }
  232. return current--;
  233. }
  234. const _TCHAR* operator--() throw (ParseError)
  235. {
  236. if (current == start) { throw ParseError(); }
  237. return --current;
  238. }
  239. const _TCHAR* operator++(int) throw (ParseError)
  240. {
  241. if (!more()) { throw ParseError(); }
  242. return current++;
  243. }
  244. const _TCHAR* operator++() throw (ParseError)
  245. {
  246. if (!more()) { throw ParseError(); }
  247. return ++current;
  248. }
  249. _TCHAR operator*() const throw ()
  250. {
  251. return *current;
  252. }
  253. operator const _TCHAR*() const throw ()
  254. {
  255. return current;
  256. }
  257. protected:
  258. // Verifies that the given string is not empty.
  259. static const _TCHAR* notEmpty(const _TCHAR* tcs) throw (ParseError)
  260. {
  261. if (*tcs == __T('\0')) { throw ParseError(); }
  262. return tcs;
  263. }
  264. // Sets the current position to pos or end of string if pos is null.
  265. const _TCHAR* setCurrent(_TCHAR* pos) throw ()
  266. {
  267. return (pos ? (current = pos) : findEnd());
  268. }
  269. //////////
  270. // Not implemented.
  271. //////////
  272. Parser(const Parser&);
  273. Parser& operator=(const Parser&);
  274. const _TCHAR* start; // The start of the token.
  275. _TCHAR* current; // The current position of the cursor.
  276. _TCHAR save; // The actual terminating character of the token.
  277. bool tokenLocked; // true if the current token has not been released.
  278. };
  279. #endif