Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

450 lines
13 KiB

  1. /*****************************************************************************
  2. *
  3. * parse.cpp
  4. *
  5. * Lame string parser.
  6. *
  7. *****************************************************************************/
  8. #include "sdview.h"
  9. /*****************************************************************************
  10. *
  11. * Ctype stuff
  12. *
  13. * The vast majority of characters we encounter are below 128, so use fast
  14. * table lookup for those.
  15. *
  16. *****************************************************************************/
  17. const BYTE c_rgbCtype[128] = {
  18. C_NONE , C_NONE , C_NONE , C_NONE , // 00-03
  19. C_NONE , C_NONE , C_NONE , C_NONE , // 04-07
  20. C_NONE , C_NONE , C_NONE , C_NONE , // 08-0B
  21. C_NONE , C_NONE , C_NONE , C_NONE , // 0C-0F
  22. C_NONE , C_NONE , C_NONE , C_NONE , // 10-13
  23. C_NONE , C_NONE , C_NONE , C_NONE , // 14-17
  24. C_NONE , C_NONE , C_NONE , C_NONE , // 18-1B
  25. C_NONE , C_NONE , C_NONE , C_NONE , // 1C-1F
  26. C_SPACE, C_NONE , C_NONE , C_NONE , // 20-23
  27. C_NONE , C_NONE , C_NONE , C_NONE , // 24-27
  28. C_NONE , C_NONE , C_NONE , C_BRNCH, // 28-2B
  29. C_NONE , C_DASH , C_NONE , C_BRNCH, // 2C-2F
  30. C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, // 30-33
  31. C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, // 34-37
  32. C_DIGIT, C_DIGIT, C_NONE , C_NONE , // 38-3B
  33. C_NONE , C_NONE , C_NONE , C_NONE , // 3C-3F
  34. C_NONE , C_ALPHA, C_ALPHA, C_ALPHA, // 40-43
  35. C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 44-47
  36. C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 48-4B
  37. C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 4C-4F
  38. C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 50-53
  39. C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 54-57
  40. C_ALPHA, C_ALPHA, C_ALPHA, C_NONE , // 58-5B
  41. C_NONE , C_NONE , C_NONE , C_BRNCH, // 5C-5F
  42. C_NONE , C_ALPHA, C_ALPHA, C_ALPHA, // 60-63
  43. C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 64-67
  44. C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 68-6B
  45. C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 6C-6F
  46. C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 70-73
  47. C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 74-77
  48. C_ALPHA, C_ALPHA, C_ALPHA, C_NONE , // 78-7B
  49. C_NONE , C_NONE , C_NONE , C_NONE , // 7C-7F
  50. };
  51. /*****************************************************************************
  52. *
  53. * _ParseP
  54. *
  55. * Parse a partial depot path.
  56. *
  57. * A partial depot path extends up to the next "#" or "@".
  58. *
  59. * If we find a "//", ":", or "\\" (double backslash) then we have
  60. * gone too far and started parsing something else, so backtrack to
  61. * the end of the previous word.
  62. *
  63. * A full depot path is a partial depot path that begins with
  64. * two slashes.
  65. *
  66. *****************************************************************************/
  67. LPCTSTR _ParseP(LPCTSTR pszParse, Substring *rgss)
  68. {
  69. rgss->SetStart(pszParse);
  70. LPCTSTR pszLastSpace = NULL;
  71. while (*pszParse && *pszParse != TEXT('#') && *pszParse != TEXT('@')) {
  72. if (pszLastSpace) {
  73. if ((pszParse[0] == TEXT('/') && pszParse[1] == TEXT('/')) ||
  74. (pszParse[0] == TEXT('\\') && pszParse[1] == TEXT('\\')) ||
  75. (pszParse[0] == TEXT(':'))) {
  76. // Back up over the word we ate by mistake
  77. pszParse = pszLastSpace;
  78. // Back up over the whitespace we ate by mistake
  79. while (pszParse >= rgss->Start() && IsSpace(pszParse[-1])) {
  80. pszParse--;
  81. }
  82. break;
  83. }
  84. }
  85. if (*pszParse == TEXT(' ')) {
  86. pszLastSpace = pszParse;
  87. }
  88. pszParse++;
  89. }
  90. rgss->SetEnd(pszParse); // Null string is possible
  91. return pszParse;
  92. }
  93. /*****************************************************************************
  94. *
  95. * Parse strings
  96. *
  97. * $D date
  98. * $P full depot path
  99. * $W optional whitespace (does not consume a Substring slot)
  100. * $a email alias
  101. * $b branch name
  102. * $d digits
  103. * $e end of string (does not consume a Substring slot)
  104. * $p partial depot path, may not be null
  105. * $u user (with optional domain removed)
  106. * $w arbitrary word (whitespace-delimited)
  107. *
  108. * NEED:
  109. *
  110. * $R maximal file revision specifier
  111. * $q quoted string
  112. *
  113. * NOTE: Some pains were taken to make this a non-backtracking parser.
  114. * If you want to add a backtracking rule, try to find a way so you don't.
  115. *
  116. *****************************************************************************/
  117. LPTSTR Parse(LPCTSTR pszFormat, LPCTSTR pszParse, Substring *rgss)
  118. {
  119. SIZE_T siz;
  120. while (*pszFormat) {
  121. if (*pszFormat == TEXT('$')) {
  122. pszFormat++;
  123. switch (*pszFormat++) {
  124. //
  125. // Keep the switch cases in alphabetical order, please.
  126. // Just helps maintain my sanity.
  127. //
  128. case TEXT('D'): // Date
  129. rgss->SetStart(pszParse);
  130. if (lstrlen(pszParse) < 19) {
  131. return NULL; // Not long enough to be a date
  132. }
  133. pszParse += 19;
  134. rgss->SetEnd(pszParse);
  135. rgss++;
  136. break;
  137. case TEXT('P'): // Full depot path
  138. if (pszParse[0] != TEXT('/') || pszParse[1] != TEXT('/')) {
  139. return NULL; // Must begin with //
  140. }
  141. goto L_p; // Now treat as if it were partial
  142. case TEXT('W'): // Optional whitespace
  143. while (*pszParse && (UINT)*pszParse <= (UINT)TEXT(' ')) {
  144. pszParse++;
  145. }
  146. break;
  147. case TEXT('a'): // Email alias
  148. rgss->SetStart(pszParse);
  149. if (IsAlpha(*pszParse)) { // First char must be alpha
  150. while (IsAlias(*pszParse)) {
  151. pszParse++;
  152. }
  153. }
  154. siz = rgss->SetEnd(pszParse);
  155. if (siz == 0 || siz > 8) {
  156. return NULL; // Must be 1 to 8 chars
  157. }
  158. rgss++;
  159. break;
  160. case TEXT('b'): // Branch name
  161. rgss->SetStart(pszParse);
  162. while (IsBranch(*pszParse)) {
  163. pszParse++;
  164. }
  165. siz = rgss->SetEnd(pszParse);
  166. if (siz == 0) {
  167. return NULL; // Must be at least one char
  168. }
  169. rgss++;
  170. break;
  171. case TEXT('d'): // Digits
  172. rgss->SetStart(pszParse);
  173. while (IsDigit(*pszParse)) {
  174. pszParse++;
  175. }
  176. if (rgss->SetEnd(pszParse) == 0) {
  177. return NULL; // Must have at least one digit
  178. }
  179. rgss++;
  180. break;
  181. case TEXT('e'): // End of string
  182. if (*pszParse) {
  183. return NULL;
  184. }
  185. break;
  186. L_p: case TEXT('p'): // Partial depot path
  187. pszParse = _ParseP(pszParse, rgss);
  188. if (!pszParse) {
  189. return NULL; // Parse failure
  190. }
  191. rgss++;
  192. break;
  193. case TEXT('u'): // Userid
  194. rgss->SetStart(pszParse);
  195. while (_IsWord(*pszParse) && *pszParse != TEXT('@')) {
  196. if (*pszParse == TEXT('\\')) {
  197. rgss->SetStart(pszParse+1);
  198. }
  199. pszParse++;
  200. }
  201. if (rgss->SetEnd(pszParse) == 0) {
  202. return NULL; // Must have at least one character
  203. }
  204. rgss++;
  205. break;
  206. #if 0
  207. case TEXT('s'): // String
  208. rgss->SetStart(pszParse);
  209. while ((_IsPrint(*pszParse) || *pszParse == TEXT('\t')) &&
  210. *pszParse != *pszFormat) {
  211. pszParse++;
  212. }
  213. rgss->SetEnd(pszParse); // Null string is okay
  214. rgss++;
  215. break;
  216. #endif
  217. case TEXT('w'):
  218. rgss->SetStart(pszParse);
  219. while (_IsWord(*pszParse)) {
  220. pszParse++;
  221. }
  222. if (rgss->SetEnd(pszParse) == 0) {
  223. return NULL; // Must have at least one character
  224. }
  225. rgss++;
  226. break;
  227. default: // ?
  228. ASSERT(0);
  229. return NULL;
  230. }
  231. } else if (*pszParse == *pszFormat) {
  232. pszParse++;
  233. pszFormat++;
  234. } else {
  235. return NULL;
  236. }
  237. }
  238. return CCAST(LPTSTR, pszParse);
  239. }
  240. /*****************************************************************************
  241. *
  242. * Tokenizer
  243. *
  244. *****************************************************************************/
  245. void Tokenizer::Restart(LPCTSTR psz)
  246. {
  247. /* Skip spaces */
  248. while (IsSpace(*psz)) {
  249. psz++;
  250. }
  251. _psz = psz;
  252. }
  253. BOOL Tokenizer::Token(String& str)
  254. {
  255. str.Reset();
  256. if (!*_psz) return FALSE;
  257. //
  258. // Quote state:
  259. //
  260. // Bit 0: In quote?
  261. // Bit 1: Was previous character part of a run of quotation marks?
  262. //
  263. int iQuote = 0;
  264. //
  265. // Wacko boundary case. The opening quotation mark should not
  266. // be counted as part of a run of quotation marks.
  267. //
  268. if (*_psz == TEXT('"')) {
  269. iQuote = 1;
  270. _psz++;
  271. }
  272. while (*_psz && ((iQuote & 1) || !IsSpace(*_psz))) {
  273. if (*_psz == TEXT('"')) {
  274. iQuote ^= 1 ^ 2;
  275. if (!(iQuote & 2)) {
  276. str << TEXT('"');
  277. }
  278. } else {
  279. iQuote &= ~2;
  280. str << *_psz;
  281. }
  282. _psz++;
  283. }
  284. Restart(_psz); /* Eat any trailing spaces */
  285. return TRUE;
  286. }
  287. /*****************************************************************************
  288. *
  289. * GetOpt
  290. *
  291. *****************************************************************************/
  292. //
  293. // Returns the switch character, or '\0' if no more switches.
  294. //
  295. // The option that terminated switch parsing is left in the tokenizer.
  296. //
  297. TCHAR GetOpt::NextSwitch()
  298. {
  299. if (!_pszUnparsed) {
  300. LPCTSTR pszTokUndo = _tok.Unparsed();
  301. if (!_tok.Token(_str)) {
  302. return TEXT('\0'); // end of command line
  303. }
  304. if (_str[0] != TEXT('-')) {
  305. _tok.Restart(pszTokUndo); // so caller can re-read it
  306. _pszValue = _str; // all future values will go nere
  307. return TEXT('\0'); // end of command line
  308. }
  309. if (_str[1] == TEXT('\0')) { // base - end switches
  310. _pszValue = _str; // all future values will go nere
  311. return TEXT('\0'); // but do not re-read it
  312. }
  313. _pszUnparsed = &_str[1];
  314. }
  315. TCHAR tchSwitch = *_pszUnparsed;
  316. LPCTSTR pszParam;
  317. for (pszParam = _pszParams; *pszParam; pszParam++) {
  318. if (tchSwitch == *pszParam) {
  319. /*
  320. * Value can come immediately afterwards or as a separate token.
  321. */
  322. _pszValue = _pszUnparsed + 1;
  323. if (_pszValue[0] == TEXT('\0')) {
  324. _tok.Token(_str);
  325. _pszValue = _str;
  326. }
  327. _pszUnparsed = NULL;
  328. return tchSwitch;
  329. }
  330. }
  331. _pszUnparsed++;
  332. if (!*_pszUnparsed) _pszUnparsed = NULL;
  333. return tchSwitch;
  334. }
  335. /*****************************************************************************
  336. *
  337. * CommentParser - Parses checkin comments
  338. *
  339. *****************************************************************************/
  340. void CommentParser::AddComment(LPTSTR psz)
  341. {
  342. if (_fHaveComment) return;
  343. //
  344. // Ignore leading spaces.
  345. //
  346. while (*psz == TEXT('\t') || *psz == TEXT(' ')) psz++;
  347. //
  348. // Skip blank description lines.
  349. //
  350. if (*psz == TEXT('\0')) return;
  351. //
  352. // Okay, here comes the money. Is this a Gauntlet checkin?
  353. //
  354. LPTSTR pszRest = Parse(TEXT("Checkin by - "), psz, NULL);
  355. if (pszRest) {
  356. //
  357. // You betcha. This overrides the dev column.
  358. //
  359. SetDev(pszRest);
  360. } else {
  361. //
  362. // No, it's a regular comment. Use the first nonblank comment
  363. // line as the text and toss the rest.
  364. //
  365. // Change all tabs to spaces because listview doesn't like tabs.
  366. //
  367. ChangeTabsToSpaces(psz);
  368. //
  369. // If the comment begins with [alias] or (alias), then move
  370. // that alias to the developer column. Digits can optionally
  371. // be inserted before the alias.
  372. //
  373. Substring rgss[2];
  374. if ((pszRest = Parse("[$a]$W", psz, rgss)) ||
  375. (pszRest = Parse("($a)$W", psz, rgss))) {
  376. SetDev(rgss[0].Finalize());
  377. psz = pszRest;
  378. } else if ((pszRest = Parse("$d$W[$a]$W", psz, rgss)) ||
  379. (pszRest = Parse("$d$W($a)$W", psz, rgss))) {
  380. SetDev(rgss[1].Finalize());
  381. //
  382. // Now collapse out the alias.
  383. //
  384. lstrcpy(rgss[1].Start()-1, pszRest);
  385. }
  386. SetComment(psz);
  387. _fHaveComment = TRUE;
  388. }
  389. }