/***************************************************************************** * * parse.cpp * * Lame string parser. * *****************************************************************************/ #include "sdview.h" /***************************************************************************** * * Ctype stuff * * The vast majority of characters we encounter are below 128, so use fast * table lookup for those. * *****************************************************************************/ const BYTE c_rgbCtype[128] = { C_NONE , C_NONE , C_NONE , C_NONE , // 00-03 C_NONE , C_NONE , C_NONE , C_NONE , // 04-07 C_NONE , C_NONE , C_NONE , C_NONE , // 08-0B C_NONE , C_NONE , C_NONE , C_NONE , // 0C-0F C_NONE , C_NONE , C_NONE , C_NONE , // 10-13 C_NONE , C_NONE , C_NONE , C_NONE , // 14-17 C_NONE , C_NONE , C_NONE , C_NONE , // 18-1B C_NONE , C_NONE , C_NONE , C_NONE , // 1C-1F C_SPACE, C_NONE , C_NONE , C_NONE , // 20-23 C_NONE , C_NONE , C_NONE , C_NONE , // 24-27 C_NONE , C_NONE , C_NONE , C_BRNCH, // 28-2B C_NONE , C_DASH , C_NONE , C_BRNCH, // 2C-2F C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, // 30-33 C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, // 34-37 C_DIGIT, C_DIGIT, C_NONE , C_NONE , // 38-3B C_NONE , C_NONE , C_NONE , C_NONE , // 3C-3F C_NONE , C_ALPHA, C_ALPHA, C_ALPHA, // 40-43 C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 44-47 C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 48-4B C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 4C-4F C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 50-53 C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 54-57 C_ALPHA, C_ALPHA, C_ALPHA, C_NONE , // 58-5B C_NONE , C_NONE , C_NONE , C_BRNCH, // 5C-5F C_NONE , C_ALPHA, C_ALPHA, C_ALPHA, // 60-63 C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 64-67 C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 68-6B C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 6C-6F C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 70-73 C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 74-77 C_ALPHA, C_ALPHA, C_ALPHA, C_NONE , // 78-7B C_NONE , C_NONE , C_NONE , C_NONE , // 7C-7F }; /***************************************************************************** * * _ParseP * * Parse a partial depot path. * * A partial depot path extends up to the next "#" or "@". * * If we find a "//", ":", or "\\" (double backslash) then we have * gone too far and started parsing something else, so backtrack to * the end of the previous word. * * A full depot path is a partial depot path that begins with * two slashes. * *****************************************************************************/ LPCTSTR _ParseP(LPCTSTR pszParse, Substring *rgss) { rgss->SetStart(pszParse); LPCTSTR pszLastSpace = NULL; while (*pszParse && *pszParse != TEXT('#') && *pszParse != TEXT('@')) { if (pszLastSpace) { if ((pszParse[0] == TEXT('/') && pszParse[1] == TEXT('/')) || (pszParse[0] == TEXT('\\') && pszParse[1] == TEXT('\\')) || (pszParse[0] == TEXT(':'))) { // Back up over the word we ate by mistake pszParse = pszLastSpace; // Back up over the whitespace we ate by mistake while (pszParse >= rgss->Start() && IsSpace(pszParse[-1])) { pszParse--; } break; } } if (*pszParse == TEXT(' ')) { pszLastSpace = pszParse; } pszParse++; } rgss->SetEnd(pszParse); // Null string is possible return pszParse; } /***************************************************************************** * * Parse strings * * $D date * $P full depot path * $W optional whitespace (does not consume a Substring slot) * $a email alias * $b branch name * $d digits * $e end of string (does not consume a Substring slot) * $p partial depot path, may not be null * $u user (with optional domain removed) * $w arbitrary word (whitespace-delimited) * * NEED: * * $R maximal file revision specifier * $q quoted string * * NOTE: Some pains were taken to make this a non-backtracking parser. * If you want to add a backtracking rule, try to find a way so you don't. * *****************************************************************************/ LPTSTR Parse(LPCTSTR pszFormat, LPCTSTR pszParse, Substring *rgss) { SIZE_T siz; while (*pszFormat) { if (*pszFormat == TEXT('$')) { pszFormat++; switch (*pszFormat++) { // // Keep the switch cases in alphabetical order, please. // Just helps maintain my sanity. // case TEXT('D'): // Date rgss->SetStart(pszParse); if (lstrlen(pszParse) < 19) { return NULL; // Not long enough to be a date } pszParse += 19; rgss->SetEnd(pszParse); rgss++; break; case TEXT('P'): // Full depot path if (pszParse[0] != TEXT('/') || pszParse[1] != TEXT('/')) { return NULL; // Must begin with // } goto L_p; // Now treat as if it were partial case TEXT('W'): // Optional whitespace while (*pszParse && (UINT)*pszParse <= (UINT)TEXT(' ')) { pszParse++; } break; case TEXT('a'): // Email alias rgss->SetStart(pszParse); if (IsAlpha(*pszParse)) { // First char must be alpha while (IsAlias(*pszParse)) { pszParse++; } } siz = rgss->SetEnd(pszParse); if (siz == 0 || siz > 8) { return NULL; // Must be 1 to 8 chars } rgss++; break; case TEXT('b'): // Branch name rgss->SetStart(pszParse); while (IsBranch(*pszParse)) { pszParse++; } siz = rgss->SetEnd(pszParse); if (siz == 0) { return NULL; // Must be at least one char } rgss++; break; case TEXT('d'): // Digits rgss->SetStart(pszParse); while (IsDigit(*pszParse)) { pszParse++; } if (rgss->SetEnd(pszParse) == 0) { return NULL; // Must have at least one digit } rgss++; break; case TEXT('e'): // End of string if (*pszParse) { return NULL; } break; L_p: case TEXT('p'): // Partial depot path pszParse = _ParseP(pszParse, rgss); if (!pszParse) { return NULL; // Parse failure } rgss++; break; case TEXT('u'): // Userid rgss->SetStart(pszParse); while (_IsWord(*pszParse) && *pszParse != TEXT('@')) { if (*pszParse == TEXT('\\')) { rgss->SetStart(pszParse+1); } pszParse++; } if (rgss->SetEnd(pszParse) == 0) { return NULL; // Must have at least one character } rgss++; break; #if 0 case TEXT('s'): // String rgss->SetStart(pszParse); while ((_IsPrint(*pszParse) || *pszParse == TEXT('\t')) && *pszParse != *pszFormat) { pszParse++; } rgss->SetEnd(pszParse); // Null string is okay rgss++; break; #endif case TEXT('w'): rgss->SetStart(pszParse); while (_IsWord(*pszParse)) { pszParse++; } if (rgss->SetEnd(pszParse) == 0) { return NULL; // Must have at least one character } rgss++; break; default: // ? ASSERT(0); return NULL; } } else if (*pszParse == *pszFormat) { pszParse++; pszFormat++; } else { return NULL; } } return CCAST(LPTSTR, pszParse); } /***************************************************************************** * * Tokenizer * *****************************************************************************/ void Tokenizer::Restart(LPCTSTR psz) { /* Skip spaces */ while (IsSpace(*psz)) { psz++; } _psz = psz; } BOOL Tokenizer::Token(String& str) { str.Reset(); if (!*_psz) return FALSE; // // Quote state: // // Bit 0: In quote? // Bit 1: Was previous character part of a run of quotation marks? // int iQuote = 0; // // Wacko boundary case. The opening quotation mark should not // be counted as part of a run of quotation marks. // if (*_psz == TEXT('"')) { iQuote = 1; _psz++; } while (*_psz && ((iQuote & 1) || !IsSpace(*_psz))) { if (*_psz == TEXT('"')) { iQuote ^= 1 ^ 2; if (!(iQuote & 2)) { str << TEXT('"'); } } else { iQuote &= ~2; str << *_psz; } _psz++; } Restart(_psz); /* Eat any trailing spaces */ return TRUE; } /***************************************************************************** * * GetOpt * *****************************************************************************/ // // Returns the switch character, or '\0' if no more switches. // // The option that terminated switch parsing is left in the tokenizer. // TCHAR GetOpt::NextSwitch() { if (!_pszUnparsed) { LPCTSTR pszTokUndo = _tok.Unparsed(); if (!_tok.Token(_str)) { return TEXT('\0'); // end of command line } if (_str[0] != TEXT('-')) { _tok.Restart(pszTokUndo); // so caller can re-read it _pszValue = _str; // all future values will go nere return TEXT('\0'); // end of command line } if (_str[1] == TEXT('\0')) { // base - end switches _pszValue = _str; // all future values will go nere return TEXT('\0'); // but do not re-read it } _pszUnparsed = &_str[1]; } TCHAR tchSwitch = *_pszUnparsed; LPCTSTR pszParam; for (pszParam = _pszParams; *pszParam; pszParam++) { if (tchSwitch == *pszParam) { /* * Value can come immediately afterwards or as a separate token. */ _pszValue = _pszUnparsed + 1; if (_pszValue[0] == TEXT('\0')) { _tok.Token(_str); _pszValue = _str; } _pszUnparsed = NULL; return tchSwitch; } } _pszUnparsed++; if (!*_pszUnparsed) _pszUnparsed = NULL; return tchSwitch; } /***************************************************************************** * * CommentParser - Parses checkin comments * *****************************************************************************/ void CommentParser::AddComment(LPTSTR psz) { if (_fHaveComment) return; // // Ignore leading spaces. // while (*psz == TEXT('\t') || *psz == TEXT(' ')) psz++; // // Skip blank description lines. // if (*psz == TEXT('\0')) return; // // Okay, here comes the money. Is this a Gauntlet checkin? // LPTSTR pszRest = Parse(TEXT("Checkin by - "), psz, NULL); if (pszRest) { // // You betcha. This overrides the dev column. // SetDev(pszRest); } else { // // No, it's a regular comment. Use the first nonblank comment // line as the text and toss the rest. // // Change all tabs to spaces because listview doesn't like tabs. // ChangeTabsToSpaces(psz); // // If the comment begins with [alias] or (alias), then move // that alias to the developer column. Digits can optionally // be inserted before the alias. // Substring rgss[2]; if ((pszRest = Parse("[$a]$W", psz, rgss)) || (pszRest = Parse("($a)$W", psz, rgss))) { SetDev(rgss[0].Finalize()); psz = pszRest; } else if ((pszRest = Parse("$d$W[$a]$W", psz, rgss)) || (pszRest = Parse("$d$W($a)$W", psz, rgss))) { SetDev(rgss[1].Finalize()); // // Now collapse out the alias. // lstrcpy(rgss[1].Start()-1, pszRest); } SetComment(psz); _fHaveComment = TRUE; } }