mirror of https://github.com/tongzx/nt5src
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
450 lines
13 KiB
450 lines
13 KiB
/*****************************************************************************
|
|
*
|
|
* parse.cpp
|
|
*
|
|
* Lame string parser.
|
|
*
|
|
*****************************************************************************/
|
|
|
|
#include "sdview.h"
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* Ctype stuff
|
|
*
|
|
* The vast majority of characters we encounter are below 128, so use fast
|
|
* table lookup for those.
|
|
*
|
|
*****************************************************************************/
|
|
|
|
const BYTE c_rgbCtype[128] = {
|
|
|
|
C_NONE , C_NONE , C_NONE , C_NONE , // 00-03
|
|
C_NONE , C_NONE , C_NONE , C_NONE , // 04-07
|
|
C_NONE , C_NONE , C_NONE , C_NONE , // 08-0B
|
|
C_NONE , C_NONE , C_NONE , C_NONE , // 0C-0F
|
|
C_NONE , C_NONE , C_NONE , C_NONE , // 10-13
|
|
C_NONE , C_NONE , C_NONE , C_NONE , // 14-17
|
|
C_NONE , C_NONE , C_NONE , C_NONE , // 18-1B
|
|
C_NONE , C_NONE , C_NONE , C_NONE , // 1C-1F
|
|
|
|
C_SPACE, C_NONE , C_NONE , C_NONE , // 20-23
|
|
C_NONE , C_NONE , C_NONE , C_NONE , // 24-27
|
|
C_NONE , C_NONE , C_NONE , C_BRNCH, // 28-2B
|
|
C_NONE , C_DASH , C_NONE , C_BRNCH, // 2C-2F
|
|
C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, // 30-33
|
|
C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, // 34-37
|
|
C_DIGIT, C_DIGIT, C_NONE , C_NONE , // 38-3B
|
|
C_NONE , C_NONE , C_NONE , C_NONE , // 3C-3F
|
|
|
|
C_NONE , C_ALPHA, C_ALPHA, C_ALPHA, // 40-43
|
|
C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 44-47
|
|
C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 48-4B
|
|
C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 4C-4F
|
|
C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 50-53
|
|
C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 54-57
|
|
C_ALPHA, C_ALPHA, C_ALPHA, C_NONE , // 58-5B
|
|
C_NONE , C_NONE , C_NONE , C_BRNCH, // 5C-5F
|
|
|
|
C_NONE , C_ALPHA, C_ALPHA, C_ALPHA, // 60-63
|
|
C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 64-67
|
|
C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 68-6B
|
|
C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 6C-6F
|
|
C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 70-73
|
|
C_ALPHA, C_ALPHA, C_ALPHA, C_ALPHA, // 74-77
|
|
C_ALPHA, C_ALPHA, C_ALPHA, C_NONE , // 78-7B
|
|
C_NONE , C_NONE , C_NONE , C_NONE , // 7C-7F
|
|
|
|
};
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* _ParseP
|
|
*
|
|
* Parse a partial depot path.
|
|
*
|
|
* A partial depot path extends up to the next "#" or "@".
|
|
*
|
|
* If we find a "//", ":", or "\\" (double backslash) then we have
|
|
* gone too far and started parsing something else, so backtrack to
|
|
* the end of the previous word.
|
|
*
|
|
* A full depot path is a partial depot path that begins with
|
|
* two slashes.
|
|
*
|
|
*****************************************************************************/
|
|
|
|
LPCTSTR _ParseP(LPCTSTR pszParse, Substring *rgss)
|
|
{
|
|
rgss->SetStart(pszParse);
|
|
|
|
LPCTSTR pszLastSpace = NULL;
|
|
|
|
while (*pszParse && *pszParse != TEXT('#') && *pszParse != TEXT('@')) {
|
|
if (pszLastSpace) {
|
|
if ((pszParse[0] == TEXT('/') && pszParse[1] == TEXT('/')) ||
|
|
(pszParse[0] == TEXT('\\') && pszParse[1] == TEXT('\\')) ||
|
|
(pszParse[0] == TEXT(':'))) {
|
|
// Back up over the word we ate by mistake
|
|
pszParse = pszLastSpace;
|
|
// Back up over the whitespace we ate by mistake
|
|
while (pszParse >= rgss->Start() && IsSpace(pszParse[-1])) {
|
|
pszParse--;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
if (*pszParse == TEXT(' ')) {
|
|
pszLastSpace = pszParse;
|
|
}
|
|
pszParse++;
|
|
}
|
|
|
|
rgss->SetEnd(pszParse); // Null string is possible
|
|
|
|
return pszParse;
|
|
}
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* Parse strings
|
|
*
|
|
* $D date
|
|
* $P full depot path
|
|
* $W optional whitespace (does not consume a Substring slot)
|
|
* $a email alias
|
|
* $b branch name
|
|
* $d digits
|
|
* $e end of string (does not consume a Substring slot)
|
|
* $p partial depot path, may not be null
|
|
* $u user (with optional domain removed)
|
|
* $w arbitrary word (whitespace-delimited)
|
|
*
|
|
* NEED:
|
|
*
|
|
* $R maximal file revision specifier
|
|
* $q quoted string
|
|
*
|
|
* NOTE: Some pains were taken to make this a non-backtracking parser.
|
|
* If you want to add a backtracking rule, try to find a way so you don't.
|
|
*
|
|
*****************************************************************************/
|
|
|
|
LPTSTR Parse(LPCTSTR pszFormat, LPCTSTR pszParse, Substring *rgss)
|
|
{
|
|
SIZE_T siz;
|
|
while (*pszFormat) {
|
|
|
|
if (*pszFormat == TEXT('$')) {
|
|
pszFormat++;
|
|
switch (*pszFormat++) {
|
|
|
|
//
|
|
// Keep the switch cases in alphabetical order, please.
|
|
// Just helps maintain my sanity.
|
|
//
|
|
|
|
case TEXT('D'): // Date
|
|
rgss->SetStart(pszParse);
|
|
if (lstrlen(pszParse) < 19) {
|
|
return NULL; // Not long enough to be a date
|
|
}
|
|
pszParse += 19;
|
|
rgss->SetEnd(pszParse);
|
|
rgss++;
|
|
break;
|
|
|
|
case TEXT('P'): // Full depot path
|
|
if (pszParse[0] != TEXT('/') || pszParse[1] != TEXT('/')) {
|
|
return NULL; // Must begin with //
|
|
}
|
|
goto L_p; // Now treat as if it were partial
|
|
|
|
case TEXT('W'): // Optional whitespace
|
|
while (*pszParse && (UINT)*pszParse <= (UINT)TEXT(' ')) {
|
|
pszParse++;
|
|
}
|
|
break;
|
|
|
|
case TEXT('a'): // Email alias
|
|
rgss->SetStart(pszParse);
|
|
if (IsAlpha(*pszParse)) { // First char must be alpha
|
|
while (IsAlias(*pszParse)) {
|
|
pszParse++;
|
|
}
|
|
}
|
|
siz = rgss->SetEnd(pszParse);
|
|
if (siz == 0 || siz > 8) {
|
|
return NULL; // Must be 1 to 8 chars
|
|
}
|
|
rgss++;
|
|
break;
|
|
|
|
case TEXT('b'): // Branch name
|
|
rgss->SetStart(pszParse);
|
|
while (IsBranch(*pszParse)) {
|
|
pszParse++;
|
|
}
|
|
siz = rgss->SetEnd(pszParse);
|
|
if (siz == 0) {
|
|
return NULL; // Must be at least one char
|
|
}
|
|
rgss++;
|
|
break;
|
|
|
|
case TEXT('d'): // Digits
|
|
rgss->SetStart(pszParse);
|
|
while (IsDigit(*pszParse)) {
|
|
pszParse++;
|
|
}
|
|
if (rgss->SetEnd(pszParse) == 0) {
|
|
return NULL; // Must have at least one digit
|
|
}
|
|
rgss++;
|
|
break;
|
|
|
|
case TEXT('e'): // End of string
|
|
if (*pszParse) {
|
|
return NULL;
|
|
}
|
|
break;
|
|
|
|
L_p: case TEXT('p'): // Partial depot path
|
|
pszParse = _ParseP(pszParse, rgss);
|
|
if (!pszParse) {
|
|
return NULL; // Parse failure
|
|
}
|
|
rgss++;
|
|
break;
|
|
|
|
case TEXT('u'): // Userid
|
|
rgss->SetStart(pszParse);
|
|
while (_IsWord(*pszParse) && *pszParse != TEXT('@')) {
|
|
if (*pszParse == TEXT('\\')) {
|
|
rgss->SetStart(pszParse+1);
|
|
}
|
|
pszParse++;
|
|
}
|
|
if (rgss->SetEnd(pszParse) == 0) {
|
|
return NULL; // Must have at least one character
|
|
}
|
|
rgss++;
|
|
break;
|
|
|
|
#if 0
|
|
case TEXT('s'): // String
|
|
rgss->SetStart(pszParse);
|
|
while ((_IsPrint(*pszParse) || *pszParse == TEXT('\t')) &&
|
|
*pszParse != *pszFormat) {
|
|
pszParse++;
|
|
}
|
|
rgss->SetEnd(pszParse); // Null string is okay
|
|
rgss++;
|
|
break;
|
|
#endif
|
|
|
|
case TEXT('w'):
|
|
rgss->SetStart(pszParse);
|
|
while (_IsWord(*pszParse)) {
|
|
pszParse++;
|
|
}
|
|
if (rgss->SetEnd(pszParse) == 0) {
|
|
return NULL; // Must have at least one character
|
|
}
|
|
rgss++;
|
|
break;
|
|
|
|
default: // ?
|
|
ASSERT(0);
|
|
return NULL;
|
|
}
|
|
|
|
} else if (*pszParse == *pszFormat) {
|
|
pszParse++;
|
|
pszFormat++;
|
|
} else {
|
|
return NULL;
|
|
}
|
|
|
|
}
|
|
|
|
return CCAST(LPTSTR, pszParse);
|
|
}
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* Tokenizer
|
|
*
|
|
*****************************************************************************/
|
|
|
|
void Tokenizer::Restart(LPCTSTR psz)
|
|
{
|
|
/* Skip spaces */
|
|
while (IsSpace(*psz)) {
|
|
psz++;
|
|
}
|
|
_psz = psz;
|
|
}
|
|
|
|
BOOL Tokenizer::Token(String& str)
|
|
{
|
|
str.Reset();
|
|
|
|
if (!*_psz) return FALSE;
|
|
|
|
//
|
|
// Quote state:
|
|
//
|
|
// Bit 0: In quote?
|
|
// Bit 1: Was previous character part of a run of quotation marks?
|
|
//
|
|
int iQuote = 0;
|
|
|
|
//
|
|
// Wacko boundary case. The opening quotation mark should not
|
|
// be counted as part of a run of quotation marks.
|
|
//
|
|
if (*_psz == TEXT('"')) {
|
|
iQuote = 1;
|
|
_psz++;
|
|
}
|
|
|
|
while (*_psz && ((iQuote & 1) || !IsSpace(*_psz))) {
|
|
if (*_psz == TEXT('"')) {
|
|
iQuote ^= 1 ^ 2;
|
|
if (!(iQuote & 2)) {
|
|
str << TEXT('"');
|
|
}
|
|
} else {
|
|
iQuote &= ~2;
|
|
str << *_psz;
|
|
}
|
|
_psz++;
|
|
}
|
|
|
|
Restart(_psz); /* Eat any trailing spaces */
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* GetOpt
|
|
*
|
|
*****************************************************************************/
|
|
|
|
//
|
|
// Returns the switch character, or '\0' if no more switches.
|
|
//
|
|
// The option that terminated switch parsing is left in the tokenizer.
|
|
//
|
|
TCHAR GetOpt::NextSwitch()
|
|
{
|
|
if (!_pszUnparsed) {
|
|
LPCTSTR pszTokUndo = _tok.Unparsed();
|
|
if (!_tok.Token(_str)) {
|
|
return TEXT('\0'); // end of command line
|
|
}
|
|
|
|
if (_str[0] != TEXT('-')) {
|
|
_tok.Restart(pszTokUndo); // so caller can re-read it
|
|
_pszValue = _str; // all future values will go nere
|
|
return TEXT('\0'); // end of command line
|
|
|
|
}
|
|
|
|
if (_str[1] == TEXT('\0')) { // base - end switches
|
|
_pszValue = _str; // all future values will go nere
|
|
return TEXT('\0'); // but do not re-read it
|
|
}
|
|
|
|
_pszUnparsed = &_str[1];
|
|
}
|
|
|
|
TCHAR tchSwitch = *_pszUnparsed;
|
|
LPCTSTR pszParam;
|
|
for (pszParam = _pszParams; *pszParam; pszParam++) {
|
|
if (tchSwitch == *pszParam) {
|
|
|
|
/*
|
|
* Value can come immediately afterwards or as a separate token.
|
|
*/
|
|
_pszValue = _pszUnparsed + 1;
|
|
|
|
if (_pszValue[0] == TEXT('\0')) {
|
|
_tok.Token(_str);
|
|
_pszValue = _str;
|
|
}
|
|
|
|
_pszUnparsed = NULL;
|
|
return tchSwitch;
|
|
}
|
|
}
|
|
|
|
_pszUnparsed++;
|
|
if (!*_pszUnparsed) _pszUnparsed = NULL;
|
|
return tchSwitch;
|
|
}
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* CommentParser - Parses checkin comments
|
|
*
|
|
*****************************************************************************/
|
|
|
|
void CommentParser::AddComment(LPTSTR psz)
|
|
{
|
|
if (_fHaveComment) return;
|
|
|
|
//
|
|
// Ignore leading spaces.
|
|
//
|
|
while (*psz == TEXT('\t') || *psz == TEXT(' ')) psz++;
|
|
|
|
//
|
|
// Skip blank description lines.
|
|
//
|
|
if (*psz == TEXT('\0')) return;
|
|
|
|
//
|
|
// Okay, here comes the money. Is this a Gauntlet checkin?
|
|
//
|
|
LPTSTR pszRest = Parse(TEXT("Checkin by - "), psz, NULL);
|
|
if (pszRest) {
|
|
//
|
|
// You betcha. This overrides the dev column.
|
|
//
|
|
SetDev(pszRest);
|
|
} else {
|
|
//
|
|
// No, it's a regular comment. Use the first nonblank comment
|
|
// line as the text and toss the rest.
|
|
//
|
|
// Change all tabs to spaces because listview doesn't like tabs.
|
|
//
|
|
ChangeTabsToSpaces(psz);
|
|
|
|
//
|
|
// If the comment begins with [alias] or (alias), then move
|
|
// that alias to the developer column. Digits can optionally
|
|
// be inserted before the alias.
|
|
//
|
|
Substring rgss[2];
|
|
|
|
if ((pszRest = Parse("[$a]$W", psz, rgss)) ||
|
|
(pszRest = Parse("($a)$W", psz, rgss))) {
|
|
SetDev(rgss[0].Finalize());
|
|
psz = pszRest;
|
|
} else if ((pszRest = Parse("$d$W[$a]$W", psz, rgss)) ||
|
|
(pszRest = Parse("$d$W($a)$W", psz, rgss))) {
|
|
SetDev(rgss[1].Finalize());
|
|
//
|
|
// Now collapse out the alias.
|
|
//
|
|
lstrcpy(rgss[1].Start()-1, pszRest);
|
|
}
|
|
|
|
SetComment(psz);
|
|
_fHaveComment = TRUE;
|
|
}
|
|
}
|