You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1393 lines
39 KiB
1393 lines
39 KiB
// Copyright (c)1997-1999 Microsoft Corporation, All Rights Reserved
|
|
/* copied from ..\htmed\lexer.cpp */
|
|
|
|
/*++
|
|
|
|
Copyright (c) 1995 Microsoft Corporation
|
|
|
|
File: lexer.cpp
|
|
|
|
Abstract:
|
|
Nitty Gritty Lexer stuff
|
|
|
|
Contents:
|
|
SetValueSeen()
|
|
IsSingleOp()
|
|
IsWhiteSpace()
|
|
MapToken()
|
|
FindEndTag()
|
|
MakeSublang()
|
|
SetLanguage()
|
|
FindTable()
|
|
FindTable()
|
|
RemoveTable()
|
|
MakeTableSet()
|
|
GetToken()
|
|
IfHackComment()
|
|
FindServerScript()
|
|
FindEndComment()
|
|
FindEndEntity()
|
|
FindEntityRef()
|
|
FindValue()
|
|
FindEndString()
|
|
FindTagOpen()
|
|
FindText()
|
|
FindNextToken()
|
|
GetTextHint()
|
|
GetHint()
|
|
GetTokenLength()
|
|
GetValueTokenLength()
|
|
IsElementName()
|
|
IsAttributeName()
|
|
IsIdentifier()
|
|
IsUnknownID()
|
|
IsNumber()
|
|
CColorHtml::SetTable()
|
|
CColorHtml::InitSublanguages()
|
|
|
|
History:
|
|
2/14/97 cgomes: Created
|
|
|
|
|
|
--*/
|
|
|
|
#include "stdafx.h"
|
|
|
|
#include "resource.h"
|
|
#include "guids.h"
|
|
#include "token.h"
|
|
#include "table.h"
|
|
#include "lexer.h"
|
|
|
|
UINT FindClientScriptEnd(LPCTSTR pchLine, UINT cbLen, UINT cbCur, DWORD * plxs, TXTB & token);
|
|
|
|
#undef ASSERT
|
|
#define ASSERT(b) _ASSERTE(b)
|
|
// HACK: we keep a copy of a ptr to the ASP table and sublang
|
|
// so we can do special behavior for ASP files
|
|
CTableSet* g_ptabASP = 0;
|
|
PSUBLANG g_psublangASP = 0;
|
|
|
|
PTABLESET g_arpTables[CV_MAX+1];
|
|
|
|
// NOTE: added to handle value tokens properly.
|
|
UINT GetValueTokenLength(LPCTSTR pchLine, UINT cbLen, UINT cbCur);
|
|
|
|
// mark state transition from value -> next attribute
|
|
inline int SetValueSeen(DWORD *plxs)
|
|
{
|
|
if (*plxs & inValue)
|
|
{
|
|
*plxs &= ~inValue;
|
|
*plxs |= inAttribute;
|
|
return TRUE;
|
|
}
|
|
else
|
|
return FALSE;
|
|
}
|
|
|
|
// REVIEW (walts) - need better way
|
|
inline void SetScriptLanguage(LPCTSTR pchLine, DWORD *plxs)
|
|
{
|
|
LPCTSTR strJavaScript = _T("javascript");
|
|
LPCTSTR strVBScript = _T("vbscript");
|
|
// triedit's special language. Its set when we convert server-side scripts into
|
|
// client-side scripts. Its a dummy language. if we find that as language, we
|
|
// set in ServerASP. It is reset(removed) in FindNextToken().
|
|
LPCTSTR strServerAsp = _T("serverasp");
|
|
|
|
// language attribute may have quotes around it.
|
|
// if it does then advance past the first quote.
|
|
// ex. <SCRIPT LANGUAGE="VBScript">
|
|
if(*pchLine == L'\"')
|
|
pchLine++;
|
|
|
|
if (_tcsnicmp(pchLine, strJavaScript, lstrlen(strJavaScript)) == 0)
|
|
{
|
|
*plxs &= ~inVBScript;
|
|
*plxs &= ~inServerASP;
|
|
*plxs |= inJavaScript;
|
|
}
|
|
else if (_tcsnicmp(pchLine, strVBScript, lstrlen(strVBScript)) == 0)
|
|
{
|
|
*plxs &= ~inJavaScript;
|
|
*plxs &= ~inServerASP;
|
|
*plxs |= inVBScript;
|
|
}
|
|
else if (_tcsnicmp(pchLine, strServerAsp, lstrlen(strServerAsp)) == 0)
|
|
{
|
|
*plxs &= ~inJavaScript;
|
|
*plxs &= ~inVBScript;
|
|
*plxs |= inServerASP;
|
|
}
|
|
}
|
|
|
|
inline BOOL IsSingleOp(HINT hint)
|
|
{
|
|
return ((hint >= tokOP_SINGLE) && (hint < tokOP_MAX));
|
|
};
|
|
|
|
inline BOOL IsWhiteSpace(TCHAR c)
|
|
{
|
|
return _istspace(c);
|
|
};
|
|
|
|
|
|
// NOTE: Added to handle value tokens properly
|
|
inline IsValueChar(TCHAR ch)
|
|
{
|
|
// REVIEW(cgomes): specify all the invalid value characters
|
|
return ch != _T('<') && ch != _T('>');
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// map parsed token to returned token
|
|
|
|
// left column must be in ascending order
|
|
static TOKEN _rgTokenMap[] =
|
|
{
|
|
tokName, tokSpace,
|
|
tokNum, tokSpace,
|
|
tokParEnt, tokSpace,
|
|
tokResName, tokSpace,
|
|
0, 0
|
|
};
|
|
|
|
static TOKEN MapToken(TOKEN tokClass, DWORD lxs)
|
|
{
|
|
if (IsSingleOp((HINT)tokClass))
|
|
return tokOp;
|
|
else if ((tokClass == tokTag) && (lxs & inHTXTag))
|
|
return tokSSS;
|
|
for (int i = 0; (_rgTokenMap[i] != 0) && (_rgTokenMap[i] >= tokClass); i += 2)
|
|
{
|
|
if (_rgTokenMap[i] == tokClass)
|
|
return _rgTokenMap[i + 1];
|
|
}
|
|
return tokClass;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
|
|
UINT FindEndTag(LPCTSTR pchLine, UINT cbLen, UINT cbCur, DWORD *plxs, TXTB & token)
|
|
{
|
|
ASSERT(pchLine);
|
|
TCHAR szEnd[16];
|
|
ELLEX * pellex = pellexFromTextState(*plxs);
|
|
ASSERT(0 != pellex); // shouldn't be called with something other than special text state
|
|
UINT cbCmp = 3 + pellex->cb; // length of end tag
|
|
ASSERT(cbCmp < sizeof szEnd);
|
|
_tcscpy(szEnd, _T("</"));
|
|
_tcscat(szEnd, pellex->sz);
|
|
_tcscat(szEnd, _T(">"));
|
|
|
|
while (cbCur < cbLen)
|
|
{
|
|
if (_T('<') == pchLine[cbCur])
|
|
{
|
|
if ((cbLen - cbCur >= cbCmp) && (0 == _tcsnicmp(szEnd, &pchLine[cbCur], cbCmp)))
|
|
{
|
|
*plxs &= ~TEXTMASK; // special text modes are exclusive
|
|
token.ibTokMac = cbCur;
|
|
return cbCur;
|
|
}
|
|
else if ((cbCur + 1 < cbLen) && (_T('%') == pchLine[cbCur+1]))
|
|
{
|
|
*plxs |= inHTXTag;
|
|
token.ibTokMac = cbCur;
|
|
break;
|
|
}
|
|
else
|
|
cbCur++;
|
|
}
|
|
else
|
|
cbCur += _tclen(&pchLine[cbCur]);
|
|
}
|
|
token.ibTokMac = cbCur;
|
|
return cbCur;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
|
|
BOOL MakeSublang(PSUBLANG ps, UINT id, const TCHAR *strName, UINT nIdTemplate, CLSID clsid)
|
|
{
|
|
int len;
|
|
|
|
ASSERT( NULL != ps );
|
|
|
|
ps->szSubLang = NULL;
|
|
ps->lxsInitial = LxsFromSubLangIndex(id);
|
|
ps->nIdTemplate = nIdTemplate;
|
|
ps->clsidTemplate = clsid;
|
|
|
|
if ((len = lstrlen(strName)) != 0)
|
|
{
|
|
LPTSTR szNew = new TCHAR [len+1];
|
|
if (NULL != szNew)
|
|
{
|
|
_tcscpy(szNew,strName);
|
|
ps->szSubLang = szNew;
|
|
return TRUE;
|
|
}
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
// Set sublang and tableset array members,
|
|
// putting the default one in 0th position.
|
|
//
|
|
void SetLanguage(TCHAR * strDefault, PSUBLANG rgSublang,
|
|
PTABLESET pTab, UINT & index, UINT nIdTemplate, CLSID clsid)
|
|
{
|
|
if (pTab != NULL)
|
|
{
|
|
int i;
|
|
if (lstrcmp(strDefault, pTab->Name()) == 0)
|
|
i = 0;
|
|
else
|
|
i = index;
|
|
if (MakeSublang(rgSublang+i, i, pTab->Name(), nIdTemplate, clsid))
|
|
{
|
|
g_arpTables[i] = pTab;
|
|
if (i)
|
|
index++;
|
|
else
|
|
g_pTable = pTab;
|
|
}
|
|
else
|
|
delete pTab;
|
|
}
|
|
}
|
|
|
|
CTableSet * FindTable(CTableSet ** rgpts, TCHAR *strName)
|
|
{
|
|
for (int n = 0; rgpts[n]; n++)
|
|
{
|
|
if (rgpts[n]->Name() == strName)
|
|
//if (strcmp(rgpts[n]->Name(), strName) == 0)
|
|
return rgpts[n];
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
CTableSet * FindTable(CTableSet ** rgpts, CTableSet * pts)
|
|
{
|
|
for (int n = 0; rgpts[n]; n++)
|
|
{
|
|
if (rgpts[n] == pts)
|
|
return rgpts[n];
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
void RemoveTable(CTableSet ** rgpts, CTableSet *pts)
|
|
{
|
|
int n;
|
|
for (n = 0; rgpts[n]; n++)
|
|
{
|
|
if (rgpts[n] == pts)
|
|
{
|
|
for(; rgpts[n]; n++)
|
|
rgpts[n] = rgpts[n+1];
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
CTableSet * MakeTableSet(CTableSet ** /*rgpts*/, RWATT_T att, UINT nIdName)
|
|
{
|
|
return new CStaticTableSet(att, nIdName);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
// GetToken()
|
|
//
|
|
UINT GetToken(LPCTSTR pchLine, UINT cbLen, UINT cbCur, DWORD * plxs, TXTB & token)
|
|
{
|
|
ASSERT (cbCur < cbLen);
|
|
if(cbCur > cbLen)
|
|
return cbCur;
|
|
|
|
UINT cbCount = 0;
|
|
|
|
// init token
|
|
token.tok = 0;
|
|
|
|
// initialize location where token starts
|
|
token.ibTokMin = cbCur;
|
|
|
|
if (*plxs & inHTXTag)
|
|
cbCount = FindServerScript(pchLine, cbLen, cbCur, plxs, token);
|
|
else if (*plxs & inSCRIPT && !(*plxs & inTag) && !(*plxs & inServerASP))
|
|
{
|
|
// NOTE that we want to skip tokenizing scripts that are special to triedit
|
|
// when we wrap server-side scripts in client-side scripts, we set a dummy
|
|
// language as 'serverasp'. inServerASP is set in that case.
|
|
cbCount = FindClientScriptEnd(pchLine, cbLen, cbCur, plxs, token);
|
|
}
|
|
else if (*plxs & inComment) // in a comment
|
|
{
|
|
if (*plxs & inSCRIPT)
|
|
*plxs |= inScriptText;
|
|
COMMENTTYPE ct = IfHackComment(pchLine, cbLen, cbCur, plxs, token);
|
|
if (ct == CT_METADATA)
|
|
{
|
|
// Treat as an element
|
|
cbCount = FindNextToken(pchLine, cbLen, cbCur, plxs, token);
|
|
// Remove inBangTag
|
|
*plxs &= ~inBangTag;
|
|
}
|
|
else if (ct == CT_IECOMMENT)
|
|
cbCount = token.ibTokMac;
|
|
else
|
|
cbCount = FindEndComment(pchLine, cbLen, cbCur, plxs, token);
|
|
}
|
|
else if (*plxs & INSTRING) // in a string
|
|
cbCount = FindEndString(pchLine, cbLen, cbCur, plxs, token);
|
|
else
|
|
cbCount = FindNextToken(pchLine, cbLen, cbCur, plxs, token);
|
|
|
|
token.tokClass = MapToken(token.tokClass, *plxs);
|
|
return cbCount;
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////
|
|
// IfHackComment
|
|
//
|
|
// Probe ahead in the current line to see if we have what IE recognizes
|
|
// as the end of a comment ("->"). This does not conform to RFC 1866 or SGML,
|
|
// but suppports browser behavior. This lets us tolerate comments of the
|
|
// form: "<!--- whatever ->"
|
|
// (note how it ends)
|
|
//
|
|
// Returns a COMMENTTYPE enum.
|
|
// 0 if norma comment
|
|
// 1 if IE comment
|
|
// -1 if METADATA comment
|
|
//
|
|
// Proper comments are scanned using FindEndComment().
|
|
//
|
|
COMMENTTYPE IfHackComment(LPCTSTR pchLine, UINT cbLen, UINT cbCur, DWORD * plxs, TXTB & token)
|
|
{
|
|
token.tokClass = tokComment;
|
|
while (cbCur+1 < cbLen)
|
|
{
|
|
if(_tcsnicmp(&pchLine[cbCur], _T("METADATA"), lstrlen(_T("METADATA"))) == 0)
|
|
{
|
|
token.ibTokMac = cbCur + 1; // include second dash??
|
|
*plxs &= ~inComment;
|
|
// Remove inBangTag
|
|
*plxs &= ~inBangTag;
|
|
*plxs |= inTag;
|
|
return CT_METADATA; // METADATA
|
|
}
|
|
else if (pchLine[cbCur] == '-' && pchLine[cbCur + 1] == '>')
|
|
{
|
|
token.ibTokMac = cbCur + 1;
|
|
*plxs &= ~inComment;
|
|
*plxs &= ~inScriptText;
|
|
return CT_IECOMMENT;
|
|
}
|
|
else
|
|
{
|
|
cbCur += _tclen(&pchLine[cbCur]);
|
|
}
|
|
}
|
|
return CT_NORMAL;
|
|
}
|
|
|
|
|
|
UINT FindServerScript(LPCTSTR pchLine, UINT cbLen, UINT cbCur, DWORD * plxs, TXTB & token)
|
|
{
|
|
LPCTSTR pCurrent = &pchLine[cbCur];
|
|
int cb;
|
|
|
|
// parse HTX start tag
|
|
if (*pCurrent == _T('<') && (cbCur+1 < cbLen) && *(pCurrent+1) == '%')
|
|
{
|
|
token.tokClass = tokTag;
|
|
token.tok = TokTag_SSSOPEN;
|
|
token.ibTokMac = cbCur + 2;
|
|
*plxs |= inHTXTag;
|
|
return token.ibTokMac;
|
|
}
|
|
|
|
ASSERT(*plxs & inHTXTag); // should be in HTXTag state here
|
|
|
|
if (*pCurrent == _T('%') && (cbCur+1 < cbLen) && *(pCurrent+1) == '>')
|
|
{
|
|
token.tok = TokTag_SSSCLOSE;
|
|
token.tokClass = tokSSS; //tokTag;
|
|
token.ibTokMac = cbCur + 2;
|
|
*plxs &= ~inHTXTag;
|
|
if (*plxs & inNestedQuoteinSSS)
|
|
*plxs &= ~inNestedQuoteinSSS;
|
|
return token.ibTokMac;
|
|
}
|
|
|
|
token.tokClass = tokSSS;
|
|
|
|
while (cbCur < cbLen)
|
|
{
|
|
if (*pCurrent == _T('%') && (cbCur+1 < cbLen) && (*(pCurrent+1) == _T('>')))
|
|
break;
|
|
if ( *pCurrent == _T('"')
|
|
&& *plxs&inTag
|
|
&& *plxs&inHTXTag
|
|
&& *plxs&inAttribute
|
|
&& *plxs&inString
|
|
)
|
|
*plxs |= inNestedQuoteinSSS;
|
|
|
|
cb = _tclen(pCurrent);
|
|
cbCur += cb;
|
|
pCurrent += cb;
|
|
}
|
|
|
|
token.ibTokMac = cbCur;
|
|
return cbCur;
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////
|
|
// FindClientScriptEnd()
|
|
//
|
|
// HTMED CHANGE: Find the end of client script block
|
|
//
|
|
UINT FindClientScriptEnd(LPCTSTR pchLine, UINT cbLen, UINT cbCur, DWORD * plxs, TXTB & token)
|
|
{
|
|
LPCTSTR pCurrent = &pchLine[cbCur];
|
|
int cb;
|
|
|
|
TCHAR rgEndScript[] = _T("</SCRIPT");
|
|
int cchEndScript = (wcslen(rgEndScript) - 1);
|
|
|
|
if( cbCur + cchEndScript < cbLen &&
|
|
0 == _tcsnicmp(pCurrent, rgEndScript, cchEndScript))
|
|
{
|
|
token.tokClass = tokTag;
|
|
token.tok = TokTag_END;
|
|
*plxs &= ~inSCRIPT;
|
|
*plxs |= inEndTag;
|
|
token.ibTokMac = cbCur + 2;
|
|
return token.ibTokMac;
|
|
}
|
|
|
|
token.tokClass = tokSpace;
|
|
|
|
while (cbCur < cbLen)
|
|
{
|
|
if (*pCurrent == _T('<') && (cbCur+1 < cbLen) && (*(pCurrent+1) == _T('/')))
|
|
{
|
|
// Check if found end </SCRIPT
|
|
if( cbCur + cchEndScript < cbLen &&
|
|
0 == _tcsnicmp(pCurrent, rgEndScript, cchEndScript))
|
|
{
|
|
// Check if found end </SCRIPT
|
|
break;
|
|
}
|
|
}
|
|
cb = _tclen(pCurrent);
|
|
cbCur += cb;
|
|
pCurrent += cb;
|
|
}
|
|
|
|
token.ibTokMac = cbCur;
|
|
return cbCur;
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////
|
|
// FindEndComment()
|
|
//
|
|
// Find the end of comment ("--").
|
|
//
|
|
UINT FindEndComment(LPCTSTR pchLine, UINT cbLen, UINT cbCur, DWORD * plxs, TXTB & token)
|
|
{
|
|
LPCTSTR pCurrent = &pchLine[cbCur];
|
|
BOOL bEndComment = FALSE;
|
|
int cb;
|
|
|
|
ASSERT(*plxs & inComment); // must be in a comment now
|
|
|
|
token.tokClass = tokComment;
|
|
|
|
while (!bEndComment && cbCur < cbLen)
|
|
{
|
|
if (*pCurrent == _T('-')) // check the character to see if it's the first "-" in "--"
|
|
{
|
|
pCurrent++;
|
|
cbCur++;
|
|
if ((cbCur < cbLen) &&
|
|
(*pCurrent == _T('-'))) // we're possibly at the end, so search for the final "--" pair
|
|
{
|
|
bEndComment = TRUE;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
cb = _tclen(pCurrent);
|
|
cbCur += cb;
|
|
pCurrent += cb;
|
|
}
|
|
}
|
|
if (cbCur < cbLen)
|
|
{
|
|
cb = _tclen(pCurrent);
|
|
cbCur += cb;
|
|
pCurrent += cb;
|
|
}
|
|
|
|
token.ibTokMac = cbCur;
|
|
|
|
// reset state if we reach end of comment
|
|
if (bEndComment)
|
|
*plxs &= ~inComment;
|
|
|
|
return cbCur;
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////
|
|
// FindEndEntity()
|
|
//
|
|
// Find the end of the special character sequence (ends with ; or whitespace).
|
|
//
|
|
UINT FindEndEntity(LPCTSTR pchLine, UINT cbLen, UINT cbCur, DWORD * /*plxs*/, TXTB & token)
|
|
{
|
|
token.tokClass = tokEntity;
|
|
int cb = GetTokenLength(pchLine, cbLen, cbCur);
|
|
if (pchLine[cbCur + cb] == ';')
|
|
cb++;
|
|
token.ibTokMac = cbCur + cb;
|
|
return token.ibTokMac;
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////
|
|
// Find an entity reference or non-entity ref, literal "&..."
|
|
//
|
|
UINT FindEntityRef(LPCTSTR pchLine, UINT cbLen, UINT cbCur, DWORD * /*plxs*/, TXTB & token)
|
|
{
|
|
ASSERT(cbCur < cbLen);
|
|
ASSERT(pchLine[cbCur] == '&'); // must be on ERO
|
|
|
|
cbCur++;
|
|
if (cbCur == cbLen)
|
|
{
|
|
NotEntity:
|
|
token.tokClass = tokIDENTIFIER; // plain text
|
|
token.ibTokMac = cbCur;
|
|
return cbCur;
|
|
}
|
|
|
|
if (pchLine[cbCur] == '#')
|
|
{
|
|
// parse and check valid number
|
|
if (!IsNumber(pchLine, cbLen, cbCur + 1, token))
|
|
goto NotEntity;
|
|
|
|
// must be <= 3 digits
|
|
if (token.ibTokMac - (cbCur + 1) > 3)
|
|
goto NotEntity;
|
|
|
|
// validate range
|
|
TCHAR szNum[4];
|
|
_tcsncpy(szNum, &pchLine[cbCur + 1], 3);
|
|
if (_tcstoul(szNum, 0, 10) > 255)
|
|
goto NotEntity;
|
|
|
|
// we now have a valid numeric entity ref
|
|
|
|
token.tokClass = tokEntity;
|
|
cbCur = token.ibTokMac;
|
|
|
|
// scan for end of entity ref
|
|
|
|
// scan rest of alphanumeric token
|
|
// REVIEW: Is this correct? IE 4.40.308 behaves this way
|
|
while ((cbCur < cbLen) && IsCharAlphaNumeric(pchLine[cbCur]))
|
|
cbCur++;
|
|
|
|
// scan delimiter
|
|
if (cbCur < cbLen)
|
|
cbCur++;
|
|
token.ibTokMac = cbCur;
|
|
return cbCur;
|
|
}
|
|
else if (!IsCharAlpha(pchLine[cbCur]))
|
|
{
|
|
goto NotEntity;
|
|
}
|
|
else
|
|
{
|
|
// parse and check entity name
|
|
UINT nLen = GetTokenLength(pchLine, cbLen, cbCur);
|
|
if (!g_pTable->FindEntity(&pchLine[cbCur], nLen))
|
|
goto NotEntity;
|
|
|
|
cbCur += nLen;
|
|
// eat delimiter if necessary
|
|
if ((cbCur < cbLen) &&
|
|
(pchLine[cbCur] == ';' || IsWhiteSpace(pchLine[cbCur])))
|
|
cbCur++;
|
|
token.tokClass = tokEntity;
|
|
token.ibTokMac = cbCur;
|
|
return cbCur;
|
|
}
|
|
}
|
|
|
|
|
|
/////////////////////////////////////////////////////////////
|
|
// FindEndValue
|
|
// Find the end of an unquoted value.
|
|
//
|
|
// Scan for whitespace or end if tag
|
|
//
|
|
UINT FindValue(LPCTSTR pchLine, UINT cbLen, UINT cbCur, DWORD * plxs, TXTB & token)
|
|
{
|
|
ASSERT(cbCur < cbLen);
|
|
|
|
do
|
|
{
|
|
cbCur++;
|
|
} while ( cbCur < cbLen &&
|
|
!IsWhiteSpace(pchLine[cbCur]) &&
|
|
pchLine[cbCur] != '>' );
|
|
|
|
token.tokClass = tokValue;
|
|
token.ibTokMac = cbCur;
|
|
|
|
// switch from value to attribute
|
|
*plxs &= ~inValue;
|
|
*plxs |= inAttribute;
|
|
|
|
return cbCur;
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////
|
|
// FindEndString()
|
|
// Find the end of the string.
|
|
// Should only be called when we are in the string mode already.
|
|
//
|
|
UINT FindEndString (LPCTSTR pchLine, UINT cbLen, UINT cbCur, DWORD * plxs, TXTB & token)
|
|
{
|
|
LPCTSTR pCurrent = &pchLine[cbCur];
|
|
int cb;
|
|
BOOL bInString = TRUE;
|
|
TCHAR chDelim;
|
|
|
|
ASSERT (*plxs & INSTRING); // must be in a string now
|
|
|
|
token.tokClass = tokString;
|
|
chDelim = (*plxs & inStringA) ? _T('\'') : _T('"');
|
|
|
|
while (bInString && cbCur < cbLen)
|
|
{
|
|
if (*pCurrent == chDelim)
|
|
{
|
|
*plxs &= ~INSTRING;
|
|
bInString = FALSE;
|
|
SetValueSeen(plxs);
|
|
}
|
|
else if (*pCurrent == _T('<') &&
|
|
cbCur+1 < cbLen &&
|
|
*(pCurrent+1) == _T('%'))
|
|
{
|
|
*plxs |= inHTXTag;
|
|
break;
|
|
}
|
|
cb = _tclen(pCurrent);
|
|
cbCur += cb;
|
|
pCurrent += cb;
|
|
}
|
|
token.ibTokMac = cbCur;
|
|
return cbCur;
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////
|
|
//
|
|
UINT FindTagOpen(LPCTSTR pchLine, UINT cbLen, UINT cbCur, DWORD * plxs, TXTB & token)
|
|
{
|
|
ASSERT(pchLine[cbCur] == '<');
|
|
token.tokClass = tokTag;
|
|
*plxs &= ~inScriptText; // turn off script coloring when inside tags
|
|
cbCur++;
|
|
|
|
if (cbCur == cbLen)
|
|
{
|
|
*plxs |= inTag;
|
|
}
|
|
else
|
|
{
|
|
#ifdef NEEDED // copied from htmed\lexer.cpp
|
|
//
|
|
// HTMED CHANGE:
|
|
// REVIEW(cgomes): Figure out if I should turn off inSCRIPT in any of the
|
|
// following cases. Right now I only do it for the </ case.
|
|
//
|
|
#endif //NEEDED
|
|
switch (pchLine[cbCur])
|
|
{
|
|
case '!': // MDO - Markup Declaration Open
|
|
cbCur++;
|
|
*plxs |= inBangTag;
|
|
token.tok = TokTag_BANG;
|
|
break;
|
|
|
|
case '/': // End tag
|
|
cbCur++;
|
|
*plxs |= inEndTag;
|
|
token.tok = TokTag_END;
|
|
#ifdef NEEDED // copied from htmed\lexer.cpp
|
|
// HTMED CHANGE:
|
|
// REVIEW(cgomes): Colorizer bug: it never removes the inSCRIPT state
|
|
// This removes the inSCRIPT in the case <SCRIPT <BODY>
|
|
// in this case <BODY is in error.
|
|
//
|
|
*plxs &= ~inSCRIPT;
|
|
#endif //NEEDED
|
|
break;
|
|
|
|
// REVIEW: PI is SGML -- not in HTML, but might be added
|
|
case '?': // PI - Processing Instruction
|
|
cbCur++;
|
|
*plxs |= inPITag;
|
|
token.tok = TokTag_PI;
|
|
break;
|
|
|
|
case '%': // HTX -- ODBC server HTML extension
|
|
cbCur++;
|
|
*plxs |= inHTXTag;
|
|
token.tok = TokTag_SSSOPEN;
|
|
break;
|
|
|
|
default: // Tag
|
|
if (IsCharAlpha(pchLine[cbCur]))
|
|
{
|
|
*plxs |= inTag;
|
|
token.tok = TokTag_START;
|
|
}
|
|
else
|
|
token.tokClass = tokIDENTIFIER; // NOT a TAG
|
|
break;
|
|
}
|
|
}
|
|
token.ibTokMac = cbCur;
|
|
return cbCur;
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////
|
|
// FindText
|
|
// Scan a token of text
|
|
// NOTE DO NOT MODIFY this function, mainly b/c the side effects
|
|
// will be hard to find, and will break the way
|
|
// that everything works.
|
|
//
|
|
UINT FindText(LPCTSTR pchLine, UINT cbLen, UINT cbCur, TXTB & token)
|
|
{
|
|
//BOOL fExtraSpace = FALSE;
|
|
//int cSpace = 0;
|
|
|
|
ASSERT (cbCur < cbLen);
|
|
|
|
token.tokClass = tokIDENTIFIER;
|
|
|
|
//if (pchLine[cbCur] == ' ' && !fExtraSpace)
|
|
// fExtraSpace = TRUE;
|
|
cbCur += _tclen(&pchLine[cbCur]);
|
|
while (cbCur < cbLen)
|
|
{
|
|
switch (pchLine[cbCur])
|
|
{
|
|
case _T('\0'):
|
|
case _T('\n'):
|
|
case _T('<'):
|
|
case _T('&'):
|
|
//if (cSpace > 0) // found extra spaces so remember them somewhere
|
|
goto ret;
|
|
break;
|
|
//case _T(' '):
|
|
// if (!fExtraSpace)
|
|
// fExtraSpace = TRUE;
|
|
// else
|
|
// cSpace++;
|
|
// break;
|
|
default:
|
|
//if (cSpace > 0) // found extra spaces so remember them somewhere
|
|
//cSpace = 0;
|
|
//fExtraSpace = FALSE;
|
|
break;
|
|
}
|
|
cbCur += _tclen(&pchLine[cbCur]);
|
|
}
|
|
|
|
ret:
|
|
token.ibTokMac = cbCur;
|
|
return cbCur;
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////
|
|
// FindNextToken()
|
|
// Find the next token in the line
|
|
//
|
|
UINT FindNextToken(LPCTSTR pchLine, UINT cbLen, UINT cbCur, DWORD * plxs, TXTB & token)
|
|
{
|
|
ASSERT (cbCur < cbLen);
|
|
HINT hint;
|
|
|
|
if (!(*plxs & INTAG)) // scanning text
|
|
{
|
|
if (*plxs & TEXTMASK)
|
|
{
|
|
if (*plxs & inCOMMENT)
|
|
token.tokClass = tokComment;
|
|
else
|
|
token.tokClass = tokIDENTIFIER;
|
|
// probe for end tag </comment>
|
|
UINT cbEnd = FindEndTag(pchLine, cbLen, cbCur, plxs, token);
|
|
if (cbEnd > cbCur) // parsed a nonzero-length token
|
|
{
|
|
return cbEnd;
|
|
}
|
|
//else fall through to normal processing
|
|
}
|
|
hint = GetTextHint(pchLine, cbLen, cbCur, plxs, token);
|
|
switch (hint)
|
|
{
|
|
case HTA:
|
|
// begin a tag
|
|
return FindTagOpen(pchLine, cbLen, cbCur, plxs, token);
|
|
|
|
case HEN:
|
|
// scan an entity reference
|
|
token.ibTokMac = FindEntityRef(pchLine, cbLen, cbCur, plxs, token);
|
|
return token.ibTokMac;
|
|
|
|
case EOS:
|
|
case ONL:
|
|
return token.ibTokMac;
|
|
|
|
case ERR:
|
|
default:
|
|
// scan text as a single token
|
|
// If the editor uses token info for more than coloring
|
|
// (e.g. extended selections), then this will need to
|
|
// return smaller chunks.
|
|
if (*plxs & inSCRIPT)
|
|
*plxs |= inScriptText;
|
|
return FindText(pchLine, cbLen, cbCur, token);
|
|
break;
|
|
}
|
|
|
|
return cbCur;
|
|
}
|
|
|
|
ASSERT(*plxs & INTAG); // must be in a tag here
|
|
|
|
BOOL bError = FALSE;
|
|
hint = GetHint(pchLine, cbLen, cbCur, plxs, token);
|
|
switch (hint)
|
|
{
|
|
case HTE:
|
|
// Tag end: remove all tag state bits
|
|
*plxs &= ~TAGMASK;
|
|
cbCur++;
|
|
token.tokClass = tokTag;
|
|
token.tok = TokTag_CLOSE;
|
|
token.ibTokMac = cbCur;
|
|
break;
|
|
|
|
case HNU:
|
|
#if 0 // lexing HTML instance, not a DTD!
|
|
if (!IsNumber(pchLine, cbLen, cbCur, token))
|
|
bError = TRUE;
|
|
if (SetValueSeen(plxs))
|
|
token.tokClass = tokValue;
|
|
break;
|
|
#else
|
|
// fall through
|
|
#endif
|
|
|
|
case HRN: // reserved name start: #
|
|
#if 1 // lexing HTML instance, not a DTD!
|
|
// simple nonwhitespace stream
|
|
if (!(*plxs & inValue))
|
|
bError = TRUE;
|
|
FindValue(pchLine, cbLen, cbCur, plxs, token);
|
|
if (bError)
|
|
{
|
|
token.tokClass = tokSpace;
|
|
bError = FALSE; //"corrected" the error
|
|
}
|
|
#else
|
|
cbCur++;
|
|
if (cbCur == cbLen)
|
|
token.tokClass = tokOp;
|
|
else
|
|
{
|
|
if (IsIdChar(pchLine[cbCur]))
|
|
{
|
|
cbCur++;
|
|
while (cbCur < cbLen && IsIdChar(pchLine[cbCur]))
|
|
cbCur++;
|
|
token.tokClass = tokResName;
|
|
}
|
|
else
|
|
token.tokClass = tokOp;
|
|
}
|
|
token.ibTokMac = cbCur;
|
|
if (SetValueSeen(plxs))
|
|
token.tokClass = tokValue;
|
|
#endif
|
|
break;
|
|
|
|
case HEP: // parameter entity: %
|
|
#if 1 // lexing HTML instance, not a DTD!
|
|
goto BadChar;
|
|
#else
|
|
cbCur++;
|
|
if (cbCur == cbLen)
|
|
{
|
|
token.tokClass = tokOp;
|
|
token.ibTokMac = cbCur;
|
|
}
|
|
else
|
|
{
|
|
if (IsIdChar(pchLine[cbCur]))
|
|
{
|
|
token.ibTokMac = FindEndEntity(pchLine, cbLen, cbCur, plxs, token);
|
|
token.tokClass = tokParEnt;
|
|
}
|
|
else
|
|
{
|
|
token.ibTokMac = cbCur;
|
|
token.tokClass = tokOp;
|
|
}
|
|
}
|
|
if (SetValueSeen(plxs))
|
|
token.tokClass = tokValue;
|
|
#endif
|
|
break;
|
|
|
|
// ported HTMED change (walts) -- handle some chars as valid start char for attribute values.
|
|
case HAV:
|
|
{
|
|
if (!(*plxs & inTag) || !SetValueSeen(plxs))
|
|
goto BadChar; // not in tag or attribute value.
|
|
|
|
int iTokenLength = GetValueTokenLength(pchLine, cbLen, cbCur);
|
|
token.ibTokMac = token.ibTokMin + iTokenLength;
|
|
token.tokClass = tokValue;
|
|
break;
|
|
}
|
|
// ported HTMED change (walts) -- handle some chars as valid start char for attribute values.
|
|
|
|
case HKW: // identifier
|
|
{
|
|
int iTokenLength = GetTokenLength(pchLine, cbLen, cbCur);
|
|
token.ibTokMac = token.ibTokMin + iTokenLength;
|
|
token.tokClass = tokName;
|
|
//FUTURE: Don't scan attributes in an end tag
|
|
if (*plxs & (inTag|inEndTag))
|
|
{
|
|
if (*plxs & inAttribute)
|
|
{
|
|
IsAttributeName(pchLine, cbCur, iTokenLength, token);
|
|
// don't change attribute/value state here
|
|
// we only look for values after we've seen "=" in case OEQ below
|
|
|
|
// REVIEW(cgomes): what if more attributes follow
|
|
// the SPAN??
|
|
// if found STARTSPAN then pretend I am not in a tag
|
|
if(token.tok == TokAttrib_STARTSPAN)
|
|
*plxs &= ~(inTag | inAttribute);
|
|
// if found ENDSPAN then goback to comment state
|
|
else if(token.tok == TokAttrib_ENDSPAN)
|
|
{
|
|
*plxs &= ~(inTag | inAttribute);
|
|
*plxs |= inBangTag | inComment;
|
|
}
|
|
}
|
|
else if (SetValueSeen(plxs))
|
|
{
|
|
// REVIEW (walts)
|
|
// Handle the client side script language detection here for the
|
|
// following case (language attribute value is NOT wrapped by quotes.)
|
|
// <SCRIPT LANGUAGE=VBScript>
|
|
if (*plxs & inSCRIPT)
|
|
{
|
|
SetScriptLanguage(&pchLine[cbCur], plxs);
|
|
}
|
|
|
|
//
|
|
// REVIEW(cgomes): It seems that any non-white space character
|
|
// is valid for non-quoted attribute values.
|
|
// Problem is that GetTokenLength is used to determine
|
|
// the token length, which works great non-values,
|
|
// but pulls egss for values.
|
|
// I use GetValueTokenLength here to get the length
|
|
// of value token. GetValueTokenLength will not
|
|
// stop till it hits a white space character.
|
|
//
|
|
|
|
iTokenLength = GetValueTokenLength(pchLine, cbLen, cbCur);
|
|
token.ibTokMac = token.ibTokMin + iTokenLength;
|
|
token.tokClass = tokName;
|
|
|
|
token.tokClass = tokValue;
|
|
}
|
|
else
|
|
{
|
|
IsElementName(pchLine, cbCur, iTokenLength, token);
|
|
// look for attributes
|
|
*plxs |= inAttribute;
|
|
// set content state
|
|
if (*plxs & inTag)
|
|
*plxs |= TextStateFromElement(&pchLine[token.ibTokMin], iTokenLength);
|
|
else if ((*plxs & inEndTag) && (*plxs & TEXTMASK))
|
|
*plxs &= ~TextStateFromElement(&pchLine[token.ibTokMin], iTokenLength);
|
|
else if ((*plxs & inEndTag) && (*plxs & inSCRIPT))
|
|
*plxs &= ~(inSCRIPT | inScriptText | inServerASP/* | inVBScript | inJavaScript*/);
|
|
}
|
|
}
|
|
else if (*plxs & inBangTag)
|
|
{
|
|
// FUTURE: other <!...> items like "HTML", "PUBLIC"? -- nice for DTDs
|
|
// Use a RW table for it if we do
|
|
|
|
// recognize <!DOCTYPE ...> as 'element'
|
|
if ((iTokenLength == 7) &&
|
|
(0 == _tcsnicmp(&pchLine[cbCur], _T("doctype"), 7)))
|
|
token.tokClass = tokElem;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case HST: // string "..."
|
|
*plxs |= inString;
|
|
goto String;
|
|
|
|
case HSL: // string alternate '...'
|
|
*plxs |= inStringA;
|
|
String:
|
|
cbCur++;
|
|
token.ibTokMac = FindEndString(pchLine, cbLen, cbCur, plxs, token);
|
|
SetValueSeen(plxs);
|
|
// Handle the client side script language detection here for the
|
|
// following case (language attribute value is wrapped by quotes.)
|
|
// <SCRIPT LANGUAGE="VBScript">
|
|
if((*plxs & inSCRIPT) && (*plxs & inAttribute))
|
|
{
|
|
SetScriptLanguage(&pchLine[cbCur], plxs);
|
|
}
|
|
break;
|
|
|
|
case HWS: // tag whitespace
|
|
do
|
|
{
|
|
cbCur++;
|
|
} while (cbCur < cbLen && IsWhiteSpace(pchLine[cbCur]));
|
|
token.tokClass = tokSpace;
|
|
token.ibTokMac = cbCur;
|
|
break;
|
|
|
|
case OEQ:
|
|
// GetHint has set token info
|
|
if (*plxs & inAttribute)
|
|
{
|
|
// start looking for values
|
|
*plxs &= ~inAttribute;
|
|
*plxs |= inValue;
|
|
}
|
|
else
|
|
goto BadChar;
|
|
break;
|
|
|
|
case HTA:
|
|
if (cbCur+1 < cbLen && '%' == pchLine[cbCur+1])
|
|
{
|
|
SetValueSeen(plxs);
|
|
return FindTagOpen(pchLine, cbLen, cbCur, plxs, token);
|
|
}
|
|
// else fall through
|
|
case ERR:
|
|
case HEN:
|
|
BadChar:
|
|
token.tokClass = tokSpace;
|
|
|
|
// DS96# 10116 [CFlaat]: we can be in DBCS here, and so we need
|
|
// to make sure that our increment is double-byte aware
|
|
cbCur += _tcsnbcnt(pchLine + cbCur, 1); // byte count for current char
|
|
ASSERT(cbCur <= cbLen);
|
|
token.ibTokMac = cbCur;
|
|
break;
|
|
|
|
// ported HTMED CHANGE (walts) - added this case to handle dbcs attribute values.
|
|
case HDB:
|
|
{
|
|
// DBCS char. Handle for attribute values within tag.
|
|
if (!SetValueSeen(plxs))
|
|
goto BadChar;
|
|
|
|
int iTokenLength = GetValueTokenLength(pchLine, cbLen, cbCur);
|
|
token.ibTokMac = token.ibTokMin + iTokenLength;
|
|
token.tokClass = tokValue;
|
|
}
|
|
break;
|
|
// ported HTMED CHANGE END
|
|
|
|
default:
|
|
// GetHint has set token info
|
|
if (token.tokClass != tokComment && (*plxs & inValue))
|
|
FindValue(pchLine, cbLen, cbCur, plxs, token);
|
|
break;
|
|
}
|
|
if (bError)
|
|
IsUnknownID(pchLine, cbLen, cbCur, token);
|
|
return token.ibTokMac;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////
|
|
// GetTextHint()
|
|
// Like GetHint when scanning text -- look only for tags and entities
|
|
//
|
|
HINT GetTextHint(LPCTSTR pchLine, UINT /*cbLen*/, UINT cbCur, DWORD * /*plxs*/, TXTB & token)
|
|
{
|
|
// if the character is bigger than 128 (dbcs) then return error
|
|
if (pchLine[cbCur] & ~0x7F)
|
|
return HDB;
|
|
|
|
HINT hint = g_hintTable[pchLine[cbCur]];
|
|
|
|
if (IsSingleOp(hint))
|
|
{
|
|
hint = ERR;
|
|
}
|
|
else if (hint == ONL || hint == EOS)
|
|
{
|
|
token.tokClass = tokOp;
|
|
token.ibTokMac = cbCur + 1;
|
|
}
|
|
return hint;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////
|
|
// GetHint()
|
|
// Use hint table to guess what the next token going to be
|
|
// If it is a single operator, it will fill in the token info
|
|
// as well
|
|
//
|
|
HINT GetHint(LPCTSTR pchLine, UINT cbLen, UINT cbCur, DWORD * plxs, TXTB & token)
|
|
{
|
|
// if the character is bigger than 128 (dbcs) then return error
|
|
if (pchLine[cbCur] & ~0x7F)
|
|
return HDB;
|
|
|
|
HINT hint = g_hintTable[pchLine[cbCur]];
|
|
|
|
// check if it is a single op, new line or end of stream
|
|
if (IsSingleOp(hint) || hint == ONL || hint == EOS)
|
|
{
|
|
token.tokClass = hint;
|
|
token.ibTokMac = cbCur + 1;
|
|
}
|
|
else if (hint == ODA)
|
|
{
|
|
if ((cbCur + 1 < cbLen) &&
|
|
(g_hintTable[pchLine[cbCur + 1]] == ODA) &&
|
|
(*plxs & inBangTag))
|
|
{
|
|
cbCur += 2;
|
|
*plxs |= inComment;
|
|
COMMENTTYPE ct = IfHackComment(pchLine, cbLen, cbCur, plxs, token);
|
|
if (ct == 0)
|
|
{
|
|
token.tokClass = tokComment;
|
|
token.ibTokMac = cbCur;
|
|
}
|
|
else if(ct == CT_METADATA)
|
|
hint = HTA; // tag open
|
|
}
|
|
else
|
|
{
|
|
// single -
|
|
token.tokClass = tokOp;
|
|
token.ibTokMac = cbCur + 1;
|
|
}
|
|
}
|
|
return hint;
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////
|
|
// GetTokenLength ()
|
|
// return the length of a token identifier/keyword
|
|
//
|
|
UINT GetTokenLength(LPCTSTR pchLine, UINT cbLen, UINT cbCur)
|
|
{
|
|
LPCTSTR pCurrent = &pchLine[cbCur];
|
|
UINT cb;
|
|
UINT cbOld = cbCur;
|
|
|
|
if (IsCharAlphaNumeric(*pCurrent))
|
|
{
|
|
while (cbCur < cbLen && IsIdChar(*pCurrent))
|
|
{
|
|
cb = _tclen(pCurrent);
|
|
cbCur += cb;
|
|
pCurrent += cb;
|
|
}
|
|
}
|
|
return (int) max((cbCur - cbOld), 1);
|
|
}
|
|
|
|
/*
|
|
|
|
UINT GetValueTokenLength
|
|
|
|
Description:
|
|
Gets the length of the token.
|
|
This version will accept any non whitespace character
|
|
in the token.
|
|
|
|
*/
|
|
UINT GetValueTokenLength(LPCTSTR pchLine, UINT cbLen, UINT cbCur)
|
|
{
|
|
LPCTSTR pCurrent = &pchLine[cbCur];
|
|
UINT cb;
|
|
UINT cbOld = cbCur;
|
|
|
|
while (cbCur < cbLen && !_istspace(*pCurrent) && IsValueChar(*pCurrent))
|
|
{
|
|
cb = _tclen(pCurrent);
|
|
cbCur += cb;
|
|
pCurrent += cb;
|
|
}
|
|
return (int) max((cbCur - cbOld), 1);
|
|
}
|
|
|
|
|
|
////////////////////////////////////////////////////////////////
|
|
// IsElementName ()
|
|
// lookup the keyword table to determine if it is a keyword or not
|
|
//
|
|
BOOL IsElementName(LPCTSTR pchLine, UINT cbCur, int iTokenLength, TXTB & token)
|
|
{
|
|
LPCTSTR pCurrent = &pchLine[cbCur];
|
|
int iFound = NOT_FOUND;
|
|
|
|
if (NOT_FOUND != (iFound = g_pTable->FindElement(pCurrent, iTokenLength)))
|
|
{
|
|
token.tokClass = tokElem;
|
|
token.ibTokMac = cbCur + iTokenLength;
|
|
token.tok = iFound; // set token
|
|
}
|
|
return (iFound != NOT_FOUND);
|
|
}
|
|
|
|
int IndexFromElementName(LPCTSTR pszName)
|
|
{
|
|
return g_pTable->FindElement(pszName, lstrlen(pszName));
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////
|
|
// IsAttributeName ()
|
|
// lookup the keyword table to determine if it is a keyword or not
|
|
//
|
|
BOOL IsAttributeName(LPCTSTR pchLine, UINT cbCur, int iTokenLength, TXTB & token)
|
|
{
|
|
LPCTSTR pCurrent = &pchLine[cbCur];
|
|
int iFound = NOT_FOUND;
|
|
|
|
if (NOT_FOUND != (iFound = g_pTable->FindAttribute(pCurrent, iTokenLength)))
|
|
{
|
|
token.tokClass = tokAttr;
|
|
// ENDSPAN__ is needed b/c the lexer does not recognize the
|
|
// endspan-- as 2 seperate tokens.
|
|
if(iFound == TokAttrib_ENDSPAN__)
|
|
{
|
|
// endspan-- found. return TokAttrib_ENDSPAN
|
|
// set ibTokMac to not include --.
|
|
token.tok = TokAttrib_ENDSPAN;
|
|
token.ibTokMac = cbCur + iTokenLength - 2;
|
|
}
|
|
else
|
|
{
|
|
token.ibTokMac = cbCur + iTokenLength;
|
|
token.tok = iFound; // set token
|
|
}
|
|
}
|
|
return (iFound != NOT_FOUND);
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// IsIdentifier()
|
|
// check if it is an identifier
|
|
//
|
|
BOOL IsIdentifier (int iTokenLength, TXTB & token)
|
|
{
|
|
if (iTokenLength > 0)
|
|
{
|
|
token.tokClass = tokName;
|
|
token.ibTokMac = token.ibTokMin + iTokenLength;
|
|
return TRUE;
|
|
}
|
|
else
|
|
return FALSE;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////
|
|
// IsUnknownID ()
|
|
// Mark the next token as an ID
|
|
//
|
|
BOOL IsUnknownID (LPCTSTR pchLine, UINT cbLen, UINT cbCur, TXTB & token)
|
|
{
|
|
ASSERT(cbCur < cbLen);
|
|
UINT cb;
|
|
LPCTSTR pCurrent = &pchLine[cbCur];
|
|
|
|
cb = _tclen(pCurrent);
|
|
cbCur += cb;
|
|
pCurrent += cb;
|
|
|
|
while ((cbCur < cbLen) && IsIdChar(*pCurrent))
|
|
{
|
|
cb = _tclen(pCurrent);
|
|
cbCur += cb;
|
|
pCurrent += cb;
|
|
}
|
|
|
|
token.tokClass = tokSpace;
|
|
token.ibTokMac = cbCur;
|
|
return TRUE;
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////////////////
|
|
// IsNumber()
|
|
// Check whether the next token is an SGML NUMTOKEN
|
|
//
|
|
BOOL IsNumber(LPCTSTR pchLine, UINT cbLen, UINT cbCur, TXTB & token)
|
|
{
|
|
if (cbCur >= cbLen)
|
|
return FALSE;
|
|
|
|
if (!_istdigit(pchLine[cbCur]))
|
|
return FALSE;
|
|
|
|
token.tokClass = tokNum;
|
|
|
|
// assume all digits are one byte
|
|
ASSERT(1 == _tclen(&pchLine[cbCur]));
|
|
cbCur++;
|
|
|
|
while (cbCur < cbLen && _istdigit(pchLine[cbCur]))
|
|
{
|
|
// assume all digits are one byte
|
|
ASSERT(1 == _tclen(&pchLine[cbCur]));
|
|
cbCur++;
|
|
}
|
|
|
|
token.ibTokMac = cbCur;
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
|
|
/* end of file */
|