// Copyright (c)1997-1999 Microsoft Corporation, All Rights Reserved
/* copied from ..\htmed\lexer.cpp */
/*++
Copyright (c) 1995 Microsoft Corporation
File: lexer.cpp
Abstract:
Nitty Gritty Lexer stuff
Contents:
SetValueSeen()
IsSingleOp()
IsWhiteSpace()
MapToken()
FindEndTag()
MakeSublang()
SetLanguage()
FindTable()
FindTable()
RemoveTable()
MakeTableSet()
GetToken()
IfHackComment()
FindServerScript()
FindEndComment()
FindEndEntity()
FindEntityRef()
FindValue()
FindEndString()
FindTagOpen()
FindText()
FindNextToken()
GetTextHint()
GetHint()
GetTokenLength()
GetValueTokenLength()
IsElementName()
IsAttributeName()
IsIdentifier()
IsUnknownID()
IsNumber()
CColorHtml::SetTable()
CColorHtml::InitSublanguages()
History:
2/14/97 cgomes: Created
--*/
#include "stdafx.h"
#include "resource.h"
#include "guids.h"
#include "token.h"
#include "table.h"
#include "lexer.h"
UINT FindClientScriptEnd(LPCTSTR pchLine, UINT cbLen, UINT cbCur, DWORD * plxs, TXTB & token);
#undef ASSERT
#define ASSERT(b) _ASSERTE(b)
// HACK: we keep a copy of a ptr to the ASP table and sublang
// so we can do special behavior for ASP files
CTableSet* g_ptabASP = 0;
PSUBLANG g_psublangASP = 0;
PTABLESET g_arpTables[CV_MAX+1];
// NOTE: added to handle value tokens properly.
UINT GetValueTokenLength(LPCTSTR pchLine, UINT cbLen, UINT cbCur);
// mark state transition from value -> next attribute
inline int SetValueSeen(DWORD *plxs)
{
if (*plxs & inValue)
{
*plxs &= ~inValue;
*plxs |= inAttribute;
return TRUE;
}
else
return FALSE;
}
// REVIEW (walts) - need better way
inline void SetScriptLanguage(LPCTSTR pchLine, DWORD *plxs)
{
LPCTSTR strJavaScript = _T("javascript");
LPCTSTR strVBScript = _T("vbscript");
// triedit's special language. Its set when we convert server-side scripts into
// client-side scripts. Its a dummy language. if we find that as language, we
// set in ServerASP. It is reset(removed) in FindNextToken().
LPCTSTR strServerAsp = _T("serverasp");
// language attribute may have quotes around it.
// if it does then advance past the first quote.
// ex. 3)
goto NotEntity;
// validate range
TCHAR szNum[4];
_tcsncpy(szNum, &pchLine[cbCur + 1], 3);
if (_tcstoul(szNum, 0, 10) > 255)
goto NotEntity;
// we now have a valid numeric entity ref
token.tokClass = tokEntity;
cbCur = token.ibTokMac;
// scan for end of entity ref
// scan rest of alphanumeric token
// REVIEW: Is this correct? IE 4.40.308 behaves this way
while ((cbCur < cbLen) && IsCharAlphaNumeric(pchLine[cbCur]))
cbCur++;
// scan delimiter
if (cbCur < cbLen)
cbCur++;
token.ibTokMac = cbCur;
return cbCur;
}
else if (!IsCharAlpha(pchLine[cbCur]))
{
goto NotEntity;
}
else
{
// parse and check entity name
UINT nLen = GetTokenLength(pchLine, cbLen, cbCur);
if (!g_pTable->FindEntity(&pchLine[cbCur], nLen))
goto NotEntity;
cbCur += nLen;
// eat delimiter if necessary
if ((cbCur < cbLen) &&
(pchLine[cbCur] == ';' || IsWhiteSpace(pchLine[cbCur])))
cbCur++;
token.tokClass = tokEntity;
token.ibTokMac = cbCur;
return cbCur;
}
}
/////////////////////////////////////////////////////////////
// FindEndValue
// Find the end of an unquoted value.
//
// Scan for whitespace or end if tag
//
UINT FindValue(LPCTSTR pchLine, UINT cbLen, UINT cbCur, DWORD * plxs, TXTB & token)
{
ASSERT(cbCur < cbLen);
do
{
cbCur++;
} while ( cbCur < cbLen &&
!IsWhiteSpace(pchLine[cbCur]) &&
pchLine[cbCur] != '>' );
token.tokClass = tokValue;
token.ibTokMac = cbCur;
// switch from value to attribute
*plxs &= ~inValue;
*plxs |= inAttribute;
return cbCur;
}
/////////////////////////////////////////////////////////////
// FindEndString()
// Find the end of the string.
// Should only be called when we are in the string mode already.
//
UINT FindEndString (LPCTSTR pchLine, UINT cbLen, UINT cbCur, DWORD * plxs, TXTB & token)
{
LPCTSTR pCurrent = &pchLine[cbCur];
int cb;
BOOL bInString = TRUE;
TCHAR chDelim;
ASSERT (*plxs & INSTRING); // must be in a string now
token.tokClass = tokString;
chDelim = (*plxs & inStringA) ? _T('\'') : _T('"');
while (bInString && cbCur < cbLen)
{
if (*pCurrent == chDelim)
{
*plxs &= ~INSTRING;
bInString = FALSE;
SetValueSeen(plxs);
}
else if (*pCurrent == _T('<') &&
cbCur+1 < cbLen &&
*(pCurrent+1) == _T('%'))
{
*plxs |= inHTXTag;
break;
}
cb = _tclen(pCurrent);
cbCur += cb;
pCurrent += cb;
}
token.ibTokMac = cbCur;
return cbCur;
}
//////////////////////////////////////////////////////////////////
//
UINT FindTagOpen(LPCTSTR pchLine, UINT cbLen, UINT cbCur, DWORD * plxs, TXTB & token)
{
ASSERT(pchLine[cbCur] == '<');
token.tokClass = tokTag;
*plxs &= ~inScriptText; // turn off script coloring when inside tags
cbCur++;
if (cbCur == cbLen)
{
*plxs |= inTag;
}
else
{
#ifdef NEEDED // copied from htmed\lexer.cpp
//
// HTMED CHANGE:
// REVIEW(cgomes): Figure out if I should turn off inSCRIPT in any of the
// following cases. Right now I only do it for the case.
//
#endif //NEEDED
switch (pchLine[cbCur])
{
case '!': // MDO - Markup Declaration Open
cbCur++;
*plxs |= inBangTag;
token.tok = TokTag_BANG;
break;
case '/': // End tag
cbCur++;
*plxs |= inEndTag;
token.tok = TokTag_END;
#ifdef NEEDED // copied from htmed\lexer.cpp
// HTMED CHANGE:
// REVIEW(cgomes): Colorizer bug: it never removes the inSCRIPT state
// This removes the inSCRIPT in the case