Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1600 lines
44 KiB

/*++
Copyright (C) 1995-2001 Microsoft Corporation
Module Name:
MOFLEX.CPP
Abstract:
Implementation for class CMofLexer, which tokenizes MOF files.
ANSI, DBCS and UNICODE are supported.
History:
a-raymcc 11-Oct-95 Created.
a-raymcc 27-Jan-96 Update for aliasing.
a-davj 6-June-96 Added support for octal, hex and binary constants
and line stitching, comment concatenation, escape
characters and old style comments.
--*/
#include "precomp.h"
#include <bmof.h>
#include <stdio.h>
#include <errno.h>
#include <arrtempl.h>
#include "mrciclass.h"
#include "datasrc.h"
#include "moflex.h"
#include "preproc.h"
#include "trace.h"
#include "strings.h"
#include "wbemcli.h"
#define INIT_ALLOC 512
#define ADDITIONAL_ALLOC 10000
#define MAX_ALLOC 1000000
#define UUIDLEN 36
// The following is the table of tokens consisting of a
// single character.
// =====================================================
typedef struct
{
wchar_t cToken;
int nSymbol;
} SingleTok;
SingleTok SingleTokenMap[] =
{
L'{', TOK_OPEN_BRACE,
L'}', TOK_CLOSE_BRACE,
L'[', TOK_OPEN_BRACKET,
L']', TOK_CLOSE_BRACKET,
L'(', TOK_OPEN_PAREN,
L')', TOK_CLOSE_PAREN,
L',', TOK_COMMA,
L'=', TOK_EQUALS,
L';', TOK_SEMI,
L'&', TOK_AMPERSAND,
L':', TOK_COLON,
L'#', TOK_POUND,
L'$', TOK_DOLLAR_SIGN,
L'+', TOK_PLUS
};
#define NUM_SINGLE_TOKENS (sizeof(SingleTokenMap)/sizeof(SingleTok))
// The following is the table of keywords which look like normal
// identifiers.
// =============================================================
typedef struct
{
OLECHAR *pKeyword;
int nToken;
} Keyword;
static Keyword MofKeywords[] =
{
L"class", TOK_CLASS,
L"instance", TOK_INSTANCE,
L"null", TOK_KEYWORD_NULL,
L"external", TOK_EXTERNAL,
L"as", TOK_AS,
L"ref", TOK_REF,
L"of", TOK_OF,
// L"object", TOK_OBJECT,
L"typedef", TOK_TYPEDEF,
L"subrange", TOK_SUBRANGE,
L"pragma", TOK_PRAGMA,
L"define", TOK_DEFINE,
L"ifdef", TOK_IFDEF,
L"include", TOK_INCLUDE,
L"endif", TOK_ENDIF,
L"ifndef", TOK_IFNDEF,
L"enum", TOK_ENUM,
L"AUTORECOVER", TOK_AUTORECOVER,
L"true", TOK_TRUE,
L"false", TOK_FALSE,
L"interface", TOK_INTERFACE,
L"ToInstance", TOK_TOINSTANCE,
L"ToSubClass", TOK_TOSUBCLASS,
L"EnableOverride", TOK_ENABLEOVERRIDE,
L"DisableOverride", TOK_DISABLEOVERRIDE,
L"NotToInstance", TOK_NOTTOINSTANCE,
L"Amended", TOK_AMENDED,
L"NotToSubClass", TOK_NOTTOSUBCLASS,
L"Restricted", TOK_RESTRICTED,
L"qualifier", TOK_QUALIFIER,
L"ClassFlags", TOK_CLASSFLAGS,
L"InstanceFlags", TOK_INSTANCEFLAGS,
L"Amendment", TOK_AMENDMENT,
L"void", TOK_VOID,
L"deleteclass", TOK_DELETECLASS,
L"FAIL", TOK_FAIL,
L"NOFAIL", TOK_NOFAIL
};
#define NUM_KEYWORDS (sizeof(MofKeywords)/sizeof(Keyword))
BOOL iswodigit(wchar_t wcTest);
//***************************************************************************
//
// SingleCharToken()
//
// This examines a single character of input and scans the table to
// determine if it is one of the single-character tokens.
//
// Parameters:
// c = The character being tested.
// Return value:
// Zero if no match, otherwise the TOK_ constant which identifies
// the token.
//
//***************************************************************************
static int SingleCharToken(wchar_t c)
{
for (int i = 0; i < NUM_SINGLE_TOKENS; i++)
if (SingleTokenMap[i].cToken == c)
return SingleTokenMap[i].nSymbol;
return 0;
}
//***************************************************************************
//
// BOOL iswwbemalpha
//
// Used to test if a wide character is suitable for identifiers.
//
// Parameters:
// c = The character being tested.
// Return value:
// TRUE if OK.
//
//***************************************************************************
BOOL iswwbemalpha(wchar_t c)
{
if(c == 0x5f || (0x41 <= c && c <= 0x5a) ||
(0x61 <= c && c <= 0x7a) || (0x80 <= c && c <= 0xfffd))
return TRUE;
else
return FALSE;
}
//***************************************************************************
//
// KeywordFilter()
//
// This function examines an identifier string to determine if it is
// in fact a keyword.
//
// Parameters:
// pTokStr = a pointer to the string to be examined.
//
// Return value:
// TOK_SIMPLE_IDENT if no match and no '_', or else the correct TOK_ value
// for the keyword.
//
//***************************************************************************
static int KeywordFilter(wchar_t *pTokStr)
{
for (int i = 0; i < NUM_KEYWORDS; i++)
if (wbem_wcsicmp(MofKeywords[i].pKeyword, pTokStr) == 0)
return MofKeywords[i].nToken;
wchar_t * pEnd;
pEnd = pTokStr + wcslen(pTokStr) -1;
if(*pTokStr != L'_' && *pEnd != L'_')
return TOK_SIMPLE_IDENT;
else
return TOK_SYSTEM_IDENT;
}
//***************************************************************************
//
// ValidGuid()
//
// Examines a character string to determine if it constitutes a valid
// GUID.
//
// Return value:
// TRUE if the string is a GUID, FALSE if not.
//
//***************************************************************************
BOOL CMofLexer::ValidGuid()
{
int i;
int iSoFar = 0;
#define HEXCHECK(n) \
for (i = 0; i < n; i++) \
if (!iswxdigit(GetChar(iSoFar++))) \
return FALSE;
#define HYPHENCHECK() \
if (GetChar(iSoFar++) != L'-') \
return FALSE;
HEXCHECK(8);
HYPHENCHECK();
HEXCHECK(4);
HYPHENCHECK();
HEXCHECK(4);
HYPHENCHECK();
HEXCHECK(4);
HYPHENCHECK();
HEXCHECK(12);
return TRUE;
}
//***************************************************************************
//
// CMofLexer::Init()
//
// Helper for first state of construction; inializing variables.
//
//***************************************************************************
void CMofLexer::Init()
{
m_nLine = 1;
m_nStartOfLinePos = 0;
m_bBMOF = false;
m_wFile[0] = 0;
m_nWorkBufSize = INIT_ALLOC;
m_pWorkBuf = new wchar_t[m_nWorkBufSize];
m_pDataSrc = NULL;
m_pBuff = NULL; // set in the constructors
m_pToFar = NULL;
m_nErrorCode = (m_pWorkBuf) ? no_error : memory_failure ;
}
//***************************************************************************
//
// CMofLexer::BuildBuffer()
//
// Helper for last stage of construction; build the unicode buffer. Note
// that this can be used by either the file or memory based constructor.
//
//***************************************************************************
void CMofLexer::BuildBuffer(long lSize, TCHAR * pFileName, char * pMemSrc, char * pMemToFar)
{
if(m_nErrorCode != no_error)
return; // already failed!
if(pFileName)
#ifdef USE_MMF_APPROACH
m_pDataSrc = new FileDataSrc1(pFileName);
#else
m_pDataSrc = new FileDataSrc(pFileName);
#endif
else
m_pDataSrc = new BufferDataSrc(lSize, pMemSrc);
if(m_pDataSrc == NULL)
m_nErrorCode = memory_failure;
else if(m_pDataSrc->GetStatus() != 0)
m_nErrorCode = file_io_error;
return;
}
//***************************************************************************
//
// Constructor for in-memory parsing.
//
//***************************************************************************
CMofLexer::CMofLexer(PDBG pDbg)
{
m_bUnicode = false;
m_pDbg = pDbg;
Init();
}
HRESULT CMofLexer::SetBuffer(char *pMemory, DWORD dwMemSize)
{
DWORD dwCompressedSize, dwExpandedSize;
if(IsBMOFBuffer((BYTE *)pMemory, dwCompressedSize, dwExpandedSize))
{
bool bRet = CreateBufferFromBMOF((BYTE *)pMemory + 16, dwCompressedSize, dwExpandedSize);
if(bRet == false)
m_nErrorCode = invalid_source_buffer;
}
else
{
m_bUnicode = false;
BuildBuffer(dwMemSize+4, NULL, pMemory, pMemory+dwMemSize);
}
if(m_nErrorCode == no_error)
return S_OK;
else
return WBEM_E_FAILED;
}
//***************************************************************************
//
// Checks if the file contains a binarary mof and if it does, decompresses
// the binary data.
//
//***************************************************************************
bool CMofLexer::ProcessBMOFFile(FILE *fp,TCHAR * szFilename)
{
// read the first 20 bytes
BYTE Test[TEST_SIZE];
int iRet = fread(Test, 1, TEST_SIZE, fp);
if(iRet != TEST_SIZE)
{
// if we cant read even the header, it must not be a BMOF
return false;
}
DWORD dwCompressedSize, dwExpandedSize;
// Test if the mof is binary
if(!IsBMOFBuffer(Test, dwCompressedSize, dwExpandedSize))
{
// not a binary mof. This is the typical case
return false;
}
// get the compression type, and the sizes
if( 0 != fseek(fp, 0, SEEK_SET)) return false;
DWORD dwSig, dwCompType;
iRet = fread(&dwSig, sizeof(DWORD), 1, fp);
iRet = fread(&dwCompType, sizeof(DWORD), 1, fp);
iRet = fread(&dwCompressedSize, sizeof(DWORD), 1, fp);
iRet = fread(&dwExpandedSize, sizeof(DWORD), 1, fp);
// Make sure the compression type is one we understand!
if(dwCompType != 0 && dwCompType != 1)
{
return FALSE;
}
m_pDataSrc = new BMOFDataSrc(szFilename);
if (NULL == m_pDataSrc) return false;
// If there was no compression, just read the data
if(dwCompType == 0)
{
m_pBuff = (WCHAR *)new BYTE[dwExpandedSize];
if(m_pBuff == NULL)
{
return false;
}
iRet = fread(m_pBuff, dwExpandedSize, 1, fp);
m_bBMOF = true;
m_pToFar = (BYTE *)m_pBuff + dwExpandedSize;
return true;
}
// Allocate storage for the compressed data
BYTE * pCompressed = new BYTE[dwCompressedSize];
if(pCompressed == NULL)
{
return false;
}
// Read the compressed data.
iRet = fread(pCompressed, 1, dwCompressedSize,fp);
if((DWORD)iRet != dwCompressedSize)
{
delete pCompressed;
return false;
}
// Convert from compress into something we can use later
bool bRet = CreateBufferFromBMOF(pCompressed, dwCompressedSize, dwExpandedSize);
delete pCompressed;
return bRet;
}
//***************************************************************************
//
// Creates the working buffer from a compressed binary mof buffer.
//
//***************************************************************************
bool CMofLexer::CreateBufferFromBMOF(byte * pCompressed, DWORD dwCompressedSize, DWORD dwExpandedSize)
{
if(m_pBuff)
delete m_pBuff;
m_pBuff = (WCHAR *)new BYTE[dwExpandedSize];
if(m_pBuff == NULL)
{
return false;
}
m_pToFar = (BYTE *)m_pBuff + dwExpandedSize;
// Decompress the data
CMRCICompression * pCompress = new CMRCICompression;
if(pCompress == NULL)
return FALSE;
CDeleteMe<CMRCICompression> dm(pCompress);
DWORD dwResSize = pCompress->Mrci1Decompress(pCompressed, dwCompressedSize,
(BYTE *)m_pBuff, dwExpandedSize);
bool bRet = dwResSize == dwExpandedSize;
if(bRet)
m_bBMOF = true;
return bRet;
}
//***************************************************************************
//
// Constructor for file-based parsing.
//
//***************************************************************************
CMofLexer::CMofLexer(const TCHAR *pFilePath, PDBG pDbg)
{
m_bUnicode = FALSE;
m_pDbg = pDbg;
Init();
FILE *fp;
BOOL bBigEndian = FALSE;
if(pFilePath == NULL)
{
m_nErrorCode = file_not_found;
return;
}
TCHAR szExpandedFilename[MAX_PATH+1];
DWORD nRes = ExpandEnvironmentStrings(pFilePath,
szExpandedFilename,
FILENAME_MAX);
if(nRes == 0)
StringCchCopyW(szExpandedFilename, MAX_PATH+1, pFilePath);
// Make sure the file exists and can be opened
if(pFilePath && lstrlen(szExpandedFilename))
{
Trace(true, pDbg, PARSING_MSG, szExpandedFilename);
}
#ifdef UNICODE
fp = _wfopen(szExpandedFilename, L"rb");
#else
fp = fopen(szExpandedFilename, "rb");
#endif
if (!fp)
{
if (errno == ENOENT)
m_nErrorCode = file_not_found;
if (errno == EACCES)
m_nErrorCode = access_denied;
else
m_nErrorCode = file_io_error;
return;
}
else
{
CfcloseMe cm(fp);
// If the file contains a binary mof, handle it here
if(ProcessBMOFFile(fp,szExpandedFilename))
{
return;
}
}
// Create a temp file name
TCHAR cTempFileName[MAX_PATH+1];
TCHAR cTempPath[MAX_PATH+1];
if( 0 == GetTempPath(MAX_PATH+1, cTempPath))
{
m_nErrorCode = problem_creating_temp_file;
return ;
}
if( 0 == GetTempFileName(cTempPath, TEXT("tmp"), 0, cTempFileName))
{
m_nErrorCode = problem_creating_temp_file;
return ;
}
// Create the temp file
FILE *fpTemp;
#ifdef UNICODE
fpTemp = _wfopen(cTempFileName, L"wb+");
#else
fpTemp = fopen(cTempFileName, "wb+");
#endif
if(fpTemp == 0)
{
m_nErrorCode = problem_creating_temp_file;
return;
}
else
{
CFlexArray sofar; // used to make sure we dont get into an infinite loop
SCODE sc = WriteFileToTemp(szExpandedFilename, fpTemp, sofar, pDbg, this);
fclose(fpTemp);
for(int iCnt = 0; iCnt < sofar.Size(); iCnt++)
{
char * pTemp = (char * )sofar.GetAt(iCnt);
delete pTemp;
}
if(sc != S_OK)
{
if(m_nErrorCode == no_error)
m_nErrorCode = preprocessor_error;
DeleteFile(cTempFileName);
return;
}
// Determine the size of the file
// ==============================
fseek(fpTemp, 0, SEEK_END);
long lSize = ftell(fpTemp) + 6; // add a bit extra for ending space and null NULL
fseek(fpTemp, 0, SEEK_SET);
// The temp file will be little endian unicode
lSize /= 2;
m_bUnicode = TRUE;
bBigEndian = FALSE;
// This will create a DataSrc object which will clean up the temp file
BuildBuffer(lSize,cTempFileName ,NULL,NULL);
}
}
//***************************************************************************
//
// Destructor.
//
//***************************************************************************
CMofLexer::~CMofLexer()
{
if (m_pBuff)
delete m_pBuff;
if (m_pWorkBuf)
delete m_pWorkBuf;
delete m_pDataSrc;
}
//***************************************************************************
//
// iswodigit
//
// Returns TRUE if it is a valid octal character. '0' to '7'.
//
//***************************************************************************
BOOL iswodigit(wchar_t wcTest)
{
if(wbem_iswdigit(wcTest) && wcTest != L'8' && wcTest != L'9')
return TRUE;
else
return FALSE;
}
//***************************************************************************
//
// CMofLexer::OctalConvert
//
// Converts an octal escape sequence into a character and returns the number
// of digits converted. Only a max of 3 digits is converted and if it isnt
// a wchar, the digits cant add up to more that 0377
//
//***************************************************************************
int CMofLexer::OctalConvert(wchar_t *pResult, LexState lsCurr)
{
int iNum = 0;
wchar_t wcTest;
*pResult = 0;
for(wcTest = GetChar(iNum+1); iswodigit(wcTest) && iNum < 3;
iNum++, wcTest = GetChar(iNum+1))
{
*pResult *= 8;
*pResult += wcTest - L'0';
}
if((lsCurr == wstring || lsCurr == wcharacter) && *pResult >0xff)
m_bBadString = TRUE;
return iNum;
}
//***************************************************************************
//
// CMofLexer::HexConvert
//
// Converts a hex escape sequence into a character and returns the number
// of digits converted.
//
//***************************************************************************
int CMofLexer::HexConvert(wchar_t *pResult, LexState lsCurr)
{
int iNum = 0;
wchar_t wcTest;
*pResult = 0;
int iMax = (lsCurr == wstring||lsCurr == wcharacter) ? 4 : 2;
for(wcTest = GetChar(iNum+2); iswxdigit(wcTest) && iNum < iMax;
iNum++, wcTest = GetChar(iNum+2))
{
*pResult *= 16;
if(wbem_iswdigit(wcTest)) // sscanf(xx,"%1x",int) also works!
*pResult += wcTest - L'0';
else
*pResult += towupper(wcTest) - L'A' + 10;
}
if(iNum == 0)
return -1; // error, nothing was converted!
return iNum+1; // num converted plus the 'x' char!
}
//***************************************************************************
//
// CMofLexer::ConvertEsc
//
// Processes escape characters. Returns size of sequence, a -1 indicates an
// error. Also, the *pResult is set upon success.
//
//***************************************************************************
int CMofLexer::ConvertEsc(wchar_t * pResult, LexState lsCurr)
{
// like C, case sensitive
switch(GetChar(1)) {
case L'n':
*pResult = 0xa;
break;
case L't':
*pResult = 0x9;
break;
case L'v':
*pResult = 0xb;
break;
case L'b':
*pResult = 0x8;
break;
case L'r':
*pResult = 0xd;
break;
case L'f':
*pResult = 0xc;
break;
case L'a':
*pResult = 0x7;
break;
case L'\\':
*pResult = L'\\';
break;
case L'?':
*pResult = L'?';
break;
case L'\'':
*pResult = L'\'';
break;
case L'\"':
*pResult = L'\"';
break;
case L'x':
return HexConvert(pResult,lsCurr);
break;
default:
if(iswodigit(GetChar(1)))
return OctalConvert(pResult,lsCurr);
return -1; // error!
break;
}
return 1;
}
//***************************************************************************
//
// ProcessStr
//
// Processes new characters once we are in the string state.
//
// Return "stop" if end of string.
//
//***************************************************************************
LexState CMofLexer::ProcessStr(wchar_t * pNewChar, LexState lsCurr, int * piRet)
{
// Check for end of string if we are a wstring state
if (GetChar() == L'"' && lsCurr == wstring)
{
// search for the next non white space character. If it is another
// string then these strings need to be combined.
int iCnt = 1;
int iMinMove = 0;
wchar_t wcTest;
for(wcTest = GetChar(iCnt); wcTest != NULL; iCnt++, wcTest=GetChar(iCnt))
{
if(m_pDataSrc->WouldBePastEnd(iCnt))
{
// dont go past eof!!
*piRet = (m_bBadString) ? TOK_ERROR : TOK_LPWSTR;
return stop; // last string in the file
}
if(wcTest == L'"' && GetChar(iCnt+1) == L'"')
{
iCnt++;
iMinMove = iCnt;
continue;
}
if(!iswspace(wcTest))
break;
}
// a-levn: no ascii strings are supported. "abc" means unicode.
if(lsCurr == wstring)
{
if(wcTest == L'/')
{
// might be an intervening comment
// ===============================
if (GetChar(iCnt+1) == L'/')
{
m_bInString = TRUE;
MovePtr(iCnt+1);
return new_style_comment;
}
else if (GetChar(iCnt+1) == L'*')
{
m_bInString = TRUE;
MovePtr(iCnt+1); // skip an extra so not to be fooled by
return old_style_comment;
}
}
if(wcTest != L'"')
{
*piRet = (m_bBadString) ? TOK_ERROR : TOK_LPWSTR;
MovePtr(iMinMove); // skip over '"'
return stop; // normal way for string to end
}
else
MovePtr(iCnt + 1); // skip over '"'
}
}
// If we are in character state, check for end
if (GetChar(0) == L'\'' && lsCurr == wcharacter)
{
if(m_bBadString || m_pDataSrc->PastEnd() ||
(m_pDataSrc->GetAt(-1) == L'\'') && m_pDataSrc->GetAt(-2) != L'\\')
*piRet = TOK_ERROR;
else
*piRet = TOK_WCHAR;
return stop;
}
// Not at end, get the character, possibly converting escape sequences
if(GetChar(0) == L'\\')
{
int iSize = ConvertEsc(pNewChar,lsCurr);
m_i8 = *pNewChar;
if(iSize < 1)
m_bBadString = TRUE;
else
{
MovePtr(iSize);
if(lsCurr == wcharacter && GetChar(1) != L'\'')
{
*piRet = TOK_ERROR;
return stop;
}
}
}
else if(GetChar(0) == '\n')
{
m_bBadString = TRUE;
MovePtr(-1);
return stop;
}
else
{
*pNewChar = GetChar(0);
m_i8 = *pNewChar;
if(*pNewChar == 0 || *pNewChar > 0xfffeu || (GetChar(1) != L'\'' && lsCurr == wcharacter))
{
*piRet = TOK_ERROR;
return stop;
}
}
return lsCurr;
}
//***************************************************************************
//
// BinaryToInt
//
// Converts a character representation of a binary, such as "101b" into
// an integer.
//
//***************************************************************************
BOOL BinaryToInt(wchar_t * pConvert, __int64& i64Res)
{
BOOL bNeg = FALSE;
__int64 iRet = 0;
WCHAR * pStart;
if(*pConvert == L'-' || *pConvert == L'+')
{
if(*pConvert == L'-')
bNeg = TRUE;
pConvert++;
}
for(pStart = pConvert;*pConvert && (*pConvert == L'0' || *pConvert == L'1'); pConvert++)
{
if(pConvert - pStart > 63)
return FALSE; // Its too long
iRet *= 2;
if(*pConvert == L'1')
iRet += 1;
}
if(towupper(*pConvert) != L'B')
return FALSE;
if(bNeg)
iRet = -iRet;
i64Res = iRet;
return TRUE;
}
BOOL GetInt(WCHAR *pData, WCHAR * pFormat, __int64 * p64)
{
static WCHAR wTemp[100];
if(swscanf(pData, pFormat, p64) != 1)
return FALSE;
// Make sure the data is ok. When comparing, make sure that leading 0's are skipped
StringCchPrintfW(wTemp, 100, pFormat, *p64);
WCHAR * pTemp;
for(pTemp = pData; *pTemp == L'0' && pTemp[1]; pTemp++);
if(wbem_wcsicmp(wTemp, pTemp))
return FALSE;
return TRUE;
}
//***************************************************************************
//
// CMofLexer::iGetNumericType()
//
// Return value:
// What type of numeric constant the current pointer is pointing to.
//
//***************************************************************************
int CMofLexer::iGetNumericType(void)
{
#define isuorl(x) (towupper(x) == L'U' || towupper(x) == L'L')
wchar_t * pTemp;
BOOL bBinary = FALSE;
wchar_t * pStart; // first charcter not including leading - or +
int iNumBinaryDigit = 0;
int iNumDigit = 0;
int iNumOctalDigit = 0;
int iNumDot = 0;
int iNumE = 0;
wchar_t * pEnd = m_pWorkBuf + wcslen(m_pWorkBuf) - 1;
if(*m_pWorkBuf == L'-' || *m_pWorkBuf == L'+')
pStart = m_pWorkBuf+1;
else
pStart = m_pWorkBuf;
int iLen = wcslen(pStart); // length not including leading '-' or '+'
BOOL bHex = (pStart[0] == L'0' && towupper(pStart[1]) == L'X');
// loop through and count the number of various digit types, decimal points, etc.
// ==============================================================================
for(pTemp = pStart; *pTemp; pTemp++)
{
// Check for 'U' or 'l' characters at the end. They are an error
// if the number is a float, or not in the last two characters, or
// if a 'u' is present along with a '-' in the first character
// ===============================================================
if (isuorl(*pTemp))
{
if(pTemp < pEnd -1 || !isuorl(*pEnd) || iNumDot || iNumE)
return TOK_ERROR;
if(towupper(*pTemp) == L'U' && *m_pWorkBuf == L'-')
return TOK_ERROR;
iLen--;
continue;
}
// If we previously hit the binary indicator, the only thing that
// should be after the b is U or L characters.
// ==============================================================
if(bBinary)
return TOK_ERROR;
// If in hex mode, only allow for x in second digit and hex numbers.
// anything else is an error.
// =================================================================
if(bHex)
{
if(pTemp < pStart+2) // ignore the 0X
continue;
if(!iswxdigit(*pTemp))
return TOK_ERROR;
iNumDigit++;
continue;
}
// Number is either a non hex integer or a float.
// Do a count of various special digit types, decimal points, etc.
// ===============================================================
if(*pTemp == L'0' || *pTemp == L'1')
iNumBinaryDigit++;
if(iswodigit(*pTemp))
iNumOctalDigit++;
// each character should fall into one of the following catagories
if(wbem_iswdigit(*pTemp))
iNumDigit++;
else if(*pTemp == L'.')
{
iNumDot++;
if(iNumDot > 1 || iNumE > 0)
return TOK_ERROR;
}
else if(towupper(*pTemp) == L'E')
{
if(iNumDigit == 0 || iNumE > 0)
return TOK_ERROR;
iNumDigit=0; // to ensure at least one digit after the 'e'
iNumE++;
}
else if(*pTemp == L'-' || *pTemp == L'+') // ok if after 'E'
{
if(pTemp > pStart && towupper(pTemp[-1]) == L'E')
continue;
else
return TOK_ERROR;
}
else if (towupper(*pTemp) == L'B')
bBinary = TRUE;
else
return TOK_ERROR;
}
// Make sure there are enough digits
// =================================
if(iNumDigit < 1)
return TOK_ERROR;
// take care of integer case.
// ==========================
if(bHex || bBinary || iNumDigit == iLen)
{
__int64 i8 = 0;
if(bHex)
{
if(!GetInt(m_pWorkBuf+2, L"%I64x", &i8))
return TOK_ERROR;
}
else if(bBinary)
{
if(!BinaryToInt(m_pWorkBuf, i8))
return TOK_ERROR;
}
else if(pStart[0] != L'0' || wcslen(pStart) == 1)
{
if(*m_pWorkBuf == '-')
{
if(!GetInt(m_pWorkBuf, L"%I64i", &i8))
return TOK_ERROR;
}
else
{
if(!GetInt(m_pWorkBuf, L"%I64u", &i8))
return TOK_ERROR;
}
}
else if(iNumDigit == iNumOctalDigit)
{
if(!GetInt(m_pWorkBuf+1, L"%I64o", &i8))
return TOK_ERROR;
}
else return TOK_ERROR;
// Make sure the number isnt too large
// ===================================
m_i8 = i8;
if(*m_pWorkBuf == L'-')
return TOK_SIGNED64_NUMERIC_CONST;
else
return TOK_UNSIGNED64_NUMERIC_CONST;
}
// must be a floating point, no conversion needed.
return TOK_FLOAT_VALUE;
}
//***************************************************************************
//
// CMofLexer::MovePtr
//
// Moves pointer farther into the buffer. Note that the farthest it will go
// is one past the last valid WCHAR which is the location of an extra NULL
//
//***************************************************************************
void CMofLexer::MovePtr(int iNum)
{
int iSoFar = 0;
int iChange = (iNum > 0) ? 1 : -1;
int iNumToDo = (iNum > 0) ? iNum : -iNum;
while(iSoFar < iNumToDo)
{
if(iChange == 1)
{
// going forward, update the pointer and make sure it
// is still in an acceptable range.
// ==================================================
m_pDataSrc->Move(iChange);
if(m_pDataSrc->PastEnd()) // points to the NULL
return;
// If going forward and a slash cr is hit, do an extra skip.
WCHAR wCurr = m_pDataSrc->GetAt(0);
if(wCurr == L'\\' && m_pDataSrc->GetAt(1) == L'\n')
{
m_nLine++;
m_pDataSrc->Move(1); // extra increment
m_nStartOfLinePos = m_pDataSrc->GetPos();
continue;
}
else if(wCurr == L'\\' && m_pDataSrc->GetAt(1) == L'\r'
&& m_pDataSrc->GetAt(2) == L'\n')
{
m_nLine++;
m_pDataSrc->Move(2); // extra increment
m_nStartOfLinePos = m_pDataSrc->GetPos();
continue;
}
else if (wCurr == L'\n')
{
m_nLine++;
m_nStartOfLinePos = m_pDataSrc->GetPos();
}
}
else
{
// If going backward and a cr is left, then decrement the line
if (m_pDataSrc->GetAt(0) == L'\n' &&
m_pDataSrc->GetPos() > 0 )
{
m_nLine--;
m_nStartOfLinePos = m_pDataSrc->GetPos();
}
// Update the pointer and make sure it is still in an
// acceptable range.
// ==================================================
m_pDataSrc->Move(iChange);
if(m_pDataSrc->GetPos() < 0)
{
m_pDataSrc->MoveToStart();
return;
}
// If going backward and a slash cr is hit, do an extra skip.
WCHAR wCurr = m_pDataSrc->GetAt(0);
if( wCurr == L'\n' && m_pDataSrc->GetAt(-1) == L'\\')
{
m_nLine--;
m_nStartOfLinePos = m_pDataSrc->GetPos();
m_pDataSrc->Move(-1); // extra decrement
continue;
}
else if( wCurr == L'\n' && m_pDataSrc->GetAt(-1) == L'\r' &&
m_pDataSrc->GetAt(-2) == L'\\')
{
m_nLine--;
m_nStartOfLinePos = m_pDataSrc->GetPos();
m_pDataSrc->Move(-2); // extra decrement
continue;
}
}
iSoFar++;
}
}
//***************************************************************************
//
// CMofLexer::GetChar()
//
// Returns a character at an offset from the current character pointer.
//
//***************************************************************************
wchar_t CMofLexer::GetChar(int iNum)
{
if(iNum == 0)
return m_pDataSrc->GetAt(0);
else if(iNum == 1)
{
wchar_t tRet = m_pDataSrc->GetAt(1);
if(tRet != L'\\' && tRet != '\n')
return tRet;
}
MovePtr(iNum);
wchar_t wcRet = m_pDataSrc->GetAt(0);
MovePtr(-iNum);
return wcRet;
}
//***************************************************************************
//
// CMofLexer::iGetColumn()
//
// Gets the current column value. Counts back to the previous Cr or the
// start of buffer.
//
//***************************************************************************
int CMofLexer::iGetColumn()
{
return m_pDataSrc->GetPos() - m_nStartOfLinePos;
}
//***************************************************************************
//
// CMofLexer::bOKNumericAddition()
//
// Returns true if the test character could be added to numeric buffer.
// Note that it returns true if an alphanumeric, or a + or - and the last
// character in the working buffer is an 'E'
//
//***************************************************************************
BOOL CMofLexer::bOKNumericAddition(wchar_t cTest)
{
if(wbem_iswalnum(cTest) || cTest == L'.')
return TRUE;
int iLen = wcslen(m_pWorkBuf);
if(iLen > 0)
if(towupper(m_pWorkBuf[iLen-1]) == L'E' &&
(cTest == L'+' || cTest == L'-') &&
towupper(m_pWorkBuf[1]) != L'X')
return TRUE;
return FALSE;
}
//***************************************************************************
//
// CMofLexer::SpaceAvailable()
//
// Returns TRUE if there is enuough space in the working buffer to add.
// another character. It will expand the buffer if need be.
//
//***************************************************************************
BOOL CMofLexer::SpaceAvailable()
{
// most common case is that there is already space available
int iNumWChar = m_pEndOfText-m_pWorkBuf+1;
if(iNumWChar < m_nWorkBufSize)
return TRUE;
if(m_nWorkBufSize > MAX_ALLOC) // programs need limits!
return FALSE;
// Allocate a bigger buffer and copy the old stuff into it
// =======================================================
long nNewSize = m_nWorkBufSize + ADDITIONAL_ALLOC;
wchar_t * pNew = new wchar_t[nNewSize];
if(pNew == FALSE)
return FALSE;
memcpy(pNew, m_pWorkBuf, m_nWorkBufSize*2);
delete m_pWorkBuf;
m_nWorkBufSize = nNewSize;
m_pWorkBuf = pNew;
m_pEndOfText = m_pWorkBuf + iNumWChar - 1;
return TRUE;
}
//***************************************************************************
//
// NextToken()
//
// This function contains the DFA recognizer for MOF tokens. It works
// entirely in UNICODE characters, so the NextChar() function is expected
// to pretranslate ANSI or DBCS source streams into wide characters.
//
// Return value:
// One of the TOK_ constants, or TOK_EOF when the end of the input
// stream has been reached. If the user calls PushBack(), then
// this will return the symbol which was pushed back onto the input
// stream. Only one level of push back is supported.
//
//***************************************************************************
int CMofLexer::NextToken(bool bDontAllowWhitespace)
{
int nToken = TOK_ERROR;
LexState eState = start;
m_bBadString = FALSE;
*m_pWorkBuf = 0;
m_pEndOfText = m_pWorkBuf;
m_bInString = FALSE;
#define CONSUME(c) \
if (!SpaceAvailable()) return TOK_ERROR;\
*m_pEndOfText++ = (c), *m_pEndOfText = 0;
wchar_t c;
for (MovePtr(1); m_nErrorCode == no_error && !m_pDataSrc->PastEnd(); MovePtr(1))
{
c = GetChar();
// *************************************************************************
// General 'start' state entry.
// ============================
if (eState == start)
{
m_nTokCol = iGetColumn();
m_nTokLine = m_nLine;
// If a non-newline whitespace and we are in 'start', then just strip it.
// =======================================================================
if (iswspace(c) || c == L'\n')
if(bDontAllowWhitespace)
return TOK_ERROR;
else
continue;
// Check for string continuation
// =============================
if(m_bInString)
{
if(c == '"')
{
eState = wstring;
continue;
}
else
{
// string ended after all
MovePtr(-1);
return TOK_LPWSTR;
}
}
// Handle all single character tokens.
// ===================================
if (nToken = SingleCharToken(c))
return nToken;
// Start of comment, we have to get either another / or a *.
// The style of comment depends on what you get. To get
// neither is an error
// ======================================================
if (c == L'/')
{
if (GetChar(1) == L'/')
{
eState = new_style_comment;
continue;
}
else if (GetChar(1) == L'*')
{
eState = old_style_comment;
MovePtr(1); // skip an extra so not to be fooled by /*/
continue;
}
else
return TOK_ERROR;
}
// Check for strings or characters. Like C, 'L' is case sensitive
// ================================
if (c == L'"' || c == L'\'')
{
eState = (c == L'"') ? wstring : wcharacter;
continue;
}
// Tokens beginning with these letters might be a uuid.
// ====================================================
if (iswxdigit(c) && ValidGuid())
{
eState = uuid;
CONSUME(c);
continue;
}
// Check for identifiers which start with either a letter or _
// ===========================================================
if (iswwbemalpha(c) || c == L'_')
{
eState = ident;
CONSUME(c);
continue;
}
// Check for a leading minus sign or digits. Either indicates
// a numeric constant
// ===========================================================
if (wbem_iswdigit(c) || c == L'-')
{
eState = numeric;
CONSUME(c);
continue;
}
// If the first character was a '.', then it might be a
// float or a single byte token.
// ====================================================
if (c == L'.')
{
if (wbem_iswdigit(GetChar(1)))
{
eState = numeric;
CONSUME(c);
continue;
}
return TOK_DOT;
}
// If here, an unknown token.
// ==========================
break;
} // end of if (eState == start)
// ************************************************************
// Some state other than start
// If we are in a quoted string or character.
// ==========================================
if (eState == wstring || eState == wcharacter)
{
wchar_t wTemp; // might be converted esc sequence
int iRet;
LexState lsNew = ProcessStr(&wTemp,eState,&iRet);
if(stop == lsNew)
{
return iRet;
}
else
{
eState = lsNew;
}
if(eState == wstring || eState == wcharacter)
{
CONSUME(wTemp);
}
// else we stepped out of the string and into a comment.
continue;
}
// numeric state, undetermined numeric constant.
// =============================================
if (eState == numeric)
{
if(bOKNumericAddition(c))
{
CONSUME(c);
continue;
}
MovePtr(-1);
return iGetNumericType();
}
// If we are getting an identifer, we continue
// until a nonident char is hit.
// ============================================
if (eState == ident)
{
if (wbem_iswdigit(c) || iswwbemalpha(c) || c == L'_')
{
CONSUME(c);
continue;
}
MovePtr(-1);
return KeywordFilter(m_pWorkBuf);
}
// GUIDs are already verified, just load up the proper length
// ==========================================================
if (eState == uuid)
{
CONSUME(c);
if(wcslen(m_pWorkBuf) >= UUIDLEN)
return TOK_UUID;
else
continue;
}
// Take care of comment states. New style comments "//" are
// terminated by a new line while old style end with "*/"
// =========================================================
if (eState == new_style_comment)
{
if (c == L'\n')
{
eState = start;
}
continue;
}
if (eState == old_style_comment)
{
if (c == L'*')
if(GetChar(1) == L'/')
{
MovePtr(1);
eState = start;
}
continue;
}
break; // this is bad, got into strange state
}
// If we ended and the last thing was a string, the we are ok. This takes care
// of the case where the last token in a file is a string.
if ((eState == start || eState == new_style_comment) && m_bInString)
{
return TOK_LPWSTR;
}
// return eof if we never got started, ex, bad file name
if(m_nErrorCode != no_error)
return 0;
if(m_pDataSrc->PastEnd() &&
(eState == start || eState == new_style_comment))
return 0;
else
{
if(eState == old_style_comment)
Trace(true, m_pDbg, UNEXPECTED_EOF, m_nTokLine);
if(c == L'*' && GetChar(1) == L'/')
Trace(true, m_pDbg, COMMENT_ERROR, m_nTokLine);
return TOK_ERROR;
}
}
//***************************************************************************
//
// GetText
//
//***************************************************************************
const OLECHAR *CMofLexer::GetText(int *pLineDeclared)
{
if (pLineDeclared)
*pLineDeclared = m_nTokLine;
return m_pWorkBuf;
}
void CMofLexer::SetLexPosition(ParseState * pPos)
{
m_pDataSrc->MoveToPos(pPos->m_iPos);
}
void CMofLexer::GetLexPosition(ParseState * pPos)
{
pPos->m_iPos = m_pDataSrc->GetPos();
}