Implementation for class CMofLexer, which tokenizes MOF files. ANSI, DBCS and UNICODE are supported.
a-raymcc 11-Oct-95 Created. a-raymcc 27-Jan-96 Update for aliasing. a-davj 6-June-96 Added support for octal, hex and binary constants and line stitching, comment concatenation, escape characters and old style comments.
#include "precomp.h"
#include <bmof.h>
#include <stdio.h>
#include <errno.h>
#include <arrtempl.h>
#include "mrciclass.h"
#include "datasrc.h"
#include "moflex.h"
#include "preproc.h"
#include "trace.h"
#include "strings.h"
#include "wbemcli.h"
#define INIT_ALLOC 512
#define ADDITIONAL_ALLOC 10000
#define MAX_ALLOC 1000000
#define UUIDLEN 36
// The following is the table of tokens consisting of a
// single character.
// =====================================================
typedef struct { wchar_t cToken; int nSymbol; } SingleTok;
#define NUM_SINGLE_TOKENS (sizeof(SingleTokenMap)/sizeof(SingleTok))
// The following is the table of keywords which look like normal
// identifiers.
// =============================================================
typedef struct { OLECHAR *pKeyword; int nToken; } Keyword;
static Keyword MofKeywords[] = { L"class", TOK_CLASS, L"instance", TOK_INSTANCE, L"null", TOK_KEYWORD_NULL, L"external", TOK_EXTERNAL, L"as", TOK_AS, L"ref", TOK_REF, L"of", TOK_OF, // L"object", TOK_OBJECT,
L"typedef", TOK_TYPEDEF, L"subrange", TOK_SUBRANGE, L"pragma", TOK_PRAGMA, L"define", TOK_DEFINE, L"ifdef", TOK_IFDEF, L"include", TOK_INCLUDE, L"endif", TOK_ENDIF, L"ifndef", TOK_IFNDEF, L"enum", TOK_ENUM, L"AUTORECOVER", TOK_AUTORECOVER, L"true", TOK_TRUE, L"false", TOK_FALSE, L"interface", TOK_INTERFACE, L"ToInstance", TOK_TOINSTANCE, L"ToSubClass", TOK_TOSUBCLASS, L"EnableOverride", TOK_ENABLEOVERRIDE, L"DisableOverride", TOK_DISABLEOVERRIDE, L"NotToInstance", TOK_NOTTOINSTANCE, L"Amended", TOK_AMENDED, L"NotToSubClass", TOK_NOTTOSUBCLASS, L"Restricted", TOK_RESTRICTED, L"qualifier", TOK_QUALIFIER, L"ClassFlags", TOK_CLASSFLAGS, L"InstanceFlags", TOK_INSTANCEFLAGS, L"Amendment", TOK_AMENDMENT, L"void", TOK_VOID, L"deleteclass", TOK_DELETECLASS, L"FAIL", TOK_FAIL, L"NOFAIL", TOK_NOFAIL };
#define NUM_KEYWORDS (sizeof(MofKeywords)/sizeof(Keyword))
BOOL iswodigit(wchar_t wcTest);
// SingleCharToken()
// This examines a single character of input and scans the table to
// determine if it is one of the single-character tokens.
// Parameters:
// c = The character being tested.
// Return value:
// Zero if no match, otherwise the TOK_ constant which identifies
// the token.
static int SingleCharToken(wchar_t c) { for (int i = 0; i < NUM_SINGLE_TOKENS; i++) if (SingleTokenMap[i].cToken == c) return SingleTokenMap[i].nSymbol;
return 0; }
// BOOL iswwbemalpha
// Used to test if a wide character is suitable for identifiers.
// Parameters:
// c = The character being tested.
// Return value:
// TRUE if OK.
BOOL iswwbemalpha(wchar_t c) { if(c == 0x5f || (0x41 <= c && c <= 0x5a) || (0x61 <= c && c <= 0x7a) || (0x80 <= c && c <= 0xfffd)) return TRUE; else return FALSE; }
// KeywordFilter()
// This function examines an identifier string to determine if it is
// in fact a keyword.
// Parameters:
// pTokStr = a pointer to the string to be examined.
// Return value:
// TOK_SIMPLE_IDENT if no match and no '_', or else the correct TOK_ value
// for the keyword.
static int KeywordFilter(wchar_t *pTokStr) { for (int i = 0; i < NUM_KEYWORDS; i++) if (wbem_wcsicmp(MofKeywords[i].pKeyword, pTokStr) == 0) return MofKeywords[i].nToken;
wchar_t * pEnd; pEnd = pTokStr + wcslen(pTokStr) -1;
if(*pTokStr != L'_' && *pEnd != L'_') return TOK_SIMPLE_IDENT; else return TOK_SYSTEM_IDENT; }
// ValidGuid()
// Examines a character string to determine if it constitutes a valid
// GUID.
// Return value:
// TRUE if the string is a GUID, FALSE if not.
BOOL CMofLexer::ValidGuid() { int i; int iSoFar = 0;
#define HEXCHECK(n) \
for (i = 0; i < n; i++) \ if (!iswxdigit(GetChar(iSoFar++))) \ return FALSE;
#define HYPHENCHECK() \
if (GetChar(iSoFar++) != L'-') \ return FALSE;
return TRUE; }
// CMofLexer::Init()
// Helper for first state of construction; inializing variables.
void CMofLexer::Init() { m_nLine = 1; m_nStartOfLinePos = 0; m_bBMOF = false; m_wFile[0] = 0; m_nWorkBufSize = INIT_ALLOC; m_pWorkBuf = new wchar_t[m_nWorkBufSize]; m_pDataSrc = NULL; m_pBuff = NULL; // set in the constructors
m_pToFar = NULL; m_nErrorCode = (m_pWorkBuf) ? no_error : memory_failure ; }
// CMofLexer::BuildBuffer()
// Helper for last stage of construction; build the unicode buffer. Note
// that this can be used by either the file or memory based constructor.
void CMofLexer::BuildBuffer(long lSize, TCHAR * pFileName, char * pMemSrc, char * pMemToFar) { if(m_nErrorCode != no_error) return; // already failed!
if(pFileName) #ifdef USE_MMF_APPROACH
m_pDataSrc = new FileDataSrc1(pFileName); #else
m_pDataSrc = new FileDataSrc(pFileName); #endif
else m_pDataSrc = new BufferDataSrc(lSize, pMemSrc);
if(m_pDataSrc == NULL) m_nErrorCode = memory_failure; else if(m_pDataSrc->GetStatus() != 0) m_nErrorCode = file_io_error; return; }
// Constructor for in-memory parsing.
CMofLexer::CMofLexer(PDBG pDbg) { m_bUnicode = false; m_pDbg = pDbg; Init(); }
HRESULT CMofLexer::SetBuffer(char *pMemory, DWORD dwMemSize) {
DWORD dwCompressedSize, dwExpandedSize;
if(IsBMOFBuffer((BYTE *)pMemory, dwCompressedSize, dwExpandedSize)) { bool bRet = CreateBufferFromBMOF((BYTE *)pMemory + 16, dwCompressedSize, dwExpandedSize); if(bRet == false) m_nErrorCode = invalid_source_buffer; } else {
m_bUnicode = false; BuildBuffer(dwMemSize+4, NULL, pMemory, pMemory+dwMemSize); }
if(m_nErrorCode == no_error) return S_OK; else return WBEM_E_FAILED; }
// Checks if the file contains a binarary mof and if it does, decompresses
// the binary data.
bool CMofLexer::ProcessBMOFFile(FILE *fp,TCHAR * szFilename) {
// read the first 20 bytes
BYTE Test[TEST_SIZE]; int iRet = fread(Test, 1, TEST_SIZE, fp); if(iRet != TEST_SIZE) { // if we cant read even the header, it must not be a BMOF
return false; }
DWORD dwCompressedSize, dwExpandedSize;
// Test if the mof is binary
if(!IsBMOFBuffer(Test, dwCompressedSize, dwExpandedSize)) { // not a binary mof. This is the typical case
return false; }
// get the compression type, and the sizes
if( 0 != fseek(fp, 0, SEEK_SET)) return false;
DWORD dwSig, dwCompType; iRet = fread(&dwSig, sizeof(DWORD), 1, fp); iRet = fread(&dwCompType, sizeof(DWORD), 1, fp); iRet = fread(&dwCompressedSize, sizeof(DWORD), 1, fp); iRet = fread(&dwExpandedSize, sizeof(DWORD), 1, fp);
// Make sure the compression type is one we understand!
if(dwCompType != 0 && dwCompType != 1) { return FALSE; }
m_pDataSrc = new BMOFDataSrc(szFilename); if (NULL == m_pDataSrc) return false;
// If there was no compression, just read the data
if(dwCompType == 0) { m_pBuff = (WCHAR *)new BYTE[dwExpandedSize]; if(m_pBuff == NULL) { return false; } iRet = fread(m_pBuff, dwExpandedSize, 1, fp); m_bBMOF = true; m_pToFar = (BYTE *)m_pBuff + dwExpandedSize; return true; }
// Allocate storage for the compressed data
BYTE * pCompressed = new BYTE[dwCompressedSize]; if(pCompressed == NULL) { return false; }
// Read the compressed data.
iRet = fread(pCompressed, 1, dwCompressedSize,fp); if((DWORD)iRet != dwCompressedSize) { delete pCompressed; return false; }
// Convert from compress into something we can use later
bool bRet = CreateBufferFromBMOF(pCompressed, dwCompressedSize, dwExpandedSize); delete pCompressed; return bRet; }
// Creates the working buffer from a compressed binary mof buffer.
bool CMofLexer::CreateBufferFromBMOF(byte * pCompressed, DWORD dwCompressedSize, DWORD dwExpandedSize) { if(m_pBuff) delete m_pBuff;
m_pBuff = (WCHAR *)new BYTE[dwExpandedSize]; if(m_pBuff == NULL) { return false; } m_pToFar = (BYTE *)m_pBuff + dwExpandedSize;
// Decompress the data
CMRCICompression * pCompress = new CMRCICompression; if(pCompress == NULL) return FALSE; CDeleteMe<CMRCICompression> dm(pCompress); DWORD dwResSize = pCompress->Mrci1Decompress(pCompressed, dwCompressedSize, (BYTE *)m_pBuff, dwExpandedSize); bool bRet = dwResSize == dwExpandedSize; if(bRet) m_bBMOF = true;
return bRet;
// Constructor for file-based parsing.
CMofLexer::CMofLexer(const TCHAR *pFilePath, PDBG pDbg) { m_bUnicode = FALSE; m_pDbg = pDbg; Init(); FILE *fp; BOOL bBigEndian = FALSE;
if(pFilePath == NULL) { m_nErrorCode = file_not_found; return; } TCHAR szExpandedFilename[MAX_PATH+1]; DWORD nRes = ExpandEnvironmentStrings(pFilePath, szExpandedFilename, FILENAME_MAX); if(nRes == 0) StringCchCopyW(szExpandedFilename, MAX_PATH+1, pFilePath);
// Make sure the file exists and can be opened
if(pFilePath && lstrlen(szExpandedFilename)) { Trace(true, pDbg, PARSING_MSG, szExpandedFilename); }
#ifdef UNICODE
fp = _wfopen(szExpandedFilename, L"rb"); #else
fp = fopen(szExpandedFilename, "rb"); #endif
if (!fp) { if (errno == ENOENT) m_nErrorCode = file_not_found; if (errno == EACCES) m_nErrorCode = access_denied; else m_nErrorCode = file_io_error; return; } else {
CfcloseMe cm(fp);
// If the file contains a binary mof, handle it here
if(ProcessBMOFFile(fp,szExpandedFilename)) { return; } }
// Create a temp file name
TCHAR cTempFileName[MAX_PATH+1]; TCHAR cTempPath[MAX_PATH+1]; if( 0 == GetTempPath(MAX_PATH+1, cTempPath)) { m_nErrorCode = problem_creating_temp_file; return ; } if( 0 == GetTempFileName(cTempPath, TEXT("tmp"), 0, cTempFileName)) { m_nErrorCode = problem_creating_temp_file; return ; }
// Create the temp file
FILE *fpTemp; #ifdef UNICODE
fpTemp = _wfopen(cTempFileName, L"wb+"); #else
fpTemp = fopen(cTempFileName, "wb+"); #endif
if(fpTemp == 0) { m_nErrorCode = problem_creating_temp_file; return; } else {
CFlexArray sofar; // used to make sure we dont get into an infinite loop
SCODE sc = WriteFileToTemp(szExpandedFilename, fpTemp, sofar, pDbg, this); fclose(fpTemp); for(int iCnt = 0; iCnt < sofar.Size(); iCnt++) { char * pTemp = (char * )sofar.GetAt(iCnt); delete pTemp; }
if(sc != S_OK) { if(m_nErrorCode == no_error) m_nErrorCode = preprocessor_error; DeleteFile(cTempFileName); return; } // Determine the size of the file
// ==============================
fseek(fpTemp, 0, SEEK_END); long lSize = ftell(fpTemp) + 6; // add a bit extra for ending space and null NULL
fseek(fpTemp, 0, SEEK_SET);
// The temp file will be little endian unicode
lSize /= 2; m_bUnicode = TRUE; bBigEndian = FALSE;
// This will create a DataSrc object which will clean up the temp file
BuildBuffer(lSize,cTempFileName ,NULL,NULL); }
// Destructor.
CMofLexer::~CMofLexer() { if (m_pBuff) delete m_pBuff; if (m_pWorkBuf) delete m_pWorkBuf; delete m_pDataSrc; }
// iswodigit
// Returns TRUE if it is a valid octal character. '0' to '7'.
BOOL iswodigit(wchar_t wcTest) { if(wbem_iswdigit(wcTest) && wcTest != L'8' && wcTest != L'9') return TRUE; else return FALSE; }
// CMofLexer::OctalConvert
// Converts an octal escape sequence into a character and returns the number
// of digits converted. Only a max of 3 digits is converted and if it isnt
// a wchar, the digits cant add up to more that 0377
int CMofLexer::OctalConvert(wchar_t *pResult, LexState lsCurr) { int iNum = 0; wchar_t wcTest; *pResult = 0; for(wcTest = GetChar(iNum+1); iswodigit(wcTest) && iNum < 3; iNum++, wcTest = GetChar(iNum+1)) { *pResult *= 8; *pResult += wcTest - L'0'; } if((lsCurr == wstring || lsCurr == wcharacter) && *pResult >0xff) m_bBadString = TRUE; return iNum; }
// CMofLexer::HexConvert
// Converts a hex escape sequence into a character and returns the number
// of digits converted.
int CMofLexer::HexConvert(wchar_t *pResult, LexState lsCurr) { int iNum = 0; wchar_t wcTest; *pResult = 0; int iMax = (lsCurr == wstring||lsCurr == wcharacter) ? 4 : 2; for(wcTest = GetChar(iNum+2); iswxdigit(wcTest) && iNum < iMax; iNum++, wcTest = GetChar(iNum+2)) { *pResult *= 16; if(wbem_iswdigit(wcTest)) // sscanf(xx,"%1x",int) also works!
*pResult += wcTest - L'0'; else *pResult += towupper(wcTest) - L'A' + 10; } if(iNum == 0) return -1; // error, nothing was converted!
return iNum+1; // num converted plus the 'x' char!
// CMofLexer::ConvertEsc
// Processes escape characters. Returns size of sequence, a -1 indicates an
// error. Also, the *pResult is set upon success.
int CMofLexer::ConvertEsc(wchar_t * pResult, LexState lsCurr) { // like C, case sensitive
switch(GetChar(1)) { case L'n': *pResult = 0xa; break; case L't': *pResult = 0x9; break; case L'v': *pResult = 0xb; break; case L'b': *pResult = 0x8; break; case L'r': *pResult = 0xd; break; case L'f': *pResult = 0xc; break; case L'a': *pResult = 0x7; break; case L'\\': *pResult = L'\\'; break; case L'?': *pResult = L'?'; break; case L'\'': *pResult = L'\''; break; case L'\"': *pResult = L'\"'; break; case L'x': return HexConvert(pResult,lsCurr); break; default: if(iswodigit(GetChar(1))) return OctalConvert(pResult,lsCurr); return -1; // error!
break; } return 1; }
// ProcessStr
// Processes new characters once we are in the string state.
// Return "stop" if end of string.
LexState CMofLexer::ProcessStr(wchar_t * pNewChar, LexState lsCurr, int * piRet) {
// Check for end of string if we are a wstring state
if (GetChar() == L'"' && lsCurr == wstring) { // search for the next non white space character. If it is another
// string then these strings need to be combined.
int iCnt = 1; int iMinMove = 0; wchar_t wcTest; for(wcTest = GetChar(iCnt); wcTest != NULL; iCnt++, wcTest=GetChar(iCnt)) { if(m_pDataSrc->WouldBePastEnd(iCnt)) { // dont go past eof!!
*piRet = (m_bBadString) ? TOK_ERROR : TOK_LPWSTR; return stop; // last string in the file
} if(wcTest == L'"' && GetChar(iCnt+1) == L'"') { iCnt++; iMinMove = iCnt; continue; } if(!iswspace(wcTest)) break; } // a-levn: no ascii strings are supported. "abc" means unicode.
if(lsCurr == wstring) { if(wcTest == L'/') { // might be an intervening comment
// ===============================
if (GetChar(iCnt+1) == L'/') { m_bInString = TRUE; MovePtr(iCnt+1); return new_style_comment; } else if (GetChar(iCnt+1) == L'*') { m_bInString = TRUE; MovePtr(iCnt+1); // skip an extra so not to be fooled by
return old_style_comment; } } if(wcTest != L'"') { *piRet = (m_bBadString) ? TOK_ERROR : TOK_LPWSTR; MovePtr(iMinMove); // skip over '"'
return stop; // normal way for string to end
} else MovePtr(iCnt + 1); // skip over '"'
} }
// If we are in character state, check for end
if (GetChar(0) == L'\'' && lsCurr == wcharacter) {
if(m_bBadString || m_pDataSrc->PastEnd() || (m_pDataSrc->GetAt(-1) == L'\'') && m_pDataSrc->GetAt(-2) != L'\\') *piRet = TOK_ERROR; else *piRet = TOK_WCHAR; return stop; }
// Not at end, get the character, possibly converting escape sequences
if(GetChar(0) == L'\\') { int iSize = ConvertEsc(pNewChar,lsCurr); m_i8 = *pNewChar; if(iSize < 1) m_bBadString = TRUE; else { MovePtr(iSize); if(lsCurr == wcharacter && GetChar(1) != L'\'') { *piRet = TOK_ERROR; return stop; }
} } else if(GetChar(0) == '\n') { m_bBadString = TRUE; MovePtr(-1); return stop; } else { *pNewChar = GetChar(0); m_i8 = *pNewChar; if(*pNewChar == 0 || *pNewChar > 0xfffeu || (GetChar(1) != L'\'' && lsCurr == wcharacter)) { *piRet = TOK_ERROR; return stop; } } return lsCurr; }
// BinaryToInt
// Converts a character representation of a binary, such as "101b" into
// an integer.
BOOL BinaryToInt(wchar_t * pConvert, __int64& i64Res) { BOOL bNeg = FALSE; __int64 iRet = 0; WCHAR * pStart; if(*pConvert == L'-' || *pConvert == L'+') { if(*pConvert == L'-') bNeg = TRUE; pConvert++; } for(pStart = pConvert;*pConvert && (*pConvert == L'0' || *pConvert == L'1'); pConvert++) { if(pConvert - pStart > 63) return FALSE; // Its too long
iRet *= 2; if(*pConvert == L'1') iRet += 1; }
if(towupper(*pConvert) != L'B') return FALSE;
if(bNeg) iRet = -iRet;
i64Res = iRet; return TRUE; }
BOOL GetInt(WCHAR *pData, WCHAR * pFormat, __int64 * p64) { static WCHAR wTemp[100]; if(swscanf(pData, pFormat, p64) != 1) return FALSE;
// Make sure the data is ok. When comparing, make sure that leading 0's are skipped
StringCchPrintfW(wTemp, 100, pFormat, *p64); WCHAR * pTemp; for(pTemp = pData; *pTemp == L'0' && pTemp[1]; pTemp++); if(wbem_wcsicmp(wTemp, pTemp)) return FALSE; return TRUE; }
// CMofLexer::iGetNumericType()
// Return value:
// What type of numeric constant the current pointer is pointing to.
int CMofLexer::iGetNumericType(void) {
#define isuorl(x) (towupper(x) == L'U' || towupper(x) == L'L')
wchar_t * pTemp; BOOL bBinary = FALSE; wchar_t * pStart; // first charcter not including leading - or +
int iNumBinaryDigit = 0; int iNumDigit = 0; int iNumOctalDigit = 0; int iNumDot = 0; int iNumE = 0;
wchar_t * pEnd = m_pWorkBuf + wcslen(m_pWorkBuf) - 1;
if(*m_pWorkBuf == L'-' || *m_pWorkBuf == L'+') pStart = m_pWorkBuf+1; else pStart = m_pWorkBuf; int iLen = wcslen(pStart); // length not including leading '-' or '+'
BOOL bHex = (pStart[0] == L'0' && towupper(pStart[1]) == L'X');
// loop through and count the number of various digit types, decimal points, etc.
// ==============================================================================
for(pTemp = pStart; *pTemp; pTemp++) { // Check for 'U' or 'l' characters at the end. They are an error
// if the number is a float, or not in the last two characters, or
// if a 'u' is present along with a '-' in the first character
// ===============================================================
if (isuorl(*pTemp)) { if(pTemp < pEnd -1 || !isuorl(*pEnd) || iNumDot || iNumE) return TOK_ERROR; if(towupper(*pTemp) == L'U' && *m_pWorkBuf == L'-') return TOK_ERROR; iLen--; continue; } // If we previously hit the binary indicator, the only thing that
// should be after the b is U or L characters.
// ==============================================================
if(bBinary) return TOK_ERROR;
// If in hex mode, only allow for x in second digit and hex numbers.
// anything else is an error.
// =================================================================
if(bHex) { if(pTemp < pStart+2) // ignore the 0X
continue; if(!iswxdigit(*pTemp)) return TOK_ERROR; iNumDigit++; continue; }
// Number is either a non hex integer or a float.
// Do a count of various special digit types, decimal points, etc.
// ===============================================================
if(*pTemp == L'0' || *pTemp == L'1') iNumBinaryDigit++; if(iswodigit(*pTemp)) iNumOctalDigit++;
// each character should fall into one of the following catagories
if(wbem_iswdigit(*pTemp)) iNumDigit++; else if(*pTemp == L'.') { iNumDot++; if(iNumDot > 1 || iNumE > 0) return TOK_ERROR; } else if(towupper(*pTemp) == L'E') { if(iNumDigit == 0 || iNumE > 0) return TOK_ERROR; iNumDigit=0; // to ensure at least one digit after the 'e'
iNumE++; } else if(*pTemp == L'-' || *pTemp == L'+') // ok if after 'E'
{ if(pTemp > pStart && towupper(pTemp[-1]) == L'E') continue; else return TOK_ERROR; } else if (towupper(*pTemp) == L'B') bBinary = TRUE; else return TOK_ERROR; }
// Make sure there are enough digits
// =================================
if(iNumDigit < 1) return TOK_ERROR;
// take care of integer case.
// ==========================
if(bHex || bBinary || iNumDigit == iLen) { __int64 i8 = 0; if(bHex) { if(!GetInt(m_pWorkBuf+2, L"%I64x", &i8)) return TOK_ERROR; } else if(bBinary) { if(!BinaryToInt(m_pWorkBuf, i8)) return TOK_ERROR; } else if(pStart[0] != L'0' || wcslen(pStart) == 1) { if(*m_pWorkBuf == '-') { if(!GetInt(m_pWorkBuf, L"%I64i", &i8)) return TOK_ERROR; } else { if(!GetInt(m_pWorkBuf, L"%I64u", &i8)) return TOK_ERROR; } } else if(iNumDigit == iNumOctalDigit) { if(!GetInt(m_pWorkBuf+1, L"%I64o", &i8)) return TOK_ERROR; } else return TOK_ERROR;
// Make sure the number isnt too large
// ===================================
m_i8 = i8; if(*m_pWorkBuf == L'-') return TOK_SIGNED64_NUMERIC_CONST; else return TOK_UNSIGNED64_NUMERIC_CONST; }
// must be a floating point, no conversion needed.
// CMofLexer::MovePtr
// Moves pointer farther into the buffer. Note that the farthest it will go
// is one past the last valid WCHAR which is the location of an extra NULL
void CMofLexer::MovePtr(int iNum) { int iSoFar = 0; int iChange = (iNum > 0) ? 1 : -1; int iNumToDo = (iNum > 0) ? iNum : -iNum;
while(iSoFar < iNumToDo) {
if(iChange == 1) {
// going forward, update the pointer and make sure it
// is still in an acceptable range.
// ==================================================
m_pDataSrc->Move(iChange); if(m_pDataSrc->PastEnd()) // points to the NULL
// If going forward and a slash cr is hit, do an extra skip.
WCHAR wCurr = m_pDataSrc->GetAt(0); if(wCurr == L'\\' && m_pDataSrc->GetAt(1) == L'\n') { m_nLine++; m_pDataSrc->Move(1); // extra increment
m_nStartOfLinePos = m_pDataSrc->GetPos(); continue; } else if(wCurr == L'\\' && m_pDataSrc->GetAt(1) == L'\r' && m_pDataSrc->GetAt(2) == L'\n') { m_nLine++; m_pDataSrc->Move(2); // extra increment
m_nStartOfLinePos = m_pDataSrc->GetPos(); continue; } else if (wCurr == L'\n') { m_nLine++; m_nStartOfLinePos = m_pDataSrc->GetPos(); } } else {
// If going backward and a cr is left, then decrement the line
if (m_pDataSrc->GetAt(0) == L'\n' && m_pDataSrc->GetPos() > 0 ) { m_nLine--; m_nStartOfLinePos = m_pDataSrc->GetPos(); } // Update the pointer and make sure it is still in an
// acceptable range.
// ==================================================
m_pDataSrc->Move(iChange); if(m_pDataSrc->GetPos() < 0) { m_pDataSrc->MoveToStart(); return; }
// If going backward and a slash cr is hit, do an extra skip.
WCHAR wCurr = m_pDataSrc->GetAt(0); if( wCurr == L'\n' && m_pDataSrc->GetAt(-1) == L'\\') { m_nLine--; m_nStartOfLinePos = m_pDataSrc->GetPos(); m_pDataSrc->Move(-1); // extra decrement
continue; } else if( wCurr == L'\n' && m_pDataSrc->GetAt(-1) == L'\r' && m_pDataSrc->GetAt(-2) == L'\\') { m_nLine--; m_nStartOfLinePos = m_pDataSrc->GetPos(); m_pDataSrc->Move(-2); // extra decrement
continue; }
iSoFar++; } }
// CMofLexer::GetChar()
// Returns a character at an offset from the current character pointer.
wchar_t CMofLexer::GetChar(int iNum) { if(iNum == 0) return m_pDataSrc->GetAt(0); else if(iNum == 1) { wchar_t tRet = m_pDataSrc->GetAt(1); if(tRet != L'\\' && tRet != '\n') return tRet; } MovePtr(iNum); wchar_t wcRet = m_pDataSrc->GetAt(0); MovePtr(-iNum); return wcRet; }
// CMofLexer::iGetColumn()
// Gets the current column value. Counts back to the previous Cr or the
// start of buffer.
int CMofLexer::iGetColumn() { return m_pDataSrc->GetPos() - m_nStartOfLinePos; }
// CMofLexer::bOKNumericAddition()
// Returns true if the test character could be added to numeric buffer.
// Note that it returns true if an alphanumeric, or a + or - and the last
// character in the working buffer is an 'E'
BOOL CMofLexer::bOKNumericAddition(wchar_t cTest) { if(wbem_iswalnum(cTest) || cTest == L'.') return TRUE; int iLen = wcslen(m_pWorkBuf); if(iLen > 0) if(towupper(m_pWorkBuf[iLen-1]) == L'E' && (cTest == L'+' || cTest == L'-') && towupper(m_pWorkBuf[1]) != L'X') return TRUE; return FALSE; }
// CMofLexer::SpaceAvailable()
// Returns TRUE if there is enuough space in the working buffer to add.
// another character. It will expand the buffer if need be.
BOOL CMofLexer::SpaceAvailable() { // most common case is that there is already space available
int iNumWChar = m_pEndOfText-m_pWorkBuf+1; if(iNumWChar < m_nWorkBufSize) return TRUE;
if(m_nWorkBufSize > MAX_ALLOC) // programs need limits!
return FALSE;
// Allocate a bigger buffer and copy the old stuff into it
// =======================================================
long nNewSize = m_nWorkBufSize + ADDITIONAL_ALLOC; wchar_t * pNew = new wchar_t[nNewSize]; if(pNew == FALSE) return FALSE; memcpy(pNew, m_pWorkBuf, m_nWorkBufSize*2); delete m_pWorkBuf; m_nWorkBufSize = nNewSize; m_pWorkBuf = pNew; m_pEndOfText = m_pWorkBuf + iNumWChar - 1; return TRUE; }
// NextToken()
// This function contains the DFA recognizer for MOF tokens. It works
// entirely in UNICODE characters, so the NextChar() function is expected
// to pretranslate ANSI or DBCS source streams into wide characters.
// Return value:
// One of the TOK_ constants, or TOK_EOF when the end of the input
// stream has been reached. If the user calls PushBack(), then
// this will return the symbol which was pushed back onto the input
// stream. Only one level of push back is supported.
int CMofLexer::NextToken(bool bDontAllowWhitespace) { int nToken = TOK_ERROR; LexState eState = start; m_bBadString = FALSE; *m_pWorkBuf = 0; m_pEndOfText = m_pWorkBuf; m_bInString = FALSE;
#define CONSUME(c) \
if (!SpaceAvailable()) return TOK_ERROR;\ *m_pEndOfText++ = (c), *m_pEndOfText = 0;
wchar_t c; for (MovePtr(1); m_nErrorCode == no_error && !m_pDataSrc->PastEnd(); MovePtr(1)) { c = GetChar();
// *************************************************************************
// General 'start' state entry.
// ============================
if (eState == start) { m_nTokCol = iGetColumn(); m_nTokLine = m_nLine; // If a non-newline whitespace and we are in 'start', then just strip it.
// =======================================================================
if (iswspace(c) || c == L'\n') if(bDontAllowWhitespace) return TOK_ERROR; else continue;
// Check for string continuation
// =============================
if(m_bInString) { if(c == '"') { eState = wstring; continue; } else { // string ended after all
MovePtr(-1); return TOK_LPWSTR; } }
// Handle all single character tokens.
// ===================================
if (nToken = SingleCharToken(c)) return nToken;
// Start of comment, we have to get either another / or a *.
// The style of comment depends on what you get. To get
// neither is an error
// ======================================================
if (c == L'/') { if (GetChar(1) == L'/') { eState = new_style_comment; continue; } else if (GetChar(1) == L'*') { eState = old_style_comment; MovePtr(1); // skip an extra so not to be fooled by /*/
continue; } else return TOK_ERROR; }
// Check for strings or characters. Like C, 'L' is case sensitive
// ================================
if (c == L'"' || c == L'\'') { eState = (c == L'"') ? wstring : wcharacter; continue; }
// Tokens beginning with these letters might be a uuid.
// ====================================================
if (iswxdigit(c) && ValidGuid()) { eState = uuid; CONSUME(c); continue; }
// Check for identifiers which start with either a letter or _
// ===========================================================
if (iswwbemalpha(c) || c == L'_') { eState = ident; CONSUME(c); continue; }
// Check for a leading minus sign or digits. Either indicates
// a numeric constant
// ===========================================================
if (wbem_iswdigit(c) || c == L'-') { eState = numeric; CONSUME(c); continue; }
// If the first character was a '.', then it might be a
// float or a single byte token.
// ====================================================
if (c == L'.') { if (wbem_iswdigit(GetChar(1))) { eState = numeric; CONSUME(c); continue; } return TOK_DOT; }
// If here, an unknown token.
// ==========================
break; } // end of if (eState == start)
// ************************************************************
// Some state other than start
// If we are in a quoted string or character.
// ==========================================
if (eState == wstring || eState == wcharacter) { wchar_t wTemp; // might be converted esc sequence
int iRet; LexState lsNew = ProcessStr(&wTemp,eState,&iRet); if(stop == lsNew) { return iRet; } else { eState = lsNew; }
if(eState == wstring || eState == wcharacter) { CONSUME(wTemp); } // else we stepped out of the string and into a comment.
continue; }
// numeric state, undetermined numeric constant.
// =============================================
if (eState == numeric) { if(bOKNumericAddition(c)) { CONSUME(c); continue; }
MovePtr(-1); return iGetNumericType(); }
// If we are getting an identifer, we continue
// until a nonident char is hit.
// ============================================
if (eState == ident) { if (wbem_iswdigit(c) || iswwbemalpha(c) || c == L'_') { CONSUME(c); continue; }
MovePtr(-1); return KeywordFilter(m_pWorkBuf); }
// GUIDs are already verified, just load up the proper length
// ==========================================================
if (eState == uuid) { CONSUME(c); if(wcslen(m_pWorkBuf) >= UUIDLEN) return TOK_UUID; else continue; }
// Take care of comment states. New style comments "//" are
// terminated by a new line while old style end with "*/"
// =========================================================
if (eState == new_style_comment) { if (c == L'\n') { eState = start; } continue; }
if (eState == old_style_comment) { if (c == L'*') if(GetChar(1) == L'/') { MovePtr(1); eState = start; } continue; } break; // this is bad, got into strange state
// If we ended and the last thing was a string, the we are ok. This takes care
// of the case where the last token in a file is a string.
if ((eState == start || eState == new_style_comment) && m_bInString) { return TOK_LPWSTR; }
// return eof if we never got started, ex, bad file name
if(m_nErrorCode != no_error) return 0; if(m_pDataSrc->PastEnd() && (eState == start || eState == new_style_comment)) return 0; else { if(eState == old_style_comment) Trace(true, m_pDbg, UNEXPECTED_EOF, m_nTokLine); if(c == L'*' && GetChar(1) == L'/') Trace(true, m_pDbg, COMMENT_ERROR, m_nTokLine);
return TOK_ERROR; } }
// GetText
const OLECHAR *CMofLexer::GetText(int *pLineDeclared) { if (pLineDeclared) *pLineDeclared = m_nTokLine;
return m_pWorkBuf; }
void CMofLexer::SetLexPosition(ParseState * pPos) { m_pDataSrc->MoveToPos(pPos->m_iPos); }
void CMofLexer::GetLexPosition(ParseState * pPos) { pPos->m_iPos = m_pDataSrc->GetPos(); }