/*++ Copyright (C) 1995-2001 Microsoft Corporation Module Name: MOFLEX.CPP Abstract: Implementation for class CMofLexer, which tokenizes MOF files. ANSI, DBCS and UNICODE are supported. History: a-raymcc 11-Oct-95 Created. a-raymcc 27-Jan-96 Update for aliasing. a-davj 6-June-96 Added support for octal, hex and binary constants and line stitching, comment concatenation, escape characters and old style comments. --*/ #include "precomp.h" #include #include #include #include #include "mrciclass.h" #include "datasrc.h" #include "moflex.h" #include "preproc.h" #include "trace.h" #include "strings.h" #include "wbemcli.h" #define INIT_ALLOC 512 #define ADDITIONAL_ALLOC 10000 #define MAX_ALLOC 1000000 #define UUIDLEN 36 // The following is the table of tokens consisting of a // single character. // ===================================================== typedef struct { wchar_t cToken; int nSymbol; } SingleTok; SingleTok SingleTokenMap[] = { L'{', TOK_OPEN_BRACE, L'}', TOK_CLOSE_BRACE, L'[', TOK_OPEN_BRACKET, L']', TOK_CLOSE_BRACKET, L'(', TOK_OPEN_PAREN, L')', TOK_CLOSE_PAREN, L',', TOK_COMMA, L'=', TOK_EQUALS, L';', TOK_SEMI, L'&', TOK_AMPERSAND, L':', TOK_COLON, L'#', TOK_POUND, L'$', TOK_DOLLAR_SIGN, L'+', TOK_PLUS }; #define NUM_SINGLE_TOKENS (sizeof(SingleTokenMap)/sizeof(SingleTok)) // The following is the table of keywords which look like normal // identifiers. // ============================================================= typedef struct { OLECHAR *pKeyword; int nToken; } Keyword; static Keyword MofKeywords[] = { L"class", TOK_CLASS, L"instance", TOK_INSTANCE, L"null", TOK_KEYWORD_NULL, L"external", TOK_EXTERNAL, L"as", TOK_AS, L"ref", TOK_REF, L"of", TOK_OF, // L"object", TOK_OBJECT, L"typedef", TOK_TYPEDEF, L"subrange", TOK_SUBRANGE, L"pragma", TOK_PRAGMA, L"define", TOK_DEFINE, L"ifdef", TOK_IFDEF, L"include", TOK_INCLUDE, L"endif", TOK_ENDIF, L"ifndef", TOK_IFNDEF, L"enum", TOK_ENUM, L"AUTORECOVER", TOK_AUTORECOVER, L"true", TOK_TRUE, L"false", TOK_FALSE, L"interface", TOK_INTERFACE, L"ToInstance", TOK_TOINSTANCE, L"ToSubClass", TOK_TOSUBCLASS, L"EnableOverride", TOK_ENABLEOVERRIDE, L"DisableOverride", TOK_DISABLEOVERRIDE, L"NotToInstance", TOK_NOTTOINSTANCE, L"Amended", TOK_AMENDED, L"NotToSubClass", TOK_NOTTOSUBCLASS, L"Restricted", TOK_RESTRICTED, L"qualifier", TOK_QUALIFIER, L"ClassFlags", TOK_CLASSFLAGS, L"InstanceFlags", TOK_INSTANCEFLAGS, L"Amendment", TOK_AMENDMENT, L"void", TOK_VOID, L"deleteclass", TOK_DELETECLASS, L"FAIL", TOK_FAIL, L"NOFAIL", TOK_NOFAIL }; #define NUM_KEYWORDS (sizeof(MofKeywords)/sizeof(Keyword)) BOOL iswodigit(wchar_t wcTest); //*************************************************************************** // // SingleCharToken() // // This examines a single character of input and scans the table to // determine if it is one of the single-character tokens. // // Parameters: // c = The character being tested. // Return value: // Zero if no match, otherwise the TOK_ constant which identifies // the token. // //*************************************************************************** static int SingleCharToken(wchar_t c) { for (int i = 0; i < NUM_SINGLE_TOKENS; i++) if (SingleTokenMap[i].cToken == c) return SingleTokenMap[i].nSymbol; return 0; } //*************************************************************************** // // BOOL iswwbemalpha // // Used to test if a wide character is suitable for identifiers. // // Parameters: // c = The character being tested. // Return value: // TRUE if OK. // //*************************************************************************** BOOL iswwbemalpha(wchar_t c) { if(c == 0x5f || (0x41 <= c && c <= 0x5a) || (0x61 <= c && c <= 0x7a) || (0x80 <= c && c <= 0xfffd)) return TRUE; else return FALSE; } //*************************************************************************** // // KeywordFilter() // // This function examines an identifier string to determine if it is // in fact a keyword. // // Parameters: // pTokStr = a pointer to the string to be examined. // // Return value: // TOK_SIMPLE_IDENT if no match and no '_', or else the correct TOK_ value // for the keyword. // //*************************************************************************** static int KeywordFilter(wchar_t *pTokStr) { for (int i = 0; i < NUM_KEYWORDS; i++) if (wbem_wcsicmp(MofKeywords[i].pKeyword, pTokStr) == 0) return MofKeywords[i].nToken; wchar_t * pEnd; pEnd = pTokStr + wcslen(pTokStr) -1; if(*pTokStr != L'_' && *pEnd != L'_') return TOK_SIMPLE_IDENT; else return TOK_SYSTEM_IDENT; } //*************************************************************************** // // ValidGuid() // // Examines a character string to determine if it constitutes a valid // GUID. // // Return value: // TRUE if the string is a GUID, FALSE if not. // //*************************************************************************** BOOL CMofLexer::ValidGuid() { int i; int iSoFar = 0; #define HEXCHECK(n) \ for (i = 0; i < n; i++) \ if (!iswxdigit(GetChar(iSoFar++))) \ return FALSE; #define HYPHENCHECK() \ if (GetChar(iSoFar++) != L'-') \ return FALSE; HEXCHECK(8); HYPHENCHECK(); HEXCHECK(4); HYPHENCHECK(); HEXCHECK(4); HYPHENCHECK(); HEXCHECK(4); HYPHENCHECK(); HEXCHECK(12); return TRUE; } //*************************************************************************** // // CMofLexer::Init() // // Helper for first state of construction; inializing variables. // //*************************************************************************** void CMofLexer::Init() { m_nLine = 1; m_nStartOfLinePos = 0; m_bBMOF = false; m_wFile[0] = 0; m_nWorkBufSize = INIT_ALLOC; m_pWorkBuf = new wchar_t[m_nWorkBufSize]; m_pDataSrc = NULL; m_pBuff = NULL; // set in the constructors m_pToFar = NULL; m_nErrorCode = (m_pWorkBuf) ? no_error : memory_failure ; } //*************************************************************************** // // CMofLexer::BuildBuffer() // // Helper for last stage of construction; build the unicode buffer. Note // that this can be used by either the file or memory based constructor. // //*************************************************************************** void CMofLexer::BuildBuffer(long lSize, TCHAR * pFileName, char * pMemSrc, char * pMemToFar) { if(m_nErrorCode != no_error) return; // already failed! if(pFileName) #ifdef USE_MMF_APPROACH m_pDataSrc = new FileDataSrc1(pFileName); #else m_pDataSrc = new FileDataSrc(pFileName); #endif else m_pDataSrc = new BufferDataSrc(lSize, pMemSrc); if(m_pDataSrc == NULL) m_nErrorCode = memory_failure; else if(m_pDataSrc->GetStatus() != 0) m_nErrorCode = file_io_error; return; } //*************************************************************************** // // Constructor for in-memory parsing. // //*************************************************************************** CMofLexer::CMofLexer(PDBG pDbg) { m_bUnicode = false; m_pDbg = pDbg; Init(); } HRESULT CMofLexer::SetBuffer(char *pMemory, DWORD dwMemSize) { DWORD dwCompressedSize, dwExpandedSize; if(IsBMOFBuffer((BYTE *)pMemory, dwCompressedSize, dwExpandedSize)) { bool bRet = CreateBufferFromBMOF((BYTE *)pMemory + 16, dwCompressedSize, dwExpandedSize); if(bRet == false) m_nErrorCode = invalid_source_buffer; } else { m_bUnicode = false; BuildBuffer(dwMemSize+4, NULL, pMemory, pMemory+dwMemSize); } if(m_nErrorCode == no_error) return S_OK; else return WBEM_E_FAILED; } //*************************************************************************** // // Checks if the file contains a binarary mof and if it does, decompresses // the binary data. // //*************************************************************************** bool CMofLexer::ProcessBMOFFile(FILE *fp,TCHAR * szFilename) { // read the first 20 bytes BYTE Test[TEST_SIZE]; int iRet = fread(Test, 1, TEST_SIZE, fp); if(iRet != TEST_SIZE) { // if we cant read even the header, it must not be a BMOF return false; } DWORD dwCompressedSize, dwExpandedSize; // Test if the mof is binary if(!IsBMOFBuffer(Test, dwCompressedSize, dwExpandedSize)) { // not a binary mof. This is the typical case return false; } // get the compression type, and the sizes if( 0 != fseek(fp, 0, SEEK_SET)) return false; DWORD dwSig, dwCompType; iRet = fread(&dwSig, sizeof(DWORD), 1, fp); iRet = fread(&dwCompType, sizeof(DWORD), 1, fp); iRet = fread(&dwCompressedSize, sizeof(DWORD), 1, fp); iRet = fread(&dwExpandedSize, sizeof(DWORD), 1, fp); // Make sure the compression type is one we understand! if(dwCompType != 0 && dwCompType != 1) { return FALSE; } m_pDataSrc = new BMOFDataSrc(szFilename); if (NULL == m_pDataSrc) return false; // If there was no compression, just read the data if(dwCompType == 0) { m_pBuff = (WCHAR *)new BYTE[dwExpandedSize]; if(m_pBuff == NULL) { return false; } iRet = fread(m_pBuff, dwExpandedSize, 1, fp); m_bBMOF = true; m_pToFar = (BYTE *)m_pBuff + dwExpandedSize; return true; } // Allocate storage for the compressed data BYTE * pCompressed = new BYTE[dwCompressedSize]; if(pCompressed == NULL) { return false; } // Read the compressed data. iRet = fread(pCompressed, 1, dwCompressedSize,fp); if((DWORD)iRet != dwCompressedSize) { delete pCompressed; return false; } // Convert from compress into something we can use later bool bRet = CreateBufferFromBMOF(pCompressed, dwCompressedSize, dwExpandedSize); delete pCompressed; return bRet; } //*************************************************************************** // // Creates the working buffer from a compressed binary mof buffer. // //*************************************************************************** bool CMofLexer::CreateBufferFromBMOF(byte * pCompressed, DWORD dwCompressedSize, DWORD dwExpandedSize) { if(m_pBuff) delete m_pBuff; m_pBuff = (WCHAR *)new BYTE[dwExpandedSize]; if(m_pBuff == NULL) { return false; } m_pToFar = (BYTE *)m_pBuff + dwExpandedSize; // Decompress the data CMRCICompression * pCompress = new CMRCICompression; if(pCompress == NULL) return FALSE; CDeleteMe dm(pCompress); DWORD dwResSize = pCompress->Mrci1Decompress(pCompressed, dwCompressedSize, (BYTE *)m_pBuff, dwExpandedSize); bool bRet = dwResSize == dwExpandedSize; if(bRet) m_bBMOF = true; return bRet; } //*************************************************************************** // // Constructor for file-based parsing. // //*************************************************************************** CMofLexer::CMofLexer(const TCHAR *pFilePath, PDBG pDbg) { m_bUnicode = FALSE; m_pDbg = pDbg; Init(); FILE *fp; BOOL bBigEndian = FALSE; if(pFilePath == NULL) { m_nErrorCode = file_not_found; return; } TCHAR szExpandedFilename[MAX_PATH+1]; DWORD nRes = ExpandEnvironmentStrings(pFilePath, szExpandedFilename, FILENAME_MAX); if(nRes == 0) StringCchCopyW(szExpandedFilename, MAX_PATH+1, pFilePath); // Make sure the file exists and can be opened if(pFilePath && lstrlen(szExpandedFilename)) { Trace(true, pDbg, PARSING_MSG, szExpandedFilename); } #ifdef UNICODE fp = _wfopen(szExpandedFilename, L"rb"); #else fp = fopen(szExpandedFilename, "rb"); #endif if (!fp) { if (errno == ENOENT) m_nErrorCode = file_not_found; if (errno == EACCES) m_nErrorCode = access_denied; else m_nErrorCode = file_io_error; return; } else { CfcloseMe cm(fp); // If the file contains a binary mof, handle it here if(ProcessBMOFFile(fp,szExpandedFilename)) { return; } } // Create a temp file name TCHAR cTempFileName[MAX_PATH+1]; TCHAR cTempPath[MAX_PATH+1]; if( 0 == GetTempPath(MAX_PATH+1, cTempPath)) { m_nErrorCode = problem_creating_temp_file; return ; } if( 0 == GetTempFileName(cTempPath, TEXT("tmp"), 0, cTempFileName)) { m_nErrorCode = problem_creating_temp_file; return ; } // Create the temp file FILE *fpTemp; #ifdef UNICODE fpTemp = _wfopen(cTempFileName, L"wb+"); #else fpTemp = fopen(cTempFileName, "wb+"); #endif if(fpTemp == 0) { m_nErrorCode = problem_creating_temp_file; return; } else { CFlexArray sofar; // used to make sure we dont get into an infinite loop SCODE sc = WriteFileToTemp(szExpandedFilename, fpTemp, sofar, pDbg, this); fclose(fpTemp); for(int iCnt = 0; iCnt < sofar.Size(); iCnt++) { char * pTemp = (char * )sofar.GetAt(iCnt); delete pTemp; } if(sc != S_OK) { if(m_nErrorCode == no_error) m_nErrorCode = preprocessor_error; DeleteFile(cTempFileName); return; } // Determine the size of the file // ============================== fseek(fpTemp, 0, SEEK_END); long lSize = ftell(fpTemp) + 6; // add a bit extra for ending space and null NULL fseek(fpTemp, 0, SEEK_SET); // The temp file will be little endian unicode lSize /= 2; m_bUnicode = TRUE; bBigEndian = FALSE; // This will create a DataSrc object which will clean up the temp file BuildBuffer(lSize,cTempFileName ,NULL,NULL); } } //*************************************************************************** // // Destructor. // //*************************************************************************** CMofLexer::~CMofLexer() { if (m_pBuff) delete m_pBuff; if (m_pWorkBuf) delete m_pWorkBuf; delete m_pDataSrc; } //*************************************************************************** // // iswodigit // // Returns TRUE if it is a valid octal character. '0' to '7'. // //*************************************************************************** BOOL iswodigit(wchar_t wcTest) { if(wbem_iswdigit(wcTest) && wcTest != L'8' && wcTest != L'9') return TRUE; else return FALSE; } //*************************************************************************** // // CMofLexer::OctalConvert // // Converts an octal escape sequence into a character and returns the number // of digits converted. Only a max of 3 digits is converted and if it isnt // a wchar, the digits cant add up to more that 0377 // //*************************************************************************** int CMofLexer::OctalConvert(wchar_t *pResult, LexState lsCurr) { int iNum = 0; wchar_t wcTest; *pResult = 0; for(wcTest = GetChar(iNum+1); iswodigit(wcTest) && iNum < 3; iNum++, wcTest = GetChar(iNum+1)) { *pResult *= 8; *pResult += wcTest - L'0'; } if((lsCurr == wstring || lsCurr == wcharacter) && *pResult >0xff) m_bBadString = TRUE; return iNum; } //*************************************************************************** // // CMofLexer::HexConvert // // Converts a hex escape sequence into a character and returns the number // of digits converted. // //*************************************************************************** int CMofLexer::HexConvert(wchar_t *pResult, LexState lsCurr) { int iNum = 0; wchar_t wcTest; *pResult = 0; int iMax = (lsCurr == wstring||lsCurr == wcharacter) ? 4 : 2; for(wcTest = GetChar(iNum+2); iswxdigit(wcTest) && iNum < iMax; iNum++, wcTest = GetChar(iNum+2)) { *pResult *= 16; if(wbem_iswdigit(wcTest)) // sscanf(xx,"%1x",int) also works! *pResult += wcTest - L'0'; else *pResult += towupper(wcTest) - L'A' + 10; } if(iNum == 0) return -1; // error, nothing was converted! return iNum+1; // num converted plus the 'x' char! } //*************************************************************************** // // CMofLexer::ConvertEsc // // Processes escape characters. Returns size of sequence, a -1 indicates an // error. Also, the *pResult is set upon success. // //*************************************************************************** int CMofLexer::ConvertEsc(wchar_t * pResult, LexState lsCurr) { // like C, case sensitive switch(GetChar(1)) { case L'n': *pResult = 0xa; break; case L't': *pResult = 0x9; break; case L'v': *pResult = 0xb; break; case L'b': *pResult = 0x8; break; case L'r': *pResult = 0xd; break; case L'f': *pResult = 0xc; break; case L'a': *pResult = 0x7; break; case L'\\': *pResult = L'\\'; break; case L'?': *pResult = L'?'; break; case L'\'': *pResult = L'\''; break; case L'\"': *pResult = L'\"'; break; case L'x': return HexConvert(pResult,lsCurr); break; default: if(iswodigit(GetChar(1))) return OctalConvert(pResult,lsCurr); return -1; // error! break; } return 1; } //*************************************************************************** // // ProcessStr // // Processes new characters once we are in the string state. // // Return "stop" if end of string. // //*************************************************************************** LexState CMofLexer::ProcessStr(wchar_t * pNewChar, LexState lsCurr, int * piRet) { // Check for end of string if we are a wstring state if (GetChar() == L'"' && lsCurr == wstring) { // search for the next non white space character. If it is another // string then these strings need to be combined. int iCnt = 1; int iMinMove = 0; wchar_t wcTest; for(wcTest = GetChar(iCnt); wcTest != NULL; iCnt++, wcTest=GetChar(iCnt)) { if(m_pDataSrc->WouldBePastEnd(iCnt)) { // dont go past eof!! *piRet = (m_bBadString) ? TOK_ERROR : TOK_LPWSTR; return stop; // last string in the file } if(wcTest == L'"' && GetChar(iCnt+1) == L'"') { iCnt++; iMinMove = iCnt; continue; } if(!iswspace(wcTest)) break; } // a-levn: no ascii strings are supported. "abc" means unicode. if(lsCurr == wstring) { if(wcTest == L'/') { // might be an intervening comment // =============================== if (GetChar(iCnt+1) == L'/') { m_bInString = TRUE; MovePtr(iCnt+1); return new_style_comment; } else if (GetChar(iCnt+1) == L'*') { m_bInString = TRUE; MovePtr(iCnt+1); // skip an extra so not to be fooled by return old_style_comment; } } if(wcTest != L'"') { *piRet = (m_bBadString) ? TOK_ERROR : TOK_LPWSTR; MovePtr(iMinMove); // skip over '"' return stop; // normal way for string to end } else MovePtr(iCnt + 1); // skip over '"' } } // If we are in character state, check for end if (GetChar(0) == L'\'' && lsCurr == wcharacter) { if(m_bBadString || m_pDataSrc->PastEnd() || (m_pDataSrc->GetAt(-1) == L'\'') && m_pDataSrc->GetAt(-2) != L'\\') *piRet = TOK_ERROR; else *piRet = TOK_WCHAR; return stop; } // Not at end, get the character, possibly converting escape sequences if(GetChar(0) == L'\\') { int iSize = ConvertEsc(pNewChar,lsCurr); m_i8 = *pNewChar; if(iSize < 1) m_bBadString = TRUE; else { MovePtr(iSize); if(lsCurr == wcharacter && GetChar(1) != L'\'') { *piRet = TOK_ERROR; return stop; } } } else if(GetChar(0) == '\n') { m_bBadString = TRUE; MovePtr(-1); return stop; } else { *pNewChar = GetChar(0); m_i8 = *pNewChar; if(*pNewChar == 0 || *pNewChar > 0xfffeu || (GetChar(1) != L'\'' && lsCurr == wcharacter)) { *piRet = TOK_ERROR; return stop; } } return lsCurr; } //*************************************************************************** // // BinaryToInt // // Converts a character representation of a binary, such as "101b" into // an integer. // //*************************************************************************** BOOL BinaryToInt(wchar_t * pConvert, __int64& i64Res) { BOOL bNeg = FALSE; __int64 iRet = 0; WCHAR * pStart; if(*pConvert == L'-' || *pConvert == L'+') { if(*pConvert == L'-') bNeg = TRUE; pConvert++; } for(pStart = pConvert;*pConvert && (*pConvert == L'0' || *pConvert == L'1'); pConvert++) { if(pConvert - pStart > 63) return FALSE; // Its too long iRet *= 2; if(*pConvert == L'1') iRet += 1; } if(towupper(*pConvert) != L'B') return FALSE; if(bNeg) iRet = -iRet; i64Res = iRet; return TRUE; } BOOL GetInt(WCHAR *pData, WCHAR * pFormat, __int64 * p64) { static WCHAR wTemp[100]; if(swscanf(pData, pFormat, p64) != 1) return FALSE; // Make sure the data is ok. When comparing, make sure that leading 0's are skipped StringCchPrintfW(wTemp, 100, pFormat, *p64); WCHAR * pTemp; for(pTemp = pData; *pTemp == L'0' && pTemp[1]; pTemp++); if(wbem_wcsicmp(wTemp, pTemp)) return FALSE; return TRUE; } //*************************************************************************** // // CMofLexer::iGetNumericType() // // Return value: // What type of numeric constant the current pointer is pointing to. // //*************************************************************************** int CMofLexer::iGetNumericType(void) { #define isuorl(x) (towupper(x) == L'U' || towupper(x) == L'L') wchar_t * pTemp; BOOL bBinary = FALSE; wchar_t * pStart; // first charcter not including leading - or + int iNumBinaryDigit = 0; int iNumDigit = 0; int iNumOctalDigit = 0; int iNumDot = 0; int iNumE = 0; wchar_t * pEnd = m_pWorkBuf + wcslen(m_pWorkBuf) - 1; if(*m_pWorkBuf == L'-' || *m_pWorkBuf == L'+') pStart = m_pWorkBuf+1; else pStart = m_pWorkBuf; int iLen = wcslen(pStart); // length not including leading '-' or '+' BOOL bHex = (pStart[0] == L'0' && towupper(pStart[1]) == L'X'); // loop through and count the number of various digit types, decimal points, etc. // ============================================================================== for(pTemp = pStart; *pTemp; pTemp++) { // Check for 'U' or 'l' characters at the end. They are an error // if the number is a float, or not in the last two characters, or // if a 'u' is present along with a '-' in the first character // =============================================================== if (isuorl(*pTemp)) { if(pTemp < pEnd -1 || !isuorl(*pEnd) || iNumDot || iNumE) return TOK_ERROR; if(towupper(*pTemp) == L'U' && *m_pWorkBuf == L'-') return TOK_ERROR; iLen--; continue; } // If we previously hit the binary indicator, the only thing that // should be after the b is U or L characters. // ============================================================== if(bBinary) return TOK_ERROR; // If in hex mode, only allow for x in second digit and hex numbers. // anything else is an error. // ================================================================= if(bHex) { if(pTemp < pStart+2) // ignore the 0X continue; if(!iswxdigit(*pTemp)) return TOK_ERROR; iNumDigit++; continue; } // Number is either a non hex integer or a float. // Do a count of various special digit types, decimal points, etc. // =============================================================== if(*pTemp == L'0' || *pTemp == L'1') iNumBinaryDigit++; if(iswodigit(*pTemp)) iNumOctalDigit++; // each character should fall into one of the following catagories if(wbem_iswdigit(*pTemp)) iNumDigit++; else if(*pTemp == L'.') { iNumDot++; if(iNumDot > 1 || iNumE > 0) return TOK_ERROR; } else if(towupper(*pTemp) == L'E') { if(iNumDigit == 0 || iNumE > 0) return TOK_ERROR; iNumDigit=0; // to ensure at least one digit after the 'e' iNumE++; } else if(*pTemp == L'-' || *pTemp == L'+') // ok if after 'E' { if(pTemp > pStart && towupper(pTemp[-1]) == L'E') continue; else return TOK_ERROR; } else if (towupper(*pTemp) == L'B') bBinary = TRUE; else return TOK_ERROR; } // Make sure there are enough digits // ================================= if(iNumDigit < 1) return TOK_ERROR; // take care of integer case. // ========================== if(bHex || bBinary || iNumDigit == iLen) { __int64 i8 = 0; if(bHex) { if(!GetInt(m_pWorkBuf+2, L"%I64x", &i8)) return TOK_ERROR; } else if(bBinary) { if(!BinaryToInt(m_pWorkBuf, i8)) return TOK_ERROR; } else if(pStart[0] != L'0' || wcslen(pStart) == 1) { if(*m_pWorkBuf == '-') { if(!GetInt(m_pWorkBuf, L"%I64i", &i8)) return TOK_ERROR; } else { if(!GetInt(m_pWorkBuf, L"%I64u", &i8)) return TOK_ERROR; } } else if(iNumDigit == iNumOctalDigit) { if(!GetInt(m_pWorkBuf+1, L"%I64o", &i8)) return TOK_ERROR; } else return TOK_ERROR; // Make sure the number isnt too large // =================================== m_i8 = i8; if(*m_pWorkBuf == L'-') return TOK_SIGNED64_NUMERIC_CONST; else return TOK_UNSIGNED64_NUMERIC_CONST; } // must be a floating point, no conversion needed. return TOK_FLOAT_VALUE; } //*************************************************************************** // // CMofLexer::MovePtr // // Moves pointer farther into the buffer. Note that the farthest it will go // is one past the last valid WCHAR which is the location of an extra NULL // //*************************************************************************** void CMofLexer::MovePtr(int iNum) { int iSoFar = 0; int iChange = (iNum > 0) ? 1 : -1; int iNumToDo = (iNum > 0) ? iNum : -iNum; while(iSoFar < iNumToDo) { if(iChange == 1) { // going forward, update the pointer and make sure it // is still in an acceptable range. // ================================================== m_pDataSrc->Move(iChange); if(m_pDataSrc->PastEnd()) // points to the NULL return; // If going forward and a slash cr is hit, do an extra skip. WCHAR wCurr = m_pDataSrc->GetAt(0); if(wCurr == L'\\' && m_pDataSrc->GetAt(1) == L'\n') { m_nLine++; m_pDataSrc->Move(1); // extra increment m_nStartOfLinePos = m_pDataSrc->GetPos(); continue; } else if(wCurr == L'\\' && m_pDataSrc->GetAt(1) == L'\r' && m_pDataSrc->GetAt(2) == L'\n') { m_nLine++; m_pDataSrc->Move(2); // extra increment m_nStartOfLinePos = m_pDataSrc->GetPos(); continue; } else if (wCurr == L'\n') { m_nLine++; m_nStartOfLinePos = m_pDataSrc->GetPos(); } } else { // If going backward and a cr is left, then decrement the line if (m_pDataSrc->GetAt(0) == L'\n' && m_pDataSrc->GetPos() > 0 ) { m_nLine--; m_nStartOfLinePos = m_pDataSrc->GetPos(); } // Update the pointer and make sure it is still in an // acceptable range. // ================================================== m_pDataSrc->Move(iChange); if(m_pDataSrc->GetPos() < 0) { m_pDataSrc->MoveToStart(); return; } // If going backward and a slash cr is hit, do an extra skip. WCHAR wCurr = m_pDataSrc->GetAt(0); if( wCurr == L'\n' && m_pDataSrc->GetAt(-1) == L'\\') { m_nLine--; m_nStartOfLinePos = m_pDataSrc->GetPos(); m_pDataSrc->Move(-1); // extra decrement continue; } else if( wCurr == L'\n' && m_pDataSrc->GetAt(-1) == L'\r' && m_pDataSrc->GetAt(-2) == L'\\') { m_nLine--; m_nStartOfLinePos = m_pDataSrc->GetPos(); m_pDataSrc->Move(-2); // extra decrement continue; } } iSoFar++; } } //*************************************************************************** // // CMofLexer::GetChar() // // Returns a character at an offset from the current character pointer. // //*************************************************************************** wchar_t CMofLexer::GetChar(int iNum) { if(iNum == 0) return m_pDataSrc->GetAt(0); else if(iNum == 1) { wchar_t tRet = m_pDataSrc->GetAt(1); if(tRet != L'\\' && tRet != '\n') return tRet; } MovePtr(iNum); wchar_t wcRet = m_pDataSrc->GetAt(0); MovePtr(-iNum); return wcRet; } //*************************************************************************** // // CMofLexer::iGetColumn() // // Gets the current column value. Counts back to the previous Cr or the // start of buffer. // //*************************************************************************** int CMofLexer::iGetColumn() { return m_pDataSrc->GetPos() - m_nStartOfLinePos; } //*************************************************************************** // // CMofLexer::bOKNumericAddition() // // Returns true if the test character could be added to numeric buffer. // Note that it returns true if an alphanumeric, or a + or - and the last // character in the working buffer is an 'E' // //*************************************************************************** BOOL CMofLexer::bOKNumericAddition(wchar_t cTest) { if(wbem_iswalnum(cTest) || cTest == L'.') return TRUE; int iLen = wcslen(m_pWorkBuf); if(iLen > 0) if(towupper(m_pWorkBuf[iLen-1]) == L'E' && (cTest == L'+' || cTest == L'-') && towupper(m_pWorkBuf[1]) != L'X') return TRUE; return FALSE; } //*************************************************************************** // // CMofLexer::SpaceAvailable() // // Returns TRUE if there is enuough space in the working buffer to add. // another character. It will expand the buffer if need be. // //*************************************************************************** BOOL CMofLexer::SpaceAvailable() { // most common case is that there is already space available int iNumWChar = m_pEndOfText-m_pWorkBuf+1; if(iNumWChar < m_nWorkBufSize) return TRUE; if(m_nWorkBufSize > MAX_ALLOC) // programs need limits! return FALSE; // Allocate a bigger buffer and copy the old stuff into it // ======================================================= long nNewSize = m_nWorkBufSize + ADDITIONAL_ALLOC; wchar_t * pNew = new wchar_t[nNewSize]; if(pNew == FALSE) return FALSE; memcpy(pNew, m_pWorkBuf, m_nWorkBufSize*2); delete m_pWorkBuf; m_nWorkBufSize = nNewSize; m_pWorkBuf = pNew; m_pEndOfText = m_pWorkBuf + iNumWChar - 1; return TRUE; } //*************************************************************************** // // NextToken() // // This function contains the DFA recognizer for MOF tokens. It works // entirely in UNICODE characters, so the NextChar() function is expected // to pretranslate ANSI or DBCS source streams into wide characters. // // Return value: // One of the TOK_ constants, or TOK_EOF when the end of the input // stream has been reached. If the user calls PushBack(), then // this will return the symbol which was pushed back onto the input // stream. Only one level of push back is supported. // //*************************************************************************** int CMofLexer::NextToken(bool bDontAllowWhitespace) { int nToken = TOK_ERROR; LexState eState = start; m_bBadString = FALSE; *m_pWorkBuf = 0; m_pEndOfText = m_pWorkBuf; m_bInString = FALSE; #define CONSUME(c) \ if (!SpaceAvailable()) return TOK_ERROR;\ *m_pEndOfText++ = (c), *m_pEndOfText = 0; wchar_t c; for (MovePtr(1); m_nErrorCode == no_error && !m_pDataSrc->PastEnd(); MovePtr(1)) { c = GetChar(); // ************************************************************************* // General 'start' state entry. // ============================ if (eState == start) { m_nTokCol = iGetColumn(); m_nTokLine = m_nLine; // If a non-newline whitespace and we are in 'start', then just strip it. // ======================================================================= if (iswspace(c) || c == L'\n') if(bDontAllowWhitespace) return TOK_ERROR; else continue; // Check for string continuation // ============================= if(m_bInString) { if(c == '"') { eState = wstring; continue; } else { // string ended after all MovePtr(-1); return TOK_LPWSTR; } } // Handle all single character tokens. // =================================== if (nToken = SingleCharToken(c)) return nToken; // Start of comment, we have to get either another / or a *. // The style of comment depends on what you get. To get // neither is an error // ====================================================== if (c == L'/') { if (GetChar(1) == L'/') { eState = new_style_comment; continue; } else if (GetChar(1) == L'*') { eState = old_style_comment; MovePtr(1); // skip an extra so not to be fooled by /*/ continue; } else return TOK_ERROR; } // Check for strings or characters. Like C, 'L' is case sensitive // ================================ if (c == L'"' || c == L'\'') { eState = (c == L'"') ? wstring : wcharacter; continue; } // Tokens beginning with these letters might be a uuid. // ==================================================== if (iswxdigit(c) && ValidGuid()) { eState = uuid; CONSUME(c); continue; } // Check for identifiers which start with either a letter or _ // =========================================================== if (iswwbemalpha(c) || c == L'_') { eState = ident; CONSUME(c); continue; } // Check for a leading minus sign or digits. Either indicates // a numeric constant // =========================================================== if (wbem_iswdigit(c) || c == L'-') { eState = numeric; CONSUME(c); continue; } // If the first character was a '.', then it might be a // float or a single byte token. // ==================================================== if (c == L'.') { if (wbem_iswdigit(GetChar(1))) { eState = numeric; CONSUME(c); continue; } return TOK_DOT; } // If here, an unknown token. // ========================== break; } // end of if (eState == start) // ************************************************************ // Some state other than start // If we are in a quoted string or character. // ========================================== if (eState == wstring || eState == wcharacter) { wchar_t wTemp; // might be converted esc sequence int iRet; LexState lsNew = ProcessStr(&wTemp,eState,&iRet); if(stop == lsNew) { return iRet; } else { eState = lsNew; } if(eState == wstring || eState == wcharacter) { CONSUME(wTemp); } // else we stepped out of the string and into a comment. continue; } // numeric state, undetermined numeric constant. // ============================================= if (eState == numeric) { if(bOKNumericAddition(c)) { CONSUME(c); continue; } MovePtr(-1); return iGetNumericType(); } // If we are getting an identifer, we continue // until a nonident char is hit. // ============================================ if (eState == ident) { if (wbem_iswdigit(c) || iswwbemalpha(c) || c == L'_') { CONSUME(c); continue; } MovePtr(-1); return KeywordFilter(m_pWorkBuf); } // GUIDs are already verified, just load up the proper length // ========================================================== if (eState == uuid) { CONSUME(c); if(wcslen(m_pWorkBuf) >= UUIDLEN) return TOK_UUID; else continue; } // Take care of comment states. New style comments "//" are // terminated by a new line while old style end with "*/" // ========================================================= if (eState == new_style_comment) { if (c == L'\n') { eState = start; } continue; } if (eState == old_style_comment) { if (c == L'*') if(GetChar(1) == L'/') { MovePtr(1); eState = start; } continue; } break; // this is bad, got into strange state } // If we ended and the last thing was a string, the we are ok. This takes care // of the case where the last token in a file is a string. if ((eState == start || eState == new_style_comment) && m_bInString) { return TOK_LPWSTR; } // return eof if we never got started, ex, bad file name if(m_nErrorCode != no_error) return 0; if(m_pDataSrc->PastEnd() && (eState == start || eState == new_style_comment)) return 0; else { if(eState == old_style_comment) Trace(true, m_pDbg, UNEXPECTED_EOF, m_nTokLine); if(c == L'*' && GetChar(1) == L'/') Trace(true, m_pDbg, COMMENT_ERROR, m_nTokLine); return TOK_ERROR; } } //*************************************************************************** // // GetText // //*************************************************************************** const OLECHAR *CMofLexer::GetText(int *pLineDeclared) { if (pLineDeclared) *pLineDeclared = m_nTokLine; return m_pWorkBuf; } void CMofLexer::SetLexPosition(ParseState * pPos) { m_pDataSrc->MoveToPos(pPos->m_iPos); } void CMofLexer::GetLexPosition(ParseState * pPos) { pPos->m_iPos = m_pDataSrc->GetPos(); }