/********************************************************************** // QMT.C // // Copyright (c) 1992 - Microsoft Corp. // All rights reserved. // Microsoft Confidential // // // Functions for handling reqular expressions. // **********************************************************************/ #include "shellprv.h" #pragma hdrstop #pragma warning( disable:4001 ) // Disable new type remark warning //********************************************************************* // void bitset( LPSTR BitVec, int First, int Last, int BitVal) // // ARGUMENTS: // BitVec - Bit vector // First - First character // Last - Last character // BitVal - Bit value (0 or 1) //********************************************************************* void bitset( LPBYTE BitVec, int First, int Last, int BitVal) { int BitNo; // Bit number BitVec += ((unsigned)First) >> 3; // Point at first byte BitNo = First & 7; // Calculate first bit number while ( First <= Last ) // Loop to set bits { // If we have a whole byte's worth if ( BitNo == 0 && (First + 8) <= Last ) { // Set the bits *BitVec++ = (unsigned char)(BitVal ? 0xff : '\0'); First += 8; // Increment the counter continue; // Next iteration } // Set the appropriate bit *BitVec = (unsigned char)( ((unsigned)*BitVec & (~((unsigned)1 << BitNo)) | ((unsigned)BitVal << BitNo)) ); if ( ++BitNo == 8 ) // If we wrap into next byte { BitVec++; // Increment pointer BitNo = 0; // Reset bit index } First++; // Increment bit index } } //********************************************************************* // Parses a search expression and creates a qualified expression string // for which a pointer is returned to the caller. // // LPSTR exprparse( register LPSTR szPattern, int *NewBufLen ) // // ARGUMENTS: // szPattern - Ptr to raw pattern // NewBufLen - Ptr to len of new buffer returned by this function // RETURNS: // LPSTR - Ptr to a newly allocated buffer containg the // qualified expression and the caller's NewBufLen // is updated to reflect the returned buffer length. // Returns NULL ptr if an invalid expression. //********************************************************************* LPSTR exprparse( LPGREPINFO lpgi, LPSTR szPattern, int *NewBufLen ) { LPSTR pChar; // Char pointer LPSTR pChar2; // Char pointer LPSTR lpChar; // Far Char pointer int i; // Counter/index int j; // Counter/index int m; // Counter/index int n; // Counter/index int BitVal; // Bit value char Buffer[ PATMAX ]; // Temporary buffer BitVal = 0; if ( !lpgi->CaseSen ) // If case insensitive force to uprcase CharUpperBuffA( szPattern, (int)lstrlenA(szPattern) ); pChar = Buffer; // Initialize pointer if ( *szPattern == '^' ) *(pChar++) = *szPattern++; // Copy leading caret if any while ( *szPattern != '\0' ) // While not end of pattern { i = -2; // Initialize for (n = 0;;) // Loop to delimit ordinary string { // Look for a special character n += (int)StrCSpnA( szPattern + n, ".\\[*" ); if ( szPattern[ n ] != '\\' ) break; // Break if not backslash i = n; // Remember where backslash is if ( szPattern[ ++n ] == '\0' ) return( NULL ); // Cannot be at very end n++; // Skip escaped character } if ( szPattern[ n ] == '*' ) // If we found a *-expr. { if ( n-- == 0 ) return( NULL ); // Illegal first character if ( i == (n - 1) ) n = i; // Escaped single-char. *-expr. } if ( n > 0 ) // If we have string or single { // If single character if ( n == 1 || (n == 2 && *szPattern == '\\') ) { *pChar++ = T_SINGLE; // Set type if ( *szPattern == '\\' ) szPattern++; // Skip escape if any *pChar++ = *szPattern++; // Copy single character } else // Else we have a string { *pChar++ = T_STRING; // Set type pChar2 = pChar++; // Save pointer to length byte while ( n-- > 0 ) // While bytes to copy remain { if (*szPattern == '\\') // If escape found { szPattern++; // Skip escape n--; // Adjust length } *pChar++ = *szPattern++; // Copy character } // Set string length *pChar2 = (char) ((pChar - pChar2) - 1); } } if ( *szPattern == '\0' ) break; // Break if end of pattern if ( *szPattern == '.' ) // If matching any { if ( *++szPattern == '*' ) // If star follows any { szPattern++; // Skip star, too *pChar++ = T_STAR; // Insert prefix ahead of token } *pChar++ = T_ANY; // Match any character continue; // Next iteration } if ( *szPattern == '[' ) // If character class { if (*++szPattern == '\0') // Skip '[' return(NULL); *pChar++ = T_CLASS; // Set type // Clear the vector _fmemset( pChar, '\0', ASCII_LEN / 8 ); BitVal = 1; // Assume we're setting bits if ( *szPattern == '^' ) // If inverted class { szPattern++; // Skip '^' // Set all bits _fmemset( pChar, (char) -1, ASCII_LEN / 8 ); bitset( (LPBYTE )pChar, EOS, EOS, 0 ); // All except end-of-string bitset( (LPBYTE )pChar, '\n', '\n', 0 ); // And linefeed! BitVal = 0; // Now we're clearing bits } while ( *szPattern != ']' ) // Loop to find ']' { if (*szPattern == '\0') return( NULL ); // Check for malformed string */ if ( *szPattern == '\\' ) // If escape found { if ( *++szPattern == '\0' )// Skip escape return(NULL); } i = (unsigned char)*szPattern++; // Get first character in range // If range found if ( *szPattern == '-' && szPattern[ 1 ] != '\0' && szPattern[ 1 ] != ']') { szPattern++; // Skip hyphen if ( *szPattern == '\\' && szPattern[ 1 ] != '\0' ) szPattern++; // Skip escape character j = (unsigned char)*szPattern++; // Get end of range } else j = i; // Else just one character // Set bits in vector bitset( (LPBYTE )pChar, i, j, BitVal ); if (!lpgi->CaseSen) // If ignoring case { m = (i < 'A') ? 'A' : i; // m = max(i,'A') n = (j > 'Z') ? 'Z' : j; // n = min(j,'Z') if (m <= n) // Whack corresponding lower case bitset( (LPBYTE )pChar, LOWORD((DWORD)AnsiLower( (LPSTR)(DWORD)m )), LOWORD((DWORD)AnsiLower( (LPSTR)(DWORD) n )), BitVal ); m = (i < 'a') ? 'a' : i; // m = max(i,'a') n = (j > 'z') ? 'z' : j; // n = min(j,'z') if (m <= n) // Whack corresponding upper case bitset( (LPBYTE )pChar, LOWORD((DWORD)AnsiUpper((LPSTR)(DWORD)m )), LOWORD((DWORD)AnsiUpper((LPSTR)(DWORD)n )), BitVal ); } } if (*(++szPattern) == '*') // If repeated class { MoveMemory( pChar, pChar - 1, ASCII_LEN / 8 + 1 ); // Move vector forward 1 byte pChar[ -1 ] = T_STAR; // Insert prefix pChar++; // Skip to start of vector szPattern++; // Skip star } pChar += ASCII_LEN / 8; // Skip over vector continue; // Next iteration } *pChar++ = T_STAR; // Repeated single character *pChar++ = T_SINGLE; if ( *szPattern == '\\' ) szPattern++; // Skip escape if any *pChar++ = *szPattern++; // Copy the character #ifndef NDEBUG Assert( *szPattern == '*' ); // Validate assumption #endif szPattern++; // Skip the star } *pChar++ = T_END; // Mark end of parsed expression n = (pChar - (LPSTR)Buffer); // Determine new expression length *NewBufLen = n; // Update caller's ptr with buf len lpChar = AllocThrow( lpgi, (unsigned)n ); // Allocate buffer MoveMemory( lpChar, Buffer, (unsigned)n ); // Copy expression to buffer return( lpChar ); // Return buffer pointer } //********************************************************************* //int istoken( LPSTR pStr, int StrLen ) // // ARGUMENTS: // pStr - String // StrLen - Length //********************************************************************* int istoken( LPSTR pStr, int StrLen ) { if ( StrLen >= 2 && pStr[ 0 ] == '\\' && pStr[ 1 ] == '<' ) return( 1 ); // Token if starts with '\<' while ( StrLen-- > 0 ) // Loop to find end of string { if ( *pStr++ == '\\' ) // If escape found { if ( --StrLen == 0 && *pStr == '>' ) return( 1 ); // Token if ends with '\>' pStr++; // Skip escaped character } } return( 0 ); // Not a token } //********************************************************************* // int isexpr( LPSTR pStr, int ExprLen ) // // ARGUMENTS: // pStr - String // ExprLen - Length //********************************************************************* int isexpr( LPGREPINFO lpgi, LPSTR pStr, int ExprLen ) { LPSTR pChar; // Char pointer int status; // Return status char Buffer[ BUFLEN ]; // Temporary buffer if ( istoken( pStr, ExprLen ) ) return( 1 ); // Tokens are exprs // Copy string to buffer MoveMemory( Buffer, pStr, (unsigned)ExprLen ); Buffer[ ExprLen ] = '\0'; // Null-terminate string if ( (pStr = exprparse( lpgi, Buffer, &ExprLen )) == NULL ) return( 0 ); // Not an expression if parse fails status = 1; // Assume we have an expression if ( *pStr != '^' && *pStr != T_END ) // If no caret and not empty { status = 0; // Assume not an expression pChar = pStr; // Initialize do // Loop to find special tokens { switch ( *pChar++ ) // Switch on token type { case T_STAR: // Repeat prefix case T_CLASS: // Character class case T_ANY : // Any character status++; // This is an expression break; case T_SINGLE: // Single character pChar++; // Skip character break; case T_STRING: // String pChar += *pChar + 1; // Skip string break; default: break; } } while ( !status && *pChar != T_END ); // Do while not at end of expr. } if ( !Free( pStr ) ) // Free expression RaiseException(ERROR_INVALID_BLOCK, 0, 0, 0); return( status ); // Return status } //********************************************************************* // LPSTR get1stcharset( LPSTR pExpr, LPBYTE BitVec ) // // ARGUMENTS: // pExpr - Pointer to expression string // BitVec - Pointer to bit vector //********************************************************************* LPSTR get1stcharset( LPGREPINFO lpgi, LPSTR pExpr, LPBYTE BitVec ) { LPSTR pChar; // Char pointer int i; // Index/counter int star; // Repeat prefix flag if ( *pExpr == '^' ) pExpr++; // Skip leading caret if any _fmemset( BitVec, '\0', ASCII_LEN / 8 ); // Clear bit vector pChar = pExpr; // Initialize while ( *pExpr != T_END ) // Loop to process leading *-expr.s { star = 0; // Assume no repeat prefix if ( *pExpr == T_STAR ) // If repeat prefix found { star++; // Set flag pExpr++; // Skip repeat prefix } switch ( *pExpr++ ) // Switch on token type { case T_END : // End of expression case T_STAR : // Repeat prefix RaiseException(ERROR_INVALID_PARAMETER, 0, 0, 0); case T_STRING : // String if ( star || *pExpr++ == '\0' ) // If repeat prefix or zero count RaiseException(ERROR_INVALID_PARAMETER, 0, 0, 0); // Else drop through case T_SINGLE : // Single character // Set the bit bitset( (LPBYTE )BitVec, (unsigned char)*pExpr, (unsigned char)*pExpr, 1 ); pExpr++; // Skip the character break; case T_ANY : // Match any // Set all the bits _fmemset( BitVec, (char)-1, ASCII_LEN / 8 ); bitset( (LPBYTE )BitVec, EOS, EOS, 0 ); // Except end-of-string bitset( (LPBYTE )BitVec, '\n', '\n', 0 ); // And linefeed! break; case T_CLASS : for ( i = 0; i < ASCII_LEN / 8; i++ ) BitVec[ i ] |= (unsigned char)*(pExpr++); // Or in all the bits break; default: break; } if ( !star ) break; // Break if not repeated pChar = pExpr; // Update pointer } return( pChar ); // Point to 1st non-repeated expr. } //********************************************************************* // LPSTR findall( LPSTR Buffer, LPSTR bufend ) // // ARGUMENTS: // Buffer - Buffer in which to search // bufend - End of buffer //********************************************************************* LPSTR findall( LPGREPINFO lpgi, LPSTR Buffer, LPSTR bufend ) { // Fail only on empty buffer return( ((LPSTR )Buffer < bufend) ? Buffer : NULL ); } //********************************************************************* // void addtoken( LPSTR pExprStr, int ExprLen ) // // ARGUMENTS: // pExprStr - Raw token expression // ExprLen - Length of expression //********************************************************************* void addtoken( LPGREPINFO lpgi, LPSTR pExprStr, int ExprLen ) { static const char achpref[] = "^"; // Prefix static const char achprefsuf[] = "[^A-Za-z0-9_]"; // Prefix/suffix static const char achsuf[] = "$"; // Suffix char Buffer[ BUFLEN ]; // Temporary buffer #ifndef NDEBUG Assert( ExprLen >= 2 ); // Must have at least two characters #endif // If begin token if ( pExprStr[ 0 ] == '\\' && pExprStr[ 1 ] == '<' ) { if ( !(lpgi->Flags & BEGLINE) ) // If not matching at beginning only { // Copy first prefix _fmemcpy( Buffer, achprefsuf, SIZEOF(achprefsuf) - 1 ); // Attach expression _fmemcpy( Buffer + SIZEOF(achprefsuf) - 1, pExprStr + 2, (unsigned)ExprLen - 2 ); // Add expression addexpr( lpgi, Buffer, ExprLen + (int)SIZEOF(achprefsuf) - 3 ); } // Copy second prefix _fmemcpy( Buffer, achpref, SIZEOF(achpref) - 1 ); // Attach expression _fmemcpy( Buffer + SIZEOF(achpref) - 1, pExprStr + 2, (unsigned)ExprLen - 2 ); // Add expression addexpr( lpgi, Buffer, ExprLen + (int)SIZEOF( achpref ) - 3 ); return; // Done } // Must be end token #ifndef NDEBUG Assert( pExprStr[ ExprLen - 2 ] == '\\' && pExprStr[ ExprLen - 1 ] == '>' ); #endif if ( !(lpgi->Flags & ENDLINE) ) // If not matching at end only { // Copy expression _fmemcpy( Buffer, pExprStr, (unsigned)ExprLen - 2 ); // Attach first suffix _fmemcpy( Buffer + ExprLen - 2, achprefsuf, SIZEOF(achprefsuf) - 1 ); // Add expression addexpr( lpgi, Buffer, ExprLen + (int)SIZEOF( achprefsuf ) - 3 ); } // Copy expression _fmemcpy( Buffer, pExprStr, (unsigned)ExprLen - 2 ); // Attach second suffix _fmemcpy( Buffer + ExprLen - 2, achsuf, SIZEOF(achsuf) - 1); // Add expression addexpr( lpgi, Buffer, ExprLen + (int)SIZEOF( achsuf ) - 3); } //********************************************************************* // void addexpr( LPGREPINFO lpgi, LPSTR pExprStr, int ExprLen ) // // ARGUMENTS: // pExprStr - Expression to add // ExprLen - Length of expression //********************************************************************* void addexpr( LPGREPINFO lpgi, LPSTR pExprStr, int ExprLen ) { LPSTR pBufStart; EXPR *expr; // Expression node pointer int i; // Index int j; // Index int LocalFlgs; // Local copy of flags unsigned char BitVec[ ASCII_LEN / 8 ];// First char. bit vector char Buffer[ BUFLEN ]; // Temporary buffer if ( lpgi->find == findall ) return; // Return if matching everything if ( istoken( pExprStr, ExprLen ) ) // If expr is token { addtoken( lpgi, pExprStr, ExprLen );// Convert and add tokens return; // Done } LocalFlgs = lpgi->Flags; // Initialize local copy if ( *pExprStr == '^' ) LocalFlgs |= BEGLINE; // Set flag if match must begin line j = -2; // Assume no escapes in string for ( i = 0; i < ExprLen - 1; i++ ) // Loop to find last escape { if ( pExprStr[ i ] == '\\' ) j = i++; // Save index of last escape } if ( ExprLen > 0 && pExprStr[ ExprLen - 1 ] == '$' && j != ExprLen - 2 ) { // If expr. ends in unescaped '$' ExprLen--; // Skip dollar sign LocalFlgs |= ENDLINE; // Match must be at end } // Copy pattern to buffer _fmemcpy( Buffer, pExprStr, (unsigned)ExprLen ); if ( LocalFlgs & ENDLINE ) Buffer[ ExprLen++ ] = EOS; // Add end character if needed Buffer[ ExprLen ] = '\0'; // Null-terminate string Assert(lstrlenA(Buffer)==ExprLen); if ( (pExprStr = exprparse( lpgi, Buffer, &ExprLen )) == NULL ) return; // Return if invalid expression pBufStart = pExprStr; // Save base of original alloc() lpgi->ge.StrCount++; // Increment string count if ( !(LocalFlgs & BEGLINE) ) // If match needn't be at beginning pExprStr = get1stcharset( lpgi, pExprStr, BitVec ); // Remove leading *-expr.s // pExpStr now points to a Buffer containing a preprocessed expression. // We need to find the set of allowable first characters and make // the appropriate entries in the string node table. if ( *get1stcharset( lpgi, pExprStr, BitVec ) == T_END ) { // If expression will match anything lpgi->find = findall; // Match everything // Make sure memory is freed. if ( !Free( pBufStart ) ) RaiseException(ERROR_INVALID_BLOCK, 0, 0, 0); return; // All done } if ( lpgi->ge.ExprEntriesUsed >= ASCII_LEN ) RaiseException(ERROR_INVALID_PARAMETER, 0, 0, 0); // Copy the expression to the start of it's buffer and add it to // the expression string list. The call to get1stcharset() may // have returned a ptr to an offset in the original buffer so // that is why we may have to move the expression string. lpgi->ge.ExprStrList[ lpgi->ge.ExprEntriesUsed++ ] = pBufStart; if ( pExprStr != pBufStart ) MoveMemory( pBufStart, pExprStr, (unsigned)ExprLen ); for ( j = 0; j < ASCII_LEN; j++ ) // Loop to examine bit vector { if ( BitVec[ (unsigned)j >> 3 ] & (1 << (j &7)) ) { // If the bit is set // Allocate a new node and point it to the expression string expr = (EXPR *) AllocThrow( lpgi, SIZEOF( EXPR ) ); expr->ex_pattern = pBufStart; expr->ex_dummy = NULL; if ( (i = lpgi->ge.TransTable[ j ]) == 0 ) // If no existing list { if ( (i = lpgi->ge.TblEntriesUsed++) >= ASCII_LEN ) RaiseException(ERROR_INVALID_PARAMETER, 0, 0, 0); #ifndef NDEBUG Assert( lpgi->ge.StringList[ i ] == NULL ); #endif lpgi->ge.TransTable[ j ] = (unsigned char) i; // Set pointer to new list if ( !lpgi->CaseSen && IsCharAlpha( (unsigned char)j ) ) lpgi->ge.TransTable[ j ^ 0x20 ] = (unsigned char)i; // Set pointer for other case } // Link new record into table expr->ex_next = (struct exprnode *) ((LPSTR)lpgi->ge.StringList[ i ]); lpgi->ge.StringList[ i ] = (STRINGNODE *)((LPSTR)expr); } } }