|
|
// static char *SCCSID = "@(#)qmatch.c 13.7 90/08/13";
#include <stdio.h>
#include <ctype.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <windows.h>
#include <stdarg.h>
#include "fsmsg.h"
#define ASCLEN 256 // Number of ascii characters
#define BUFLEN 256 // Temporary buffer length
#define EOS ('\r') // End of string character
#define EOS2 ('\n') // Alternate End of string character
#define PATMAX 512 // Maximum parsed pattern length
#define BEGLINE 0x08 // Match at beginning of line
#define DEBUG 0x20 // Print debugging output
#define ENDLINE 0x10 // Match at end of line
#define T_END 0 // End of expression
#define T_STRING 1 // String to match
#define T_SINGLE 2 // Single character to match
#define T_CLASS 3 // Class to match
#define T_ANY 4 // Match any character
#define T_STAR 5 // *-expr
typedef struct exprnode { struct exprnode *ex_next; // Next node in list
unsigned char *ex_pattern; // Pointer to pattern to match
} EXPR; // Expression node
static int clists = 1; // One is first available index
static int toklen[] = { // Table of token lengths
32767, // T_END: invalid
32767, // T_STRING: invalid
2, // T_SINGLE
ASCLEN/8+1, // T_CLASS
1, // T_ANY
32767 // T_STAR: invalid
};
static int (__cdecl *ncmp)(const char *,const char *,size_t); // String comparison pointer
extern int casesen; // Case-sensitivity flag
extern char *(*find)(unsigned char *, char *); // Pointer to search function
extern int flags; // Flags
extern int strcnt; // String count
extern char transtab[]; // Translation table
EXPR *stringlist[ASCLEN]; // String table
void addexpr( char *, int ); // Add expression
extern char *alloc(unsigned); // User-defined heap allocator
unsigned char *simpleprefix(); // Match simple prefix
char *strnupr( char *pch, int cch ); void printmessage(FILE *fp, DWORD messagegid, ...); // Message display function for internationalization(findstr.c)
unsigned char * simpleprefix( unsigned char *s, // String pointer
unsigned char **pp // Pointer to pattern pointer
) { register unsigned char *p; // Simple pattern pointer
register int c; // Single character
char tmp[2];
tmp[1] = 0; p = *pp; // Initialize
while(*p != T_END && *p != T_STAR) { // While not at end of pattern
switch(*p++) { // Switch on token type
case T_STRING: // String to compare
if((*ncmp)((char *)s, (char *)p + 1, *p) != 0) return(NULL); // Fail if mismatch found
s += *p; // Skip matched portion
p += *p + 1; // Skip to next token
break;
case T_SINGLE: // Single character
c = *s++; // Get character
if(!casesen) { tmp[0] = (char)c; c = (unsigned char)(_strupr(tmp))[0]; } // Map to upper case if necessary
if(c != (int)*p++) return(NULL); // Fail if mismatch found
break;
case T_CLASS: // Class of characters
if(!(p[*s >> 3] & (1 << (*s & 7)))) return(NULL); // Failure if bit not set
p += ASCLEN/8; // Skip bit vector
++s; // Skip character
break;
case T_ANY: // Any character
if(*s == EOS || *s == EOS2) return(NULL); // Match all but end of string
++s; break; } } *pp = p; // Update pointer
return(s); // Pattern is prefix of s
}
int match( unsigned char *s, // String to match
unsigned char *p // Pattern to match against
) { register unsigned char *q; // Temporary pointer
unsigned char *r; // Temporary pointer
register int c; // Character
char tmp[2];
if(*p != T_END && *p != T_STAR && (s = simpleprefix(s,&p)) == NULL) return(0); // Failure if prefix mismatch
if(*p++ == T_END) return(1); // Match if end of pattern
tmp[1] = 0; q = r = p; // Point to repeated token
r += toklen[*q]; // Skip repeated token
switch(*q++) { // Switch on token type
case T_ANY: // Any character
while(match(s,r) == 0) { // While match not found
if(*s == EOS || *s == EOS2) return(0); // Match all but end of string
++s; } return(1); // Success
case T_SINGLE: // Single character
while(match(s,r) == 0) { // While match not found
c = *s++; // Get character
if(!casesen) { tmp[0] = (char)c; c = (unsigned char)(_strupr(tmp))[0]; // Map to upper case if necessary
} if((unsigned char) c != *q) return(0); // Fail if mismatch found
} return(1); // Success
case T_CLASS: // Class of characters
while(match(s,r) == 0) { // While match not found
if(!(q[*s >> 3] & (1 << (*s & 7)))) return(0); // Fail if bit not set
++s; // Else skip character
} return(1); // Success
} return(0); // Return failure
}
int exprmatch( char *s, // String
char *p // Pattern
) { ncmp = _strncoll; // Assume case-sensitive
if(!casesen) { ncmp = _strnicoll; } // Be case-insensitive if flag set
// See if pattern matches string
return(match((unsigned char *)s, (unsigned char *)p)); }
void bitset( char *bitvec, // Bit vector
unsigned char first, // First character
unsigned char last, // Last character
int bitval // Bit value (0 or 1)
) { int bitno; // Bit number
bitvec += first >> 3; // Point at first byte
bitno = first & 7; // Calculate first bit number
while(first <= last) { // Loop to set bits
if(bitno == 0 && first + 8 <= last) { // If we have a whole byte's worth
*bitvec++ = (char)(bitval? '\xFF': '\0'); // Set the bits
first += 8; // Increment the counter
continue; // Next iteration
} *bitvec=(char)(*bitvec & (unsigned char)(~(1 << bitno))) | (unsigned char)(bitval << bitno); // Set the appropriate bit
if(++bitno == 8) { // If we wrap into next byte
++bitvec; // Increment pointer
bitno = 0; // Reset bit index
} ++first; // Increment bit index
} }
unsigned char * exprparse( unsigned char *p // Raw pattern
) { register char *cp; // Char pointer
unsigned char *cp2; // Char pointer
int i; // Counter/index
int j; // Counter/index
int n; int bitval; // Bit value
char buffer[PATMAX]; // Temporary buffer
char tmp1[2]; char tmp2[2]; char tmp3[2]; unsigned x;
tmp1[1] = tmp2[1] = tmp3[1] = 0; if(!casesen) strnupr((char *)p, strlen((char *)p)); // Force pattern to upper case
cp = buffer; // Initialize pointer
if(*p == '^') *cp++ = *p++; // Copy leading caret if any
while(*p != '\0') { // While not end of pattern
i = -2; // Initialize
for(n = 0;;) { // Loop to delimit ordinary string
n += strcspn((char *)(p + n),".\\[*");// Look for a special character
if(p[n] != '\\') break; // Break if not backslash
i = n; // Remember where backslash is
if(p[++n] == '\0') return(NULL); // Cannot be at very end
++n; // Skip escaped character
} if(p[n] == '*') { // If we found a *-expr.
if(n-- == 0) return(NULL); // Illegal first character
if(i == n - 1) n = i; // Escaped single-char. *-expr.
} if(n > 0) { // If we have string or single
if(n == 1 || (n == 2 && *p == '\\')) { // If single character
*cp++ = T_SINGLE; // Set type
if(*p == '\\') ++p; // Skip escape if any
*cp++ = *p++; // Copy single character
} else { // Else we have a string
*cp++ = T_STRING; // Set type
cp2 = (unsigned char *)cp++; // Save pointer to length byte
while(n-- > 0) { // While bytes to copy remain
if(*p == '\\') { // If escape found
++p; // Skip escape
--n; // Adjust length
} *cp++ = *p++; // Copy character
} *cp2 = (unsigned char)((cp - (char *)cp2) - 1); // Set string length
} } if(*p == '\0') break; // Break if end of pattern
if(*p == '.') { // If matching any
if(*++p == '*') { // If star follows any
++p; // Skip star, too
*cp++ = T_STAR; // Insert prefix ahead of token
} *cp++ = T_ANY; // Match any character
continue; // Next iteration
} if(*p == '[') { // If character class
if(*++p == '\0') return(NULL); // Skip '['
*cp++ = T_CLASS; // Set type
memset(cp,'\0',ASCLEN/8); // Clear the vector
bitval = 1; // Assume we're setting bits
if(*p == '^') { // If inverted class
++p; // Skip '^'
memset(cp,'\xFF',ASCLEN/8); // Set all bits
bitset(cp,EOS,EOS,0); // All except end-of-string
bitset(cp,'\n','\n',0); // And linefeed!
bitval = 0; // Now we're clearing bits
}
while(*p != ']') { // Loop to find ']'
if(*p == '\0') return(NULL); // Check for malformed string
if(*p == '\\') { // If escape found
if(*++p == '\0') return(NULL); // Skip escape
} i = *p++; // Get first character in range
if(*p == '-' && p[1] != '\0' && p[1] != ']') { // If range found
++p; // Skip hyphen
if(*p == '\\' && p[1] != '\0') ++p; // Skip escape character
j = *p++; // Get end of range
} else j = i; // Else just one character
tmp1[0] = (char)i; tmp2[0] = (char)j; if (strcoll(tmp1, tmp2) <= 0) { for (x=0; x<ASCLEN; x++) { tmp3[0] = (char)x; if (strcoll(tmp1, tmp3) <= 0 && strcoll(tmp3, tmp2) <= 0) { bitset(cp, (unsigned char)tmp3[0], (unsigned char)tmp3[0], bitval); if (!casesen) { if (isupper(x)) { _strlwr(tmp3); } else if (islower(x)) _strupr(tmp3); else continue; bitset(cp, (unsigned char)tmp3[0], (unsigned char)tmp3[0], bitval); } } } } }
if(*++p == '*') { // If repeated class
memmove(cp,cp - 1,ASCLEN/8 + 1); // Move vector forward 1 byte
cp[-1] = T_STAR; // Insert prefix
++cp; // Skip to start of vector
++p; // Skip star
} cp += ASCLEN/8; // Skip over vector
continue; // Next iteration
} *cp++ = T_STAR; // Repeated single character
*cp++ = T_SINGLE; if(*p == '\\') ++p; // Skip escape if any
*cp++ = *p++; // Copy the character
assert(*p == '*'); // Validate assumption
++p; // Skip the star
} *cp++ = T_END; // Mark end of parsed expression
cp2 = (unsigned char *)alloc((int)(cp - buffer)); // Allocate buffer
memmove(cp2, buffer,(int)(cp - buffer)); // Copy expression to buffer
return(cp2); // Return buffer pointer
}
int istoken( unsigned char *s, // String
int n // Length
) { if(n >= 2 && s[0] == '\\' && s[1] == '<') return(1); // Token if starts with '\<'
while(n-- > 0) { // Loop to find end of string
if(*s++ == '\\') { // If escape found
if(--n == 0 && *s == '>') return(1); // Token if ends with '\>'
++s; // Skip escaped character
} } return(0); // Not a token
}
int isexpr( unsigned char *s, // String
int n // Length
) { unsigned char *cp; // Char pointer
int status; // Return status
char buffer[BUFLEN]; // Temporary buffer
if(istoken(s, n)) return(1); // Tokens are exprs
memmove(buffer,s,n); // Copy string to buffer
buffer[n] = '\0'; // Null-terminate string
if (*buffer && buffer[n - 1] == '$') return(1); if((s = exprparse((unsigned char *)buffer)) == NULL) return(0); // Not an expression if parse fails
status = 1; // Assume we have an expression
if(*s != '^' && *s != T_END) { // If no caret and not empty
status = 0; // Assume not an expression
cp = s; // Initialize
do { // Loop to find special tokens
switch(*cp++) { // Switch on token type
case T_STAR: // Repeat prefix
case T_CLASS: // Character class
case T_ANY: // Any character
++status; // This is an expression
break;
case T_SINGLE: // Single character
++cp; // Skip character
break;
case T_STRING: // String
cp += *cp + 1; // Skip string
break; } } while(!status && *cp != T_END) ; // Do while not at end of expression
} free(s); // Free expression
return(status); // Return status
}
#ifdef gone // for DEBUG
void exprprint( unsigned char *p, // Pointer to expression
FILE *fo // File pointer
) { int bit; // Bit value
int count; // Count of characters in string
int first; // First character in range
int last; // Last character in range
int star; // Repeat prefix flag
if(*p == '^') fputc(*p++,fo); // Print leading caret
while(*p != T_END) { // While not at end of expression
star = 0; // Assume no prefix
if(*p == T_STAR) { // If repeat prefix found
++star; // Set flag
++p; // Skip prefix
} switch(*p++) { // Switch on token type
case T_END: // End of expression
case T_STAR: // Repeat prefix
fprintf(stderr,"Internal error: exprprint\n"); // Not valid
exit(2); // Die abnormal death
case T_STRING: // String
count = *p++; // Get string length
goto common; // Forgive me, Djikstra!
case T_SINGLE: // Single character
count = 1; // Only one character
common: while(count-- > 0) { // While bytes remain
if(*p == EOS || *p == EOS2) { // If end-of-string found
++p; // Skip character
fputc('$',fo); // Emit special marker
continue; // Next iteration
} if(strchr("*.[\\$",*p) != NULL) fputc('\\',fo); // Emit escape if needed
fputc(*p++,fo); // Emit the character
} break;
case T_ANY: // Match any
fputc('.',fo); // Emit dot
break;
case T_CLASS: first = -1; // Initialize
fputc('[',fo); // Open braces
for(count = ' '; count <= '~'; ++count) { // Loop through printable characters
if((bit = p[count >> 3] & (1 << (count & 7))) != 0) { // If bit is set
if(first == -1) first = count; // Set first bit
last = count; // Set last bit
} if((!bit || count == '~') && first != -1) { // If range to print
if(strchr("\\]-",first) != NULL) fputc('\\',fo); // Emit escape if needed
fputc(first,fo); // Print first character in range
if(last != first) { // If we have a range
if(last > first + 1) fputc('-',fo); // Emit hyphen if needed
if(strchr("\\]-",last) != NULL) fputc('\\',fo); // Emit escape if needed
fputc(last,fo); // Print last character in range
} first = -1; // Range printed
} } fputc(']',fo); // Close braces
p += ASCLEN/8; // Skip bit vector
break; } if(star) fputc('*',fo); // Print star if needed
} fputc('\n',fo); // Print newline
}
#endif
char * get1stcharset( unsigned char *e, // Pointer to expression
char *bitvec // Pointer to bit vector
) { unsigned char *cp; // Char pointer
int i; // Index/counter
int star; // Repeat prefix flag
if(*e == '^') ++e; // Skip leading caret if any
memset(bitvec,'\0',ASCLEN/8); // Clear bit vector
cp = e; // Initialize
while(*e != T_END) { // Loop to process leading *-expr.s
star = 0; // Assume no repeat prefix
if(*e == T_STAR) { // If repeat prefix found
++star; // Set flag
++e; // Skip repeat prefix
} switch(*e++) { // Switch on token type
case T_END: // End of expression
case T_STAR: // Repeat prefix
assert(0); // Not valid
exit(2); // Die abnormal death
case T_STRING: // String
if(star || *e++ == '\0') { // If repeat prefix or zero count
assert(0); // Not valid
exit(2); // Die abnormal death
} // Drop through
case T_SINGLE: // Single character
bitset(bitvec,*e,*e,1); // Set the bit
++e; // Skip the character
break;
case T_ANY: // Match any
memset(bitvec,'\xFF',ASCLEN/8); // Set all the bits
bitset(bitvec,EOS,EOS,0); // Except end-of-string
bitset(bitvec,'\n','\n',0); // And linefeed!
break;
case T_CLASS: for(i = 0; i < ASCLEN/8; ++i) bitvec[i] |= *e++; // Or in all the bits
break; } if(!star) break; // Break if not repeated
cp = e; // Update pointer
} return((char *)cp); // Point to 1st non-repeated expr.
}
char * findall( unsigned char *buffer, // Buffer in which to search
char *bufend // End of buffer
) { return(buffer < (unsigned char *) bufend ? (char *) buffer : NULL); // Fail only on empty buffer
}
void addtoken( char *e, // Raw token expression
int n // Length of expression
) { static char achpref[] = "^";// Prefix
static char achprefsuf[] = "[^A-Za-z0-9_]"; // Prefix/suffix
static char achsuf[] = "$"; // Suffix
char buffer[BUFLEN]; // Temporary buffer
assert(n >= 2); // Must have at least two characters
if(e[0] == '\\' && e[1] == '<') { // If begin token
if(!(flags & BEGLINE)) { // If not matching at beginning only
memcpy(buffer,achprefsuf,sizeof achprefsuf - 1); // Copy first prefix
memcpy(buffer + sizeof achprefsuf - 1,e + 2,n - 2); // Attach expression
addexpr(buffer,n + sizeof achprefsuf - 3); // Add expression
} memcpy(buffer,achpref,sizeof achpref - 1); // Copy second prefix
memcpy(buffer + sizeof achpref - 1,e + 2,n - 2); // Attach expression
addexpr(buffer,n + sizeof achpref - 3); // Add expression
return; // Done
} assert(e[n-2] == '\\' && e[n - 1] == '>'); // Must be end token
if(!(flags & ENDLINE)) { // If not matching at end only
memcpy(buffer,e,n - 2); // Copy expression
memcpy(buffer + n - 2,achprefsuf,sizeof achprefsuf - 1); // Attach first suffix
addexpr(buffer,n + sizeof achprefsuf - 3); // Add expression
} memcpy(buffer,e,n - 2); // Copy expression
memcpy(buffer + n - 2,achsuf,sizeof achsuf - 1); // Attach second suffix
addexpr(buffer,n + sizeof achsuf - 3); // Add expression
}
void addexpr( char *e, // Expression to add
int n // Length of expression
) { EXPR *expr; // Expression node pointer
int i; // Index
int j; // Index
int locflags; // Local copy of flags
char bitvec[ASCLEN/8]; // First char. bit vector
char buffer[BUFLEN]; // Temporary buffer
char tmp[2];
if(find == findall) return; // Return if matching everything
if(istoken((unsigned char *)e, n)) { // If expr is token
addtoken(e,n); // Convert and add tokens
return; // Done
} tmp[1] = 0; locflags = flags; // Initialize local copy
if(*e == '^') locflags |= BEGLINE; // Set flag if match must begin line
j = -2; // Assume no escapes in string
for(i = 0; i < n - 1; ++i) { // Loop to find last escape
if(e[i] == '\\') j = i++; // Save index of last escape
} if(n > 0 && e[n-1] == '$' && j != n-2) { // If expr. ends in unescaped '$'
--n; // Skip dollar sign
locflags |= ENDLINE; // Match must be at end
} strncpy(buffer,e,n); // Copy pattern to buffer
if(locflags & ENDLINE) buffer[n++] = EOS; // Add end character if needed
buffer[n] = '\0'; // Null-terminate string
if((e = (char *)exprparse((unsigned char *)buffer)) == NULL) return; // Return if invalid expression
++strcnt; // Increment string count
if(!(locflags & BEGLINE)) { // If match needn't be at beginning
e = get1stcharset((unsigned char *)e, bitvec); // Remove leading *-expr.s
}
// E now points to a buffer containing a preprocessed expression.
// We need to find the set of allowable first characters and make
// the appropriate entries in the string node table.
if(*get1stcharset((unsigned char *)e, bitvec) == T_END) { // If expression will match anything
find = findall; // Match everything
return; // All done
}
for(j = 0; j < ASCLEN; ++j) { // Loop to examine bit vector
if(bitvec[j >> 3] & (1 << (j & 7))) { // If the bit is set
expr = (EXPR *) alloc(sizeof(EXPR)); // Allocate record
expr->ex_pattern = (unsigned char *)e; // Point it at pattern
if((i = (UCHAR)transtab[j]) == 0) { // If no existing list
if((i = clists++) >= ASCLEN) { // If too many string lists
printmessage(stderr,MSG_FINDSTR_TOO_MANY_STRING_LISTS,NULL); // Error message
exit(2); // Die
} stringlist[i] = NULL; // Initialize
transtab[j] = (char) i; // Set pointer to new list
if(!casesen && isalpha(j)) { tmp[0] = (char)j; if ((unsigned char)(_strlwr(tmp))[0] != (unsigned char)j || (unsigned char)(_strupr(tmp))[0] != (unsigned char)j) transtab[(unsigned char)tmp[0]] = (char)i; // Set pointer for other case
} } expr->ex_next = stringlist[i]; // Link new record into table
stringlist[i] = expr; } }
// if(locflags & DEBUG) exprprint(e,stderr);
// Print the expression if debugging
}
char * findexpr( unsigned char *buffer, // Buffer in which to search
char *bufend // End of buffer
) { EXPR *expr; // Expression list pointer
unsigned char *pattern; // Pattern
int i; // Index
unsigned char *bufbegin; int b;
bufbegin = buffer;
while(buffer < (unsigned char *)bufend) { // Loop to find match
if((i = (UCHAR)transtab[*buffer++]) == 0) continue; // Continue if not valid 1st char
if((expr = (EXPR *) stringlist[i]) == NULL) { // If null pointer
assert(0); exit(2); // Die
} --buffer; // Back up to first character
while(expr != NULL) { // Loop to find match
pattern = expr->ex_pattern; // Point to pattern
expr = expr->ex_next; // Point to next record
if(pattern[0] == '^') { // If match begin line
++pattern; // Skip caret
if(buffer > bufbegin && buffer[-1] != '\n') continue; // Don't bother if not at beginning
} __try { b = exprmatch((char *)buffer, (char *)pattern); } __except (GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION) { b = 0; } if (b) { return((char *)buffer); } } ++buffer; // Skip first character
} return(NULL); // No match
}
|