Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

735 lines
33 KiB

  1. /*static char *SCCSID = "@(#)qmatch.c 13.7 90/08/13";*/
  2. #include <stdio.h>
  3. #include <ctype.h>
  4. #include <assert.h>
  5. #include <stdlib.h>
  6. #include <string.h>
  7. #define ASCLEN 128 /* Number of ascii characters */
  8. #define BUFLEN 256 /* Temporary buffer length */
  9. #define EOS ('\r') /* End of string character */
  10. #define PATMAX 512 /* Maximum parsed pattern length */
  11. #define BEGLINE 0x08 /* Match at beginning of line */
  12. #define DEBUG 0x20 /* Print debugging output */
  13. #define ENDLINE 0x10 /* Match at end of line */
  14. #define T_END 0 /* End of expression */
  15. #define T_STRING 1 /* String to match */
  16. #define T_SINGLE 2 /* Single character to match */
  17. #define T_CLASS 3 /* Class to match */
  18. #define T_ANY 4 /* Match any character */
  19. #define T_STAR 5 /* *-expr */
  20. typedef struct exprnode
  21. {
  22. struct exprnode *ex_next; /* Next node in list */
  23. unsigned char *ex_pattern; /* Pointer to pattern to match */
  24. }
  25. EXPR; /* Expression node */
  26. static int clists = 1; /* One is first available index */
  27. static int toklen[] = /* Table of token lengths */
  28. {
  29. 32767, /* T_END: invalid */
  30. 32767, /* T_STRING: invalid */
  31. 2, /* T_SINGLE */
  32. ASCLEN/8+1, /* T_CLASS */
  33. 1, /* T_ANY */
  34. 32767 /* T_STAR: invalid */
  35. };
  36. int ( __cdecl *ncmp)(const char *,const char *,size_t);
  37. /* String comparison pointer */
  38. unsigned char *exprparse(unsigned char *p);
  39. extern int casesen; /* Case-sensitivity flag */
  40. extern char *(*find)(); /* Pointer to search function */
  41. extern int flags; /* Flags */
  42. extern int strcnt; /* String count */
  43. extern char transtab[]; /* Translation table */
  44. EXPR *stringlist[ASCLEN];
  45. /* String table */
  46. void addexpr(char *e, int n); /* Add expression */
  47. char *get1stcharset(unsigned char *e, char *bitvec);
  48. extern char *alloc(); /* User-defined heap allocator */
  49. unsigned char *simpleprefix();/* Match simple prefix */
  50. char *strnupr(); /* See QGREPSUB.ASM */
  51. unsigned char *simpleprefix(s,pp)
  52. register unsigned char *s; /* String pointer */
  53. unsigned char **pp; /* Pointer to pattern pointer */
  54. {
  55. register unsigned char *p; /* Simple pattern pointer */
  56. register int c; /* Single character */
  57. p = *pp; /* Initialize */
  58. while(*p != T_END && *p != T_STAR) /* While not at end of pattern */
  59. {
  60. switch(*p++) /* Switch on token type */
  61. {
  62. case T_STRING: /* String to compare */
  63. if((*ncmp)(s,p + 1,*p) != 0) return(NULL);
  64. /* Fail if mismatch found */
  65. s += *p; /* Skip matched portion */
  66. p += *p + 1; /* Skip to next token */
  67. break;
  68. case T_SINGLE: /* Single character */
  69. c = *s++; /* Get character */
  70. if(!casesen) c = toupper(c);
  71. /* Map to upper case if necessary */
  72. if(c != (int)*p++) return(NULL);
  73. /* Fail if mismatch found */
  74. break;
  75. case T_CLASS: /* Class of characters */
  76. if(!isascii(*s) || !(p[*s >> 3] & (1 << (*s & 7)))) return(NULL);
  77. /* Failure if bit not set */
  78. p += ASCLEN/8; /* Skip bit vector */
  79. ++s; /* Skip character */
  80. break;
  81. case T_ANY: /* Any character */
  82. if(*s++ == EOS) return(NULL);
  83. /* Match all but end of string */
  84. break;
  85. }
  86. }
  87. *pp = p; /* Update pointer */
  88. return(s); /* Pattern is prefix of s */
  89. }
  90. int match(s,p)
  91. register unsigned char *s; /* String to match */
  92. unsigned char *p; /* Pattern to match against */
  93. {
  94. register unsigned char *q; /* Temporary pointer */
  95. unsigned char *r; /* Temporary pointer */
  96. register int c; /* Character */
  97. if(*p != T_END && *p != T_STAR && (s = simpleprefix(s,&p)) == NULL)
  98. return(0); /* Failure if prefix mismatch */
  99. if(*p++ == T_END) return(1); /* Match if end of pattern */
  100. q = r = p; /* Point to repeated token */
  101. r += toklen[*q]; /* Skip repeated token */
  102. switch(*q++) /* Switch on token type */
  103. {
  104. case T_ANY: /* Any character */
  105. while(match(s,r) == 0) /* While match not found */
  106. {
  107. if(*s++ == EOS) return(0);/* Match all but end of string */
  108. }
  109. return(1); /* Success */
  110. case T_SINGLE: /* Single character */
  111. while(match(s,r) == 0) /* While match not found */
  112. {
  113. c = *s++; /* Get character */
  114. if(!casesen) c = toupper(c);
  115. /* Map to upper case if necessary */
  116. if((unsigned char) c != *q) return(0);
  117. /* Fail if mismatch found */
  118. }
  119. return(1); /* Success */
  120. case T_CLASS: /* Class of characters */
  121. while(match(s,r) == 0) /* While match not found */
  122. {
  123. if(!isascii(*s) || !(q[*s >> 3] & (1 << (*s & 7)))) return(0);
  124. /* Fail if bit not set */
  125. ++s; /* Else skip character */
  126. }
  127. return(1); /* Success */
  128. }
  129. return(0); /* Return failure */
  130. }
  131. int exprmatch(s,p)
  132. char *s; /* String */
  133. char *p; /* Pattern */
  134. {
  135. ncmp = strncmp; /* Assume case-sensitive */
  136. if(!casesen)
  137. {
  138. ncmp = _strnicmp;
  139. } /* Be case-insensitive if flag set */
  140. return(match(s,p)); /* See if pattern matches string */
  141. }
  142. void bitset(bitvec,first,last,bitval)
  143. char *bitvec; /* Bit vector */
  144. int first; /* First character */
  145. int last; /* Last character */
  146. int bitval; /* Bit value (0 or 1) */
  147. {
  148. int bitno; /* Bit number */
  149. bitvec += first >> 3; /* Point at first byte */
  150. bitno = first & 7; /* Calculate first bit number */
  151. while(first <= last) /* Loop to set bits */
  152. {
  153. if(bitno == 0 && first + 8 <= last)
  154. { /* If we have a whole byte's worth */
  155. *bitvec++ = (char)(bitval? '\xFF': '\0');
  156. /* Set the bits */
  157. first += 8; /* Increment the counter */
  158. continue; /* Next iteration */
  159. }
  160. *bitvec=(char)(*bitvec & (unsigned char)(~(1 << bitno))) | (unsigned char)(bitval << bitno);
  161. /* Set the appropriate bit */
  162. if(++bitno == 8) /* If we wrap into next byte */
  163. {
  164. ++bitvec; /* Increment pointer */
  165. bitno = 0; /* Reset bit index */
  166. }
  167. ++first; /* Increment bit index */
  168. }
  169. }
  170. unsigned char *exprparse(p)
  171. register unsigned char *p; /* Raw pattern */
  172. {
  173. register char *cp; /* Char pointer */
  174. unsigned char *cp2; /* Char pointer */
  175. int i; /* Counter/index */
  176. int j; /* Counter/index */
  177. int m; /* Counter/index */
  178. int n; /* Counter/index */
  179. int bitval; /* Bit value */
  180. char buffer[PATMAX]; /* Temporary buffer */
  181. if(!casesen) strnupr(p,strlen(p)); /* Force pattern to upper case */
  182. cp = buffer; /* Initialize pointer */
  183. if(*p == '^') *cp++ = *p++; /* Copy leading caret if any */
  184. while(*p != '\0') /* While not end of pattern */
  185. {
  186. i = -2; /* Initialize */
  187. for(n = 0;;) /* Loop to delimit ordinary string */
  188. {
  189. n += strcspn(p + n,".\\[*");/* Look for a special character */
  190. if(p[n] != '\\') break; /* Break if not backslash */
  191. i = n; /* Remember where backslash is */
  192. if(p[++n] == '\0') return(NULL);
  193. /* Cannot be at very end */
  194. ++n; /* Skip escaped character */
  195. }
  196. if(p[n] == '*') /* If we found a *-expr. */
  197. {
  198. if(n-- == 0) return(NULL); /* Illegal first character */
  199. if(i == n - 1) n = i; /* Escaped single-char. *-expr. */
  200. }
  201. if(n > 0) /* If we have string or single */
  202. {
  203. if(n == 1 || (n == 2 && *p == '\\'))
  204. { /* If single character */
  205. *cp++ = T_SINGLE; /* Set type */
  206. if(*p == '\\') ++p; /* Skip escape if any */
  207. *cp++ = *p++; /* Copy single character */
  208. }
  209. else /* Else we have a string */
  210. {
  211. *cp++ = T_STRING; /* Set type */
  212. cp2 = cp++; /* Save pointer to length byte */
  213. while(n-- > 0) /* While bytes to copy remain */
  214. {
  215. if(*p == '\\') /* If escape found */
  216. {
  217. ++p; /* Skip escape */
  218. --n; /* Adjust length */
  219. }
  220. *cp++ = *p++; /* Copy character */
  221. }
  222. *cp2 = (char)((cp - cp2) - 1);
  223. /* Set string length */
  224. }
  225. }
  226. if(*p == '\0') break; /* Break if end of pattern */
  227. if(*p == '.') /* If matching any */
  228. {
  229. if(*++p == '*') /* If star follows any */
  230. {
  231. ++p; /* Skip star, too */
  232. *cp++ = T_STAR; /* Insert prefix ahead of token */
  233. }
  234. *cp++ = T_ANY; /* Match any character */
  235. continue; /* Next iteration */
  236. }
  237. if(*p == '[') /* If character class */
  238. {
  239. if(*++p == '\0') return(NULL);
  240. /* Skip '[' */
  241. *cp++ = T_CLASS; /* Set type */
  242. memset(cp,'\0',ASCLEN/8); /* Clear the vector */
  243. bitval = 1; /* Assume we're setting bits */
  244. if(*p == '^') /* If inverted class */
  245. {
  246. ++p; /* Skip '^' */
  247. memset(cp,'\xFF',ASCLEN/8);
  248. /* Set all bits */
  249. bitset(cp,EOS,EOS,0); /* All except end-of-string */
  250. bitset(cp,'\n','\n',0); /* And linefeed! */
  251. bitval = 0; /* Now we're clearing bits */
  252. }
  253. while(*p != ']') /* Loop to find ']' */
  254. {
  255. if(*p == '\0') return(NULL);
  256. /* Check for malformed string */
  257. if(*p == '\\') /* If escape found */
  258. {
  259. if(*++p == '\0') return(NULL);
  260. /* Skip escape */
  261. }
  262. i = *p++; /* Get first character in range */
  263. if(*p == '-' && p[1] != '\0' && p[1] != ']')
  264. { /* If range found */
  265. ++p; /* Skip hyphen */
  266. if(*p == '\\' && p[1] != '\0') ++p;
  267. /* Skip escape character */
  268. j = *p++; /* Get end of range */
  269. }
  270. else j = i; /* Else just one character */
  271. bitset(cp,i,j,bitval); /* Set bits in vector */
  272. if(!casesen) /* If ignoring case */
  273. {
  274. m = (i < 'A')? 'A': i;
  275. /* m = max(i,'A') */
  276. n = (j > 'Z')? 'Z': j;
  277. /* n = min(j,'Z') */
  278. if(m <= n) bitset(cp,tolower(m),tolower(n),bitval);
  279. /* Whack corresponding lower case */
  280. m = (i < 'a')? 'a': i;
  281. /* m = max(i,'a') */
  282. n = (j > 'z')? 'z': j;
  283. /* n = min(j,'z') */
  284. if(m <= n) bitset(cp,toupper(m),toupper(n),bitval);
  285. /* Whack corresponding upper case */
  286. }
  287. }
  288. if(*++p == '*') /* If repeated class */
  289. {
  290. memmove(cp,cp - 1,ASCLEN/8 + 1);
  291. /* Move vector forward 1 byte */
  292. cp[-1] = T_STAR; /* Insert prefix */
  293. ++cp; /* Skip to start of vector */
  294. ++p; /* Skip star */
  295. }
  296. cp += ASCLEN/8; /* Skip over vector */
  297. continue; /* Next iteration */
  298. }
  299. *cp++ = T_STAR; /* Repeated single character */
  300. *cp++ = T_SINGLE;
  301. if(*p == '\\') ++p; /* Skip escape if any */
  302. *cp++ = *p++; /* Copy the character */
  303. assert(*p == '*'); /* Validate assumption */
  304. ++p; /* Skip the star */
  305. }
  306. *cp++ = T_END; /* Mark end of parsed expression */
  307. cp2 = alloc(cp - buffer); /* Allocate buffer */
  308. memmove(cp2,buffer,(size_t)(cp - buffer)); /* Copy expression to buffer */
  309. return(cp2); /* Return buffer pointer */
  310. }
  311. int istoken(s,n)
  312. unsigned char *s; /* String */
  313. int n; /* Length */
  314. {
  315. if(n >= 2 && s[0] == '\\' && s[1] == '<') return(1);
  316. /* Token if starts with '\<' */
  317. while(n-- > 0) /* Loop to find end of string */
  318. {
  319. if(*s++ == '\\') /* If escape found */
  320. {
  321. if(--n == 0 && *s == '>') return(1);
  322. /* Token if ends with '\>' */
  323. ++s; /* Skip escaped character */
  324. }
  325. }
  326. return(0); /* Not a token */
  327. }
  328. int isexpr(s,n)
  329. unsigned char *s; /* String */
  330. int n; /* Length */
  331. {
  332. unsigned char *cp; /* Char pointer */
  333. int status; /* Return status */
  334. char buffer[BUFLEN]; /* Temporary buffer */
  335. if(istoken(s,n)) return(1); /* Tokens are exprs */
  336. memmove(buffer,s,n); /* Copy string to buffer */
  337. buffer[n] = '\0'; /* Null-terminate string */
  338. if((s = exprparse(buffer)) == NULL) return(0);
  339. /* Not an expression if parse fails */
  340. status = 1; /* Assume we have an expression */
  341. if(*s != '^' && *s != T_END) /* If no caret and not empty */
  342. {
  343. status = 0; /* Assume not an expression */
  344. cp = s; /* Initialize */
  345. do /* Loop to find special tokens */
  346. {
  347. switch(*cp++) /* Switch on token type */
  348. {
  349. case T_STAR: /* Repeat prefix */
  350. case T_CLASS: /* Character class */
  351. case T_ANY: /* Any character */
  352. ++status; /* This is an expression */
  353. break;
  354. case T_SINGLE: /* Single character */
  355. ++cp; /* Skip character */
  356. break;
  357. case T_STRING: /* String */
  358. cp += *cp + 1; /* Skip string */
  359. break;
  360. }
  361. }
  362. while(!status && *cp != T_END); /* Do while not at end of expression */
  363. }
  364. free(s); /* Free expression */
  365. return(status); /* Return status */
  366. }
  367. void exprprint(p,fo)
  368. unsigned char *p; /* Pointer to expression */
  369. FILE *fo; /* File pointer */
  370. {
  371. int bit; /* Bit value */
  372. int count; /* Count of characters in string */
  373. int first; /* First character in range */
  374. int last; /* Last character in range */
  375. int star; /* Repeat prefix flag */
  376. if(*p == '^') fputc(*p++,fo); /* Print leading caret */
  377. while(*p != T_END) /* While not at end of expression */
  378. {
  379. star = 0; /* Assume no prefix */
  380. if(*p == T_STAR) /* If repeat prefix found */
  381. {
  382. ++star; /* Set flag */
  383. ++p; /* Skip prefix */
  384. }
  385. switch(*p++) /* Switch on token type */
  386. {
  387. case T_END: /* End of expression */
  388. case T_STAR: /* Repeat prefix */
  389. fprintf(stderr,"Internal error: exprprint\n");
  390. /* Not valid */
  391. exit(2); /* Die abnormal death */
  392. case T_STRING: /* String */
  393. count = *p++; /* Get string length */
  394. goto common; /* Forgive me, Djikstra! */
  395. case T_SINGLE: /* Single character */
  396. count = 1; /* Only one character */
  397. common:
  398. while(count-- > 0) /* While bytes remain */
  399. {
  400. if(*p == EOS) /* If end-of-string found */
  401. {
  402. ++p; /* Skip character */
  403. fputc('$',fo); /* Emit special marker */
  404. continue; /* Next iteration */
  405. }
  406. if(strchr("*.[\\$",*p) != NULL) fputc('\\',fo);
  407. /* Emit escape if needed */
  408. fputc(*p++,fo); /* Emit the character */
  409. }
  410. break;
  411. case T_ANY: /* Match any */
  412. fputc('.',fo); /* Emit dot */
  413. break;
  414. case T_CLASS:
  415. first = -1; /* Initialize */
  416. fputc('[',fo); /* Open braces */
  417. for(count = ' '; count <= '~'; ++count)
  418. { /* Loop through printable characters */
  419. if((bit = p[count >> 3] & (1 << (count & 7))) != 0)
  420. { /* If bit is set */
  421. if(first == -1) first = count;
  422. /* Set first bit */
  423. last = count; /* Set last bit */
  424. }
  425. if((!bit || count == '~') && first != -1)
  426. { /* If range to print */
  427. if(strchr("\\]-",first) != NULL) fputc('\\',fo);
  428. /* Emit escape if needed */
  429. fputc(first,fo); /* Print first character in range */
  430. if(last != first) /* If we have a range */
  431. {
  432. if(last > first + 1) fputc('-',fo);
  433. /* Emit hyphen if needed */
  434. if(strchr("\\]-",last) != NULL) fputc('\\',fo);
  435. /* Emit escape if needed */
  436. fputc(last,fo);
  437. /* Print last character in range */
  438. }
  439. first = -1; /* Range printed */
  440. }
  441. }
  442. fputc(']',fo); /* Close braces */
  443. p += ASCLEN/8; /* Skip bit vector */
  444. break;
  445. }
  446. if(star) fputc('*',fo); /* Print star if needed */
  447. }
  448. fputc('\n',fo); /* Print newline */
  449. }
  450. char *get1stcharset(e,bitvec)
  451. unsigned char *e; /* Pointer to expression */
  452. char *bitvec; /* Pointer to bit vector */
  453. {
  454. unsigned char *cp; /* Char pointer */
  455. int i; /* Index/counter */
  456. int star; /* Repeat prefix flag */
  457. if(*e == '^') ++e; /* Skip leading caret if any */
  458. memset(bitvec,'\0',ASCLEN/8); /* Clear bit vector */
  459. cp = e; /* Initialize */
  460. while(*e != T_END) /* Loop to process leading *-expr.s */
  461. {
  462. star = 0; /* Assume no repeat prefix */
  463. if(*e == T_STAR) /* If repeat prefix found */
  464. {
  465. ++star; /* Set flag */
  466. ++e; /* Skip repeat prefix */
  467. }
  468. switch(*e++) /* Switch on token type */
  469. {
  470. case T_END: /* End of expression */
  471. case T_STAR: /* Repeat prefix */
  472. fprintf(stderr,"Internal error: get1stcharset\n");
  473. /* Not valid */
  474. exit(2); /* Die abnormal death */
  475. case T_STRING: /* String */
  476. if(star || *e++ == '\0') /* If repeat prefix or zero count */
  477. {
  478. fprintf(stderr,"Internal error: get1stcharset\n");
  479. /* Not valid */
  480. exit(2); /* Die abnormal death */
  481. }
  482. /* Drop through */
  483. case T_SINGLE: /* Single character */
  484. bitset(bitvec,*e,*e,1); /* Set the bit */
  485. ++e; /* Skip the character */
  486. break;
  487. case T_ANY: /* Match any */
  488. memset(bitvec,'\xFF',ASCLEN/8);
  489. /* Set all the bits */
  490. bitset(bitvec,EOS,EOS,0); /* Except end-of-string */
  491. bitset(bitvec,'\n','\n',0);
  492. /* And linefeed! */
  493. break;
  494. case T_CLASS:
  495. for(i = 0; i < ASCLEN/8; ++i) bitvec[i] |= *e++;
  496. /* Or in all the bits */
  497. break;
  498. }
  499. if(!star) break; /* Break if not repeated */
  500. cp = e; /* Update pointer */
  501. }
  502. return(cp); /* Point to 1st non-repeated expr. */
  503. }
  504. char *findall(buffer,bufend)
  505. char *buffer; /* Buffer in which to search */
  506. char *bufend; /* End of buffer */
  507. {
  508. return(buffer < bufend? buffer: NULL);
  509. /* Fail only on empty buffer */
  510. }
  511. void addtoken(e,n)
  512. char *e; /* Raw token expression */
  513. int n; /* Length of expression */
  514. {
  515. static char achpref[] = "^";/* Prefix */
  516. static char achprefsuf[] = "[^A-Za-z0-9_]";
  517. /* Prefix/suffix */
  518. static char achsuf[] = "$"; /* Suffix */
  519. char buffer[BUFLEN]; /* Temporary buffer */
  520. assert(n >= 2); /* Must have at least two characters */
  521. if(e[0] == '\\' && e[1] == '<') /* If begin token */
  522. {
  523. if(!(flags & BEGLINE)) /* If not matching at beginning only */
  524. {
  525. memcpy(buffer,achprefsuf,sizeof achprefsuf - 1);
  526. /* Copy first prefix */
  527. memcpy(buffer + sizeof achprefsuf - 1,e + 2,n - 2);
  528. /* Attach expression */
  529. addexpr(buffer,n + sizeof achprefsuf - 3);
  530. /* Add expression */
  531. }
  532. memcpy(buffer,achpref,sizeof achpref - 1);
  533. /* Copy second prefix */
  534. memcpy(buffer + sizeof achpref - 1,e + 2,n - 2);
  535. /* Attach expression */
  536. addexpr(buffer,n + sizeof achpref - 3);
  537. /* Add expression */
  538. return; /* Done */
  539. }
  540. assert(e[n-2] == '\\' && e[n - 1] == '>');
  541. /* Must be end token */
  542. if(!(flags & ENDLINE)) /* If not matching at end only */
  543. {
  544. memcpy(buffer,e,n - 2); /* Copy expression */
  545. memcpy(buffer + n - 2,achprefsuf,sizeof achprefsuf - 1);
  546. /* Attach first suffix */
  547. addexpr(buffer,n + sizeof achprefsuf - 3);
  548. /* Add expression */
  549. }
  550. memcpy(buffer,e,n - 2); /* Copy expression */
  551. memcpy(buffer + n - 2,achsuf,sizeof achsuf - 1);
  552. /* Attach second suffix */
  553. addexpr(buffer,n + sizeof achsuf - 3);
  554. /* Add expression */
  555. }
  556. void addexpr(e,n)
  557. char *e; /* Expression to add */
  558. int n; /* Length of expression */
  559. {
  560. EXPR *expr; /* Expression node pointer */
  561. int i; /* Index */
  562. int j; /* Index */
  563. int locflags; /* Local copy of flags */
  564. char bitvec[ASCLEN/8];
  565. /* First char. bit vector */
  566. char buffer[BUFLEN]; /* Temporary buffer */
  567. if(find == findall) return; /* Return if matching everything */
  568. if(istoken(e,n)) /* If expr is token */
  569. {
  570. addtoken(e,n); /* Convert and add tokens */
  571. return; /* Done */
  572. }
  573. locflags = flags; /* Initialize local copy */
  574. if(*e == '^') locflags |= BEGLINE; /* Set flag if match must begin line */
  575. j = -2; /* Assume no escapes in string */
  576. for(i = 0; i < n - 1; ++i) /* Loop to find last escape */
  577. {
  578. if(e[i] == '\\') j = i++; /* Save index of last escape */
  579. }
  580. if(n > 0 && e[n-1] == '$' && j != n-2)
  581. { /* If expr. ends in unescaped '$' */
  582. --n; /* Skip dollar sign */
  583. locflags |= ENDLINE; /* Match must be at end */
  584. }
  585. strncpy(buffer,e,n); /* Copy pattern to buffer */
  586. if(locflags & ENDLINE) buffer[n++] = EOS;
  587. /* Add end character if needed */
  588. buffer[n] = '\0'; /* Null-terminate string */
  589. if((e = exprparse(buffer)) == NULL) return;
  590. /* Return if invalid expression */
  591. ++strcnt; /* Increment string count */
  592. if(!(locflags & BEGLINE)) /* If match needn't be at beginning */
  593. {
  594. e = get1stcharset(e,bitvec); /* Remove leading *-expr.s */
  595. }
  596. /*
  597. * E now points to a buffer containing a preprocessed expression.
  598. * We need to find the set of allowable first characters and make
  599. * the appropriate entries in the string node table.
  600. */
  601. if(*get1stcharset(e,bitvec) == T_END)
  602. { /* If expression will match anything */
  603. find = findall; /* Match everything */
  604. return; /* All done */
  605. }
  606. for(j = 0; j < ASCLEN; ++j) /* Loop to examine bit vector */
  607. {
  608. if(bitvec[j >> 3] & (1 << (j & 7)))
  609. { /* If the bit is set */
  610. expr = (EXPR *) alloc(sizeof(EXPR));
  611. /* Allocate record */
  612. expr->ex_pattern = e; /* Point it at pattern */
  613. if((i = transtab[j]) == 0) /* If no existing list */
  614. {
  615. if((i = clists++) >= ASCLEN)
  616. { /* If too many string lists */
  617. fprintf(stderr,"Too many string lists\n");
  618. /* Error message */
  619. exit(2); /* Die */
  620. }
  621. stringlist[i] = NULL; /* Initialize */
  622. transtab[j] = (char) i; /* Set pointer to new list */
  623. if(!casesen && isalpha(j)) transtab[j ^ 0x20] = (char) i;
  624. /* Set pointer for other case */
  625. }
  626. expr->ex_next = stringlist[i];
  627. /* Link new record into table */
  628. stringlist[i] = expr;
  629. }
  630. }
  631. if(locflags & DEBUG) exprprint(e,stderr);
  632. /* Print the expression if debugging */
  633. }
  634. char *findexpr(buffer,bufend)
  635. unsigned char *buffer; /* Buffer in which to search */
  636. char *bufend; /* End of buffer */
  637. {
  638. EXPR *expr; /* Expression list pointer */
  639. unsigned char *pattern; /* Pattern */
  640. int i; /* Index */
  641. while(buffer < bufend) /* Loop to find match */
  642. {
  643. if((i = transtab[*buffer++]) == 0) continue;
  644. /* Continue if not valid 1st char */
  645. if((expr = (EXPR *) stringlist[i]) == NULL)
  646. { /* If null pointer */
  647. fprintf(stderr,"Internal error: findexpr\n");
  648. /* Print error message */
  649. exit(2); /* Die */
  650. }
  651. --buffer; /* Back up to first character */
  652. while(expr != NULL) /* Loop to find match */
  653. {
  654. pattern = expr->ex_pattern; /* Point to pattern */
  655. expr = expr->ex_next; /* Point to next record */
  656. if(pattern[0] == '^') /* If match begin line */
  657. {
  658. ++pattern; /* Skip caret */
  659. if(buffer[-1] != '\n') continue;
  660. /* Don't bother if not at beginning */
  661. }
  662. if(exprmatch(buffer,pattern)) return(buffer);
  663. /* Return pointer if match found */
  664. }
  665. ++buffer; /* Skip first character */
  666. }
  667. return(NULL); /* No match */
  668. }