/* prscan.c */ /* lexical analysis */ #include "prtypes.h" #include "prlex.h" #include "prstdio.h" #include #include "prextern.h" static void ini_scan(void); static int get_nc_char(void); static void ungetachar(void); static int lookahead(void); static void scan_identifier(int c); static int scan_number(char c); static void scan_string(void); extern char *Read_buffer; extern char *Print_buffer; extern int Max_Readbuffer; static char *Rbuffptr; ICHAR Ch = 0 ; #if LOGGING_CAPABILITY static int Unget_flag = 0; #endif #ifdef CHARACTER ICHAR Char_scanned; static int scan_character(); #endif #if STRING_READ_CAPABILITY /* the following two variables let you read from a string */ char *Curr_string_input; /* this is where we would get the next char */ int String_input_flag = 0; /* if this is 0 then read from a file */ char *Curr_string_output, /* put characters here */ *Curr_string_output_limit ; /* stop here */ int String_output_flag = 0 ; /* if this is 1, write to output string */ #endif /* Character types - see prlex.h. * There is a bit of guess work once we go past 127 */ static int Ctype[256] = { CC, CC, CC, CC, CC, CC, CC, CC, CC, SP, /* O - 9 */ SP, CC, CC, SP, CC, CC, CC, CC, CC, CC, /* 10 - 19 */ CC, CC, CC, CC, CC, OT, OT, OT, OT, OT, /* 20 - 29 */ OT, OT, SP, OT, QU, OT, OT, OT, OT, AP, /* 30 - 39 */ BR, BR, OT, SI, OT, SI, OT, OT, DI, DI, /* 40 - 49 */ DI, DI, DI, DI, DI, DI, DI, DI, OT, OT, /* 50 - 59 */ OT, OT, OT, QE, OT, AU, AU, AU, AU, AU, /* 60 - 69 */ AU, AU, AU, AU, AU, AU, AU, AU, AU, AU, /* 70 - 79 */ AU, AU, AU, AU, AU, AU, AU, AU, AU, AU, /* 80 - 89 */ AU, OT, OT, OT, OT, US, OT, AL, AL, AL, /* 90 - 99 */ AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, /* 100 - 109 */ AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, /* 110 - 119 */ AL, AL, AL, OT, BA, OT, OT, OT, OT, OT, /* 120 - 129 */ AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, /* 130 - 139 */ AL, AL, AU, AU, AU, AL, OT, AL, AL, AL, /* 140 - 149 */ AL, AL, AL, AU, AU, OT, OT, OT, OT, OT, /* 150 - 159 */ OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 160 - 169 */ OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 170 - 179 */ OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 180 - 189 */ OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 190 - 199 */ OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 200 - 209 */ OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 210 - 219 */ OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 220 - 229 */ OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 230 - 240 */ OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 240 - 249 */ OT, OT, OT, OT, OT, OT }; /****************************************************************** ini_scan() Called by scan(). *******************************************************************/ static void ini_scan() { lookahead();/* move to next non blank */ Rbuffptr = Read_buffer; } static ICHAR getbasechar () { #if STRING_READ_CAPABILITY if(String_input_flag) { Ch = *Curr_string_input++; if( Ch <= 0 || Ch == 0x1a ) { Curr_string_input--; Ch = EOF; } } else #endif Ch = prgetc( Curr_infile ) ; return Ch ; } /****************************************************************** ungetbasechar() Put a character back onto the proper input stream. *******************************************************************/ static void ungetbasechar ( ICHAR c ) { #if STRING_READ_CAPABILITY if ( String_input_flag ) { if ( c != EOF ) Curr_string_input--; } else #endif prungetc(c, Curr_infile); #if LOGGING_CAPABILITY Unget_flag = 1; #endif } /****************************************************************** ungetachar() macro Put just one char ('Ch') back on input stream. Cannot be used without a prior call to getachar(). *******************************************************************/ #define ungetachar() ungetbasechar(Ch) /**************************************************************** get_nc_char get non commented character get next char out of comments and not in string Extended by DOH for ANSI // line comments. *****************************************************************/ static ICHAR get_nc_char() { ICHAR c; static ICHAR prevc = ' '; static int inside_comment = 0; static int line_comment = 0 ; for(;;) { c = getbasechar() ; if(inside_comment) { if ( line_comment && c == '\n' ) { inside_comment = line_comment = 0 ; prevc = ' '; return c; } else if( prevc == '*' && c == '/' ) { inside_comment = 0; prevc = ' '; continue; } if(c == '\n' || c == '\r') { return(c); } if(c == '*') { c = getbasechar(); if(c == '/') { inside_comment = 0; } prevc = c; continue; } else if(c == EOF) { fatal(msgDeref(MSG_EOFINCOMMENT)); return(EOF); } prevc = c; continue; } else if (c == '/') /* not inside comments */ { c = getbasechar() ; if( c == '*') { inside_comment = 1; continue; } else if ( c == '/') { line_comment = inside_comment = 1 ; continue; } else { ungetbasechar(c); return((short)'/'); } } else if(c == EOF) return(EOF); else if(c == '\n') { return(c); } return(c); } } /****************************************************************** getachar() The only routine you should use for reading a char. Always stores its result into 'Ch' as well as returning it to caller. *******************************************************************/ #define EvThreshhold 20 ICHAR getachar() { static int echk = 0 ; extern unsigned int Inp_linecount; if ( echk++ > EvThreshhold ) { eventCheck(); echk = 0 ; } Ch = get_nc_char() ; if(Ch == '\n') Inp_linecount ++; #if LOGGING_CAPABILITY /* new version */ if(Ch != 0 && Ch != EOF && Log_file != NULL && !Unget_flag) { char buffer [20] ; sprintf( Log_file,"%c", Ch); prfputs( buffer, Log_file); } Unget_flag = 0; #endif return Ch ; } /****************************************************************** lookahead() Peek at next character, but this character can be read by getachar() ******************************************************************/ static ICHAR lookahead() { do{ getachar(); } while(isspace(Ch)); ungetachar(); return(Ch); } /****************************************************************** scan() See prlex.h for return values other than characters (i.e. > 256) *******************************************************************/ int scan() { ini_scan(); getachar(); if(Ch == EOF) return(EOF); switch(Ctype[Ch]) { case DI: MY_ASSERT(isdigit(Ch)); /* double check */ case SI: MY_ASSERT(isdigit(Ch) || Ch == '-' || Ch == '+'); return (int) scan_number((char)Ch); case QU: MY_ASSERT(Ch == '"'); scan_string(); return(TOKEN_STRING); case BR: MY_ASSERT(Ch == ')' || Ch == '('); return(Ch); #ifdef CLIPS_SYNTAX case QE: scan_identifier(?); return(TOKEN_VAR); case AL: case AU: case OT: scan_identifier(Ch); return(TOKEN_ATOM); #else case AL: MY_ASSERT(islower(Ch)); scan_identifier(Ch); return(TOKEN_ATOM); case US: MY_ASSERT(Ch == '_'); case AU: scan_identifier(Ch); return(TOKEN_VAR); #endif case CC: return(SCAN_ERR); #ifdef CHARACTER case AP: return(scan_character()); #endif default: return(Ch); } } /****************************************************************** scan_identifier() Read an identifier. ******************************************************************/ static void scan_identifier(c) int c; { int i; *Rbuffptr++ = c; for(i = 0; i < Max_Readbuffer; i++) { *Rbuffptr++ = getachar(); switch(Ctype[Ch]) { case AL: /* lower case alpha */ case AU: /* upper case alpha */ case DI: /* digit */ case US: /* underscore */ continue; default: /* anything else */ Rbuffptr --; if (Ch != EOF) ungetachar(); break; } break; } *Rbuffptr = 0 ; } /****************************************************************** scan_number() *******************************************************************/ #define Expon1 'E' #define Expon2 'e' #define Sign1 '-' #define Sign2 '+' static int scan_number(char c) { int met_dot = 0, met_expon = 0, met_sign = 0, met_dig = 0, met_ok ; int i; if ( isdigit(c) ) met_dig++ ; *Rbuffptr++ = c; for( met_ok = TRUE, i = 0; met_ok && i < MAXREALLENGTH; i++) { *Rbuffptr++ = getachar(); if ( isdigit(Ch) ) { met_dig++ ; } else { switch ( Ch ) { case '.' : if ( met_dot || met_expon ) met_ok = FALSE ; else met_dot++ ; break ; case Sign1: case Sign2: if ( met_sign > 0 || met_expon == 0 ) met_ok = FALSE ; else met_sign++ ; break ; case Expon1: case Expon2: if ( met_expon ) met_ok = FALSE ; else met_expon++ ; break ; default: met_ok = FALSE ; break ; } } } if ( i < MAXREALLENGTH ) { Rbuffptr --; if (Ch != EOF) ungetachar(); } *Rbuffptr = 0 ; switch(met_dot) { case 0: return(TOKEN_INT); case 1: return(TOKEN_REAL); default: return(SCAN_ERR); } } /****************************************************************** scan_string() Read a string and only store the characters between the quotes. To handle embeded quotes double them up. *******************************************************************/ /* a bit crude ... */ static void scan_string() { int i, c; i = 0; do{ c = getachar(); if(c == EOF)break; if(c == STRING_QUOTE) { c = getachar(); if(c != '"')/* it really is the end of the string */ { ungetachar(); break;/* dont store the quote */ } } *Rbuffptr++ = c; }while(++i < Max_Readbuffer); *Rbuffptr = 0 ; } #ifdef CHARACTER #define APOSTROPHE '\'' /* scan character */ static int scan_character() { int c; *Rbuffptr++ = '\''; c = getachar(); *Rbuffptr++ = c; if (c == '\\') return(scan_escape()); else Char_scanned = c; if(c == EOF){ sprintf(Print_buffer, msgDeref(MSG_EOFINCHAR) ); errmsg(Print_buffer); return(SCAN_ERR); } else c = getachar(); *Rbuffptr++ = c; *Rbuffptr = 0; if(c != APOSTROPHE) return (SCAN_ERR); return(TOKEN_CHAR); } /* have just read a \ */ scan_escape(){ int c; c = getachar(); *Rbuffptr++ = c; switch(c) { case 't': Char_scanned = '\t'; break; case 'n': Char_scanned = '\n'; break; case 'r': Char_scanned = '\r'; break; case '\\': Char_scanned = '\\'; break; case '\'': Char_scanned = '\''; break; case 'b': Char_scanned = '\b'; break; case '"': Char_scanned = '\"'; break; case 'f': Char_scanned = '\f'; break; case 'v': Char_scanned = '\v'; break; default: if(isdigit(c)) return(scan_nescape(c)); *Rbuffptr = 0; return(SCAN_ERR); }/* end switch */ c = getachar(); *Rbuffptr = c; *Rbuffptr = 0; if(c != '\'') return(SCAN_ERR); return(TOKEN_CHAR); } /****************************************************************************** scan_nescape() Scan rest of character that looks like '\123' ******************************************************************************/ static int scan_nescape(c) char c; /* first digit */ { int i = 1; Char_scanned = c - '0'; do{ c = getachar(); i++; if(c == APOSTROPHE) { *Rbuffptr++ = c; *Rbuffptr = 0; return TOKEN_CHAR; } if((c > '7') || (c < '0')) { *Rbuffptr = 0; return(SCAN_ERR); } else { Char_scanned = 8*Char_scanned + c - '0'; *Rbuffptr++ = c; } }while(i <= 3); if (i > 3) { *Rbuffptr = 0; return(SCAN_ERR); } else return(SCAN_ERR); } #endif /* end of file */