mirror of https://github.com/lianthony/NT4.0
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
600 lines
14 KiB
600 lines
14 KiB
/* prscan.c */
|
|
/* lexical analysis */
|
|
#include "prtypes.h"
|
|
#include "prlex.h"
|
|
#include "prstdio.h"
|
|
#include <ctype.h>
|
|
|
|
#include "prextern.h"
|
|
static void ini_scan(void);
|
|
static int get_nc_char(void);
|
|
static void ungetachar(void);
|
|
static int lookahead(void);
|
|
static void scan_identifier(int c);
|
|
static int scan_number(char c);
|
|
static void scan_string(void);
|
|
|
|
extern char *Read_buffer;
|
|
extern char *Print_buffer;
|
|
extern int Max_Readbuffer;
|
|
|
|
|
|
static char *Rbuffptr;
|
|
ICHAR Ch = 0 ;
|
|
|
|
#if LOGGING_CAPABILITY
|
|
static int Unget_flag = 0;
|
|
#endif
|
|
|
|
|
|
#ifdef CHARACTER
|
|
ICHAR Char_scanned;
|
|
static int scan_character();
|
|
#endif
|
|
|
|
#if STRING_READ_CAPABILITY
|
|
/* the following two variables let you read from a string */
|
|
|
|
char *Curr_string_input; /* this is where we would get the next char */
|
|
int String_input_flag = 0; /* if this is 0 then read from a file */
|
|
|
|
char *Curr_string_output, /* put characters here */
|
|
*Curr_string_output_limit ; /* stop here */
|
|
int String_output_flag = 0 ; /* if this is 1, write to output string */
|
|
|
|
#endif
|
|
|
|
/* Character types - see prlex.h.
|
|
* There is a bit of guess work once we go past 127
|
|
*/
|
|
static int Ctype[256] = {
|
|
CC, CC, CC, CC, CC, CC, CC, CC, CC, SP, /* O - 9 */
|
|
SP, CC, CC, SP, CC, CC, CC, CC, CC, CC, /* 10 - 19 */
|
|
CC, CC, CC, CC, CC, OT, OT, OT, OT, OT, /* 20 - 29 */
|
|
OT, OT, SP, OT, QU, OT, OT, OT, OT, AP, /* 30 - 39 */
|
|
BR, BR, OT, SI, OT, SI, OT, OT, DI, DI, /* 40 - 49 */
|
|
DI, DI, DI, DI, DI, DI, DI, DI, OT, OT, /* 50 - 59 */
|
|
OT, OT, OT, QE, OT, AU, AU, AU, AU, AU, /* 60 - 69 */
|
|
AU, AU, AU, AU, AU, AU, AU, AU, AU, AU, /* 70 - 79 */
|
|
AU, AU, AU, AU, AU, AU, AU, AU, AU, AU, /* 80 - 89 */
|
|
AU, OT, OT, OT, OT, US, OT, AL, AL, AL, /* 90 - 99 */
|
|
AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, /* 100 - 109 */
|
|
AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, /* 110 - 119 */
|
|
AL, AL, AL, OT, BA, OT, OT, OT, OT, OT, /* 120 - 129 */
|
|
AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, /* 130 - 139 */
|
|
AL, AL, AU, AU, AU, AL, OT, AL, AL, AL, /* 140 - 149 */
|
|
AL, AL, AL, AU, AU, OT, OT, OT, OT, OT, /* 150 - 159 */
|
|
OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 160 - 169 */
|
|
OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 170 - 179 */
|
|
OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 180 - 189 */
|
|
OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 190 - 199 */
|
|
OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 200 - 209 */
|
|
OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 210 - 219 */
|
|
OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 220 - 229 */
|
|
OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 230 - 240 */
|
|
OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 240 - 249 */
|
|
OT, OT, OT, OT, OT, OT
|
|
};
|
|
|
|
/******************************************************************
|
|
ini_scan()
|
|
Called by scan().
|
|
*******************************************************************/
|
|
static void ini_scan()
|
|
{
|
|
|
|
lookahead();/* move to next non blank */
|
|
Rbuffptr = Read_buffer;
|
|
}
|
|
|
|
static ICHAR getbasechar ()
|
|
{
|
|
#if STRING_READ_CAPABILITY
|
|
if(String_input_flag)
|
|
{
|
|
Ch = *Curr_string_input++;
|
|
if( Ch <= 0 || Ch == 0x1a )
|
|
{
|
|
Curr_string_input--;
|
|
Ch = EOF;
|
|
}
|
|
}
|
|
else
|
|
#endif
|
|
Ch = prgetc( Curr_infile ) ;
|
|
return Ch ;
|
|
}
|
|
|
|
/******************************************************************
|
|
ungetbasechar()
|
|
Put a character back onto the proper input stream.
|
|
*******************************************************************/
|
|
static void ungetbasechar ( ICHAR c )
|
|
{
|
|
#if STRING_READ_CAPABILITY
|
|
if ( String_input_flag )
|
|
{
|
|
if ( c != EOF )
|
|
Curr_string_input--;
|
|
} else
|
|
#endif
|
|
prungetc(c, Curr_infile);
|
|
|
|
#if LOGGING_CAPABILITY
|
|
Unget_flag = 1;
|
|
#endif
|
|
}
|
|
|
|
/******************************************************************
|
|
ungetachar() macro
|
|
Put just one char ('Ch') back on input stream.
|
|
Cannot be used without a prior call to getachar().
|
|
*******************************************************************/
|
|
|
|
#define ungetachar() ungetbasechar(Ch)
|
|
|
|
/****************************************************************
|
|
get_nc_char
|
|
get non commented character
|
|
get next char out of comments and not in string
|
|
|
|
Extended by DOH for ANSI // line comments.
|
|
*****************************************************************/
|
|
static ICHAR get_nc_char()
|
|
{
|
|
ICHAR c;
|
|
static ICHAR prevc = ' ';
|
|
static int inside_comment = 0;
|
|
static int line_comment = 0 ;
|
|
|
|
for(;;)
|
|
{
|
|
c = getbasechar() ;
|
|
if(inside_comment)
|
|
{
|
|
if ( line_comment && c == '\n' )
|
|
{
|
|
inside_comment = line_comment = 0 ;
|
|
prevc = ' ';
|
|
return c;
|
|
}
|
|
else
|
|
if( prevc == '*' && c == '/' )
|
|
{
|
|
inside_comment = 0;
|
|
prevc = ' ';
|
|
continue;
|
|
}
|
|
|
|
if(c == '\n' || c == '\r')
|
|
{
|
|
return(c);
|
|
}
|
|
if(c == '*')
|
|
{
|
|
c = getbasechar();
|
|
if(c == '/')
|
|
{
|
|
inside_comment = 0;
|
|
}
|
|
prevc = c;
|
|
continue;
|
|
}
|
|
else
|
|
if(c == EOF)
|
|
{
|
|
fatal(msgDeref(MSG_EOFINCOMMENT));
|
|
return(EOF);
|
|
}
|
|
prevc = c;
|
|
continue;
|
|
}
|
|
else
|
|
if (c == '/') /* not inside comments */
|
|
{
|
|
c = getbasechar() ;
|
|
if( c == '*')
|
|
{
|
|
inside_comment = 1;
|
|
continue;
|
|
}
|
|
else
|
|
if ( c == '/')
|
|
{
|
|
line_comment = inside_comment = 1 ;
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
ungetbasechar(c);
|
|
return((short)'/');
|
|
}
|
|
}
|
|
else
|
|
if(c == EOF)
|
|
return(EOF);
|
|
else
|
|
if(c == '\n')
|
|
{
|
|
return(c);
|
|
}
|
|
return(c);
|
|
}
|
|
}
|
|
|
|
|
|
/******************************************************************
|
|
getachar()
|
|
The only routine you should use for reading a char. Always stores
|
|
its result into 'Ch' as well as returning it to caller.
|
|
*******************************************************************/
|
|
#define EvThreshhold 20
|
|
|
|
ICHAR getachar()
|
|
{
|
|
static int echk = 0 ;
|
|
extern unsigned int Inp_linecount;
|
|
|
|
if ( echk++ > EvThreshhold ) {
|
|
eventCheck();
|
|
echk = 0 ;
|
|
}
|
|
|
|
Ch = get_nc_char() ;
|
|
|
|
if(Ch == '\n')
|
|
Inp_linecount ++;
|
|
#if LOGGING_CAPABILITY /* new version */
|
|
|
|
if(Ch != 0 && Ch != EOF && Log_file != NULL && !Unget_flag)
|
|
{
|
|
char buffer [20] ;
|
|
sprintf( Log_file,"%c", Ch);
|
|
prfputs( buffer, Log_file);
|
|
}
|
|
Unget_flag = 0;
|
|
#endif
|
|
return Ch ;
|
|
}
|
|
|
|
/******************************************************************
|
|
lookahead()
|
|
Peek at next character,
|
|
but this character can be read by getachar()
|
|
******************************************************************/
|
|
static ICHAR lookahead()
|
|
{
|
|
do{
|
|
getachar();
|
|
} while(isspace(Ch));
|
|
|
|
ungetachar();
|
|
return(Ch);
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************
|
|
scan()
|
|
See prlex.h for return values other than characters (i.e. > 256)
|
|
*******************************************************************/
|
|
int scan()
|
|
{
|
|
ini_scan();
|
|
getachar();
|
|
if(Ch == EOF)
|
|
return(EOF);
|
|
switch(Ctype[Ch])
|
|
{
|
|
|
|
case DI:
|
|
MY_ASSERT(isdigit(Ch)); /* double check */
|
|
case SI:
|
|
MY_ASSERT(isdigit(Ch) || Ch == '-' || Ch == '+');
|
|
return (int) scan_number((char)Ch);
|
|
|
|
case QU:
|
|
MY_ASSERT(Ch == '"');
|
|
scan_string();
|
|
return(TOKEN_STRING);
|
|
|
|
case BR:
|
|
MY_ASSERT(Ch == ')' || Ch == '(');
|
|
return(Ch);
|
|
|
|
#ifdef CLIPS_SYNTAX
|
|
case QE:
|
|
scan_identifier(?);
|
|
return(TOKEN_VAR);
|
|
case AL:
|
|
case AU:
|
|
case OT:
|
|
scan_identifier(Ch);
|
|
return(TOKEN_ATOM);
|
|
#else
|
|
case AL:
|
|
MY_ASSERT(islower(Ch));
|
|
scan_identifier(Ch);
|
|
return(TOKEN_ATOM);
|
|
|
|
case US:
|
|
MY_ASSERT(Ch == '_');
|
|
|
|
case AU:
|
|
scan_identifier(Ch);
|
|
return(TOKEN_VAR);
|
|
#endif
|
|
case CC:
|
|
return(SCAN_ERR);
|
|
#ifdef CHARACTER
|
|
case AP:
|
|
return(scan_character());
|
|
#endif
|
|
default:
|
|
return(Ch);
|
|
}
|
|
}
|
|
|
|
/******************************************************************
|
|
scan_identifier()
|
|
Read an identifier.
|
|
******************************************************************/
|
|
static void scan_identifier(c)
|
|
int c;
|
|
{
|
|
int i;
|
|
*Rbuffptr++ = c;
|
|
|
|
for(i = 0; i < Max_Readbuffer; i++)
|
|
{
|
|
*Rbuffptr++ = getachar();
|
|
switch(Ctype[Ch])
|
|
{
|
|
case AL: /* lower case alpha */
|
|
case AU: /* upper case alpha */
|
|
case DI: /* digit */
|
|
case US: /* underscore */
|
|
continue;
|
|
default: /* anything else */
|
|
Rbuffptr --;
|
|
if (Ch != EOF) ungetachar();
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
*Rbuffptr = 0 ;
|
|
}
|
|
|
|
|
|
/******************************************************************
|
|
scan_number()
|
|
*******************************************************************/
|
|
#define Expon1 'E'
|
|
#define Expon2 'e'
|
|
#define Sign1 '-'
|
|
#define Sign2 '+'
|
|
|
|
static int scan_number(char c)
|
|
{
|
|
int met_dot = 0,
|
|
met_expon = 0,
|
|
met_sign = 0,
|
|
met_dig = 0,
|
|
met_ok ;
|
|
int i;
|
|
|
|
if ( isdigit(c) ) met_dig++ ;
|
|
*Rbuffptr++ = c;
|
|
for( met_ok = TRUE, i = 0; met_ok && i < MAXREALLENGTH; i++)
|
|
{
|
|
*Rbuffptr++ = getachar();
|
|
if ( isdigit(Ch) ) {
|
|
met_dig++ ;
|
|
} else {
|
|
switch ( Ch ) {
|
|
case '.' :
|
|
if ( met_dot || met_expon )
|
|
met_ok = FALSE ;
|
|
else met_dot++ ;
|
|
break ;
|
|
case Sign1:
|
|
case Sign2:
|
|
if ( met_sign > 0 || met_expon == 0 )
|
|
met_ok = FALSE ;
|
|
else met_sign++ ;
|
|
break ;
|
|
case Expon1:
|
|
case Expon2:
|
|
if ( met_expon ) met_ok = FALSE ;
|
|
else met_expon++ ;
|
|
break ;
|
|
default:
|
|
met_ok = FALSE ;
|
|
break ;
|
|
}
|
|
}
|
|
}
|
|
if ( i < MAXREALLENGTH ) {
|
|
Rbuffptr --;
|
|
if (Ch != EOF) ungetachar();
|
|
}
|
|
|
|
*Rbuffptr = 0 ;
|
|
|
|
switch(met_dot)
|
|
{
|
|
case 0:
|
|
return(TOKEN_INT);
|
|
case 1:
|
|
return(TOKEN_REAL);
|
|
default:
|
|
return(SCAN_ERR);
|
|
}
|
|
}
|
|
|
|
/******************************************************************
|
|
scan_string()
|
|
Read a string and only store the characters between the quotes.
|
|
To handle embeded quotes double them up.
|
|
*******************************************************************/
|
|
/* a bit crude ... */
|
|
static void scan_string()
|
|
{
|
|
int i, c;
|
|
|
|
i = 0;
|
|
do{
|
|
c = getachar();
|
|
if(c == EOF)break;
|
|
if(c == STRING_QUOTE)
|
|
{
|
|
c = getachar();
|
|
if(c != '"')/* it really is the end of the string */
|
|
{
|
|
ungetachar();
|
|
break;/* dont store the quote */
|
|
}
|
|
}
|
|
*Rbuffptr++ = c;
|
|
}while(++i < Max_Readbuffer);
|
|
|
|
*Rbuffptr = 0 ;
|
|
}
|
|
|
|
#ifdef CHARACTER
|
|
#define APOSTROPHE '\''
|
|
|
|
/* scan character */
|
|
static int scan_character()
|
|
{
|
|
int c;
|
|
|
|
*Rbuffptr++ = '\'';
|
|
c = getachar();
|
|
*Rbuffptr++ = c;
|
|
|
|
if (c == '\\')
|
|
return(scan_escape());
|
|
else
|
|
Char_scanned = c;
|
|
if(c == EOF){
|
|
sprintf(Print_buffer, msgDeref(MSG_EOFINCHAR) );
|
|
errmsg(Print_buffer);
|
|
return(SCAN_ERR);
|
|
}
|
|
else
|
|
c = getachar();
|
|
*Rbuffptr++ = c;
|
|
*Rbuffptr = 0;
|
|
|
|
if(c != APOSTROPHE)
|
|
return (SCAN_ERR);
|
|
return(TOKEN_CHAR);
|
|
}
|
|
|
|
/* have just read a \ */
|
|
scan_escape(){
|
|
int c;
|
|
c = getachar();
|
|
*Rbuffptr++ = c;
|
|
|
|
switch(c)
|
|
{
|
|
case 't':
|
|
Char_scanned = '\t';
|
|
break;
|
|
|
|
case 'n':
|
|
Char_scanned = '\n';
|
|
break;
|
|
|
|
case 'r':
|
|
Char_scanned = '\r';
|
|
break;
|
|
|
|
case '\\':
|
|
Char_scanned = '\\';
|
|
break;
|
|
|
|
case '\'':
|
|
Char_scanned = '\'';
|
|
break;
|
|
|
|
case 'b':
|
|
Char_scanned = '\b';
|
|
break;
|
|
|
|
case '"':
|
|
Char_scanned = '\"';
|
|
break;
|
|
|
|
case 'f':
|
|
Char_scanned = '\f';
|
|
break;
|
|
|
|
case 'v':
|
|
Char_scanned = '\v';
|
|
break;
|
|
default:
|
|
if(isdigit(c))
|
|
return(scan_nescape(c));
|
|
*Rbuffptr = 0;
|
|
return(SCAN_ERR);
|
|
}/* end switch */
|
|
|
|
c = getachar();
|
|
*Rbuffptr = c;
|
|
*Rbuffptr = 0;
|
|
|
|
if(c != '\'')
|
|
return(SCAN_ERR);
|
|
|
|
return(TOKEN_CHAR);
|
|
}
|
|
|
|
/******************************************************************************
|
|
scan_nescape()
|
|
Scan rest of character that looks like '\123'
|
|
******************************************************************************/
|
|
static int scan_nescape(c)
|
|
char c; /* first digit */
|
|
{
|
|
int i = 1;
|
|
|
|
Char_scanned = c - '0';
|
|
do{
|
|
c = getachar();
|
|
i++;
|
|
if(c == APOSTROPHE)
|
|
{
|
|
*Rbuffptr++ = c;
|
|
*Rbuffptr = 0;
|
|
return TOKEN_CHAR;
|
|
|
|
}
|
|
if((c > '7') || (c < '0'))
|
|
{
|
|
|
|
*Rbuffptr = 0;
|
|
return(SCAN_ERR);
|
|
}
|
|
else
|
|
{
|
|
Char_scanned = 8*Char_scanned + c - '0';
|
|
*Rbuffptr++ = c;
|
|
}
|
|
}while(i <= 3);
|
|
if (i > 3)
|
|
{
|
|
*Rbuffptr = 0;
|
|
return(SCAN_ERR);
|
|
}
|
|
else
|
|
return(SCAN_ERR);
|
|
}
|
|
#endif
|
|
|
|
/* end of file */
|
|
|
|
|
|
|