Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

600 lines
14 KiB

/* prscan.c */
/* lexical analysis */
#include "prtypes.h"
#include "prlex.h"
#include "prstdio.h"
#include <ctype.h>
#include "prextern.h"
static void ini_scan(void);
static int get_nc_char(void);
static void ungetachar(void);
static int lookahead(void);
static void scan_identifier(int c);
static int scan_number(char c);
static void scan_string(void);
extern char *Read_buffer;
extern char *Print_buffer;
extern int Max_Readbuffer;
static char *Rbuffptr;
ICHAR Ch = 0 ;
#if LOGGING_CAPABILITY
static int Unget_flag = 0;
#endif
#ifdef CHARACTER
ICHAR Char_scanned;
static int scan_character();
#endif
#if STRING_READ_CAPABILITY
/* the following two variables let you read from a string */
char *Curr_string_input; /* this is where we would get the next char */
int String_input_flag = 0; /* if this is 0 then read from a file */
char *Curr_string_output, /* put characters here */
*Curr_string_output_limit ; /* stop here */
int String_output_flag = 0 ; /* if this is 1, write to output string */
#endif
/* Character types - see prlex.h.
* There is a bit of guess work once we go past 127
*/
static int Ctype[256] = {
CC, CC, CC, CC, CC, CC, CC, CC, CC, SP, /* O - 9 */
SP, CC, CC, SP, CC, CC, CC, CC, CC, CC, /* 10 - 19 */
CC, CC, CC, CC, CC, OT, OT, OT, OT, OT, /* 20 - 29 */
OT, OT, SP, OT, QU, OT, OT, OT, OT, AP, /* 30 - 39 */
BR, BR, OT, SI, OT, SI, OT, OT, DI, DI, /* 40 - 49 */
DI, DI, DI, DI, DI, DI, DI, DI, OT, OT, /* 50 - 59 */
OT, OT, OT, QE, OT, AU, AU, AU, AU, AU, /* 60 - 69 */
AU, AU, AU, AU, AU, AU, AU, AU, AU, AU, /* 70 - 79 */
AU, AU, AU, AU, AU, AU, AU, AU, AU, AU, /* 80 - 89 */
AU, OT, OT, OT, OT, US, OT, AL, AL, AL, /* 90 - 99 */
AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, /* 100 - 109 */
AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, /* 110 - 119 */
AL, AL, AL, OT, BA, OT, OT, OT, OT, OT, /* 120 - 129 */
AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, /* 130 - 139 */
AL, AL, AU, AU, AU, AL, OT, AL, AL, AL, /* 140 - 149 */
AL, AL, AL, AU, AU, OT, OT, OT, OT, OT, /* 150 - 159 */
OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 160 - 169 */
OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 170 - 179 */
OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 180 - 189 */
OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 190 - 199 */
OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 200 - 209 */
OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 210 - 219 */
OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 220 - 229 */
OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 230 - 240 */
OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, /* 240 - 249 */
OT, OT, OT, OT, OT, OT
};
/******************************************************************
ini_scan()
Called by scan().
*******************************************************************/
static void ini_scan()
{
lookahead();/* move to next non blank */
Rbuffptr = Read_buffer;
}
static ICHAR getbasechar ()
{
#if STRING_READ_CAPABILITY
if(String_input_flag)
{
Ch = *Curr_string_input++;
if( Ch <= 0 || Ch == 0x1a )
{
Curr_string_input--;
Ch = EOF;
}
}
else
#endif
Ch = prgetc( Curr_infile ) ;
return Ch ;
}
/******************************************************************
ungetbasechar()
Put a character back onto the proper input stream.
*******************************************************************/
static void ungetbasechar ( ICHAR c )
{
#if STRING_READ_CAPABILITY
if ( String_input_flag )
{
if ( c != EOF )
Curr_string_input--;
} else
#endif
prungetc(c, Curr_infile);
#if LOGGING_CAPABILITY
Unget_flag = 1;
#endif
}
/******************************************************************
ungetachar() macro
Put just one char ('Ch') back on input stream.
Cannot be used without a prior call to getachar().
*******************************************************************/
#define ungetachar() ungetbasechar(Ch)
/****************************************************************
get_nc_char
get non commented character
get next char out of comments and not in string
Extended by DOH for ANSI // line comments.
*****************************************************************/
static ICHAR get_nc_char()
{
ICHAR c;
static ICHAR prevc = ' ';
static int inside_comment = 0;
static int line_comment = 0 ;
for(;;)
{
c = getbasechar() ;
if(inside_comment)
{
if ( line_comment && c == '\n' )
{
inside_comment = line_comment = 0 ;
prevc = ' ';
return c;
}
else
if( prevc == '*' && c == '/' )
{
inside_comment = 0;
prevc = ' ';
continue;
}
if(c == '\n' || c == '\r')
{
return(c);
}
if(c == '*')
{
c = getbasechar();
if(c == '/')
{
inside_comment = 0;
}
prevc = c;
continue;
}
else
if(c == EOF)
{
fatal(msgDeref(MSG_EOFINCOMMENT));
return(EOF);
}
prevc = c;
continue;
}
else
if (c == '/') /* not inside comments */
{
c = getbasechar() ;
if( c == '*')
{
inside_comment = 1;
continue;
}
else
if ( c == '/')
{
line_comment = inside_comment = 1 ;
continue;
}
else
{
ungetbasechar(c);
return((short)'/');
}
}
else
if(c == EOF)
return(EOF);
else
if(c == '\n')
{
return(c);
}
return(c);
}
}
/******************************************************************
getachar()
The only routine you should use for reading a char. Always stores
its result into 'Ch' as well as returning it to caller.
*******************************************************************/
#define EvThreshhold 20
ICHAR getachar()
{
static int echk = 0 ;
extern unsigned int Inp_linecount;
if ( echk++ > EvThreshhold ) {
eventCheck();
echk = 0 ;
}
Ch = get_nc_char() ;
if(Ch == '\n')
Inp_linecount ++;
#if LOGGING_CAPABILITY /* new version */
if(Ch != 0 && Ch != EOF && Log_file != NULL && !Unget_flag)
{
char buffer [20] ;
sprintf( Log_file,"%c", Ch);
prfputs( buffer, Log_file);
}
Unget_flag = 0;
#endif
return Ch ;
}
/******************************************************************
lookahead()
Peek at next character,
but this character can be read by getachar()
******************************************************************/
static ICHAR lookahead()
{
do{
getachar();
} while(isspace(Ch));
ungetachar();
return(Ch);
}
/******************************************************************
scan()
See prlex.h for return values other than characters (i.e. > 256)
*******************************************************************/
int scan()
{
ini_scan();
getachar();
if(Ch == EOF)
return(EOF);
switch(Ctype[Ch])
{
case DI:
MY_ASSERT(isdigit(Ch)); /* double check */
case SI:
MY_ASSERT(isdigit(Ch) || Ch == '-' || Ch == '+');
return (int) scan_number((char)Ch);
case QU:
MY_ASSERT(Ch == '"');
scan_string();
return(TOKEN_STRING);
case BR:
MY_ASSERT(Ch == ')' || Ch == '(');
return(Ch);
#ifdef CLIPS_SYNTAX
case QE:
scan_identifier(?);
return(TOKEN_VAR);
case AL:
case AU:
case OT:
scan_identifier(Ch);
return(TOKEN_ATOM);
#else
case AL:
MY_ASSERT(islower(Ch));
scan_identifier(Ch);
return(TOKEN_ATOM);
case US:
MY_ASSERT(Ch == '_');
case AU:
scan_identifier(Ch);
return(TOKEN_VAR);
#endif
case CC:
return(SCAN_ERR);
#ifdef CHARACTER
case AP:
return(scan_character());
#endif
default:
return(Ch);
}
}
/******************************************************************
scan_identifier()
Read an identifier.
******************************************************************/
static void scan_identifier(c)
int c;
{
int i;
*Rbuffptr++ = c;
for(i = 0; i < Max_Readbuffer; i++)
{
*Rbuffptr++ = getachar();
switch(Ctype[Ch])
{
case AL: /* lower case alpha */
case AU: /* upper case alpha */
case DI: /* digit */
case US: /* underscore */
continue;
default: /* anything else */
Rbuffptr --;
if (Ch != EOF) ungetachar();
break;
}
break;
}
*Rbuffptr = 0 ;
}
/******************************************************************
scan_number()
*******************************************************************/
#define Expon1 'E'
#define Expon2 'e'
#define Sign1 '-'
#define Sign2 '+'
static int scan_number(char c)
{
int met_dot = 0,
met_expon = 0,
met_sign = 0,
met_dig = 0,
met_ok ;
int i;
if ( isdigit(c) ) met_dig++ ;
*Rbuffptr++ = c;
for( met_ok = TRUE, i = 0; met_ok && i < MAXREALLENGTH; i++)
{
*Rbuffptr++ = getachar();
if ( isdigit(Ch) ) {
met_dig++ ;
} else {
switch ( Ch ) {
case '.' :
if ( met_dot || met_expon )
met_ok = FALSE ;
else met_dot++ ;
break ;
case Sign1:
case Sign2:
if ( met_sign > 0 || met_expon == 0 )
met_ok = FALSE ;
else met_sign++ ;
break ;
case Expon1:
case Expon2:
if ( met_expon ) met_ok = FALSE ;
else met_expon++ ;
break ;
default:
met_ok = FALSE ;
break ;
}
}
}
if ( i < MAXREALLENGTH ) {
Rbuffptr --;
if (Ch != EOF) ungetachar();
}
*Rbuffptr = 0 ;
switch(met_dot)
{
case 0:
return(TOKEN_INT);
case 1:
return(TOKEN_REAL);
default:
return(SCAN_ERR);
}
}
/******************************************************************
scan_string()
Read a string and only store the characters between the quotes.
To handle embeded quotes double them up.
*******************************************************************/
/* a bit crude ... */
static void scan_string()
{
int i, c;
i = 0;
do{
c = getachar();
if(c == EOF)break;
if(c == STRING_QUOTE)
{
c = getachar();
if(c != '"')/* it really is the end of the string */
{
ungetachar();
break;/* dont store the quote */
}
}
*Rbuffptr++ = c;
}while(++i < Max_Readbuffer);
*Rbuffptr = 0 ;
}
#ifdef CHARACTER
#define APOSTROPHE '\''
/* scan character */
static int scan_character()
{
int c;
*Rbuffptr++ = '\'';
c = getachar();
*Rbuffptr++ = c;
if (c == '\\')
return(scan_escape());
else
Char_scanned = c;
if(c == EOF){
sprintf(Print_buffer, msgDeref(MSG_EOFINCHAR) );
errmsg(Print_buffer);
return(SCAN_ERR);
}
else
c = getachar();
*Rbuffptr++ = c;
*Rbuffptr = 0;
if(c != APOSTROPHE)
return (SCAN_ERR);
return(TOKEN_CHAR);
}
/* have just read a \ */
scan_escape(){
int c;
c = getachar();
*Rbuffptr++ = c;
switch(c)
{
case 't':
Char_scanned = '\t';
break;
case 'n':
Char_scanned = '\n';
break;
case 'r':
Char_scanned = '\r';
break;
case '\\':
Char_scanned = '\\';
break;
case '\'':
Char_scanned = '\'';
break;
case 'b':
Char_scanned = '\b';
break;
case '"':
Char_scanned = '\"';
break;
case 'f':
Char_scanned = '\f';
break;
case 'v':
Char_scanned = '\v';
break;
default:
if(isdigit(c))
return(scan_nescape(c));
*Rbuffptr = 0;
return(SCAN_ERR);
}/* end switch */
c = getachar();
*Rbuffptr = c;
*Rbuffptr = 0;
if(c != '\'')
return(SCAN_ERR);
return(TOKEN_CHAR);
}
/******************************************************************************
scan_nescape()
Scan rest of character that looks like '\123'
******************************************************************************/
static int scan_nescape(c)
char c; /* first digit */
{
int i = 1;
Char_scanned = c - '0';
do{
c = getachar();
i++;
if(c == APOSTROPHE)
{
*Rbuffptr++ = c;
*Rbuffptr = 0;
return TOKEN_CHAR;
}
if((c > '7') || (c < '0'))
{
*Rbuffptr = 0;
return(SCAN_ERR);
}
else
{
Char_scanned = 8*Char_scanned + c - '0';
*Rbuffptr++ = c;
}
}while(i <= 3);
if (i > 3)
{
*Rbuffptr = 0;
return(SCAN_ERR);
}
else
return(SCAN_ERR);
}
#endif
/* end of file */