You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
423 lines
17 KiB
423 lines
17 KiB
%{
|
|
//--------------------------------------------------------------------
|
|
// Microsoft Monarch
|
|
//
|
|
// Copyright (c) Microsoft Corporation, 1997 - 1999.
|
|
//
|
|
// @doc OPTIONAL EXTRACTION CODES
|
|
//
|
|
// @module ms-sql.l |
|
|
// LEX tokenizer script
|
|
//
|
|
// @devnotes none
|
|
//
|
|
// @rev 0 | 04-Feb-97 | v-charca | Created
|
|
//
|
|
|
|
/**
|
|
** NOTE : when adding a new token (XXX) modify the following:
|
|
** 1.) Add %token _XXX to sql.y
|
|
** 2.) Add lexeme pattern to sql.l stating whether the token returns a TOKEN
|
|
** or a VALUE. If the token returns a value a node will need to be created to
|
|
** contain the value information. Therefore the VALUE macro will also need to
|
|
** specify a valid VARIANT type for the value.
|
|
**/
|
|
|
|
|
|
#include "msidxtr.h"
|
|
|
|
#ifdef DEBUG
|
|
# define YYTRACE(tknNum) LexerTrace(yytext, yyleng, tknNum);
|
|
#else
|
|
# define YYTRACE(tknNum)
|
|
#endif
|
|
|
|
|
|
#define TOKEN(tknNum) YYTRACE(tknNum) return(tknNum);
|
|
|
|
#define VALUE(tknNum) \
|
|
{ \
|
|
YYTRACE(tknNum) \
|
|
CreateTknValue(yylval, tknNum); \
|
|
return tknNum; \
|
|
}
|
|
|
|
#define STRING_VALUE(tknNum, wch, fQuote) \
|
|
{ \
|
|
YYTRACE(tknNum) \
|
|
CreateTknValue(yylval, tknNum, wch, fQuote); \
|
|
return tknNum; \
|
|
}
|
|
|
|
#define ID_VALUE(tknNum, wch) \
|
|
{ \
|
|
YYTRACE(tknNum) \
|
|
CreateTknValue(yylval, tknNum, wch); \
|
|
return _ID; \
|
|
}
|
|
|
|
/*
|
|
** Make Lex read from a block of data
|
|
** buffer is the character buffer,
|
|
** result is a variable to store the number of chars read
|
|
** ms is the size of the buffer
|
|
*/
|
|
#undef YY_INPUT
|
|
#define YY_INPUT(b, r, ms) (r = yybufferinput(b, ms))
|
|
|
|
//--------------------------------------------------------------------------------------------
|
|
// @func Makes a new copy of UNICODE string. Filters out double quotes
|
|
// @side Allocates enough bytes to hold string
|
|
// @rdesc Pointer to new UNICODE string
|
|
|
|
LPWSTR PwszDupFilter(
|
|
LPWSTR pwszOrig,
|
|
WCHAR wch )
|
|
{
|
|
LPWSTR pwszCopy = (LPWSTR)CoTaskMemAlloc( (wcslen(pwszOrig)+2)*sizeof(WCHAR) );
|
|
if ( 0 != pwszCopy )
|
|
{
|
|
LPWSTR pwsz = pwszCopy;
|
|
while ( 0 != *pwszOrig )
|
|
{
|
|
if ( *(pwszOrig+1) && *(pwszOrig+1) == *pwszOrig && wch == *pwszOrig )
|
|
pwszOrig++;
|
|
else
|
|
*pwsz++ = *pwszOrig++;
|
|
}
|
|
*pwsz = L'\0';
|
|
}
|
|
|
|
return pwszCopy;
|
|
}
|
|
|
|
//--------------------------------------------------------------------------------------------
|
|
// YYLEXER::CreateTknValue
|
|
// Creates a QUERYTREE node structure which is passed to the YACC value stack.
|
|
// This routines uses the TokenInfo map to determine which opids to create for
|
|
// the given string.
|
|
//
|
|
//
|
|
void YYLEXER::CreateTknValue(
|
|
YYSTYPE *ppct,
|
|
short tknNum,
|
|
YY_CHAR wch,
|
|
BOOL fQuote )
|
|
{
|
|
// Note that values containing variants can only be CONSTANTS or ID's
|
|
// SHOULD BE DONE BY valType
|
|
switch ( tknNum )
|
|
{
|
|
case _ID:
|
|
case _TEMPVIEW:
|
|
{
|
|
// Assume table_name for now. Might have to correct this when I
|
|
// see the context in the parser.
|
|
if ( 0 == (*ppct = PctAllocNode(DBVALUEKIND_WSTR)) )
|
|
throw(E_OUTOFMEMORY);
|
|
|
|
(*ppct)->op = DBOP_table_name;
|
|
(*ppct)->wKind = DBVALUEKIND_WSTR;
|
|
(*ppct)->value.pwszValue = CoTaskStrDup(yytext_ptr);
|
|
if( 0 == (*ppct)->value.pwszValue )
|
|
{
|
|
DeleteDBQT( *ppct );
|
|
*ppct = NULL;
|
|
throw(E_OUTOFMEMORY);
|
|
}
|
|
_wcsupr((*ppct)->value.pwszValue);
|
|
break;
|
|
}
|
|
|
|
case _DELIMITED_ID:
|
|
{
|
|
if ( 0 == (*ppct = PctAllocNode(DBVALUEKIND_WSTR)) )
|
|
throw(E_OUTOFMEMORY);
|
|
|
|
(*ppct)->op = DBOP_table_name;
|
|
(*ppct)->wKind = DBVALUEKIND_WSTR;
|
|
// Strip quotes on delimited identifier
|
|
yytext_ptr[wcslen(yytext_ptr)-1] = L'\0';
|
|
(*ppct)->value.pwszValue = PwszDupFilter(yytext_ptr+1, wch);
|
|
if( 0 == (*ppct)->value.pwszValue )
|
|
{
|
|
DeleteDBQT( *ppct );
|
|
*ppct = NULL;
|
|
throw(E_OUTOFMEMORY);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case _URL:
|
|
case _STRING:
|
|
case _PREFIX_STRING:
|
|
{
|
|
// NOTE: This is really a PROPVARIANT node, but there is no DBVALUEKIND for PROPVARIANT.
|
|
if ( 0 == (*ppct = PctAllocNode(DBVALUEKIND_VARIANT, DBOP_scalar_constant)) )
|
|
throw(E_OUTOFMEMORY);
|
|
|
|
LPWSTR pwsz = yytext_ptr;
|
|
LPWSTR pwszCopy = PwszDupFilter(pwsz, wch);
|
|
|
|
if ( 0 == pwszCopy )
|
|
{
|
|
DeleteDBQT( *ppct );
|
|
*ppct = NULL;
|
|
throw(E_OUTOFMEMORY);
|
|
}
|
|
|
|
LPWSTR pwszTemp = pwszCopy;
|
|
// Strip quotes on literals or
|
|
if ( fQuote && (*pwszCopy == L'\"' || *pwszCopy == L'\'') )
|
|
{
|
|
pwszCopy++;
|
|
Assert(pwszCopy[wcslen(pwszCopy)-1] == L'\"' || pwszCopy[wcslen(pwszCopy)-1] == L'\'');
|
|
pwszCopy[wcslen(pwszCopy)-1] = L'\0';
|
|
}
|
|
|
|
((PROPVARIANT*)(*ppct)->value.pvValue)->bstrVal = SysAllocString( pwszCopy );
|
|
CoTaskMemFree( pwszTemp ); // throw away temporary before testing for out of memory
|
|
((PROPVARIANT*)(*ppct)->value.pvValue)->vt = VT_BSTR;
|
|
if( 0 == ((PROPVARIANT*)(*ppct)->value.pvValue)->bstrVal )
|
|
{
|
|
DeleteDBQT( *ppct );
|
|
*ppct = 0;
|
|
throw(E_OUTOFMEMORY);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case _INTNUM:
|
|
if ( 0 == (*ppct = PctAllocNode(DBVALUEKIND_VARIANT, DBOP_scalar_constant)) )
|
|
throw(E_OUTOFMEMORY);
|
|
|
|
((PROPVARIANT*)(*ppct)->value.pvValue)->bstrVal = SysAllocString( yytext_ptr );
|
|
((PROPVARIANT*)(*ppct)->value.pvValue)->vt = VT_BSTR;
|
|
if ( 0 == ((PROPVARIANT*)(*ppct)->value.pvValue)->bstrVal )
|
|
{
|
|
DeleteDBQT( *ppct );
|
|
*ppct = 0;
|
|
throw(E_OUTOFMEMORY);
|
|
}
|
|
|
|
(*ppct)->hrError = PropVariantChangeTypeI64( (PROPVARIANT*)(*ppct)->value.pvValue );
|
|
if ( FAILED((*ppct)->hrError) )
|
|
{
|
|
HRESULT hr = (*ppct)->hrError;
|
|
DeleteDBQT( *ppct );
|
|
*ppct = 0;
|
|
throw(hr);
|
|
}
|
|
break;
|
|
|
|
case _REALNUM:
|
|
if ( 0 == (*ppct = PctAllocNode(DBVALUEKIND_VARIANT, DBOP_scalar_constant)) )
|
|
throw(E_OUTOFMEMORY);
|
|
|
|
((PROPVARIANT*)(*ppct)->value.pvValue)->bstrVal = SysAllocString( yytext_ptr );
|
|
((PROPVARIANT*)(*ppct)->value.pvValue)->vt = VT_BSTR;
|
|
if ( 0 == ((PROPVARIANT*)(*ppct)->value.pvValue)->bstrVal )
|
|
{
|
|
DeleteDBQT( *ppct );
|
|
*ppct = NULL;
|
|
throw(E_OUTOFMEMORY);
|
|
}
|
|
|
|
(*ppct)->hrError = VariantChangeTypeEx( (*ppct)->value.pvarValue, // convert in place
|
|
(*ppct)->value.pvarValue,
|
|
LOCALE_SYSTEM_DEFAULT,
|
|
0,
|
|
VT_R8 );
|
|
if ( FAILED((*ppct)->hrError) )
|
|
{
|
|
HRESULT hr = (*ppct)->hrError;
|
|
DeleteDBQT( *ppct );
|
|
*ppct = 0;
|
|
throw(hr);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
Assert( !"Unkown token value" );
|
|
}
|
|
}
|
|
|
|
|
|
%}
|
|
%x contains
|
|
%x cntntsrch
|
|
%x scope0
|
|
%x scope1
|
|
%x scope2
|
|
%x view
|
|
|
|
white [ \t\n\f\r]+
|
|
id [a-zA-Z][a-zA-Z0-9_]*
|
|
simpleterm ([^ \n\t\f\r\'\(\)\[\]\&\|\~\!\,]+|\'\')*
|
|
br_id ([^\"\n]*|\"\")*
|
|
integer [-+]?[0-9]+|[-+]?0x[a-fA-F0-9]+
|
|
real [-+]?([0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?|-?\.[0-9]+([eE][-+]?[0-9]+)?
|
|
quoted_string \'([^'\n]*|\'\')*\'
|
|
string \'[^'\n]*\'
|
|
comment --[^\n]*
|
|
%%
|
|
|
|
%{
|
|
/***
|
|
*** Reserved words in every context
|
|
***/
|
|
%}
|
|
ALL { TOKEN(_ALL); }
|
|
AND { TOKEN(_AND); }
|
|
ANY { TOKEN(_ANY); }
|
|
ARRAY { TOKEN(_ARRAY); }
|
|
AS { TOKEN(_AS); }
|
|
ASC { TOKEN(_ASC); }
|
|
CAST { TOKEN(_CAST); }
|
|
CREATE { TOKEN(_CREATE); }
|
|
CONTAINS { BEGIN contains;TOKEN(_CONTAINS); }
|
|
DESC { TOKEN(_DESC); }
|
|
DROP { TOKEN(_DROP); }
|
|
FALSE { TOKEN(_FALSE); }
|
|
FREETEXT { TOKEN(_FREETEXT); }
|
|
FROM { TOKEN(_FROM); }
|
|
IS { TOKEN(_IS); }
|
|
IS{white}NOT { TOKEN(_IS_NOT); }
|
|
LIKE { TOKEN(_LIKE); }
|
|
MATCHES { TOKEN(_MATCHES); }
|
|
NOT { TOKEN(_NOT); }
|
|
NOT{white}LIKE { TOKEN(_NOT_LIKE); }
|
|
NULL { TOKEN(_NULL); }
|
|
OR { TOKEN(_OR); }
|
|
ORDER{white}BY { TOKEN(_ORDER_BY); }
|
|
PASSTHROUGH { TOKEN(_PASSTHROUGH); }
|
|
PROPERTYNAME { TOKEN(_PROPERTYNAME); }
|
|
PROPID { TOKEN(_PROPID); }
|
|
RANKMETHOD { TOKEN(_RANKMETHOD); }
|
|
SCOPE { BEGIN scope0; TOKEN(_SCOPE); }
|
|
SELECT { TOKEN(_SELECT); }
|
|
SET { TOKEN(_SET); }
|
|
SOME { TOKEN(_SOME); }
|
|
TABLE { TOKEN(_TABLE); }
|
|
TRUE { TOKEN(_TRUE); }
|
|
TYPE { TOKEN(_TYPE); }
|
|
UNION { TOKEN(_UNION); }
|
|
UNKNOWN { TOKEN(_UNKNOWN); }
|
|
VIEW { TOKEN(_VIEW); }
|
|
WHERE { TOKEN(_WHERE); }
|
|
|
|
{white} { /* empty lex rule */ }
|
|
{id} { VALUE(_ID); }
|
|
\#{id} { VALUE(_TEMPVIEW); }
|
|
\#\#{id} { VALUE(_TEMPVIEW); }
|
|
\"{br_id}\" { ID_VALUE(_DELIMITED_ID, L'"'); }
|
|
{quoted_string} { STRING_VALUE(_STRING, L'\'', TRUE);}
|
|
{integer} { VALUE(_INTNUM); }
|
|
{real} { VALUE(_REALNUM); }
|
|
{comment} { /* empty lex rule */ }
|
|
|
|
\>\= { TOKEN(_GE); }
|
|
\<\= { TOKEN(_LE); }
|
|
\<\> { TOKEN(_NE); }
|
|
\!\= { TOKEN(_NE); }
|
|
\. { TOKEN(_DOT); }
|
|
\.\. { BEGIN view; TOKEN(_DOTDOT); }
|
|
\.\.\. { BEGIN view; TOKEN(_DOTDOTDOT); }
|
|
\.\.SCOPE { BEGIN scope0; TOKEN(_DOTDOT_SCOPE);}
|
|
\.\.\.\SCOPE { BEGIN scope0; TOKEN(_DOTDOTDOT_SCOPE);}
|
|
. { YYTRACE(yytext[0]); return yytext[0]; }
|
|
|
|
%{
|
|
/***
|
|
*** A <contains predicate> has been started. The only things we should see are:
|
|
*** ( - matched by .
|
|
*** <column reference> - matched by {id} or "{br_id}"
|
|
*** , - matched by .
|
|
*** ' - matched by \'. Also switch to content search state (cntnsrch).
|
|
***/
|
|
%}
|
|
<contains>\' { BEGIN cntntsrch;YYTRACE(yytext[0]); return yytext[0];}
|
|
<contains>{id} { VALUE(_ID); }
|
|
<contains>\"{br_id}\" { ID_VALUE(_DELIMITED_ID, L'"'); }
|
|
<contains>{white} { /* empty lex rule */ }
|
|
<contains>. { YYTRACE(yytext[0]); return yytext[0];}
|
|
|
|
%{
|
|
/***
|
|
*** The only things we should see are:
|
|
*** <global view name> - matched by {id}
|
|
*** _TEMPVIEW - matched by \#{id} or \#\#{id}
|
|
***/
|
|
%}
|
|
<view>{id} { BEGIN INITIAL; VALUE(_ID); }
|
|
<view>\#{id} { BEGIN INITIAL; VALUE(_TEMPVIEW); }
|
|
<view>\#\#{id} { BEGIN INITIAL; VALUE(_TEMPVIEW); }
|
|
|
|
%{
|
|
/***
|
|
*** A <content search condition> has been started. There are several keywords we can see here.
|
|
*** We are also looking for a quoted string, a prefix string, or a simple term. We are taken
|
|
*** back to the initial state by a single quote (').
|
|
***/
|
|
%}
|
|
<cntntsrch>{white}AND{white} { unput(L' '); TOKEN(_AND); }
|
|
<cntntsrch>COERCE { TOKEN(_COERCE); }
|
|
<cntntsrch>ISABOUT { TOKEN(_ISABOUT); }
|
|
<cntntsrch>{white}NEAR { TOKEN(_NEAR); }
|
|
<cntntsrch>{white}NOT{white} { unput(L' '); TOKEN(_NOT); }
|
|
<cntntsrch>{white}OR{white} { unput(L' '); TOKEN(_OR); }
|
|
<cntntsrch>FORMSOF { TOKEN(_FORMSOF); }
|
|
<cntntsrch>WEIGHT { TOKEN(_WEIGHT); }
|
|
<cntntsrch>\"{br_id}\*\" { STRING_VALUE(_PREFIX_STRING, L'\'', TRUE);}
|
|
<cntntsrch>\"{br_id}\" { STRING_VALUE(_STRING, L'\'', TRUE);}
|
|
<cntntsrch>\' { BEGIN INITIAL; YYTRACE(yytext[0]); return yytext[0];}
|
|
<cntntsrch>{white} { /* empty lex rule */ }
|
|
<cntntsrch>{simpleterm} { STRING_VALUE(_STRING, L'\'', FALSE)}
|
|
<cntntsrch>. { YYTRACE(yytext[0]); return yytext[0];}
|
|
|
|
%{
|
|
/***
|
|
*** A <from clause> has been started. We've already seen the keyword SCOPE, so this
|
|
*** is not a FROM <view name>. We're just looking for a ( now to put us into the
|
|
*** next state (scope1).
|
|
*** ( - matched by \(. Also switch to scope1 state.
|
|
***/
|
|
%}
|
|
<scope0>\( { BEGIN scope1; YYTRACE(yytext[0]); return yytext[0];}
|
|
<scope0>{white} { /* empty lex rule */ }
|
|
<scope0>. { BEGIN scope1; YYTRACE(yytext[0]); return yytext[0];}
|
|
|
|
%{
|
|
/***
|
|
*** We're in the middle of a <from clause>. We've seen FROM SCOPE(, so now we need to recognize
|
|
*** the various scope definitions that we might see here. The two important things to recognize
|
|
*** are:
|
|
*** ( - matched by \(. Also switch to scope2 state to match parens.
|
|
*** ) - matched by \). Also switch to the initial (finished <from clause>).
|
|
***/
|
|
%}
|
|
<scope1>\"{br_id}\" { STRING_VALUE(_URL, L'"', TRUE); }
|
|
<scope1>ALL { TOKEN(_ALL); }
|
|
<scope1>DEEP{white}TRAVERSAL { TOKEN(_DEEP_TRAVERSAL); }
|
|
<scope1>EXCLUDE{white}SEARCH{white}TRAVERSAL { TOKEN(_EXCLUDE_SEARCH_TRAVERSAL);}
|
|
<scope1>OF { TOKEN(_OF); }
|
|
<scope1>SHALLOW{white}TRAVERSAL { TOKEN(_SHALLOW_TRAVERSAL); }
|
|
<scope1>{white} { /* empty lex rule */ }
|
|
<scope1>\( { BEGIN scope2; YYTRACE(yytext[0]); return yytext[0];}
|
|
<scope1>\) { BEGIN INITIAL; YYTRACE(yytext[0]); return yytext[0];}
|
|
<scope1>. { YYTRACE(yytext[0]); return yytext[0];}
|
|
|
|
%{
|
|
/***
|
|
*** We're still in the middle of a <from clause>. So far we've seen:
|
|
*** FROM SCOPE( ... (
|
|
*** We need to find a ')' to finish out the element we're working on:
|
|
*** ) - matched by \). Also switch back to scope1 state.
|
|
***/
|
|
%}
|
|
<scope2>{white} { /* empty lex rule */ }
|
|
<scope2>\"{br_id}\" { STRING_VALUE(_URL, L'"', TRUE); }
|
|
<scope2>\) { BEGIN scope1; YYTRACE(yytext[0]); return yytext[0];}
|
|
<scope2>. { YYTRACE(yytext[0]); return yytext[0];}
|
|
|
|
%%
|