windows-server-2003/inetsrv/query/sqltext/ms-sql.l


								%{

								//--------------------------------------------------------------------

								// Microsoft Monarch

								//

								// Copyright (c) Microsoft Corporation, 1997 - 1999.

								//

								// @doc OPTIONAL EXTRACTION CODES

								//

								// @module  ms-sql.l |

								//          LEX tokenizer script

								//

								// @devnotes none

								//

								// @rev 0 | 04-Feb-97 | v-charca  | Created

								//


								/**

								** NOTE : when adding a new token (XXX) modify the following:

								**      1.) Add %token _XXX to sql.y

								**      2.) Add lexeme pattern to sql.l stating whether the token returns a TOKEN

								**          or a VALUE.  If the token returns a value a node will need to be created to

								**          contain the value information. Therefore the VALUE macro will also need to

								**          specify a valid VARIANT type for the value.

								**/


								#include "msidxtr.h"


								#ifdef DEBUG

								# define YYTRACE(tknNum) LexerTrace(yytext, yyleng, tknNum);

								#else

								# define YYTRACE(tknNum)

								#endif


								#define TOKEN(tknNum) YYTRACE(tknNum) return(tknNum);


								#define VALUE(tknNum)                   \

								    {                                   \

								    YYTRACE(tknNum)                     \

								    CreateTknValue(yylval, tknNum);     \

								    return tknNum;                      \

								    }


								#define STRING_VALUE(tknNum, wch, fQuote)           \

								    {                                               \

								    YYTRACE(tknNum)                                 \

								    CreateTknValue(yylval, tknNum, wch, fQuote);    \

								    return tknNum;                                  \

								    }


								#define ID_VALUE(tknNum, wch)               \

								    {                                       \

								    YYTRACE(tknNum)                         \

								    CreateTknValue(yylval, tknNum, wch);    \

								    return _ID;                             \

								    }


								/*

								** Make Lex read from a block of data

								**    buffer is the character buffer,

								**    result is a variable to store the number of chars read

								**    ms is the size of the buffer

								*/

								#undef YY_INPUT

								#define YY_INPUT(b, r, ms) (r = yybufferinput(b, ms))


								//--------------------------------------------------------------------------------------------

								// @func Makes a new copy of UNICODE string.  Filters out double quotes

								// @side Allocates enough bytes to hold string

								// @rdesc Pointer to new UNICODE string


								LPWSTR PwszDupFilter(

								    LPWSTR  pwszOrig,

								    WCHAR   wch )

								{

								    LPWSTR pwszCopy = (LPWSTR)CoTaskMemAlloc( (wcslen(pwszOrig)+2)*sizeof(WCHAR) );

								    if ( 0 != pwszCopy )

								    {

								        LPWSTR pwsz = pwszCopy;

								        while ( 0 != *pwszOrig )

								        {

								            if ( *(pwszOrig+1) && *(pwszOrig+1) == *pwszOrig && wch == *pwszOrig )

								                pwszOrig++;

								            else

								                *pwsz++ = *pwszOrig++;

								        }

								        *pwsz = L'\0';

								    }


								    return pwszCopy;

								}


								//--------------------------------------------------------------------------------------------

								//      YYLEXER::CreateTknValue

								//      Creates a QUERYTREE node structure which is passed to the YACC value stack.

								//      This routines uses the TokenInfo map to determine which opids to create for

								//      the given string.

								//

								//

								void YYLEXER::CreateTknValue(

								    YYSTYPE *ppct,

								    short tknNum,

								    YY_CHAR wch,

								    BOOL fQuote )

								{

								    // Note that values containing variants can only be CONSTANTS or ID's

								    // SHOULD BE DONE BY valType

								    switch ( tknNum )

								    {

								        case _ID:

								        case _TEMPVIEW:

								            {

								                // Assume table_name for now.  Might have to correct this when I

								                // see the context in the parser.

								                if ( 0 == (*ppct = PctAllocNode(DBVALUEKIND_WSTR)) )

								                    throw(E_OUTOFMEMORY);


								                (*ppct)->op    = DBOP_table_name;

								                (*ppct)->wKind = DBVALUEKIND_WSTR;

								                (*ppct)->value.pwszValue = CoTaskStrDup(yytext_ptr);

								                if( 0 == (*ppct)->value.pwszValue )

								                {

								                    DeleteDBQT( *ppct );

								                    *ppct = NULL;

								                    throw(E_OUTOFMEMORY);

								                }

								                _wcsupr((*ppct)->value.pwszValue);

								                break;

								            }


								        case _DELIMITED_ID:

								            {

								                if ( 0 == (*ppct = PctAllocNode(DBVALUEKIND_WSTR)) )

								                    throw(E_OUTOFMEMORY);


								                (*ppct)->op    = DBOP_table_name;

								                (*ppct)->wKind = DBVALUEKIND_WSTR;

								                // Strip quotes on delimited identifier

								                yytext_ptr[wcslen(yytext_ptr)-1] = L'\0';

								                (*ppct)->value.pwszValue = PwszDupFilter(yytext_ptr+1, wch);

								                if( 0 == (*ppct)->value.pwszValue )

								                {

								                    DeleteDBQT( *ppct );

								                    *ppct = NULL;

								                    throw(E_OUTOFMEMORY);

								                }

								                break;

								            }


								        case _URL:

								        case _STRING:

								        case _PREFIX_STRING:

								            {

								                // NOTE:  This is really a PROPVARIANT node, but there is no DBVALUEKIND for PROPVARIANT.

								                if ( 0 == (*ppct = PctAllocNode(DBVALUEKIND_VARIANT, DBOP_scalar_constant)) )

								                    throw(E_OUTOFMEMORY);


								                LPWSTR pwsz = yytext_ptr;

								                LPWSTR pwszCopy = PwszDupFilter(pwsz, wch);


								                if ( 0 == pwszCopy )

								                {

								                    DeleteDBQT( *ppct );

								                    *ppct = NULL;

								                    throw(E_OUTOFMEMORY);

								                }


								                LPWSTR pwszTemp = pwszCopy;

								                // Strip quotes on literals or

								                if ( fQuote && (*pwszCopy == L'\"' || *pwszCopy == L'\'') )

								                {

								                    pwszCopy++;

								                    Assert(pwszCopy[wcslen(pwszCopy)-1] == L'\"' || pwszCopy[wcslen(pwszCopy)-1] == L'\'');

								                    pwszCopy[wcslen(pwszCopy)-1] = L'\0';

								                }


								                ((PROPVARIANT*)(*ppct)->value.pvValue)->bstrVal = SysAllocString( pwszCopy );

								                CoTaskMemFree( pwszTemp );  // throw away temporary before testing for out of memory

								                ((PROPVARIANT*)(*ppct)->value.pvValue)->vt = VT_BSTR;

								                if( 0 == ((PROPVARIANT*)(*ppct)->value.pvValue)->bstrVal )

								                {

								                    DeleteDBQT( *ppct );

								                    *ppct = 0;

								                    throw(E_OUTOFMEMORY);

								                }

								            }

								            break;


								        case _INTNUM:

								            if ( 0 == (*ppct = PctAllocNode(DBVALUEKIND_VARIANT, DBOP_scalar_constant)) )

								                throw(E_OUTOFMEMORY);


								            ((PROPVARIANT*)(*ppct)->value.pvValue)->bstrVal = SysAllocString( yytext_ptr );

								            ((PROPVARIANT*)(*ppct)->value.pvValue)->vt = VT_BSTR;

								            if ( 0 == ((PROPVARIANT*)(*ppct)->value.pvValue)->bstrVal )

								            {

								                DeleteDBQT( *ppct );

								                *ppct = 0;

								                throw(E_OUTOFMEMORY);

								            }


								            (*ppct)->hrError = PropVariantChangeTypeI64( (PROPVARIANT*)(*ppct)->value.pvValue );

								            if ( FAILED((*ppct)->hrError) )

								            {

								                HRESULT hr = (*ppct)->hrError;

								                DeleteDBQT( *ppct );

								                *ppct = 0;

								                throw(hr);

								            }

								            break;


								        case _REALNUM:

								            if ( 0 == (*ppct = PctAllocNode(DBVALUEKIND_VARIANT, DBOP_scalar_constant)) )

								                throw(E_OUTOFMEMORY);


								            ((PROPVARIANT*)(*ppct)->value.pvValue)->bstrVal = SysAllocString( yytext_ptr );

								            ((PROPVARIANT*)(*ppct)->value.pvValue)->vt = VT_BSTR;

								            if ( 0 == ((PROPVARIANT*)(*ppct)->value.pvValue)->bstrVal )

								            {

								                DeleteDBQT( *ppct );

								                *ppct = NULL;

								                throw(E_OUTOFMEMORY);

								            }


								            (*ppct)->hrError = VariantChangeTypeEx( (*ppct)->value.pvarValue,  // convert in place

								                                                    (*ppct)->value.pvarValue,

								                                                    LOCALE_SYSTEM_DEFAULT,

								                                                    0,

								                                                    VT_R8 );

								            if ( FAILED((*ppct)->hrError) )

								            {

								                HRESULT hr = (*ppct)->hrError;

								                DeleteDBQT( *ppct );

								                *ppct = 0;

								                throw(hr);

								            }

								            break;


								        default:

								            Assert( !"Unkown token value" );

								    }

								}


								%}

								%x  contains

								%x  cntntsrch

								%x  scope0

								%x  scope1

								%x  scope2

								%x  view


								white           [ \t\n\f\r]+

								id              [a-zA-Z][a-zA-Z0-9_]*

								simpleterm      ([^ \n\t\f\r\'\(\)\[\]\&\|\~\!\,]+|\'\')*

								br_id           ([^\"\n]*|\"\")*

								integer         [-+]?[0-9]+|[-+]?0x[a-fA-F0-9]+

								real            [-+]?([0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?|-?\.[0-9]+([eE][-+]?[0-9]+)?

								quoted_string   \'([^'\n]*|\'\')*\'

								string          \'[^'\n]*\'

								comment         --[^\n]*

								%%


								%{

								/***

								 *** Reserved words in every context

								 ***/

								%}

								ALL                             { TOKEN(_ALL);                      }

								AND                             { TOKEN(_AND);                      }

								ANY                             { TOKEN(_ANY);                      }

								ARRAY                           { TOKEN(_ARRAY);                    }

								AS                              { TOKEN(_AS);                       }

								ASC                             { TOKEN(_ASC);                      }

								CAST                            { TOKEN(_CAST);                     }

								CREATE                          { TOKEN(_CREATE);                   }

								CONTAINS                        { BEGIN contains;TOKEN(_CONTAINS);  }

								DESC                            { TOKEN(_DESC);                     }

								DROP                            { TOKEN(_DROP);                     }

								FALSE                           { TOKEN(_FALSE);                    }

								FREETEXT                        { TOKEN(_FREETEXT);                 }

								FROM                            { TOKEN(_FROM);                     }

								IS                              { TOKEN(_IS);                       }

								IS{white}NOT                    { TOKEN(_IS_NOT);                   }

								LIKE                            { TOKEN(_LIKE);                     }

								MATCHES                         { TOKEN(_MATCHES);                  }

								NOT                             { TOKEN(_NOT);                      }

								NOT{white}LIKE                  { TOKEN(_NOT_LIKE);                 }

								NULL                            { TOKEN(_NULL);                     }

								OR                              { TOKEN(_OR);                       }

								ORDER{white}BY                  { TOKEN(_ORDER_BY);                 }

								PASSTHROUGH                     { TOKEN(_PASSTHROUGH);              }

								PROPERTYNAME                    { TOKEN(_PROPERTYNAME);             }

								PROPID                          { TOKEN(_PROPID);                   }

								RANKMETHOD                      { TOKEN(_RANKMETHOD);               }

								SCOPE                           { BEGIN scope0; TOKEN(_SCOPE);      }

								SELECT                          { TOKEN(_SELECT);                   }

								SET                             { TOKEN(_SET);                      }

								SOME                            { TOKEN(_SOME);                     }

								TABLE                           { TOKEN(_TABLE);                    }

								TRUE                            { TOKEN(_TRUE);                     }

								TYPE                            { TOKEN(_TYPE);                     }

								UNION                           { TOKEN(_UNION);                    }

								UNKNOWN                         { TOKEN(_UNKNOWN);                  }

								VIEW                            { TOKEN(_VIEW);                     }

								WHERE                           { TOKEN(_WHERE);                    }


								{white}                         { /* empty lex rule */              }

								{id}                            { VALUE(_ID);                       }

								\#{id}                          { VALUE(_TEMPVIEW);                 }

								\#\#{id}                        { VALUE(_TEMPVIEW);                 }

								\"{br_id}\"                     { ID_VALUE(_DELIMITED_ID, L'"');        }

								{quoted_string}                 { STRING_VALUE(_STRING, L'\'', TRUE);}

								{integer}                       { VALUE(_INTNUM);                   }

								{real}                          { VALUE(_REALNUM);                  }

								{comment}                       { /* empty lex rule */              }


								\>\=                            { TOKEN(_GE);                       }

								\<\=                            { TOKEN(_LE);                       }

								\<\>                            { TOKEN(_NE);                       }

								\!\=                            { TOKEN(_NE);                       }

								\.                              { TOKEN(_DOT);                      }

								\.\.                            { BEGIN view; TOKEN(_DOTDOT);       }

								\.\.\.                          { BEGIN view; TOKEN(_DOTDOTDOT);    }

								\.\.SCOPE                       { BEGIN scope0; TOKEN(_DOTDOT_SCOPE);}

								\.\.\.\SCOPE                    { BEGIN scope0; TOKEN(_DOTDOTDOT_SCOPE);}

								.                               { YYTRACE(yytext[0]); return yytext[0]; }


								%{

								/***

								 *** A <contains predicate> has been started.  The only things we should see are:

								 ***    (                   - matched by .

								 ***    <column reference>  - matched by {id} or "{br_id}"

								 ***    ,                   - matched by .

								 ***    '                   - matched by \'.  Also switch to content search state (cntnsrch).

								 ***/

								%}

								<contains>\'                    { BEGIN cntntsrch;YYTRACE(yytext[0]); return yytext[0];}

								<contains>{id}                  { VALUE(_ID);                       }

								<contains>\"{br_id}\"           { ID_VALUE(_DELIMITED_ID, L'"');        }

								<contains>{white}               { /* empty lex rule */              }

								<contains>.                     { YYTRACE(yytext[0]); return yytext[0];}


								%{

								/***

								 *** The only things we should see are:

								 ***    <global view name>  - matched by {id}

								 ***    _TEMPVIEW           - matched by \#{id} or \#\#{id}

								 ***/

								%}

								<view>{id}                      { BEGIN INITIAL; VALUE(_ID);        }

								<view>\#{id}                    { BEGIN INITIAL; VALUE(_TEMPVIEW);  }

								<view>\#\#{id}                  { BEGIN INITIAL; VALUE(_TEMPVIEW);  }


								%{

								/***

								 *** A <content search condition> has been started.  There are several keywords we can see here.

								 *** We are also looking for a quoted string, a prefix string, or a simple term.  We are taken

								 *** back to the initial state by a single quote (').

								 ***/

								%}

								<cntntsrch>{white}AND{white}    { unput(L' '); TOKEN(_AND);         }

								<cntntsrch>COERCE               { TOKEN(_COERCE);                   }

								<cntntsrch>ISABOUT              { TOKEN(_ISABOUT);                  }

								<cntntsrch>{white}NEAR          { TOKEN(_NEAR);                     }

								<cntntsrch>{white}NOT{white}    { unput(L' '); TOKEN(_NOT);         }

								<cntntsrch>{white}OR{white}     { unput(L' '); TOKEN(_OR);          }

								<cntntsrch>FORMSOF              { TOKEN(_FORMSOF);                  }

								<cntntsrch>WEIGHT               { TOKEN(_WEIGHT);                   }

								<cntntsrch>\"{br_id}\*\"        { STRING_VALUE(_PREFIX_STRING, L'\'', TRUE);}

								<cntntsrch>\"{br_id}\"          { STRING_VALUE(_STRING, L'\'', TRUE);}

								<cntntsrch>\'                   { BEGIN INITIAL; YYTRACE(yytext[0]); return yytext[0];}

								<cntntsrch>{white}              { /* empty lex rule */              }

								<cntntsrch>{simpleterm}         { STRING_VALUE(_STRING, L'\'', FALSE)}

								<cntntsrch>.                    { YYTRACE(yytext[0]); return yytext[0];}


								%{

								/***

								 *** A <from clause> has been started.  We've already seen the keyword SCOPE, so this

								 *** is not a FROM <view name>.  We're just looking for a ( now to put us into the

								 *** next state (scope1).

								 ***    (                   - matched by \(.  Also switch to scope1 state.

								 ***/

								%}

								<scope0>\(                      { BEGIN scope1; YYTRACE(yytext[0]); return yytext[0];}

								<scope0>{white}                 { /* empty lex rule */              }

								<scope0>.                       { BEGIN scope1; YYTRACE(yytext[0]); return yytext[0];}


								%{

								/***

								 *** We're in the middle of a <from clause>.  We've seen FROM SCOPE(, so now we need to recognize

								 *** the various scope definitions that we might see here.  The two important things to recognize

								 *** are:

								 ***    (                   - matched by \(.  Also switch to scope2 state to match parens.

								 ***    )                   - matched by \).  Also switch to the initial (finished <from clause>).

								 ***/

								%}

								<scope1>\"{br_id}\"             { STRING_VALUE(_URL, L'"', TRUE);   }

								<scope1>ALL                     { TOKEN(_ALL);                      }

								<scope1>DEEP{white}TRAVERSAL    { TOKEN(_DEEP_TRAVERSAL);           }

								<scope1>EXCLUDE{white}SEARCH{white}TRAVERSAL { TOKEN(_EXCLUDE_SEARCH_TRAVERSAL);}

								<scope1>OF                      { TOKEN(_OF);                       }

								<scope1>SHALLOW{white}TRAVERSAL { TOKEN(_SHALLOW_TRAVERSAL);        }

								<scope1>{white}                 { /* empty lex rule */              }

								<scope1>\(                      { BEGIN scope2; YYTRACE(yytext[0]); return yytext[0];}

								<scope1>\)                      { BEGIN INITIAL; YYTRACE(yytext[0]); return yytext[0];}

								<scope1>.                       { YYTRACE(yytext[0]); return yytext[0];}


								%{

								/***

								 *** We're still in the middle of a <from clause>.  So far we've seen:

								 ***    FROM SCOPE( ... (

								 *** We need to find a ')' to finish out the element we're working on:

								 ***    )                   - matched by \).  Also switch back to scope1 state.

								 ***/

								%}

								<scope2>{white}                 { /* empty lex rule */              }

								<scope2>\"{br_id}\"             { STRING_VALUE(_URL, L'"', TRUE);  }

								<scope2>\)                      { BEGIN scope1; YYTRACE(yytext[0]); return yytext[0];}

								<scope2>.                       { YYTRACE(yytext[0]); return yytext[0];}


								%%