windows-server-2003/inetsrv/query/qutil/triplish/parser.l

%{
//+---------------------------------------------------------------------------
//
//  Microsoft Windows
//  Copyright (C) Microsoft Corporation, 1997 - 2000.
//
//  File:       parser.l
//
//  Contents:   Lex rules for parser
//
//  Notes:      Written for flex version 2.5.4
//
//  History:    10-01-97        emilyb  created
//
//----------------------------------------------------------------------------

class CValueParser;

#include "yybase.hxx"
#include "parser.h" 
#include "parsepl.h"
#include "flexcpp.h"

#define TOKEN(tknNum)  return (tknNum);
#define STRING_VALUE(tknNum, fLong, fQuote)     \
        {       \
            if (!IsTokenEmpty())        \
                return CreateTknValue(yylval, tknNum, fLong, fQuote);   \
        }

/*
** Make Lex read from a block of data
**    buffer is the character buffer,
**    result is a variable to store the number of chars read
**    ms is the size of the buffer
*/
#undef YY_INPUT
#define YY_INPUT(b, r, ms) (r = yybufferinput(b, ms))

DECLARE_INFOLEVEL(yacc)

//+---------------------------------------------------------------------------
//
//  Function:   YYLEXER::IsTokenEmpty 
//
//  Synopsis:   Determines if a token is empty. An empty token only has 
//              whitespace or has nothing in it.
//
//  Arguments:  None.
//
//  Returns:    Boolean value.
//
//  History:    08-APR-98        KrishnaN  created
//
//----------------------------------------------------------------------------

BOOL YYLEXER::IsTokenEmpty()
{
    LPWSTR pwsz = yytext;
    
    Win4Assert(pwsz);
    
    while (*pwsz != 0)
    {
        if (*pwsz != L' ' && *pwsz != L'\t')
            return FALSE;
        pwsz++;
    }
    
    return TRUE;
}


//+---------------------------------------------------------------------------
//
//  Function:   YYLEXER::IsNotOperator 
//
//  Synopsis:   Determines if we have a not operator.
//
//  Arguments:  None.
//
//  Returns:    Boolean value.
//
//  History:    08-DEC-98        KrishnaN  created
//
//----------------------------------------------------------------------------

BOOL YYLEXER::IsNotOperator()
{
    LPWSTR pwsz = yytext;
    
    Win4Assert(pwsz);
    
    // skip past leading spaces
    int i = 0;
    while (*pwsz != 0 && (*pwsz == L' ' || *pwsz == L'\t'))
    {
        pwsz++;
        i++;
    }
    
    // If we don't have at least four chars to consider, we don't have a 
    // not operator.
    if (yyleng < i+4)
        return FALSE;
    
    if ( (*pwsz == L'n' || *pwsz == L'N') &&
         (*(pwsz+1) == L'o' || *(pwsz+1) == L'O') &&
         (*(pwsz+2) == L't' || *(pwsz+2) == L'T') &&
         (*(pwsz+3) == L'@' || *(pwsz+3) == L'#' || *(pwsz+3) == L'$')
       )
        return TRUE;
    else
        return FALSE;
}

//+---------------------------------------------------------------------------
//
//  Function:   YYLEXER::CreateTknValue 
//
//  Synopsis:   Allocs a WCHAR string which is passed to the YACC value stack.  
//
//  Arguments:  [ppStg]  -- set to pointer to alloc'd memory
//              [tknNum] -- token id
//              [fLong]  -- true if token is in longhand version
//              [fQuote] -- true if token is quoted
//
//  Returns:    Updated token id
//
//  History:    10-01-97        emilyb  created
//
//----------------------------------------------------------------------------

short YYLEXER::CreateTknValue(YYSTYPE *ppStg, short tknNum, BOOL fLong, BOOL fQuote )
{
    HRESULT hr = S_OK;
    short retTkn = tknNum;
    LPWSTR pwsz = yytext;
    
    if (!fQuote)    
    {
        // If we see a double quote, consider the string quoted.
        while (L' ' == *pwsz)
            pwsz++;
        if (*pwsz == L'"')
        {
            // strip trailing blanks and check if we see a trailing "
            
            LPWSTR pLast = pwsz + wcslen(pwsz) - 1;

            while (pLast >= pwsz && L' ' == *pLast )
            {
                *pLast = L'\0';
                pLast--;
            }
            
            if (*pLast == L'"' && pLast > pwsz )
                fQuote = TRUE;
        }
    }
    
    // start parsing from the beginning of the string
    pwsz = yytext;
    
    if (_PHRASEORREGEX == tknNum)
    {
        // A quoted string is always a phrase.
        if (fQuote)
            retTkn = _PHRASE;
        else
            retTkn = DetermineTokenType();
    }

    switch (retTkn)
    {
    case _PHRASE:

        {    
            LPWSTR pLast;

            pLast = pwsz + wcslen(pwsz) - 1;

            // if long version, find the phrase
            if (fLong)
            {
                pwsz = pwsz + wcslen(L"{phrase}");
                pLast = pLast - wcslen(L"{/phrase}"+1);
                Win4Assert(*pLast == L'{');
                *pLast = L'\0';
            }

            // strip leading and trailing blanks            
            while (L' ' == *pwsz)
                pwsz++;

            pLast = pwsz + wcslen(pwsz) - 1;

            while (pLast >= pwsz && L' ' == *pLast )
            {
                *pLast = L'\0';
                pLast--;
            }    
            // NOTE: Don't strip double quotes here, they will be stripped later
            
            yaccDebugOut((DEB_ITRACE, "Phrase %ws in %ws format\n",  pwsz, fLong ? L"Long" : L"Short"));
        }
        break;
    case _PROPNAME:

        {
            LPWSTR pLast;
            
            if (fLong)  // looks like: { prop name = "prop name"  }
            {
                // find =
                while (L'=' != *pwsz)
                    pwsz++;
                pwsz++; 

                pLast = pwsz + wcslen(pwsz) - 1;
                Win4Assert( *pLast == L'}');
                *pLast-- = L'\0';
            }
            else
            {
                // Strip @ or # or $ token
                Win4Assert(*pwsz == L'@' || *pwsz == L'#' || *pwsz == L'$');
                pwsz = pwsz + 1;
            }

            // strip leading and trailing blanks            
            while (L' ' == *pwsz)
                pwsz++;

            pLast = pwsz + wcslen(pwsz) - 1;

            while (pLast >= pwsz && L' ' == *pLast )
            {
                *pLast--= L'\0';
            }    

            if (fQuote)
            {
                pwsz++;
                *pLast = L'\0';                              
            }

            yaccDebugOut((DEB_ITRACE, "Propname %ws in %ws format and %ws\n", 
                          pwsz, fLong ? L"Long" : L"Short", fQuote ? L"quoted" : L"unquoted"));
        }
        break;                
    case _FREETEXT:
        {    
            LPWSTR pLast;

            // if long version, find the FREETEXT
            if (fLong)
            {
                pwsz = pwsz + wcslen(L"{freetext}");
                pLast = pwsz + wcslen(pwsz) - 1;                
                pLast = pLast - wcslen(L"{/freetext}")+1;
                Win4Assert(*pLast == L'{');
                *pLast = L'\0';
            }

            // strip leading and trailing blanks            
            while (L' ' == *pwsz)
                pwsz++;

            pLast = pwsz + wcslen(pwsz) - 1;

            while (pLast >= pwsz && L' ' == *pLast )
            {
                *pLast = L'\0';
                pLast--;
            }    

            if (fQuote)
            {
                Win4Assert(pLast >= pwsz+1);
                // Strip quotes
                pwsz = pwsz + 1;
                *pLast = L'\0';               
            }
            
            yaccDebugOut((DEB_ITRACE, "Freetext %ws in %ws format\n",  pwsz, fLong ? L"Long" : L"Short"));
        }
        break;

    case _REGEX:
        {
            LPWSTR pLast;

            // if long version, find the regex
            if (fLong)
            {
                pwsz = pwsz + wcslen(L"{regex}");
                pLast = pwsz + wcslen(pwsz);                
                pLast = pLast - wcslen(L"{/regex}");
                Win4Assert(*pLast == L'{');
                *pLast = L'\0';
            }
            
            // strip leading blanks
            while (L' ' == *pwsz)
                pwsz++;
                
            // If the first char is =, ignore it. We only ignore the first
            // = character. This is backward compatible with Triplish1
            if (L'=' == *pwsz)
                pwsz++;

            // strip leading and trailing blanks            
            while (L' ' == *pwsz)
                pwsz++;
                
            pLast = pwsz + wcslen(pwsz) - 1;
            
            while (pLast >= pwsz && L' ' == *pLast )
            {
                *pLast = L'\0';
                pLast--;
            }    
                
            // After we strip a leading =, we might have a quoted phrase
            // Check only if fQuote is false.
            // We don't want to deal with an unpaired double quote.
            if (!fQuote && *pwsz == L'"' && *pLast == L'"' && pLast > pwsz )
                fQuote = TRUE;
                
            if (fQuote)
            {
                Win4Assert(pLast >= pwsz+1);
                // Strip quotes
                pwsz = pwsz + 1;
                *pLast = L'\0';               
            }
            
            yaccDebugOut((DEB_ITRACE, "RegEx %ws in %ws format\n",  pwsz, fLong ? L"Long" : L"Short"));
        }
        break;                

    case _WEIGHT:
        {  
            Assert (fLong);
            Assert(!fQuote);
            if (fLong)  // looks like: {weight value = number }
            {
                // find =
                while (L'=' != *pwsz)
                    pwsz++;
                pwsz++;

                // step past leading blanks
                while (L' ' == *pwsz)
                    pwsz++;

                // remove trailing } and blanks
                LPWSTR pLast = pwsz + wcslen(pwsz) - 1;
                Win4Assert(*pLast == L'}');
                *(pLast--) = L'\0';

                while (pLast >= pwsz && L' ' == *pLast )
                {
                    *(pLast--) = L'\0';
                }    
            }
        }
        break;                

    case _NEARDIST:
        {  
            Assert (fLong);
            Assert(!fQuote);
            if (fLong)  // looks like: dist = number
            {
                // find =
                while (L'=' != *pwsz)
                    pwsz++;
                pwsz++;

                // step past leading blanks
                while (L' ' == *pwsz)
                    pwsz++;
            }
            
            yaccDebugOut((DEB_ITRACE, "NearDist string: %ws in %s format\n",  pwsz, fLong ? L"Long" : L"Short"));
        }
        break;                
    case _NEARUNIT:
        {  
            Assert (fLong);
            Assert(!fQuote);
            if (fLong)  // looks like: unit = blah
            {
                // find =
                while (L'=' != *pwsz)
                    pwsz++;
                pwsz++;

                // step past leading blanks
                while (L' ' == *pwsz)
                    pwsz++;
            }
            
            yaccDebugOut((DEB_ITRACE, "NearUnit string: %ws in %s format\n",  pwsz, fLong ? L"Long" : L"Short"));
        }
        break; 
    case _VECTORELEMENT:
        {              
            // strip leading and trailing blanks            
            while (L' ' == *pwsz)
                pwsz++;

            LPWSTR pTemp = pwsz + wcslen(pwsz) - 1;

            if (fLong)  // strip trailing ; 
            {
                Win4Assert(L';' == *pTemp);
                *pTemp--='\0';
            }

            while (L' ' == *pTemp && pTemp > pwsz)
                *pTemp-- = L'\0';

            if (fQuote)
            {
                // Strip quotes
                pwsz = pwsz + 1;
                pwsz[wcslen(pwsz)-1] = L'\0';               
            }

            yaccDebugOut((DEB_ITRACE, "VectorElem %ws in %ws format\n",  pwsz, fLong ? L"Long" : L"Short"));
        }
        break;                        
    case _VEMETHOD:
        { 
            Assert (fLong);

            LPWSTR pTemp;

            if (fLong)  // looks like: {vector rankmethod= blah}
            {
                // find =
                while (L'=' != *pwsz)
                    pwsz++;
                pwsz++;

                // strip trailing }
                pTemp = pwsz + wcslen(pwsz) - 1;               
                Win4Assert(L'}' == *pTemp);
                *pTemp-- = L'\0';

            }

            // strip leading and trailing blanks and quotes          
            while (L' ' == *pwsz)
                pwsz++;

            pTemp = pwsz + wcslen(pwsz) - 1;

            while (L' ' == *pTemp && pTemp > pwsz)
                *pTemp-- = L'\0';

            if (fQuote)
            {
                // Strip quotes
                pwsz = pwsz + 1;
                pwsz[wcslen(pwsz)-1] = L'\0';               
            }
            
            yaccDebugOut((DEB_ITRACE, "VectorMethod %ws in %ws format\n",  pwsz, fLong ? L"Long" : L"Short"));
        }
        break; 


    }

    int len = wcslen(pwsz);
    XPtrST<WCHAR> xwszRet(new WCHAR[len + 1]);
    
    _allocations.Add(xwszRet.GetPointer(), _allocations.Count());
    
    RtlCopyMemory(xwszRet.GetPointer(), pwsz, (len+1) * sizeof(WCHAR));
    (*ppStg).pwszChar = xwszRet.Acquire();
    
    return retTkn;
}        


//+---------------------------------------------------------------------------
//
//  Function:   YYLEXER::DetermineTokenType 
//
//  Synopsis:   Determines if we have a regular expression or a regular string.  
//              A  regular expression is a string that contains atleast one of
//              *, ?, or | characters.
//
//  Returns:    Token id
//
//  History:    Jun-05-98        KrishnaN  created
//
//----------------------------------------------------------------------------

short YYLEXER::DetermineTokenType()
{
    LPWSTR pwsz = yytext;
    
    LPWSTR pLast = pwsz + wcslen(pwsz) - 1;

    while (pLast >= pwsz)
    {
        if (L'|' == *pwsz || L'*' == *pwsz || L'?' == *pwsz)
            return _REGEX;
            
        pwsz++;
    }    
    
    // None of the regular expression defining characters have been found
    return _PHRASE;
}        


        //
        // 
        //    RULES       
        // 
        // Notes:  Any characters which are not matched, cause yylexer to throw.
        //         We can also throw if E_OUTOFMEMORY.
        //         Tokens which need 2 return more than 1 value (e.g. {near}
        //         use start states to return each pice of the value. The start
        //         states also emit a "token end" token so that the parser can
        //         check that they are syntactically complete.
        //         Lex matches to the longest match in the rules.  If 2 matches
        //         are the same, it matches to the 1st match. 
%}

%x innear
%x shortgen
%x shortregex
%x mayberegex
%x implicitphrase
%x infreefreetext
%x invector

white    [ \t\n\f\r]+

begin_freetext  \{[fF][rR][eE][eE][tT][eE][xX][tT]\}[ ]*
end_freetext    [ ]*\{\/[fF][rR][eE][eE][tT][eE][xX][tT]\}
begin_phrase    \{[pP][hH][rR][aA][sS][eE]\}[ ]*
end_phrase      [ ]*\{\/[pP][hH][rR][aA][sS][eE]\}
prop            [pP][rR][oO][pP]
propname        {prop}[ ]+[nN][aA][mM][eE][ ]*
contains        [cC][oO][nN][tT][aA][iI][nN][sS]
and             [aA][nN][dD]
or              [oO][rR]
not             [nN][oO][tT]
near            [nN][eE][aA][rR]
vector          [vV][eE][cC][tT][oO][rR]
vecmethod       {vector}[ ]+[rR][aA][nN][kK][mM][eE][tT][hH][oO][dD][ ]*
ve              [vV][eE]
weight          [wW][eE][iI][gG][hH][tT][ ]+[vV][aA][lL][uU][eE][ ]*
coerce          [cC][oO][eE][rR][cC][eE]
generate        [gG][eE][nN][eE][rR][aA][tT][eE]
genmethod       {generate}[ ]+[mM][eE][tT][hH][oO][dD][ ]*
begin_regex     \{[rR][eE][gG][eE][xX]\}[ ]*
end_regex       [ ]*\{\/[rR][eE][gG][eE][xX]\}
dist            [dD][iI][sS][tT][ ]*
unit            [uU][nN][iI][tT][ ]*
word            [wW][oO][rR][dD]
sent            [sS][eE][nN][tT]
par             [pP][aA][rR]
chap            [cC][hH][aA][pP]

%%

{white}               { /* do nothing */ }

\(                 {    fContinueImplicitPhrase = FALSE;
                        fContinueRegex = FALSE;
                        fContinueMaybeRegex = FALSE;
                        TOKEN (_OPEN);
                   }
\)                 {
                        fContinueImplicitPhrase = FALSE;
                        fContinueRegex = FALSE;
                        fContinueMaybeRegex = FALSE;
                        TOKEN (_CLOSE); 
                   }
  
    %{// ************
      // PROPNAME
      // ************ %}
     
    %{ // If something was treated as a phrase in Tripolish 1, it should
       // be treated as such even now. That applies here. For e.g. @propname
       // caused the following text to be treated as a phrase. The same should
       // apply to {prop name = propname}
       // 
    %}
    
    %{// shorthand, quoted %}
@\"[^"]+\"                              { 
                                            // treat value as a phrase
                                            BEGIN implicitphrase;
                                            STRING_VALUE(_PROPNAME, FALSE, TRUE);
                                        }     
   %{// shorthand, not quoted %}
@[^" <>=!&|~\^]+                        { 
                                            // treat value as a phrase
                                            BEGIN implicitphrase;
                                            STRING_VALUE(_PROPNAME, FALSE, FALSE);
                                        }
                                         
   %{// shorthand, quoted %}
$\"[^"]+\"                              { 
                                            // treat value as freetext
                                            BEGIN infreefreetext;
                                            STRING_VALUE(_PROPNAME, FALSE, TRUE);
                                        }     
   %{// shorthand, not quoted %}
$[^" <>=!&|~\^]+                        { 
                                            // treat value as freetext
                                            BEGIN infreefreetext;
                                            STRING_VALUE(_PROPNAME, FALSE, FALSE);
                                        }    
    
   %{// longhand, quoted %}
\{{propname}=[ ]*\"[^"]*\"[ ]*\}        { 
                                            // treat value as a phrase
                                            BEGIN implicitphrase;
                                            STRING_VALUE(_PROPNAME, TRUE, TRUE);
                                        }
   %{// longhand, not quoted %}
\{{propname}=[ ]*[^"} ][^}]*\}          { 
                                            // treat value as a phrase
                                            BEGIN implicitphrase;
                                            STRING_VALUE(_PROPNAME, TRUE, FALSE);
                                        }
   %{// closing token %}
\{\/{prop}\}                            { TOKEN (_PROPEND); }

   %{// *********
     // OPERATORS
     // ********* %}

{contains}[ ]+   { if (fContinueImplicitPhrase)
                   {
                       BEGIN implicitphrase;
                       fContinueImplicitPhrase = FALSE;
                   }
                   else if (fContinueRegex)
                   {
                       BEGIN shortregex;
                       fContinueRegex = FALSE;
                   }
                   else if (fContinueMaybeRegex)
                   {
                       BEGIN mayberegex;
                       fContinueMaybeRegex = FALSE;
                   }
                   TOKEN (_CONTAINS); 
                 }
{and}[ ]+        { if (fContinueImplicitPhrase)
                   {
                       BEGIN implicitphrase;
                       fContinueImplicitPhrase = FALSE;
                   }
                   else if (fContinueRegex)
                   {
                       BEGIN shortregex;
                       fContinueRegex = FALSE;
                   }
                   else if (fContinueMaybeRegex)
                   {
                       BEGIN mayberegex;
                       fContinueMaybeRegex = FALSE;
                   }
                   TOKEN (_AND);
                 }
{and}\{          { 
		   yyless(yyleng-1);
		   if (fContinueImplicitPhrase)
                   {
                       BEGIN implicitphrase;
                       fContinueImplicitPhrase = FALSE;
                   }
                   else if (fContinueRegex)
                   {
                       BEGIN shortregex;
                       fContinueRegex = FALSE;
                   }
                   else if (fContinueMaybeRegex)
                   {
                       BEGIN mayberegex;
                       fContinueMaybeRegex = FALSE;
                   }
                   TOKEN (_AND);
                 }		 
{or}[ ]+         { if (fContinueImplicitPhrase)
                   {
		       yaccDebugOut(( DEB_ITRACE, "fContinueImplicitPhrase\n" )); 
                       BEGIN implicitphrase;
                       fContinueImplicitPhrase = FALSE;
                   }
                   else if (fContinueRegex)
                   {
		       yaccDebugOut(( DEB_ITRACE, "fContinueRegex\n" ));  
                       BEGIN shortregex;
                       fContinueRegex = FALSE;
                   }
                   else if (fContinueMaybeRegex)
                   {
		       yaccDebugOut(( DEB_ITRACE, "fContinueMaybeRegex\n" ));  
                       BEGIN mayberegex;
                       fContinueMaybeRegex = FALSE;
                   }
		   yaccDebugOut(( DEB_ITRACE, "OR TOKEN found !!!\n" ));
                   TOKEN (_OR); }
{or}\{		 { 
		   yyless(yyleng-1);
		   if (fContinueImplicitPhrase)
                   {
		       yaccDebugOut(( DEB_ITRACE, "OR{ fContinueImplicitPhrase\n" )); 
                       BEGIN implicitphrase;
                       fContinueImplicitPhrase = FALSE;
                   }
                   else if (fContinueRegex)
                   {
		       yaccDebugOut(( DEB_ITRACE, "OR{ fContinueRegex\n" ));  
                       BEGIN shortregex;
                       fContinueRegex = FALSE;
                   }
                   else if (fContinueMaybeRegex)
                   {
		       yaccDebugOut(( DEB_ITRACE, "OR{ fContinueMaybeRegex\n" ));  
                       BEGIN mayberegex;
                       fContinueMaybeRegex = FALSE;
                   }
		   yaccDebugOut(( DEB_ITRACE, "OR{ TOKEN found !!!\n" ));
                   TOKEN (_OR); }     	   
{not}[ ]+        { if (fContinueImplicitPhrase)
                   {
                       BEGIN implicitphrase;
                       fContinueImplicitPhrase = FALSE;
                   }
                   else if (fContinueRegex)
                   {
                       BEGIN shortregex;
                       fContinueRegex = FALSE;
                   }
                   else if (fContinueMaybeRegex)
                   {
                       BEGIN mayberegex;
                       fContinueMaybeRegex = FALSE;
                   }
                   TOKEN (_NOT);}
{not}\{		 { 
		   yyless(yyleng-1);
		   if (fContinueImplicitPhrase)
		   {
		       BEGIN implicitphrase;
                       fContinueImplicitPhrase = FALSE;
                   }
                   else if (fContinueRegex)
                   {
                       BEGIN shortregex;
                       fContinueRegex = FALSE;
                   }
                   else if (fContinueMaybeRegex)
                   {
                       BEGIN mayberegex;
                       fContinueMaybeRegex = FALSE;
                   }
                   TOKEN (_NOT);}		   
&                { if (fContinueImplicitPhrase)
                   {
                       BEGIN implicitphrase;
                       fContinueImplicitPhrase = FALSE;
                   }
                   else if (fContinueRegex)
                   {
                       BEGIN shortregex;
                       fContinueRegex = FALSE;
                   }
                   else if (fContinueMaybeRegex)
                   {
                       BEGIN mayberegex;
                       fContinueMaybeRegex = FALSE;
                   }
                   TOKEN (_AND);}
\|               { if (fContinueImplicitPhrase)
                   {
                       BEGIN implicitphrase;
                       fContinueImplicitPhrase = FALSE;
                   }
                   else if (fContinueRegex)
                   {
                       BEGIN shortregex;
                       fContinueRegex = FALSE;
                   }
                   else if (fContinueMaybeRegex)
                   {
                       BEGIN mayberegex;
                       fContinueMaybeRegex = FALSE;
                   }
                   TOKEN (_OR);}
!                { if (fContinueImplicitPhrase)
                   {
                       BEGIN implicitphrase;
                       fContinueImplicitPhrase = FALSE;
                   }
                   else if (fContinueRegex)
                   {
                       BEGIN shortregex;
                       fContinueRegex = FALSE;
                   }
                   else if (fContinueMaybeRegex)
                   {
                       BEGIN mayberegex;
                       fContinueMaybeRegex = FALSE;
                   }
                   TOKEN (_NOT);}
{near}[ ]+       { yaccDebugOut(( DEB_ITRACE, "near[ ]+ _NEAR token, begin implicitphrase\n" ));
		   BEGIN implicitphrase;
                   TOKEN (_NEAR);}
{near}\{	 { yaccDebugOut(( DEB_ITRACE, "near{ _NEAR token, begin implicitphrase\n" ));
		   yyless(yyleng-1);
		   BEGIN implicitphrase;
                   TOKEN (_NEAR);}			   
~                { BEGIN implicitphrase;
                   TOKEN (_NEAR);}
\<               { TOKEN (_LT);}
\>               { TOKEN (_GT);}
\<\=             { TOKEN (_LTE);}
\>\=             { TOKEN (_GTE);}
\=               { if (fContinueMaybeRegex) 
                      {
                          // We are not sure if we are going to find a
                          // regular expression or a phrase.
                          
                          BEGIN mayberegex;
                          fContinueMaybeRegex = FALSE;
                      }
                      TOKEN (_EQ);
                    }
\!\=                { TOKEN (_NE); }
\^a                 { TOKEN (_ALLOF); }
\^s                 { TOKEN (_SOMEOF); }
\<[ ]*\^s |
\^s[ ]*\<           { TOKEN (_LTSOME); }
\>[ ]*\^s |
\^s[ ]*\>           { TOKEN (_GTSOME); }
\<\=[ ]*\^s |
\^s[ ]*\<\=         { TOKEN (_LTESOME); }
\>\=[ ]*\^s |
\^s[ ]*\>\=         { TOKEN (_GTESOME); }
\=[ ]*\^s |
\^s[ ]*\=           { TOKEN (_EQSOME); }
\!\=[ ]*\^s |
\^s[ ]*\!\=         { TOKEN (_NESOME); }

\^s[ ]*\^a          { TOKEN (_ALLOFSOME); }
\^s[ ]*\^s          { TOKEN (_SOMEOFSOME); }
\^<[ ]*\^a  |
\^a[ ]*\<           { TOKEN (_LTALL); }
\>[ ]*\^a   |
\^a[ ]*\>           { TOKEN (_GTALL); }
\<\=[ ]*\^a |
\^a[ ]*\<\=         { TOKEN (_LTEALL); }
\>\=[ ]*\^a |
\^a[ ]*\>\=         { TOKEN (_GTEALL); }
\=[ ]*\^a   |
\^a[ ]*\=           { TOKEN (_EQALL); }
\!\=[ ]*\^a |
\^a[ ]*\!\=         { TOKEN (_NEALL); }
\^a[ ]*\^a          { TOKEN (_ALLOFALL); }
\^a[ ]*\^s          { TOKEN (_SOMEOFALL); }

   %{// *************
     // VECTOR SPACE TOKENS
     // ************* %}
\{{vecmethod}=[ ]*\"[^"]*\"[ ]*\}               { STRING_VALUE(_VEMETHOD, TRUE, TRUE); }
\{{vecmethod}=[^}]*\}                           { STRING_VALUE(_VEMETHOD, TRUE, FALSE); }
\{{ve}\}                                        {
                                                    // makes more sense to enter phrase mode
                                                    // rather than freetext mode.
                                                    fContinueImplicitPhrase = TRUE;
                                                    BEGIN implicitphrase;
                                                    TOKEN (_VE);
                                                }
                                                   
\{\/{vector}\}                                  { TOKEN (_VECTOR_END); }

    %{// *************
      // longhand NEAR
      // ************* %}
   
    %{// must return both unit and distance, so use start state to pull them out, and 
      // return _NEAR_END so parser knows we hit the closing }  
    %}
\{{near}[ ]     { yaccDebugOut(( DEB_ITRACE, "Longhand _NEAR token, begin innear\n" ));
		  BEGIN innear; }
\{{near}\{	{ yaccDebugOut(( DEB_ITRACE, "Longhand _NEAR{ token, begin innear\n" ));
		  yyless(yyleng-1);
		  BEGIN innear; }

   %{// ************
     // WEIGHT
     // ************ %}
\{{weight}=[ ]*(0|1|0\.[0-9]*|1\.[0]*|\.[0-9]+)[ ]*\} { 
							  if (fContinueImplicitPhrase)
							  {
							      BEGIN implicitphrase;
							      fContinueImplicitPhrase = FALSE;
							  }
						
							  yaccDebugOut(( DEB_ITRACE, "_WEIGHT TOKEN FOUND!!\n" ));
							  STRING_VALUE(_WEIGHT,TRUE,FALSE);
						      }

\{{coerce}\}    {
                        if (fContinueImplicitPhrase)
                        {
                            BEGIN implicitphrase;
                            fContinueImplicitPhrase = FALSE;
                        }
                        TOKEN (_COERCE); }

   %{// ****************
     // longhand GENERATE
     // **************** %}

\{{genmethod}=[" ]*prefix[" ]*\}                {
                                                    if (fContinueImplicitPhrase)
                                                    {
                                                        BEGIN implicitphrase;
                                                        fContinueImplicitPhrase = FALSE;
                                                    }
                                                    yaccDebugOut((DEB_ITRACE, "Prefix recognized.\n"));
                                                    TOKEN(_GENPREFIX);
                                                }
\{{genmethod}=[" ]*inflect[" ]*\}               {
                                                    if (fContinueImplicitPhrase)
                                                    {
                                                        BEGIN implicitphrase;
                                                        fContinueImplicitPhrase = FALSE;
                                                    }
                                                    yaccDebugOut((DEB_ITRACE, "Inflect recognized.\n"));
                                                    TOKEN(_GENINFLECT);
                                                }
\{\/{generate}\}                                { TOKEN (_GENNORMAL); }

    %{// ****************
      // longhand REGEX
      // ****************   %}

{begin_regex}\"[^"]*\"{end_regex}       { STRING_VALUE(_REGEX,TRUE,TRUE);}
{begin_regex}[^{]*{end_regex}           { STRING_VALUE(_REGEX,TRUE,FALSE);}
{begin_regex}([^{]*\|[()\[{}\],*?+][^{]*)*{end_regex}       { STRING_VALUE(_REGEX,TRUE,FALSE);}


    %{// ****************
      // shorthand REGEX
      // ****************   %}
     
    %{// shorthand, quoted %}
#\"[^"]+\"                              { 
                                            // Get into short form of reg expression
                                            BEGIN shortregex;
                                            STRING_VALUE(_PROPNAME, FALSE, TRUE);
                                        }     
    %{// shorthand, not quoted %}
#[^" <>=!&|~\^]+                        {
                                            // Get into short form of reg expression
                                            BEGIN shortregex;
                                            STRING_VALUE(_PROPNAME, FALSE, FALSE);
                                        }    


    %{// ***************
      // longhand PHRASE                                            
      // *************** %}
    
    %{// quoted, with trailing * or **  %}
{begin_phrase}\"[^"]*\"{end_phrase}\*       {
                                                // trailing * has to be for inflection - 
                                                // process it in shortgen on next pass.  
                                                // Grab phrase now.
                                                yyless(yyleng-1); 
                                                BEGIN shortgen; 
                                                STRING_VALUE(_PHRASE,TRUE,TRUE); 
                                            }
    %{// quoted, without trailing * or **  %}                                           
{begin_phrase}\"[^"]*\"{end_phrase}         {   
                                                // no trailing * -- phrase only
                                                STRING_VALUE(_PHRASE,TRUE,TRUE);
                                            }
    %{// unquoted, with trailing * or ** %}                                           
{begin_phrase}[^{]*{end_phrase}\*           {  
                                                // trailing * has to be for inflection - 
                                                // process it in shortgen on next pass.  
                                                // Grab phrase now.
                                                yyless(yyleng-1); 
                                                BEGIN shortgen; 
                                                STRING_VALUE(_PHRASE,TRUE,FALSE); 
                                            }
    %{// unquoted, without trailing * or **  %}                                                                                        
{begin_phrase}[^{]*{end_phrase}             {   
                                                // no trailing * -- phrase only
                                                STRING_VALUE(_PHRASE,TRUE,FALSE); 
                                            }
    %{// *************                                        
      // shorthand PHRASE                                            
      // *************  %}
    
    %{// with trailing * or **  %}
\"[^"]*\"\*                                 {
                                                // trailing * has to be for inflection -        
                                                // process it in shortgen on next pass.  
                                                // Grab phrase now.
                                                yyless(yyleng-1); 
                                                BEGIN shortgen;
                                                STRING_VALUE(_PHRASE, FALSE, TRUE); 
                                            } 
    %{ // without trailing * or ** %}                                           
\"[^"]*\"                                   {
                                                // no trailing * -- phrase only 
                                                STRING_VALUE(_PHRASE, FALSE, TRUE); 
                                            }
                                            
    %{// *****************
    // longhand FREETEXT
    // *****************          %}
    
    %{// quoted, with trailing * or **  %}
{begin_freetext}\"[^"]*\"{end_freetext}\*       {
                                                    // trailing * has to be for inflection - 
                                                    // process it in shortgen on next pass.  
                                                    // Grab freetext now.
                                                    yyless(yyleng-1); 
                                                    BEGIN shortgen;
                                                    STRING_VALUE(_FREETEXT,TRUE,TRUE); 
                                                }
    %{// quoted, without trailing * or ** %}                                               
{begin_freetext}\"[^"]*\"{end_freetext}         {
                                                    // no trailing * -- freetext  only
                                                    STRING_VALUE(_FREETEXT,TRUE,TRUE); 
                                                }
    %{// unquoted, with trailing * or ** %}                                               
{begin_freetext}[^{]*{end_freetext}\*              {
                                                    // trailing * has to be for inflection - 
                                                    // process it in shortgen on next pass.  
                                                    // Grab freetext now.
                                                    yyless(yyleng-1); 
                                                    BEGIN shortgen;
                                                    STRING_VALUE(_FREETEXT,TRUE,FALSE); 
                                                }
    %{// unquoted, without trailing * or **  %}                                                
{begin_freetext}[^{]*{end_freetext}                {
                                                    // no trailing * -- freetext  only
                                                    STRING_VALUE(_FREETEXT,TRUE,FALSE); 
                                                }
    %{// ******************
      // shorthand FREETEXT 
      // ****************** %}
     
[^#$@~&|<>=!\^*"()\{ ][^&~|{) ]*[ ]     {
                                            // For backward compatibility, we want to special
                                            // case and recognize the "not" operator when it
                                            // is immediately followed by a mode specifier character
                                            // (@, $, #). For e.g. "not@size > 2" should be treated
                                            // as if we have a "not" operator followed by "@size > 2".
                                            // Without this special case, "not@size > 2" gets recognized
                                            // as free text.
                                            
                                            if (IsNotOperator())
                                            {
                                                yyless(3);
                                                BEGIN INITIAL;
                                                TOKEN(_NOT);
                                            }
                                            
                                            yaccDebugOut(( DEB_ITRACE, "fTreatFreetextAsPhrase is %d\n", fTreatFreetextAsPhrase ));
                                            if (fTreatFreetextAsPhrase)
                                                BEGIN implicitphrase;
                                            else
                                                BEGIN infreefreetext;
                                        
                                            fTreatFreetextAsPhrase = FALSE;    
                                            yymore();
                                        }  
[^#$@~&|<>=!\^*"()\{ ][^&~|{) ]*        {
                                            // IsNotOperator is used here for the same reason as the
                                            // use above, except that this rule covers situations where
                                            // we have no spaces in the query. E.g. "not@size>2". 
                                            // This should be equivalent to 
                                            // "not@size > 2", which in turn should be equivalent to
                                            // "not @size > 2"
                                            
                                            if (IsNotOperator())
                                            {
                                                yyless(3);
                                                BEGIN INITIAL;
                                                TOKEN(_NOT);
                                            }
                                            
                                            if (fTreatFreetextAsPhrase)
                                            {
                                                STRING_VALUE(_PHRASE,FALSE,FALSE);
                                            }
                                            else
                                            {
                                                STRING_VALUE(_FREETEXT,FALSE,FALSE);
                                            }
                                            
                                            fTreatFreetextAsPhrase = FALSE;
                                        }  
  

    %{// *************
      // VECTOR VALUES
      // *************  %}
       
    %{// quoted multi-value vector - has ; separator.  Singlets caught in parser  %}
\([ ]*\"[^"]*\"[ ]*;    { BEGIN invector; yyless(1);}
  %{// unquoted multi-value vector - has ; separator.  Singlets caught in parser %}
\([^(;)]+;              { BEGIN invector; yyless(1);}


  %{// 
    // INNEAR: longhand NEAR processing
    // 
    %}
<innear>{white}                 {}
<innear>,                       {}
<innear>dist[ ]*=[ ]*[0-9]+     { STRING_VALUE(_NEARDIST,TRUE,FALSE);}
<innear>unit[ ]*=[ ]*{word}     { STRING_VALUE(_NEARUNIT,TRUE,FALSE);}  
<innear>unit[ ]*=[ ]*{sent}     { STRING_VALUE(_NEARUNIT,TRUE,FALSE);} 
<innear>unit[ ]*=[ ]*{par}      { STRING_VALUE(_NEARUNIT,TRUE,FALSE);}
<innear>unit[ ]*=[ ]*{chap}     { STRING_VALUE(_NEARUNIT,TRUE,FALSE);}
<innear>\}                      { BEGIN implicitphrase; TOKEN (_NEAR_END);} 

    %{// 
      // INVECTOR: multi value vector processing
      // 
    %}
<invector>{white}           {}
<invector>;                 {}
<invector>\"[^"]*\"         { STRING_VALUE(_VECTORELEMENT, FALSE, TRUE);}
<invector>[^ ";)][^;)]*;    { STRING_VALUE(_VECTORELEMENT, TRUE, FALSE);}
<invector>[^ ";)][^;)]*\)   {
                                //  Need to emit _VECTORELEMENT and _VE_END -- so backup 1
                                //  so we can emit _VE_END on next pass                               
                                yyless(yyleng-1);
                                STRING_VALUE(_VECTORELEMENT, FALSE, FALSE);
                            }
<invector>\)                { BEGIN INITIAL; TOKEN (_VE_END); } 

    %{// 
      // INFREEFREETEXT: shorthand FREETEXT processing
      //
      // NOTE:  and, or, near need to be localized %}
<infreefreetext>[ ]+        { yymore(); }
<infreefreetext>{and}[ ]    {
                                yyless(yyleng-4); 
                                BEGIN INITIAL;                        
                                STRING_VALUE(_FREETEXT,FALSE,FALSE); 
                            }  
<infreefreetext>{and}\{     {
                                yyless(yyleng-4); 
                                BEGIN INITIAL;                        
                                STRING_VALUE(_FREETEXT,FALSE,FALSE); 
                            }  			    
<infreefreetext>{or}[ ]     { 
                                yyless(yyleng-3); 
                                BEGIN INITIAL;
                                STRING_VALUE(_FREETEXT,FALSE,FALSE); 
                            }
<infreefreetext>{or}\{	    { 
                                yyless(yyleng-3); 
                                BEGIN INITIAL;
                                STRING_VALUE(_FREETEXT,FALSE,FALSE); 
                            }			    
<infreefreetext>{near}[ ]   { 
				yaccDebugOut(( DEB_ITRACE, "{infreefreetext}{near}[ ]\n" ));
                                yyless(yyleng-5); 
                                fTreatFreetextAsPhrase = TRUE;
                                BEGIN INITIAL;
                                STRING_VALUE(_PHRASE,FALSE,FALSE); 
                            }
			    
<infreefreetext>{near}\{    { 
				yaccDebugOut(( DEB_ITRACE, "{infreefreetext}{near}{\n" ));
                                yyless(yyleng-5); 
                                fTreatFreetextAsPhrase = TRUE;
                                BEGIN INITIAL;
                                STRING_VALUE(_PHRASE,FALSE,FALSE); 
                            }
<infreefreetext>\{{near}[ ] { 
				yaccDebugOut(( DEB_ITRACE, "{infreefreetext}{{near}\n" ));
                                yyless(yyleng-6); 
                                fTreatFreetextAsPhrase = TRUE;
                                BEGIN INITIAL;
                                STRING_VALUE(_PHRASE,FALSE,FALSE); 
                            }
<infreefreetext>\{{near}\{  { 
				yaccDebugOut(( DEB_ITRACE, "{infreefreetext}{{near}{\n" ));
                                yyless(yyleng-6); 
                                fTreatFreetextAsPhrase = TRUE;
                                BEGIN INITIAL;
                                STRING_VALUE(_PHRASE,FALSE,FALSE); 
                            }			    
<infreefreetext>&           {
                                yyless(yyleng-1); 
                                BEGIN INITIAL;
                                STRING_VALUE(_FREETEXT,FALSE,FALSE); 
                            }  
<infreefreetext>\|          { 
                                yyless(yyleng-1); 
                                BEGIN INITIAL;
                                STRING_VALUE(_FREETEXT,FALSE,FALSE); 
                            }
<infreefreetext>~           { 
                                yyless(yyleng-1); 
                                fTreatFreetextAsPhrase = TRUE;
                                BEGIN INITIAL;
                                STRING_VALUE(_PHRASE,FALSE,FALSE); 
                            }
<infreefreetext>\(          { 
                                yyless(yyleng-1); 
                                BEGIN INITIAL;
                                STRING_VALUE(_FREETEXT,FALSE,FALSE); 
                            }                             
<infreefreetext>\)          { 
                                yyless(yyleng-1); 
                                BEGIN INITIAL;
                                STRING_VALUE(_FREETEXT,FALSE,FALSE); 
                            }                             
<infreefreetext>\{          { 
                                yyless(yyleng-1); 
                                BEGIN INITIAL;
                                STRING_VALUE(_FREETEXT,FALSE,FALSE);
                            }
<infreefreetext>\"[^"]+\"   { 
                                BEGIN INITIAL;
                                STRING_VALUE(_FREETEXT,FALSE,FALSE);
                            }
<infreefreetext>[^~&|{}()" ]+[ ]  { yymore(); }
<infreefreetext>[^~&|{}()" ]+     {
				      BEGIN INITIAL;
				      STRING_VALUE(_FREETEXT,FALSE,FALSE); 
				  }   
                               
    %{// 
      // SHORTGEN:  * or ** processing
      //
      // can only get here by backing up over *, 
      // so we will always find a match  %}
<shortgen>\*\*  {  
                   BEGIN INITIAL; 
                   TOKEN(_SHGENINFLECT);
                }
<shortgen>\*    {   
                   BEGIN INITIAL; 
                   TOKEN(_SHGENPREFIX); 
                }
                
    %{//
      // SHORTREGEX: #propname processing
      //
      // can only get here when #"propname" or #propname
      // (quoted or unquoted) version is detected.
      // NOTE:  and, or need to be localized
      // NOTE:  It doesn't make sense to have the near operator following 
      //        a regular expression. A regex is Boolean and doesn't evaluate
      //        to a position value. 
      //
      //
      
    %}
<shortregex>[ ]+        { yymore(); }
<shortregex>=           {
                          // ignore equal operators...
                          BEGIN shortregex;
                        }
<shortregex>\"[^"]*\"   { STRING_VALUE(_REGEX, FALSE, TRUE);}
<shortregex>{and}[ ]    {
                            fContinueRegex = TRUE;
                            yyless(yyleng-4); 
                            BEGIN INITIAL;
                            STRING_VALUE(_REGEX,FALSE,FALSE); 
                        }  
<shortregex>{or}[ ]     { 
                            fContinueRegex = TRUE;
                            yyless(yyleng-3); 
                            BEGIN INITIAL;
                            STRING_VALUE(_REGEX,FALSE,FALSE); 
                        }
                        
<shortregex>{not}[ ]    { 
                                yyless(yyleng-4); 
                                // The only valid way to get here is to
                                // have had seen "and" before. Don't recognize
                                // a regex. Back off and let the lexer takes its
                                // normal course.
                                fContinueRegex = TRUE;
                                BEGIN INITIAL;
                        }                           
<shortregex>&           {
                            fContinueRegex = TRUE;
                            yyless(yyleng-1); 
                            BEGIN INITIAL;                        
                            STRING_VALUE(_REGEX,FALSE,FALSE); 
                        }  
<shortregex>\|          { 
                            fContinueRegex = TRUE;
                            yyless(yyleng-1); 
                            BEGIN INITIAL;
                            STRING_VALUE(_REGEX,FALSE,FALSE); 
                        }
                        
<shortregex>!           { 
                            yyless(yyleng-1); 
                            // The only valid way to get here is to
                            // have had seen "and" before. Don't recognize
                            // a phrase. Back off and let the lexer takes its
                            // normal course.
                            fContinueRegex = TRUE;
                            BEGIN INITIAL;
                        }                           
%{
    // When we find an operator we should treat it as one. 
    // So backup and get out if you see one.
    // Normally '^' is treated as part of an operator (e.g. ^a), but it also 
    // has a special meaning in regular expression syntax. So we will have to
    // let it through when it is part of a regular expression. As an alternative,
    // we can allow '^' in regular expression in a limited manner (i.e. only the use
    // in square brackets to exclude the set of chars "[^abc]" where abc are excluded).
    // This alternative will let the common case use of '^' in a regular expression
    // while allowing it to be treated as part of an operator when it doesn't 
    // occur immediately after a '['.
    // We are implementing the alternative here because our regex capability
    // only allows for the "[^" construct.
%}                           
<shortregex>[\^<>@$#]   { 
                            yyless(yyleng-1);
                            fContinueRegex = FALSE;
                            BEGIN INITIAL;
                        }
<shortregex>\(          { 
                            yyless(yyleng-1); 
                            BEGIN INITIAL;
                            STRING_VALUE(_REGEX,FALSE,FALSE); 
                        }                             
<shortregex>\)          { 
                            yyless(yyleng-1); 
                            BEGIN INITIAL;
                            STRING_VALUE(_REGEX,FALSE,FALSE); 
                        }                             
<shortregex>\{          { 
                            yyless(yyleng-1); 
                            BEGIN INITIAL;
                            STRING_VALUE(_REGEX,FALSE,FALSE); 
                        }                     
<shortregex>(([^~&|{}()\^<>!@$#= ])*(\|[()\[{}\],*?+])*(\|\[\^)*([^~&|{}()\^<>!@$#= ])*)+[ ]  { yymore(); }
<shortregex>(([^~&|{}()\^<>!@$#= ])*(\|[()\[{}\],*?+])*(\|\[\^)*([^~&|{}()\^<>!@$#= ])*)+ {
                            fContinueRegex = TRUE;
                            BEGIN INITIAL;
                            STRING_VALUE(_REGEX,FALSE,FALSE); 
                        }


<mayberegex>{and}[ ]    {
                            yyless(yyleng-4); 
                            fContinueMaybeRegex = TRUE;
                            BEGIN INITIAL;
                            STRING_VALUE(_PHRASEORREGEX,FALSE,FALSE); 
                        }  
<mayberegex>{or}[ ]     { 
                            yyless(yyleng-3); 
                            fContinueMaybeRegex = TRUE;
                            BEGIN INITIAL;
                            STRING_VALUE(_PHRASEORREGEX,FALSE,FALSE); 
                        }
                       
<mayberegex>{not}[ ]    { 
                            yyless(yyleng-4); 
                            // The only valid way to get here is to
                            // have had seen "and" before. Don't recognize
                            // a regex. Back off and let the lexer takes its
                            // normal course.
                            fContinueMaybeRegex = TRUE;
                            BEGIN INITIAL;
                        }                           
<mayberegex>&           {
                            fContinueMaybeRegex = TRUE;
                            yyless(yyleng-1); 
                            BEGIN INITIAL;                        
                            STRING_VALUE(_PHRASEORREGEX,FALSE,FALSE); 
                        }  
<mayberegex>\|          { 
                            fContinueMaybeRegex = TRUE;
                            yyless(yyleng-1); 
                            BEGIN INITIAL;
                            STRING_VALUE(_PHRASEORREGEX,FALSE,FALSE); 
                        }
                        
<mayberegex>!           { 
                            yyless(yyleng-1); 
                            // The only valid way to get here is to
                            // have had seen "and" before. Don't recognize
                            // a phrase. Back off and let the lexer takes its
                            // normal course.
                            fContinueMaybeRegex = TRUE;
                            BEGIN INITIAL;
                        }                           
<mayberegex>\(          { 
                            yyless(yyleng-1); 
                            BEGIN INITIAL;
                            STRING_VALUE(_PHRASEORREGEX,FALSE,FALSE); 
                        }                             
<mayberegex>\)          { 
                            yyless(yyleng-1); 
                            BEGIN INITIAL;
                            STRING_VALUE(_PHRASEORREGEX,FALSE,FALSE); 
                        }                             
<mayberegex>\{          { 
                            yyless(yyleng-1); 
                            BEGIN INITIAL;
                            STRING_VALUE(_PHRASEORREGEX,FALSE,FALSE); 
                        }
<mayberegex>[ ]+            { yymore(); }
<mayberegex>\"[^"]*\"       { STRING_VALUE(_PHRASE, FALSE, TRUE);}
<mayberegex>(([^~&|{}()\^<>!@$# ])*(\|[()\[{}\],*?+])*(\|\[\^)*([^~&|{}()\^<>!@$# ])*)+[ ]  {   yymore(); }
<mayberegex>(([^~&|{}()\^<>!@$# ])*(\|[()\[{}\],*?+])*(\|\[\^)*([^~&|{}()\^<>!@$# ])*)+ {
                                 fContinueMaybeRegex = TRUE;
                                 BEGIN INITIAL;
                                 STRING_VALUE(_PHRASEORREGEX,FALSE,FALSE); 
                            }
%{
    // When we find an operator at the start of a phrase, 
    // we should treat it as one. So backup and get out if you see one.
%}                           
<mayberegex>[\^<>@$#]       { 
                                yyless(yyleng-1);
                                fContinueMaybeRegex = FALSE;
                                BEGIN INITIAL;
                            }
                               

    %{//
      // IMPLICITPHRASE: Where phrase is implied.
      //
      // can only get here when @propname or {prop name = propname} is detected.
      // NOTE:  and, or, not need to be localized when time permits.
      //
      // NTRAID#DB-NTBUG9-84571-2000/07/31-dlee Indexing Service tripolish2 query expressions misinterpreted as strings
      // if expression has trailing blanks, we'll emit a string value
    %}
<implicitphrase>\"[^"]*\"   {   
				fContinueImplicitPhrase = FALSE;
                                BEGIN INITIAL;
                                STRING_VALUE(_PHRASE, FALSE, TRUE);
                            }    
<implicitphrase>[ ]+        {   yymore(); }
<implicitphrase>{and}[ ]    {
				yyless(yyleng-4);
                                fContinueImplicitPhrase = TRUE;
                                BEGIN INITIAL;
                                STRING_VALUE(_PHRASE,FALSE,FALSE); 
                            }  
<implicitphrase>{or}[ ]     { 
                                yyless(yyleng-3);
                                fContinueImplicitPhrase = TRUE;
                                BEGIN INITIAL;
                                STRING_VALUE(_PHRASE,FALSE,FALSE); 
                            }
<implicitphrase>{near}[ ]   { 
                                yyless(yyleng-5); 
                                // We want to treat the following token as a phrase
                                fContinueImplicitPhrase = TRUE;
                                BEGIN INITIAL;
                                STRING_VALUE(_PHRASE,FALSE,FALSE); 
                            }
<implicitphrase>{near}\{    { 
                                yyless(yyleng-5); 
                                // We want to treat the following token as a phrase
                                fContinueImplicitPhrase = TRUE;
                                BEGIN INITIAL;
                                STRING_VALUE(_PHRASE,FALSE,FALSE); 
                            }			    
<implicitphrase>{not}[ ]    { 
                                yyless(yyleng-4); 
                                // The only valid way to get here is to
                                // have had seen "and" before. Don't recognize
                                // a phrase. Back off and let the lexer takes its
                                // normal course.
                                fContinueImplicitPhrase = TRUE;
                                BEGIN INITIAL;
                            }                           
<implicitphrase>&           {
                                yyless(yyleng-1);
                                fContinueImplicitPhrase = TRUE;
                                BEGIN INITIAL;
                                STRING_VALUE(_PHRASE,FALSE,FALSE); 
                            }  
<implicitphrase>~           {
                                yyless(yyleng-1); 
                                // We want to treat the following token as a phrase
                                fContinueImplicitPhrase = TRUE;
                                BEGIN INITIAL;                        
                                STRING_VALUE(_PHRASE,FALSE,FALSE); 
                            }  
                           
<implicitphrase>!           { 
                                yyless(yyleng-1); 
                                // The only valid way to get here is to
                                // have had seen "and" before. Don't recognize
                                // a phrase. Back off and let the lexer takes its
                                // normal course.
                                fContinueImplicitPhrase = TRUE;
                                BEGIN INITIAL;
                            }                           
<implicitphrase>\|          { 
                                yyless(yyleng-1); 
                                fContinueImplicitPhrase = TRUE;
                                BEGIN INITIAL;
                                STRING_VALUE(_PHRASE,FALSE,FALSE); 
                            }
<implicitphrase>\(          {
                                yyless(yyleng-1); 
                                fContinueImplicitPhrase = FALSE;
                                BEGIN INITIAL;
                                STRING_VALUE(_PHRASE,FALSE,FALSE); 
                            }
<implicitphrase>\)          {
                                yyless(yyleng-1); 
                                fContinueImplicitPhrase = FALSE;
                                BEGIN INITIAL;
                                STRING_VALUE(_PHRASE,FALSE,FALSE); 
                            }                             
<implicitphrase>\{          {
                                yyless(yyleng-1);
                                fContinueImplicitPhrase = TRUE; 
                                BEGIN INITIAL;
                                STRING_VALUE(_PHRASE,FALSE,FALSE); 
                            }
<implicitphrase>{contains}[ ] {
                                yyless(yyleng-9);
                                fContinueImplicitPhrase = TRUE;
                                BEGIN INITIAL;
                                STRING_VALUE(_PHRASE,FALSE,FALSE);
                              }                           
%{
    // When we find an operator at the start of an implicit phrase, 
    // we should treat it as one. So backup and get out if you see one.
%}                           
<implicitphrase>[\^<>@$#]   { 
                                yyless(yyleng-1);
                                fContinueImplicitPhrase = FALSE;
                                BEGIN INITIAL;
                            }
%{
    // Triplish2 uses = to indicate that whatever appears after it may 
    // be using wildcards. Implement that here.
%}                            
<implicitphrase>=           { 
                                yyless(yyleng-1);
                                fContinueMaybeRegex = TRUE;
                                BEGIN INITIAL;
                            }                            
<implicitphrase>[^~&|{}()\^<>=!@$# ]+[ ] { yymore(); }
<implicitphrase>[^~&|{}()\^<>=!@$# ]+    {
					    fContinueImplicitPhrase = TRUE;
                                            BEGIN INITIAL;
                                            STRING_VALUE(_PHRASE,FALSE,FALSE); 
                                         }