windows-server-2003/net/ias/providers/nap/regex/fa.cxx

//+-------------------------------------------------------------------------
//
//  Copyright (C) 1991, Microsoft Corporation.
//
//  File:       FA.cxx
//
//  Contents:   Non-deterministic finite automata
//
//  Classes:    CNFA
//
//  History:    01-20-92  KyleP     Created
//				03-11-97  arunk		Modified for Kessel
//--------------------------------------------------------------------------

#include <fa.hxx>
#include <stateset.hxx>

//+-------------------------------------------------------------------------
//
//  Member:     CFA::CFA, public
//
//  Synopsis:   Copy constructor
//
//  History:    13-Jul-95 KyleP     Created
//
//--------------------------------------------------------------------------

CFA::CFA( CFA const & src )
        : _cTotal( src._cTotal ),
          _ppState( 0 )
{
    _ppState = new CFAState * [ _cTotal ];

    unsigned i = 0;

        for ( ; i < _cTotal; i++ )
        {
            if ( 0 == src._ppState[i] )
                _ppState[i] = 0;
            else
                _ppState[i] = new CFAState( *src._ppState[i] );
        }
}

//+-------------------------------------------------------------------------
//
//  Member:     CFA::~CFA, protected
//
//  Synopsis:   Frees automata.
//
//  History:    20-Jan-92 KyleP     Created
//
//--------------------------------------------------------------------------

CFA::~CFA()
{
    if( _ppState )
    {
        for ( UINT i = 0; i < _cTotal; i++ )
        {
            delete _ppState[i];
        }

        delete _ppState;
    }
}

//+-------------------------------------------------------------------------
//
//  Member:     CFA::Add, protected
//
//  Synopsis:   Adds new state to automata.
//
//  Arguments:  [pState] -- New state.  State number is member data.
//
//  History:    20-Jan-92 KyleP     Created
//
//--------------------------------------------------------------------------

void CFA::Add( CFAState * pState )
{
    if ( pState->StateNumber() > _cTotal )
    {
        for( UINT newTotal = (_cTotal) ? _cTotal * 2 : 1;
             pState->StateNumber() > newTotal;
             newTotal *= 2 );

        CFAState ** oldState = _ppState;

        _ppState = new CFAState * [ newTotal ];

        memcpy( _ppState, oldState,
                _cTotal * sizeof( CFAState * ) );
        memset( _ppState + _cTotal,
                0,
                (newTotal - _cTotal) * sizeof( CFAState * ) );

        _cTotal = newTotal;
    }

    _ppState[pState->StateNumber() - 1] = pState;
}

//+-------------------------------------------------------------------------
//
//  Member:     CFA::Get, protected
//
//  Arguments:  [iState] -- State to fetch.
//
//  Returns:    State [iState].
//
//  History:    20-Jan-92 KyleP     Created
//
//--------------------------------------------------------------------------

CFAState * CFA::Get( UINT iState ){
    return( _ppState[ iState - 1 ] );
}

//+-------------------------------------------------------------------------
//
//  Member:     CNFA::CNFA, public
//
//  Synopsis:   Converts regular expression string to NFA.
//
//  Arguments:  [pwcs]      -- Regular expression.
//              [fCaseSens] -- true if case sensitive search.
//
//  History:    20-Jan-92 Kyleap    Created
//
//--------------------------------------------------------------------------

CNFA::CNFA( WCHAR const * pwcs, bool fCaseSens )
        : _iNextState( 1 ),
          _iStart( 0 ),
          _chars( fCaseSens ),
          _pState( 0 )
{
    UINT iEnd;

    //
    // _pState initially contains room for 2 * #chars in regex.  According
    // to the Dragon Book pg. 121 this is guaranteed to be sufficient space.
    // Of course the dragon book doesn't completely take DOS or CMS into
    // account. For DOS, we need to treat beginning (and end) of line as
    // 'characters' in the string. For CMS, I agreed to support the
    // {m,n} construct, which clearly violates this rule.
    //

    if ( 0 == pwcs )
    {
        throw ERROR_INVALID_PARAMETER;
    }

    _cState = wcslen( pwcs ) * 2 + 2*2;  // 2*2 for beginning & end of line
    _pState = new CNFAState [ _cState ];

    for ( unsigned i = 1 ; i <= _cState; i++ )
        Get(i)->Init(i);

    FindCharClasses( pwcs );
    Parse( pwcs, &_iStart, &iEnd );

    Get( iEnd )->MakeFinal();

}

//+-------------------------------------------------------------------------
//
//  Member:     CNFA::CNFA, public
//
//  Synopsis:   Copy constructor
//
//  Arguments:  [src] -- Source
//
//  History:    13-Jul-95 Kylep    Created
//
//--------------------------------------------------------------------------

CNFA::CNFA( CNFA const & src )
        : _iNextState( src.NumStates() ),
          _iStart( src._iStart ),
          _chars( src._chars ),
          _cState( src._cState ),
          _pState( new CNFAState [ src._cState ] )
{
    for ( unsigned i = 0; i < _cState; i++ )
        _pState[i] = src._pState[i];

}

//+-------------------------------------------------------------------------
//
//  Member:     CNFA::~CNFA, public
//
//  Synopsis:   Free state table.
//
//  History:    13-Oct-92 KyleP     Created
//
//--------------------------------------------------------------------------

CNFA::~CNFA()
{
    delete [] _pState;
}

//+-------------------------------------------------------------------------
//
//  Member:     CNFA::EpsClosure, public
//
//  Synopsis:   Computes the epsilon closure for state [StateNum]
//
//  Effects:    States in the epsilon closure of state [StateNum]
//              are added to the state set [ssOut].
//
//  Arguments:  [StateNum] -- Initial state.
//              [ssOut]    -- Output state set.
//
//  History:    20-Jan-92 KyleP     Created
//
//--------------------------------------------------------------------------

void CNFA::EpsClosure( UINT StateNum, CStateSet & ssOut )
{
    CStateSet ssTraversed;

    ssOut.Add( StateNum );

    bool changed = true;

    while ( changed )
    {
        changed = false;

        for ( UINT i = ssOut.Count(); i > 0; i-- )
        {
            if ( !ssTraversed.IsMember( ssOut.State( i ) ) )
            {
                ssTraversed.Add( ssOut.State( i ) );

                Get( ssOut.State( i ) )->Move( ssOut, symEpsilon );

                changed = true;
            }
        }
    }
}

//+-------------------------------------------------------------------------
//
//  Member:     CNFA::EpsClosure, public
//
//  Synopsis:   Computes the epsilon closure for state set [ssIn]
//
//  Effects:    States in the epsilon closure of [ssIn]
//              are added to the state set [ssOut].
//
//  Arguments:  [ssIn]  -- Initial state set.
//              [ssOut] -- Output state set.
//
//  History:    20-Jan-92 KyleP     Created
//
//--------------------------------------------------------------------------

void CNFA::EpsClosure( CStateSet & ssIn, CStateSet & ssOut )
{
    for ( UINT i = ssIn.Count(); i > 0; i-- )
    {
        EpsClosure( ssIn.State( i ), ssOut );
    }
}

//+-------------------------------------------------------------------------
//
//  Member:     CDFA::IsFinal, public
//
//  Arguments:  [ss] -- State set
//
//  Returns:    true if some state in [ss] is final.
//
//  History:    20-Jan-92 Kyleap    Created
//
//--------------------------------------------------------------------------

bool CNFA::IsFinal( CStateSet & ss )
{
    bool fFinal = false;

    for ( UINT i = ss.Count(); i > 0 && !fFinal; i-- )
    {
        fFinal = (Get( ss.State( i ) )->IsFinal() != NULL);
    }

    return( fFinal );
}

//+-------------------------------------------------------------------------
//
//  Member:     CNFA::Move, public
//
//  Effects:    Performs a non-deterministic move from every state
//              in [ssIn] on [symbol].  The new state set is in
//              [ssOut].
//
//  Arguments:  [ssIn]   -- Initial state set.
//              [ssOut]  -- Final state set.
//              [symbol] -- Transition symbol.
//
//  History:    20-Jan-92 KyleP     Created
//
//--------------------------------------------------------------------------

void CNFA::Move( CStateSet & ssIn, CStateSet & ssOut, UINT symbol )
{
    for ( UINT i = ssIn.Count(); i > 0; i-- )
    {
        Get( ssIn.State( i ) )->Move( ssOut, symbol );
    }
}

//+-------------------------------------------------------------------------
//
//  Member:     CNFA::FindCharClasses, private
//
//  Effects:    Partitions the UniCode character space (2^16 characters)
//              into equivalence classes such that all characters in
//              a given class will have identical transitions in the NFA.
//
//  Arguments:  [wcs] -- Original regular expression string.
//
//  History:    20-Jan-92 KyleP     Created
//
//  Notes:      If case sensitivity is turned off, two ranges will be
//              added for characters with upper/lower case.  Even though
//              both ranges react identically the mapping algorithm can
//              only deal with contiguous ranges of characters.
//
//--------------------------------------------------------------------------

void CNFA::FindCharClasses( WCHAR const * wcs )
{
    //
    // Scan the regex looking for characters with (potentially)
    // different transitions.
    //

    while ( *wcs )
    {
        switch ( *wcs )
        {
        case wcAnySingle:
        case wcAnyMultiple:
        case wcDOSDot:
            break;

        case wcEscape:
        {
            wcs++;

            switch ( *wcs )
            {
            case 0:
                throw ERROR_INVALID_PARAMETER;
                break;

            case wcAnySingle:
            case wcRepeatZero:
            case wcRepeatOne:
            case wcOr:
            case wcBeginParen:
            case wcEndParen:
                break;

            case wcBeginRepeat:
                for ( wcs++; *wcs; wcs++ )
                {
                    if ( *wcs == wcEscape && *(wcs+1) == wcEndRepeat )
                    {
                        wcs++;
                        break;
                    }
                }
                break;

            case wcBeginRange:
                wcs++;

                //
                // Check the special cases of ^ and ]
                //

                if ( *wcs == wcInvertRange )
                    wcs++;

                if ( *wcs == wcEndRange )
                {
                    _chars.AddRange( *wcs, *wcs );
                    wcs++;
                }

                for ( ; *wcs && *wcs != wcEndRange; wcs++ )
                {
                    if ( *(wcs + 1) == wcRangeSep )
                    {
                        _chars.AddRange( *wcs, *(wcs+2) );
                    }
                    else
                    {
                        _chars.AddRange( *wcs, *wcs );
                    }
                }

                if ( *wcs != wcEndRange )
                {
                   throw ERROR_INVALID_PARAMETER;
                }

                break;

            default:
                _chars.AddRange( *wcs, *wcs );
                break;
            }

            break;
        }

        default:
            _chars.AddRange( *wcs, *wcs );
            break;
        }

        wcs++;
    }

    _chars.Prepare();
}

WCHAR * CNFA::_wcsNull = (WCHAR*)"";

//+-------------------------------------------------------------------------
//
//  Member:     CNFA::Parse, private
//
//  Synopsis:   Creates a NFA from [wcs]
//
//  Effects:    Parses [wcs] until end of string or character wcHalt is
//              encountered.  On exit, [iStart] and [iEnd] contain the
//              starting and ending states of the NFA, respectively.
//              [pwcsEnd] points to the last character of [wcs] that was
//              parsed.
//
//  Arguments:  [wcs]     -- Regular expression.
//              [iStart]  -- Starting state of NFA.
//              [iEnd]    -- Ending state of NFA
//              [pwcsEnd] -- Last character of [wcs] that was parsed.
//              [wcHalt]  -- Stop parsing if this character encountered.
//
//  History:    20-Jan-92 KyleP     Created
//              08-Jun-98 SBens     Fixed so that all top-level OR clauses
//                                  must terminate with symEndLine.
//
//--------------------------------------------------------------------------

void CNFA::Parse( WCHAR const * wcs,
                  UINT * iStart,
                  UINT * iEnd,
                  WCHAR const * * pwcsEnd,
                  WCHAR wcHalt )
{
    unsigned iCurrent;
    unsigned iNext;

    unsigned iLocalStart;               // Used for */+/? repositioning
    bool fRepeat = false;            // Used for +
    bool fTopLevel = (*iStart == 0); // true if at top level;

    *iEnd = 0;

    //
    // Get a starting state.  *iStart == 0 implies this is the 'top-level'
    // parse of the regular expression (e.g. we're not parsing a
    // parenthesized subexpression.
    //

    if ( fTopLevel )
    {
        iCurrent = _iNextState;
        *iStart = _iNextState++;
        iLocalStart = 0;

        //
        // non-EGREP (DOS) regex match entire string.
        //

        if ( *wcs != wcAnyMultiple )
        {
            iNext = _iNextState;
            Get( iCurrent )->AddTransition( symBeginLine, _iNextState );
            _iNextState++;
            iCurrent = iNext;
        }
        else
        {
            //
            // Add a 'special' transition on the very first state to
            // eat up characters until we actually jump into the
            // regular expresion.
            //

            Get( iCurrent )->AddTransition( symAny, Get( iCurrent )->StateNumber() );
        }
    }
    else
    {
        iCurrent = *iStart;
        iLocalStart = *iStart;
    }

    unsigned iOrStart = Get( iCurrent )->StateNumber();

    //
    // wcsLocalStart tracks the piece of string to be repeated for wcZeroOrOne, etc.
    //

    WCHAR const * wcsLocalStart = wcs;

    //
    // Parse the regular expression until there is no more or a
    // termination character is hit.
    //

    for ( ; *wcs && *wcs != wcHalt; wcs++ )
    {
        switch ( *wcs )
        {
        case wcAnySingle:
            iNext = _iNextState;
            Get( iCurrent )->AddTransition( symAny, _iNextState );
            iLocalStart = Get( iCurrent )->StateNumber();
            wcsLocalStart = wcs;
            _iNextState++;
            iCurrent = iNext;
            break;

        case wcAnyMultiple:
            //
            // Any single
            //

            iNext = _iNextState;
            Get( iCurrent )->AddTransition( symAny, _iNextState );
            iLocalStart = Get( iCurrent )->StateNumber();
            wcsLocalStart = wcs;
            _iNextState++;
            iCurrent = iNext;

            //
            // Repeat zero or more
            //

            Get( iLocalStart )->AddTransition( symEpsilon,
                                               Get( iCurrent )->StateNumber() );
            Get( iCurrent )->AddTransition( symEpsilon, iLocalStart );
            break;

        case wcEscape:
        {
            wcs++;

            switch ( *wcs )
            {
            case wcBeginParen:
            {
                UINT iLocalEnd;

                iLocalStart = Get( iCurrent )->StateNumber();
                wcsLocalStart = wcs - 1;
                wcs++;                      // Eat '('.
                Parse( wcs, &iLocalStart, &iLocalEnd, &wcs, wcEndParen );
                wcs--;                      // Provide character for loop to eat.
                iCurrent = iLocalEnd;

                break;
            }

            case wcEndParen:
                //
                // Taken care of at outer level.  Just backup so we hit the end.
                //

                wcs--;
                break;

            case wcBeginRepeat:
            {
                if ( wcHalt == wcBeginRepeat )
                {
                    //
                    // Taken care of at outer level.  Just backup so we hit the end.
                    //

                    wcs--;
                }
                else
                {
                    //
                    // Setup: Bounds of repeated regex
                    //

                    WCHAR const * wcsStartRepeat = wcsLocalStart;
                    WCHAR const * wcsEndRepeat = wcs + 1;

                    //
                    // Setup: Repeat parameters.
                    //

                    unsigned cRepeat1, cRepeat2;
                    wcs++;

                    ParseRepeat( wcs, cRepeat1, cRepeat2 );

                    unsigned iLocalEnd;

                    //
                    // The minimum set has no epsilon transitions.
                    //

                    if ( cRepeat1 > 1 )
                    {
                        iLocalStart = Get( iCurrent )->StateNumber();
                        iLocalEnd = iLocalStart;

                        for ( unsigned i = 1; i < cRepeat1; i++ )
                        {
                            WCHAR const * wcsEnd;

                            iLocalStart = iLocalEnd;
                            iLocalEnd = 0;  // Must be zero!

                            Parse( wcsLocalStart, &iLocalStart, &iLocalEnd, &wcsEnd, wcBeginRepeat );

                            if ( wcsEnd != wcsEndRepeat )
                            {
                               throw ERROR_INVALID_PARAMETER;
                            }
                        }
                    }
                    else
                        iLocalEnd = Get( iCurrent )->StateNumber();

                    if ( cRepeat1 == cRepeat2 )
                    {
                    }
                    else if ( cRepeat2 == 0 )
                    {

                        Get( iLocalEnd )->AddTransition( symEpsilon, iLocalStart );
                    }
                    else if ( cRepeat2 > cRepeat1 )
                    {
                        for ( unsigned i = cRepeat1; i < cRepeat2; i++ )
                        {
                            WCHAR const * wcsEnd;

                            iLocalStart = iLocalEnd;
                            iLocalEnd = 0;  // Must be zero!

                            Parse( wcsLocalStart, &iLocalStart, &iLocalEnd, &wcsEnd, wcBeginRepeat );
                            Get( iLocalStart )->AddTransition( symEpsilon, iLocalEnd );

                            if ( wcsEnd != wcsEndRepeat )
                            {
                               throw ERROR_INVALID_PARAMETER;
                            }
                        }
                    }
                    else
                    {
                       throw ERROR_INVALID_PARAMETER;
                    }

                    iCurrent = iLocalEnd;
                    iLocalStart = 0;
                    wcsLocalStart = _wcsNull;
                }
                break;
            }

            case wcOr:
                // Top level 'OR' clauses must terminate with symEndLine.
                if ( fTopLevel )
                {
                    iNext = _iNextState;
                    Get( iCurrent )->AddTransition( symEndLine, _iNextState );
                    _iNextState++;
                    iCurrent = iNext;
                }

                if ( *iEnd == 0 )
                {
                    //
                    // First part of OR clause.
                    //

                    *iEnd = Get( iCurrent )->StateNumber();
                }
                else
                {
                    //
                    // Subsequent OR clause.  Epsilon link to end
                    //

                    Get( iCurrent )->AddTransition( symEpsilon, *iEnd );
                }
                iCurrent = iOrStart;
                wcsLocalStart = _wcsNull;
                iLocalStart = 0;
                break;

            case wcBeginRange:
            {
                bool fReverse = false;

                wcsLocalStart = wcs-1;
                iNext = _iNextState;
                wcs++;                      // Eat '['.  ']' eaten by loop.

                //
                // Check the special cases of ^ and ]
                //

                if ( *wcs == wcInvertRange )
                {
                    wcs++;

                    fReverse = true;

                    //
                    // Add all transitions, they will be removed later.
                    //

                    for ( UINT uiNext = _chars.TranslateRange( 1,
                                                               (USHORT) symLastValidChar );
                          uiNext != 0;
                          uiNext = _chars.TranslateRange( 0, (USHORT) symLastValidChar ) )
                    {
                        Get( iCurrent )->AddTransition( uiNext,
                                                  _iNextState );
                    }

                }

                if ( *wcs == wcEndRange )
                {
                    if ( fReverse )
                    {
                        Get( iCurrent )->RemoveTransition( _chars.Translate( *wcs++ ),
                                                     _iNextState );
                    }
                    else
                    {
                        Get( iCurrent )->AddTransition( _chars.Translate( *wcs++ ),
                                                  _iNextState );
                    }
                }

                for ( ; *wcs && *wcs != wcEndRange; wcs++ )
                {
                    if ( *(wcs + 1) == wcRangeSep )
                    {
                        if ( fReverse )
                        {
                            Get( iCurrent )->RemoveTransition(
                                    _chars.TranslateRange( *wcs, *(wcs+2) ),
                                    _iNextState );
                        }
                        else
                        {
                            Get( iCurrent )->AddTransition(
                                    _chars.TranslateRange( *wcs, *(wcs+2) ),
                                    _iNextState );
                        }

                        for ( UINT uiNext = _chars.TranslateRange( 0,
                                                                   *(wcs+2) );
                              uiNext != 0;
                              uiNext = _chars.TranslateRange( 0, *(wcs+2) ) )
                        {
                            if ( fReverse )
                            {
                                Get( iCurrent )->RemoveTransition( uiNext,
                                                             _iNextState );
                            }
                            else
                            {
                                Get( iCurrent )->AddTransition( uiNext,
                                                          _iNextState );
                            }
                        }

                        wcs += 2;
                    }
                    else
                    {
                        if ( fReverse )
                        {
                            Get( iCurrent )->RemoveTransition(
                                    _chars.Translate( *wcs ),
                                    _iNextState );
                        }
                        else
                        {
                            Get( iCurrent )->AddTransition(
                                    _chars.Translate( *wcs ),
                                    _iNextState );
                        }
                    }
                }

                if ( *wcs != wcEndRange )
                {
                   throw ERROR_INVALID_PARAMETER;
                }

                iLocalStart = Get( iCurrent )->StateNumber();
                _iNextState++;
                iCurrent = iNext;
                break;
            }

            case wcRepeatOne:
                if ( iLocalStart == 0 )
                {
                   throw ERROR_INVALID_PARAMETER;
                }

                Get( iCurrent )->AddTransition( symEpsilon, iLocalStart );
                break;

            case wcRepeatZero:
                if ( iLocalStart == 0 )
                {
                   throw ERROR_INVALID_PARAMETER;
                }
                Get( iLocalStart )->AddTransition( symEpsilon,
                                                   Get( iCurrent )->StateNumber() );
                Get( iCurrent )->AddTransition( symEpsilon, iLocalStart );
                break;

            case wcRepeatZeroOrOne:
            {
                if ( iLocalStart == 0 )
                {
                   throw ERROR_INVALID_PARAMETER;
                }
                Get( iLocalStart )->AddTransition( symEpsilon,
                                                   Get( iCurrent )->StateNumber() );
                break;
            }

            default:
                iNext = _iNextState;

                Get( iCurrent )->AddTransition( _chars.Translate( *wcs ),
                                          _iNextState );

                iLocalStart = Get( iCurrent )->StateNumber();
                wcsLocalStart = wcs - 1;
                _iNextState++;
                iCurrent = iNext;
                break;
            }

            break;  // switch for wcEscape
        }

        default:
            iNext = _iNextState;

            Get( iCurrent )->AddTransition( _chars.Translate( *wcs ),
                                      _iNextState );

            //
            // In non-EGREP (DOS) syntax dot '.' is funny.  It will match
            // a dot, but if you're at the end of string it will also match
            // end.  So *.txt will look for strings with zero or more
            // characters followed by '.txt' but *. will find any names
            // without an extension and with no trailing dot.
            //

            if ( *wcs == wcDOSDot )
            {
                Get( iCurrent )->AddTransition( symEndLine, _iNextState );
            }

            iLocalStart = Get( iCurrent )->StateNumber();
            wcsLocalStart = wcs;
            _iNextState++;
            iCurrent = iNext;
            break;
        }
    }

    //
    // non-EGREP (DOS) regex match entire string.
    //

    if ( wcHalt == 0 && *(wcs-1) != wcAnyMultiple )
    {
        iNext = _iNextState;
        Get( iCurrent )->AddTransition( symEndLine, _iNextState );
        iLocalStart = 0;
        wcsLocalStart = _wcsNull;
        _iNextState++;
        iCurrent = iNext;
    }

    //
    // If we haven't had an OR clause yet, then set iEnd
    //

    if ( *iEnd == 0 )
    {
        //
        // First part of OR clause.
        //

        *iEnd = Get( iCurrent )->StateNumber();
    }
    else
    {
        //
        // Subsequent OR clause.  Epsilon link to end
        //

        Get( iCurrent )->AddTransition( symEpsilon, *iEnd );
    }

    if ( pwcsEnd )
    {
        *pwcsEnd = wcs + 1;             // Eat halt character.
    }

    if( *wcs != wcHalt )
    {
        throw ERROR_INVALID_PARAMETER;
    }
}

void CNFA::ParseRepeat( WCHAR const * & wcs, unsigned & cRepeat1, unsigned & cRepeat2 )
{
    cRepeat1 = 0;
    cRepeat2 = 0;

    for ( ; *wcs && isdigit(*wcs); wcs++ )
    {
        cRepeat1 *= 10;
        cRepeat1 += *wcs - '0';
    }

    if ( cRepeat1 == 0 || cRepeat1 > 255 )
    {
        throw ERROR_INVALID_PARAMETER;
    }

    if ( *wcs == ',' )
    {
        wcs++;

        if ( *wcs == wcEscape && *(wcs+1) == wcEndRepeat )
        {
            wcs++;
        }
        else
        {
            for ( ; *wcs && isdigit(*wcs); wcs++ )
            {
                cRepeat2 *= 10;
                cRepeat2 += *wcs - '0';
            }

            if ( cRepeat2 == 0 || cRepeat2 > 255 )
            {
               throw ERROR_INVALID_PARAMETER;
            }

            if ( *wcs != wcEscape || *(wcs+1) != wcEndRepeat )
            {
               throw ERROR_INVALID_PARAMETER;
            }
            else
            {
                wcs++;
            }
        }
    }
    else if ( *wcs == wcEscape && *(wcs+1) == wcEndRepeat )
    {
        wcs++;
        cRepeat2 = cRepeat1;
    }
    else
    {
        throw ERROR_INVALID_PARAMETER;
    }
}

//+-------------------------------------------------------------------------
//
//  Member:     CDFA::CDFA, public
//
//  Synopsis:   Constructs a DFA from a NFA.
//
//  Arguments:  [pwcs]      -- Regular expression (passed to NFA)
//              [fCaseSens] -- true if case-sensitive search
//
//  History:    20-Jan-92 KyleP     Created
//
//--------------------------------------------------------------------------

CDFA::CDFA( WCHAR const * pwcs, bool fCaseSens )
        : _nfa( pwcs, fCaseSens ),
          _xs( _nfa.NumStates() ),
          _cState( _nfa.NumStates() ),
          _pStateTrans( 0 ),
          _pStateFinal( 0 )
{
    CommonCtor();
}

//+-------------------------------------------------------------------------
//
//  Member:     CDFA::CDFA, public
//
//  Synopsis:   Copy constructor
//
//  Arguments:  [pwcs]      -- Regular expression (passed to NFA)
//              [fCaseSens] -- true if case-sensitive search
//
//  History:    20-Jan-92 KyleP     Created
//
//--------------------------------------------------------------------------

CDFA::CDFA( CDFA const & src )
        : _nfa( src._nfa ),
          _xs( src._nfa.NumStates() ),
          _cState( src._nfa.NumStates() ),
          _pStateTrans( 0 ),
          _pStateFinal( 0 )
{
    CommonCtor();
}

//+-------------------------------------------------------------------------
//
//  Member:     CDFA::CommonCtor, private
//
//  Synopsis:   Code common to both constructors.
//
//  History:    13-Jul-95 KyleP     Snarfed from constructor
//
//--------------------------------------------------------------------------

void CDFA::CommonCtor()
{
    //
    // Add initial state.
    //

    CStateSet ss;

    _nfa.EpsClosure( _nfa.StartState(), ss );

    _stateStart = _xs.XlatToOne( ss );

    //
    // Intialize translation table.
    //

    int cEntries = (_cState + 1) * ( _nfa.Translate().NumClasses() + 1 );

    _pStateTrans = new UINT [ cEntries ];
    _pStateFinal = new bool [ _cState + 1 ];

    memset( _pStateTrans, 0xFF, cEntries * sizeof(_pStateTrans[0]) );
    RtlZeroMemory( _pStateFinal, (_cState + 1) * sizeof(_pStateFinal[0]) );

    for ( int i = _cState; i >= 0; i-- )
    {
        AddTransition( i, 0, stateUndefined );
    }

    Add( _stateStart, _nfa.IsFinal( ss ) );


}

//+-------------------------------------------------------------------------
//
//  Member:     CDFA::~CDFA, public
//
//  Synopsis:   Clean up DFA.  Free state tables.
//
//  History:    20-Jun-92 KyleP     Created
//
//--------------------------------------------------------------------------

CDFA::~CDFA()
{
    delete _pStateTrans;
    delete _pStateFinal;
}

//+-------------------------------------------------------------------------
//
//  Member:     CDFA::Recognize, public
//
//  Arguments:  [wcs] -- Input string.
//
//  Returns:    true if [wcs] is matched by the regular expression.
//
//  History:    20-Jan-92 KyleP     Created
//
//--------------------------------------------------------------------------

bool CDFA::Recognize( WCHAR * wcs )
{
    //////////
    // Modified from original version to handle a NULL string.
    //////////
    if (!wcs) { return false; }

    UINT CurrentState     = _stateStart;
    UINT LastState        = CurrentState;
    bool fFinal        = IsFinal( CurrentState );
    WCHAR wcCurrent       = symBeginLine;

    while ( !fFinal )
    {
        UINT NextState = Move( CurrentState, wcCurrent );

        if ( NextState == stateUncomputed )
        {
            CStateSet ssCurrent;
            CStateSet ssNew;
            CStateSet ssClosed;

            _xs.XlatToMany( CurrentState, ssCurrent );

            _nfa.Move( ssCurrent, ssNew, wcCurrent );

            if ( ssNew.Count() == 0 )
            {
                NextState = stateUndefined;
                AddTransition( CurrentState, wcCurrent, NextState );
            }
            else
            {
                _nfa.EpsClosure( ssNew, ssClosed );

                NextState = _xs.XlatToOne( ssClosed );

                if ( !IsComputed( NextState ) )
                {
                    Add( NextState, _nfa.IsFinal( ssClosed ) );
                }

                AddTransition( CurrentState, wcCurrent, NextState );

            }

        }

        if ( NextState == stateUndefined )
        {
            return( false );
        }

        LastState    = CurrentState;
        CurrentState = NextState;

        fFinal =       IsFinal( CurrentState );


        //
        // If we ran out of string then just keep going, appending
        // end-of-string symbols.  Unfortunately the string is conceptually
        // a set of characters followed by an arbitrary number of
        // end-of-string symbols.  In non-EGREP the end-of-string symbol
        // may actually cause multiple state transitions before reaching
        // a final state.  In non-EGREP (DOS) mode we stop only when we
        // are no longer 'making progress' (moving to new states) on
        // end-of-string.  I haven't completely convinced myself this
        // algorithm is guaranteed to terminate.
        //

        if ( wcCurrent == symEndLine )
        {
            if ( LastState == CurrentState )
                break;
        }
        else
        {
            wcCurrent = *wcs++;

            //
            // After we've exhausted the string, append the special
            // end-of-line character.
            //

            if ( wcCurrent == 0 )
            {
                wcCurrent = symEndLine;
            }
            else
            {
                wcCurrent = (WCHAR)_nfa.Translate().Translate( wcCurrent );
            }
        }

    }

    return( fFinal );
}

//+-------------------------------------------------------------------------
//
//  Member:     CDFA::Add, private
//
//  Synopsis:   Adds a new state the the DFA.
//
//  Arguments:  [state]  -- State number
//              [fFinal] -- true if state is a final state.
//
//  History:    20-Jan-92 KyleP     Created
//
//  Notes:      All transitions for the new state are initially uncomputed.
//
//--------------------------------------------------------------------------

void CDFA::Add( UINT state, bool fFinal )
{
    if ( state > _cState )
    {
        //
        // Since the number of states required will probably grow at
        // a slow rate, increase the size of the array in a linear
        // fashion.

        UINT const DeltaState = 10;

        UINT *    oldStateTrans = _pStateTrans;
        bool * oldStateFinal = _pStateFinal;
        UINT      oldcState = _cState;
        UINT      oldcEntries = (_cState + 1) *
            ( _nfa.Translate().NumClasses() + 1 );

        _cState += DeltaState;
        UINT cEntries = (_cState + 1) * ( _nfa.Translate().NumClasses() + 1 );

        _pStateTrans = new UINT [ cEntries ];
        _pStateFinal = new bool [ _cState + 1 ];

        //
        // Initilize new state tables...
        //

        memcpy( _pStateTrans, oldStateTrans, oldcEntries * sizeof( UINT ) );
        memcpy( _pStateFinal, oldStateFinal, oldcState * sizeof( bool ) );

        memset( _pStateTrans + oldcEntries, 0xFF, (cEntries - oldcEntries)*sizeof(_pStateTrans[0]) );
        RtlZeroMemory( _pStateFinal + oldcState, (_cState + 1 - oldcState)*sizeof(_pStateFinal[0]) );


        for ( UINT i = _cState - DeltaState + 1; i <= _cState; i++ )
        {
            AddTransition( i, 0, stateUndefined );
        }

        //
        // ...and destroy the old
        //

        delete oldStateTrans;
        delete oldStateFinal;
    }

    //
    // All states are set to stateUncomputed above, except the 'undefined' flag-state.
    //

    AddTransition( state, 0, stateUncomputed );
    _pStateFinal[state] = fFinal;
}