//***************************************************************************

//

//  ANALYSER.CPP

//

//  Module: OLE MS Provider Framework

//

// Copyright (c) 1996-2001 Microsoft Corporation, All Rights Reserved
//
//***************************************************************************

#include "precomp.h"
#include <instpath.h>
#include <provexpt.h>
#include <strsafe.h>

WbemLexicon :: WbemLexicon () : position ( 0 ) , tokenStream ( NULL ) 
{
    value.string = NULL ;
    value.token = NULL ;
}

WbemLexicon :: ~WbemLexicon ()
{
    switch ( token ) 
    {
        case TOKEN_ID:
        {
            delete [] value.token ;
        }
        break ;
    
        case STRING_ID:
        {
            delete [] value.string ;
        }
        break ;

        default:
        {
        } ;
    }
}

WbemLexicon :: LexiconToken WbemLexicon :: GetToken ()
{
    return token ;
}

WbemLexiconValue *WbemLexicon :: GetValue ()
{
    return &value ;
}

WbemAnalyser :: WbemAnalyser ( WCHAR *tokenStream ) : status ( TRUE ) , position ( 0 ) , stream ( NULL ) 
{
    if ( tokenStream )
    {
		DWORD t_TextLength = wcslen ( tokenStream ) + 1 ;
        stream = new WCHAR [ t_TextLength ] ;
        StringCchCopyW ( stream , t_TextLength , tokenStream ) ;
    }
}

WbemAnalyser :: ~WbemAnalyser () 
{
    delete [] stream ;
}

void WbemAnalyser :: Set ( WCHAR *tokenStream ) 
{
    status = 0 ;
    position = NULL ;

    delete [] stream ;
	DWORD t_TextLength = wcslen ( tokenStream ) + 1 ;
    stream = new WCHAR [ t_TextLength ] ;
    StringCchCopyW ( stream , t_TextLength , tokenStream ) ;
}

void WbemAnalyser :: PutBack ( WbemLexicon *token ) 
{
    position = token->position ;
}

BOOL WbemAnalyser :: IsLeadingDecimal ( WCHAR token )
{
    return iswdigit ( token ) && ( token != L'0' ) ;
}

BOOL WbemAnalyser :: IsDecimal ( WCHAR token )
{
    return iswdigit ( token ) ;
}

BOOL WbemAnalyser :: IsHex ( WCHAR token )
{
    return iswxdigit ( token ) ;
}
    
BOOL WbemAnalyser :: IsWhitespace ( WCHAR token )
{
    return iswspace ( token ) ;
}

BOOL WbemAnalyser :: IsOctal ( WCHAR token )
{
    return ( token >= L'0' && token <= L'7' ) ;
}

BOOL WbemAnalyser :: IsAlpha ( WCHAR token )
{
    return iswalpha ( token ) ;
}

BOOL WbemAnalyser :: IsAlphaNumeric ( WCHAR token )
{
    return iswalnum ( token ) || ( token == L'_' ) || ( token == L'-' ) ;
}

BOOL WbemAnalyser :: IsEof ( WCHAR token )
{
    return token == 0 ;
}

LONG WbemAnalyser :: OctToDec ( WCHAR token ) 
{
    return token - L'0' ;
}

LONG WbemAnalyser :: HexToDec ( WCHAR token ) 
{
    if ( token >= L'0' && token <= L'9' )
    {
        return token - L'0' ;
    }
    else if ( token >= L'a' && token <= L'f' )
    {
        return token - L'a' + 10 ;
    }
    else if ( token >= L'A' && token <= L'F' )
    {
        return token - L'A' + 10 ;
    }
    else
    {
        return 0 ;
    }
}

WbemAnalyser :: operator void * () 
{
    return status ? this : NULL ;
}

#define DEC_INTEGER_START 1000
#define HEX_INTEGER_START 2000
#define OCT_INTEGER_START 3000
#define STRING_START 4000
#define TOKEN_START 5000
#define OID_START 6000

#define OID_STATE 9000
#define TOKEN_STATE 9001
#define STRING_STATE 9002
#define EOF_STATE 9003
#define DEC_INTEGER_STATE 9004
#define HEX_INTEGER_STATE 9005
#define OCT_INTEGER_STATE 9006

#define ACCEPT_STATE 10000
#define REJECT_STATE 10001

WbemLexicon *WbemAnalyser :: Get () 
{
    WbemLexicon *lexicon = NULL ;

    if ( stream )
    {
        lexicon = GetToken () ;
    }
    else
    {
        lexicon = new WbemLexicon ;
        lexicon->position = position ;
        lexicon->token = WbemLexicon :: EOF_ID ;
    }

    return lexicon ;
}

WbemLexicon *WbemAnalyser :: GetToken () 
{
    WbemLexicon *lexicon = new WbemLexicon ;
    lexicon->position = position ;
    ULONG state = 0 ;

/* 
 * Integer Definitions
 */

    BOOL negative = FALSE ;
    LONG magicMult = ( LONG ) ( ( ( ULONG ) ( 1L << 31L ) ) / 10L ) ; 
    LONG magicNegDigit = 8 ;
    LONG magicPosDigit = 7 ;
    LONG datum = 0 ;    

/*
 * String Definitions
 */
    ULONG string_start = 0 ;
    ULONG string_length = 0 ;
    WCHAR *string = NULL ;


/*
 * Token Definitions
 */

    ULONG token_start = 0 ;

/*
 * OID Definitions
 */
    ULONG hex_datum = 0 ;
    ULONG brace_position = 0 ;
    ULONG hex_repetitions = 0 ;
    ULONG nybbleRepetitions = 0 ;

    while ( state != REJECT_STATE && state != ACCEPT_STATE )
    {
        WCHAR token = stream [ position ] ;
        switch ( state )
        {
            case 0:
            {
                if ( IsLeadingDecimal ( token ) )
                {
                    state = DEC_INTEGER_START + 1  ;
                    datum = ( token - 48 ) ;
                }
                else if ( token == L'@' )
                {
                    lexicon->token = WbemLexicon :: AT_ID ;
                    state = ACCEPT_STATE ;
                } 
                else if ( token == L'\\' )
                {
                    lexicon->token = WbemLexicon :: BACKSLASH_ID ;
                    state = ACCEPT_STATE ;
                } 
                else if ( token == L'\"' )
                {
                    state = STRING_START ;
                    string_start = position + 1 ;
                }
                else if ( token == L'{' )
                {
                    state = OID_START ;
                    hex_datum = 0 ;
                    brace_position = position ;
                    hex_repetitions = 0 ;
                    nybbleRepetitions = 0 ;
                }
                else if ( token == L'}' )
                {
                    lexicon->token = WbemLexicon :: CLOSE_BRACE_ID ;
                    state = ACCEPT_STATE ;
                }
                else if ( token == L'=' )
                {
                    lexicon->token = WbemLexicon :: EQUALS_ID ;
                    state = ACCEPT_STATE ;
                }
                else if ( token == L'.' )
                {
                    lexicon->token = WbemLexicon :: DOT_ID ;
                    state = ACCEPT_STATE ;
                }
                else if ( token == L',' )
                {
                    lexicon->token = WbemLexicon :: COMMA_ID ;
                    state = ACCEPT_STATE ;
                }
                else if ( token == L':' )
                {
                    lexicon->token = WbemLexicon :: COLON_ID ;
                    state = ACCEPT_STATE ;
                }
                else if ( token == L'+' )
                {
                    state = DEC_INTEGER_START ;
                }
                else if ( token == L'-' ) 
                {
                    negative = TRUE ;
                    state = DEC_INTEGER_START ;
                }
                else if ( token == L'0' )
                {
                    state = 1 ;
                }
                else if ( IsWhitespace ( token ) ) 
                {
                    state = 0 ;
                }
                else if ( IsEof ( token ) )
                {
                    lexicon->token = WbemLexicon :: EOF_ID ;
                    state = ACCEPT_STATE ;
                }
                else if ( IsAlpha ( token ) )
                {
                    state = TOKEN_START ;
                    token_start = position ;
                }
                else state = REJECT_STATE ;
            }
            break ;

            case 1:
            {
                if ( token == L'x' || token == L'X' )
                {
                    state = HEX_INTEGER_START ;             
                }
                else if ( IsOctal ( token ) )
                {
                    state = OCT_INTEGER_START ;
                    datum = ( token - 48 ) ;
                }
                else
                {
                    lexicon->token = WbemLexicon :: INTEGER_ID ;
                    lexicon->value.integer = 0 ;
                    position -- ;
                    state = ACCEPT_STATE ;
                }
            }
            break ;

            case STRING_START:
            {
                if ( token == L'\"' )
                {
                    lexicon->token = WbemLexicon :: STRING_ID ;
                    state = ACCEPT_STATE ;
        
                    if ( position == string_start )
                    {
                        lexicon->value.string = new WCHAR [ 1 ] ;
                        lexicon->value.string [ 0 ] = 0 ;
                    }
                    else
                    {
                        lexicon->value.string = new WCHAR [ string_length + 1 ] ;
                        wcsncpy ( 

                            lexicon->value.string , 
                            string ,
                            string_length
                        ) ;

                        lexicon->value.string [ string_length ] = 0 ;
                        free (string);
                    }
                }
                else if ( token == IsEof ( token ) ) 
                {
                    state = REJECT_STATE ;
                    free (string);                    
                }
                else 
                {
                    realloc_throw ( &string , sizeof ( WCHAR ) * ( string_length + 1 ) ) ;

                    string [ string_length ] = token ;
                    string_length ++ ;
                    state = STRING_START ;
                }
            }
            break ;

            case STRING_START+1:
            {
                if ( token == L'\"' )
                {
                    realloc_throw ( &string , sizeof ( WCHAR ) * ( string_length + 1 ) ) ;

		      string [ string_length ] = L'\"' ;
                    string_length ++ ;
                    state = STRING_START ;
                }
                else if ( token == IsEof ( token ) ) 
                {
                    state = REJECT_STATE ;
                }
                else 
                {
                    realloc_throw ( &string , sizeof ( WCHAR ) * ( string_length + 1 ) ) ;

                    string [ string_length ] = token ;
                    string_length ++ ;
                    state = STRING_START ;
                }
            }
            break ;

            case TOKEN_START:
            {
                if ( IsAlphaNumeric ( token ) ) 
                {
                    state = TOKEN_START ;
                }
                else 
                {
                    state = ACCEPT_STATE ;
                    lexicon->token = WbemLexicon :: TOKEN_ID ;
                    lexicon->value.token = new WCHAR [ position - token_start + 1 ] ;
                    wcsncpy ( 

                        lexicon->value.token , 
                        & stream [ token_start ] , 
                        position - token_start 
                    ) ;

                    lexicon->value.token [ position - token_start ] = 0 ;

                    position -- ;
                }
            }
            break ;

            case HEX_INTEGER_START:
            {
                if ( IsHex ( token ) )
                {
                    datum = HexToDec ( token ) ;
                    state = HEX_INTEGER_START + 1 ;
                }
                else
                {
                    state = REJECT_STATE ;
                }
            }
            break ;

            case HEX_INTEGER_START+1:
            {
                if ( IsHex ( token ) )
                {
                    state = HEX_INTEGER_START + 1 ;

                    if ( datum > magicMult )
                    {
                        state = REJECT_STATE ;
                    }
                    else if ( datum == magicMult ) 
                    {
                        if ( HexToDec ( token ) > magicPosDigit ) 
                        {
                            state = REJECT_STATE ;
                        }
                    }

                    datum = ( datum << 4 ) + HexToDec ( token ) ;
                }
                else
                {
                    lexicon->token = WbemLexicon :: INTEGER_ID ;
                    lexicon->value.integer = datum ;
                    state = ACCEPT_STATE ;

                    position -- ;
                }
            }
            break ;

            case OCT_INTEGER_START:
            {
                if ( IsOctal ( token ) )
                {
                    state = OCT_INTEGER_START ;

                    if ( datum > magicMult )
                    {
                        state = REJECT_STATE ;
                    }
                    else if ( datum == magicMult ) 
                    {
                        if ( OctToDec ( token ) > magicPosDigit ) 
                        {
                            state = REJECT_STATE ;
                        }
                    }

                    datum = ( datum << 3 ) + OctToDec ( token ) ;

                }
                else
                {
                    lexicon->token = WbemLexicon :: INTEGER_ID ;
                    lexicon->value.integer = datum ;
                    state = ACCEPT_STATE ;

                    position -- ;
                }
            }
            break ;

            case DEC_INTEGER_START:
            {
                if ( IsDecimal ( token ) )
                {
                    state = DEC_INTEGER_START + 1 ;
                    datum = ( token - 48 ) ;
                }
                else 
                if ( IsWhitespace ( token ) ) 
                {
                    state = DEC_INTEGER_START ;
                }
                else state = REJECT_STATE ;
            }   
            break ;

            case DEC_INTEGER_START+1:
            {
                if ( IsDecimal ( token ) )
                {   
                    state = DEC_INTEGER_START + 1 ;

                    if ( datum > magicMult )
                    {
                        state = REJECT_STATE ;
                    }
                    else if ( datum == magicMult ) 
                    {
                        if ( negative ) 
                        {
                            if ( ( token - 48 ) > magicNegDigit ) 
                            {
                                state = REJECT_STATE ;
                            }
                        }
                        else
                        {
                            if ( ( token - 48 ) > magicPosDigit ) 
                            {
                                state = REJECT_STATE ;
                            }
                        }
                    }

                    datum = datum * 10 + ( token - 48 ) ;
                }
                else
                {
                    lexicon->token = WbemLexicon :: INTEGER_ID ;
                    if ( negative )
                    {
                        lexicon->value.integer = datum * -1 ;
                    }
                    else
                    {
                        lexicon->value.integer = datum ;
                    }

                    state = ACCEPT_STATE ;

                    position -- ;
                }
            }   
            break ;

            case OID_START:     // {xxxxxxxx
            {
                if ( IsHex ( token ) )
                {
                    hex_datum = hex_datum + ( HexToDec ( token ) << ( ( 7 - hex_repetitions ) << 2 ) ) ;
                    hex_repetitions ++ ;
                    if ( hex_repetitions == 8 ) 
                    {
                        state = OID_START + 1 ;
                        lexicon->value.guid.Data1 = hex_datum ;
                        hex_repetitions = 0 ;
                        hex_datum = 0 ;
                    }
                }
                else
                {
                    state = ACCEPT_STATE ;
                    position = brace_position ;
                    lexicon->token = WbemLexicon :: OPEN_BRACE_ID ;
                }
            }
            break ;

            case OID_START+1:   // {xxxxxxxx-
            {
                if ( token == L'-' )
                {
                    state = OID_START + 2 ;
                }
                else
                {
                    state = ACCEPT_STATE ;
                    position = brace_position ;
                    lexicon->token = WbemLexicon :: OPEN_BRACE_ID ;
                }
            }
            break ;

            case OID_START+2:   // {xxxxxxxx-xxxx
            {
                if ( IsHex ( token ) )
                {
                    hex_datum = hex_datum + ( HexToDec ( token ) << ( ( 3 - hex_repetitions ) << 2 ) ) ;
                    hex_repetitions ++ ;
                    if ( hex_repetitions == 4 ) 
                    {
                        lexicon->value.guid.Data2 = ( USHORT ) hex_datum ;
                        hex_repetitions = 0 ;
                        hex_datum = 0 ;
                        state = OID_START + 3 ;
                    }
                }
                else
                {
                    state = ACCEPT_STATE ;
                    position = brace_position ;
                    lexicon->token = WbemLexicon :: OPEN_BRACE_ID ;
                }
            }
            break ;

            case OID_START+3:   // {xxxxxxxx-xxxx-
            {
                if ( token == L'-' )
                {
                    state = OID_START + 4 ;
                }
                else
                {
                    state = ACCEPT_STATE ;
                    position = brace_position ;
                    lexicon->token = WbemLexicon :: OPEN_BRACE_ID ;
                }
            }
            break ;

            case OID_START+4:   // {xxxxxxxx-xxxx-xxxx
            {
                if ( IsHex ( token ) )
                {
                    hex_datum = hex_datum + ( HexToDec ( token ) << ( ( 3 - hex_repetitions ) <<2 ) ) ;
                    hex_repetitions ++ ;
                    if ( hex_repetitions == 4 ) 
                    {
                        lexicon->value.guid.Data3 = ( USHORT ) hex_datum ;
                        hex_repetitions = 0 ;
                        hex_datum = 0 ;
                        state = OID_START + 5 ;
                    }
                }
                else
                {
                    state = ACCEPT_STATE ;
                    position = brace_position ;
                    lexicon->token = WbemLexicon :: OPEN_BRACE_ID ;
                }
            }
            break ;

            case OID_START+5:   // {xxxxxxxx-xxxx-xxxx-
            {
                if ( token == L'-' )
                {
                    state = OID_START + 6 ;
                }
                else
                {
                    state = ACCEPT_STATE ;
                    position = brace_position ;
                    lexicon->token = WbemLexicon :: OPEN_BRACE_ID ;
                }
            }
            break ;

            case OID_START+6:   // {xxxxxxxx-xxxx-xxxx-xxxx
            {
                if ( IsHex ( token ) )
                {
                    hex_datum = hex_datum + ( HexToDec ( token ) << ( ( 1 - nybbleRepetitions ) << 2 ) ) ;
                    hex_repetitions ++ ;
                    nybbleRepetitions ++ ;

                    if ( hex_repetitions == 2 ) 
                    {
                        lexicon->value.guid.Data4 [ 0 ] = ( char ) hex_datum ;
                        hex_datum = 0 ;
                        nybbleRepetitions = 0 ;
                    }                       
                    else if ( hex_repetitions == 4 ) 
                    {
                        lexicon->value.guid.Data4 [ 1 ] = ( char ) hex_datum ;
                        hex_repetitions = 0 ;
                        hex_datum = 0 ;
                        nybbleRepetitions = 0 ;
                        state = OID_START + 7 ;
                    }
                }
                else
                {
                    state = ACCEPT_STATE ;
                    position = brace_position ;
                    lexicon->token = WbemLexicon :: OPEN_BRACE_ID ;
                }
            }
            break ;

            case OID_START+7:   // {xxxxxxxx-xxxx-xxxx-xxxx-
            {
                if ( token == L'-' )
                {
                    state = OID_START + 8 ;
                }
                else
                {
                    state = ACCEPT_STATE ;
                    position = brace_position ;
                    lexicon->token = WbemLexicon :: OPEN_BRACE_ID ;
                }
            }
            break ;

            case OID_START+8:   // {xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
            {
                if ( IsHex ( token ) )
                {
                    hex_datum = hex_datum + ( HexToDec ( token ) << ( ( 1 - nybbleRepetitions ) << 2 ) ) ;
                    hex_repetitions ++ ;
                    nybbleRepetitions ++ ;

                    if ( hex_repetitions == 12 ) 
                    {
                        lexicon->value.guid.Data4 [ 7 ] = ( char ) hex_datum ;
                        hex_repetitions = 0 ;
                        nybbleRepetitions = 0 ;
                        hex_datum = 0 ;
                        state = OID_START + 9 ;
                    }
                    else if ( hex_repetitions % 2 == 0 ) 
                    {
                        lexicon->value.guid.Data4 [ 1 + ( hex_repetitions >> 1 ) ] = ( char ) hex_datum ;
                        hex_datum = 0 ;
                        nybbleRepetitions = 0 ;
                    }                       
                }
                else
                {
                    state = ACCEPT_STATE ;
                    position = brace_position ;
                    lexicon->token = WbemLexicon :: OPEN_BRACE_ID ;
                }
            }
            break ;

            case OID_START+9:   // {xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx}
            {
                if ( token == L'}' )
                {
                    lexicon->token = WbemLexicon :: OID_ID ;                
                    state = ACCEPT_STATE ;
                }
                else
                {
                    state = ACCEPT_STATE ;
                    position = brace_position ;
                    lexicon->token = WbemLexicon :: OPEN_BRACE_ID ;
                }
            }
            break ;

            case ACCEPT_STATE:
            case REJECT_STATE:
            default:
            {
                state = REJECT_STATE ;
            } ;
            break ;
        }

        position ++ ;
    }

    status = ( state != REJECT_STATE ) ;

    return lexicon ;
}