/*++

Copyright (c) 1997-2000  Microsoft Corporation

Module Name:

    EzParse.cpp

Abstract:

    Poor man C/C++/any file parser.
    
Author:

    Gor Nishanov (gorn) 03-Apr-1999

Revision History:

    Gor Nishanov (gorn) 03-Apr-1999 -- hacked together to prove that this can work

    GorN: 29-Sep-2000 - fix enumeration bug
    GorN: 29-Sep-2000 - add support for KdPrintEx like function
    GorN: 09-Oct-2000 - fixed "//" in the string bug 
    GorN: 23-Oct-2000 - IGNORE_CPP_COMMENT, IGNORE_POUND_COMMENT options added 
    GorN: 16-Apr-2001 - Properly handle \" within a string
    
ToDo:

    Clean it up
    

--*/

#define STRICT

#include <stdio.h>
#include <windows.h>

#pragma warning(disable: 4100)
#include <algorithm>
#include <xstring>
#include "ezparse.h"

DWORD ErrorCount = 0;

PEZPARSE_CONTEXT EzParseCurrentContext = NULL;

// To force build tool to recognize our errors

#define BUILD_PREFIX_FNAME "cl %s\n"
#define BUILD_PREFIX "cl wpp\n"

void ExParsePrintErrorPrefix(FILE* f, char * func)
{
    ++ErrorCount;
    if (EzParseCurrentContext) {
        fprintf(f,BUILD_PREFIX_FNAME "%s(%d) : error : (%s)", 
               EzParseCurrentContext->filename, 
               EzParseCurrentContext->filename, 
               EzGetLineNo(EzParseCurrentContext->currentStart, EzParseCurrentContext),
               func);
    } else {
        fprintf(f,BUILD_PREFIX "wpp : error : (%s)", func);
    }
}

LPCSTR skip_stuff_in_quotes(LPCSTR  q, LPCSTR  begin)
{
    char ch = *q;
    if (q > begin) {
        if (q[-1] == '\\') {
            return q - 1;
        }
    }
    for(;;) {
        if (q == begin) {
            return 0;
        }
        --q;
        if (*q == ch && ( (q == begin) || (q[-1] != '\\') ) ) {
            return q;
        }
    }
}


void
adjust_pair( STR_PAIR& str )
/*++
  Shrink the pair to remote leading and trailing whitespace
 */
{
    while (str.beg < str.end && isspace(*str.beg)) { ++str.beg; }
    while (str.beg < str.end && isspace(str.end[-1])) { --str.end; }
}

void
remove_cpp_comment(STR_PAIR& str)
{
    LPCSTR p = str.beg;

//    printf("rcb: %s\n", std::string(str.beg, str.end).c_str());

    // let's cut the comment in the beginning of the string

    for(;;) {
        // skip the whitespace
        for(;;) {
            if (p == str.end) return;
            if (!isspace(*p)) break;
            ++p;
        }
        str.beg = p;
        if (p + 1 == str.end) return;
        if (p[0] == '/' && p[1] == '/') {

            // we have a comment. Need to get to the end of the comment
            p += 2;
//    printf("rcd: %s %s\n", std::string(str.beg, p).c_str(), std::string(p,str.end).c_str());
            for(;;) {
                if (p == str.end) return;
                if (*p == '\r' || *p == '\n') {                    
                    str.beg = p;
                    break;
                }
                ++p;
            }
        
        } else {
            // no leading comment
            break;
        }
    }    

//    printf("rcc: %s %s\n", std::string(str.beg, p).c_str(), std::string(p,str.end).c_str());    

    for(;;) {
        if (p == str.end) return;
        if (*p == '"') {
            // don't look for comments within a string
            for(;;) {
                if (++p == str.end) return;
                if (*p == '"' && p[-1] != '\\') break;
            }
            ++p;
            continue;
        }
        
        if (p + 1 == str.end) return;
        if (p[0] == '/')
            if (p[1] == '/') break;
            else p += 2;
        else
            p += 1;
    }
    str.end = p;

//    printf("rce: %s\n", std::string(str.beg, str.end).c_str());
}

DWORD
ScanForFunctionCallsEx(
    IN LPCSTR begin, 
    IN LPCSTR   end,
    IN EZPARSE_CALLBACK Callback, 
    IN PVOID Context,
    IN OUT PEZPARSE_CONTEXT ParseContext,
    IN DWORD Options
    )
/*++

Routine Description:

    Scan the buffer for expressions that looks like function calls,
    i.e name(sd,sdf,sdf,sdf,sdf); . It will treat variable declaration
    with constructor call as a function call as well.
    
Inputs:

    begin, end -- pointers to the beginning and the end of the buffer
    Callback   -- to be called for every function
    Context    -- opaque context to be passed to callback
    ParseContext -- holds current parse state information    

--*/
{
    LPCSTR p = begin;
    LPCSTR q, funcNameEnd;
    DWORD Status = ERROR_SUCCESS;
    bool double_par = FALSE;

no_match:

    if (Options & NO_SEMICOLON) {
        q = end;
        Options &= ~NO_SEMICOLON;
    } else {   
        do {
            ++p;
            if (p == end) {
                return Status;
            }
        } while ( *p != ';' );
        // Ok. Now p points to ';' //

        q = p;
    }    
    
    do {
        if (--q <= begin) {
            goto no_match;
        }
    } while ( isspace(*q) );
    
    // Now q points on the first non white space character        //
    // If it is not a ')' then we need to search for the next ';' //

    if (*q != ')') {
        goto no_match;
    }

    ParseContext->macroEnd = q;

    // Ok. This is a function call (definition).
    // Now, let's go and collect all the arguments of the first level and
    // get to the name of the function

    // HACKHACK
    // We need a special case for functions that looks like
    //   KdPrintEx((Level, Indent, Msg, ...));
    //   Essentially, we need to treat them as
    //   KdPrintEx(Level, Indent, Msg, ...);

    const char *r = q;

    // check if we have ));

    do {
        if (--r <= begin) break; // no "));"
    } while ( isspace(*r) );

    double_par = r > begin && *r == ')';
    if (double_par) {
        q = r;
        // we assume that this is KdPrint((a,b,c,d,...)); at the moment
        // if our assumtion is wrong, we will retry the loop below
    }

retry: 
    {
        int level = 0;

        LPCSTR   ends[128], *current = ends;
        STR_PAIR strs[128];

//        LPCSTR   closing_parenthisis = q;

        *current = q;
        
        for(;;) {
            --q;
            if (q <= begin) {
                goto no_match;
            }
            switch (*q) {
            case ',':  if (!level) *++current = q; break;
            case '(':  if (level) --level; else goto maybe_match; break;
            case ')':  ++level; break;
            case '\'': 
            case '"':  
                q = skip_stuff_in_quotes(q, begin); if(!q) goto no_match;
            }
        }
maybe_match:
        *++current = q;
        funcNameEnd = q;

        // now q point to '(' we need to find name of the function //
        do {
            --q;
            if (q <= begin) {
                goto no_match;
            }

        } while(isspace(*q));

        // now q points to first not white character

        if (double_par) {
            // if we see )); and found a matching
            // parenthesis for the inner one, we can have
            // one of two cases
            //   1) KdPrint((a,b,c,d,...));
            //      or
            //   2) DebugPrint(a,b,(c,d));
            // If it is the latter, we just need to
            // retry the scanning, now using leftmost bracket as a starting point

            if (*q != '(') {
                // restore q to the rightmost parenthesis
                q = ParseContext->macroEnd;
                double_par = FALSE;
                goto retry;
            }
            funcNameEnd = q;
            // now q point to '(' we need to find name of the function //
            do {
                --q;
                if (q <= begin) {
                    goto no_match;
                }

            } while(isspace(*q));
        }
        
        // now q points to first non white character
        // BUGBUG '{' and '}' are allowed only in config files

        if (*q == '}') {
            for(;;) {
                if (--q < begin) goto no_match;
                if (*q == '{') break;
            }
            if (--q < begin) goto no_match;
        }

        if (!(isalpha(*q) || isdigit(*q) || *q == '_')) {
            goto no_match;
        }
        do {
            --q;
            if (q <= begin) {
                goto found;
            }
        } while ( isalpha(*q) || isdigit(*q) || *q == '_');
        ++q;

        if (isdigit(*q)) {
            goto no_match;
        }

found:
        if (Options & IGNORE_COMMENT)
        // Verify that it is not a comment
        //   # sign in the beginning of the line

        {
            LPCSTR line = q;
            //
            // Find the beginning of the line or file
            //

            for(;;) {
                if (line == begin) {
                    // Beginning of the file. Good enough
                    break;
                }
                if (Options & IGNORE_CPP_COMMENT && line[0] == '/' && line[1] == '/') {
                    // C++ comment. Ignore
                    goto no_match;
                }
                if (*line == 13 || *line == 10) {
                    ++line;
                    break;
                }
                --line;
            }

            //
            // If the first non-white character is #, ignore it
            //
            while (line <= q) {
                if ( *line != ' ' && *line != '\t' ) {
                    break;
                }
                ++line;
            }

            if (Options & IGNORE_POUND_COMMENT && *line == '#') {
                goto no_match;
            }
        }


        {
            int i = 0;

            strs[0].beg  = q;
            strs[0].end = funcNameEnd;
            adjust_pair(strs[0]);

            while (current != ends) {
                // putchar('<');printrange(current[0]+1, current[-1]); putchar('>');
                ++i;
                strs[i].beg = current[0]+1;
                --current;
                strs[i].end = current[0];
                adjust_pair(strs[i]);
                remove_cpp_comment(strs[i]);
            }

            ParseContext->currentStart = strs[0].beg;
            ParseContext->currentEnd = strs[0].end;
            ParseContext->doubleParent = double_par;

            Status = Callback(strs, i+1, Context, ParseContext);
            if (Status != ERROR_SUCCESS) {
                return Status;
            }
                
        }
        goto no_match;
    }
    // return ERROR_SUCCESS; // unreachable code
}

DWORD
ScanForFunctionCalls(
    IN LPCSTR begin, 
    IN LPCSTR   end,
    IN EZPARSE_CALLBACK Callback, 
    IN PVOID Context,
    IN OUT PEZPARSE_CONTEXT ParseContext    
    )
{
    return ScanForFunctionCallsEx(
        begin, end, Callback, Context,
        ParseContext, IGNORE_COMMENT);
}

DWORD 
EzGetLineNo(
    IN LPCSTR Ptr,
    IN OUT PEZPARSE_CONTEXT ParseContext
    )
/*++
    Computes a line number based on 
    an pointer within a buffer.

    Last known lineno/pointer is cached in ParseContext
    for performance
*/
{
    int count = ParseContext->scannedLineCount;
    LPCSTR downto = ParseContext->lastScanned;
        LPCSTR p = Ptr;

    if (downto > p) {
        count = 1;
        downto = ParseContext->start;
    }

    while (p > downto) {
        if (*p == '\n') {
            ++count;
        }
        --p;
    }

    ParseContext->scannedLineCount = count;
    ParseContext->lastScanned = Ptr;

    return count;
}

const char begin_wpp[] = "begin_wpp"; 
const char end_wpp[]   = "end_wpp";  
const char define_[]   = "#define";
const char enum_[]     = "enum ";
enum { 
    begin_wpp_size = (sizeof(begin_wpp)-1),
    end_wpp_size   = (sizeof(end_wpp)-1),
    define_size    = (sizeof(define_)-1),
    enum_size      = (sizeof(enum_)-1),
};

typedef struct _SmartContext {
    EZPARSE_CALLBACK Callback;
    PVOID Context;
    OUT PEZPARSE_CONTEXT ParseContext;
    std::string buf;
} SMART_CONTEXT, *PSMART_CONTEXT;

void DoEnumItems(PSTR_PAIR name, LPCSTR begin, LPCSTR end, PSMART_CONTEXT ctx)
{
    LPCSTR p,q;
    ULONG  value = 0;
    STR_PAIR Item;
    BOOL First = TRUE;
    ctx->buf.assign("CUSTOM_TYPE(");
    ctx->buf.append(name->beg, name->end);
    ctx->buf.append(", ItemListLong");
    p = begin;

    while(begin < end && isspace(*--end)); // skip spaces
    if (begin < end && *end != ',') ++end;

    for(;p < end;) {
        Item.beg = p;
        q = p;
        for(;;) {
            if (q == end) {
                goto enum_end;
            }
            if (*q == ',' || *q == '}') {
                // valueless item. Use current
                Item.end = q;
                break;
            } else if (*q == '=') {
                // need to calc the value. Skip for now //
                Item.end = q;
                while (q < end && *q != ',') ++q;
                break;
            }
            ++q;
        }
        adjust_pair(Item);
        if (Item.beg == Item.end) {
            break;
        }
        if (First) {ctx->buf.append("("); First = FALSE;} else ctx->buf.append(",");
        ctx->buf.append(Item.beg, Item.end);
        if (q == end) break;
        p = q+1;
        ++value;
    }
  enum_end:;  
    ctx->buf.append(") )");
    ScanForFunctionCallsEx(
        &ctx->buf[0], &ctx->buf[0] + ctx->buf.size(), ctx->Callback, ctx->Context,
        ctx->ParseContext, NO_SEMICOLON);
    Flood("enum %s\n", ctx->buf.c_str());
}

void DoEnum(LPCSTR begin, LPCSTR end, PSMART_CONTEXT Ctx)
{
    LPCSTR p, q, current = begin;

    for(;;) {
        p = std::search(current, end, enum_, enum_ + enum_size);
        if (p == end) break;
        q = std::find(p, end, '{');
        if (q == end) break;

        // let's figure out enum name //
        STR_PAIR name;
        name.beg = p + enum_size;
        name.end = q;

        adjust_pair(name);
        if ( *name.beg == '_' ) ++name.beg;

        p = q+1; // past "{";
        q = std::find(p, end, '}');
        if (q == end) break;

        if (name.end > name.beg) {
            DoEnumItems(&name, p, q, Ctx); 
        } else {
            ReportError("Cannot handle tagless enums yet");
        }

        current = q;
    }
}


DWORD
SmartScan(
    IN LPCSTR begin, 
    IN LPCSTR   end,
    IN EZPARSE_CALLBACK Callback, 
    IN PVOID Context,
    IN OUT PEZPARSE_CONTEXT ParseContext
    )
{
    LPCSTR block_start, block_end, current = begin;
    SMART_CONTEXT Ctx;
    Ctx.Callback = Callback;
    Ctx.Context  = Context;
    Ctx.ParseContext = ParseContext;
    
    for(;;) {
        block_start = std::search(current, end, begin_wpp, begin_wpp + begin_wpp_size);
        if (block_start == end) break;
        
        current = block_start;
        
        block_end = std::search(block_start, end, end_wpp, end_wpp + end_wpp_size);
        if (block_end == end) break;

        Flood("Block Found\n");
        // determine block type //
        
        //  begin_wpp enum
        //  begin_wpp config
        //  begin_wpp func
        //  begin_wpp define
        
        LPCSTR block_type = block_start + begin_wpp_size + 1;
        Flood("block_type = %c%c%c%c\n", block_type[0],block_type[1],block_type[2],block_type[3]); 
        
        if        (memcmp(block_type, "enum",   4) == 0) {
            // do enum block //
            DoEnum( block_type + 4, block_end, &Ctx );
            
        } else if (memcmp(block_type, "config", 6) == 0) {
            // do config block //
            ScanForFunctionCallsEx(block_type + 6, block_end, Callback, Context, ParseContext, IGNORE_POUND_COMMENT);

        } else if (memcmp(block_type, "func", 4) == 0) {
            LPCSTR func_start, func_end;
            current = block_type + 6;
            for(;;) {
                func_start = std::search(current, block_end, define_, define_ + define_size);
                if (func_start == block_end) break;
                func_start += define_size;
                while (isspace(*func_start)) {
                    if(++func_start == block_end) goto no_func;
                }
                func_end = func_start;
                while (!isspace(*func_end)) {
                    if(*func_end == '(') break;
                    if(++func_end == block_end) goto no_func;
                }
                if(*func_end != '(') {
                    Ctx.buf.assign(func_start, func_end);
                    Ctx.buf.append("(MSGARGS)");
                } else {
                    func_end = std::find(func_start, block_end, ')');
                    if (func_end == block_end) break;

                    ++func_end; // include ")"
                    Ctx.buf.assign(func_start, func_end); 
                }
                Flood("Func %s\n", Ctx.buf.c_str());
                ScanForFunctionCallsEx(
                    Ctx.buf.begin(), Ctx.buf.end(), Callback, Context,
                    ParseContext, NO_SEMICOLON);
                current = func_end;
            }            
            no_func:;
        } else if (memcmp(block_type, "define", 6) == 0) {
            // do define block
        } else {
            ReportError("Unknown block");
        }

        current = block_end + end_wpp_size;
    }
    if (current == begin) {
        // file without marking, let's do default processing
        Unusual("Reverting back to plain scan\n");
        ScanForFunctionCalls(begin, end, Callback, Context, ParseContext);
    }

    return ERROR_SUCCESS;
}

DWORD
EzParse(
    IN LPCSTR filename, 
    IN EZPARSE_CALLBACK Callback, 
    IN PVOID Context)
{
    
//    return EzParseEx(filename, SmartScan, Callback, Context);
    return EzParseEx(filename, ScanForFunctionCalls, Callback, Context, IGNORE_POUND_COMMENT);
}

DWORD
EzParseWithOptions(
    IN LPCSTR filename, 
    IN EZPARSE_CALLBACK Callback, 
    IN PVOID Context,
    IN DWORD Options)
{
    
    return EzParseEx(filename, ScanForFunctionCalls, Callback, Context, Options);
}

DWORD
EzParseEx(
    IN LPCSTR filename, 
    IN PROCESSFILE_CALLBACK ProcessData,
    IN EZPARSE_CALLBACK Callback, 
    IN PVOID Context,
    IN DWORD Options
    )
{    
    DWORD  Status = ERROR_SUCCESS;
    HANDLE mapping;
    HANDLE file = CreateFileA(filename, 
                              GENERIC_READ, FILE_SHARE_READ, NULL,
                              OPEN_EXISTING, 0, 0);
    if (file == INVALID_HANDLE_VALUE) {
        Status = GetLastError();
        ReportError("Cannot open file %s, error %u\n", filename, Status );
        return Status;
    }
    DWORD size = GetFileSize(file, 0);
    mapping = CreateFileMapping(file,0,PAGE_READONLY,0,0, 0);
    if (!mapping) {
        Status = GetLastError();
        ReportError("Cannot create mapping, error %u\n", Status );
        CloseHandle(file);
        return Status;
    }
    PCHAR buf = (PCHAR)MapViewOfFileEx(mapping, FILE_MAP_READ,0,0,0,0);
    if (buf) {

        EZPARSE_CONTEXT ParseContext;
        ZeroMemory(&ParseContext, sizeof(ParseContext) );
    
        ParseContext.start = buf;
        ParseContext.filename = filename;
        ParseContext.scannedLineCount = 1;
        ParseContext.lastScanned = buf;
        ParseContext.previousContext = EzParseCurrentContext;
        ParseContext.Options = Options;
        EzParseCurrentContext = &ParseContext;
    
        Status = (*ProcessData)(buf, buf + size, Callback, Context, &ParseContext);

        EzParseCurrentContext = ParseContext.previousContext;
        UnmapViewOfFile( buf );

    } else {
        Status = GetLastError();
        ReportError("MapViewOfFileEx failed, error %u\n", Status );
    }
    CloseHandle(mapping);
    CloseHandle(file);
    return Status;
}

DWORD
EzParseResourceEx(
    IN LPCSTR ResName, 
    IN PROCESSFILE_CALLBACK ProcessData,
    IN EZPARSE_CALLBACK Callback, 
    IN PVOID Context)
{    
    DWORD  Status = ERROR_SUCCESS;
    HRSRC hRsrc;

    hRsrc = FindResource(
        NULL, //this Module
        ResName, 
        RT_RCDATA);
        
    if (hRsrc == NULL) {
        Status = GetLastError();
        ReportError("Cannot open resource %s, error %u\n", ResName, Status );
        return Status;
    }

    HGLOBAL hGlobal = LoadResource(NULL, hRsrc);
    if (!hGlobal) {
        Status = GetLastError();
        ReportError("LockResource failed, error %u\n", Status );
        return Status;
    }

    DWORD size = SizeofResource(NULL, hRsrc);
    
    PCHAR buf = (PCHAR)LockResource(hGlobal);
    if (buf) {

        EZPARSE_CONTEXT ParseContext;
        ZeroMemory(&ParseContext, sizeof(ParseContext) );
    
        ParseContext.start = buf;
        ParseContext.filename = ResName;
        ParseContext.scannedLineCount = 1;
        ParseContext.lastScanned = buf;
        ParseContext.previousContext = EzParseCurrentContext;
        EzParseCurrentContext = &ParseContext;
    
        Status = (*ProcessData)(buf, buf + size, Callback, Context, &ParseContext);
        EzParseCurrentContext = ParseContext.previousContext;
    } else {
        Status = GetLastError();
        ReportError("LockResource failed, error %u\n", Status );
    }
    // According to MSDN. There is no need to call Unlock/Free Resource
    return Status;
}