windows-server-2003/shell/tools/cleaninf/parse.cpp



								#include "priv.h"


								#define IS_WHITESPACE(ch)   (' ' == ch || '\t' == ch)

								#define IS_NEWLINE(ch)      ('\n' == ch)


								// Flags for _ReadChar

								#define RCF_NEXTLINE        0x0001      // skip to next line

								#define RCF_NEXTNWS         0x0002      // skip to next non-whitespace

								#define RCF_SKIPTRAILING    0x0004      // skip trailing whitespace


								// constructor

								CParseFile::CParseFile()

								{

								}


								/*-------------------------------------------------------------------------

								Purpose: Parse the given file according to the provided flags.

								*/

								void CParseFile::Parse(FILE * pfileSrc, FILE * pfileDest, DWORD dwFlags)

								{

								    _bSkipWhitespace = BOOLIFY(dwFlags & PFF_WHITESPACE);


								    _pfileSrc = pfileSrc;

								    _pfileDest = pfileDest;

								    _ichRead = 0;

								    _cchRead = 0;


								    _ichWrite = 0;


								    _ch = 0;


								    if (dwFlags & PFF_HTML)

								        _ParseHtml();

								    else if (dwFlags & PFF_HTC)

								        _ParseHtc();

								    else if (dwFlags & PFF_JS)

								        _ParseJS();

								    else

								        _ParseInf();


								    _FlushWriteBuffer();

								}


								/*-------------------------------------------------------------------------

								Purpose: Read the next character in the file.  Sets _ch.


								*/

								char CParseFile::_ReadChar(DWORD dwFlags)

								{

								    BOOL bFirstCharSav = _bFirstChar;


								    do

								    {

								        _ichRead++;

								        _bFirstChar = FALSE;


								        // Are we past the buffer, or do we skip to next line?

								        if (_ichRead >= _cchRead || dwFlags & RCF_NEXTLINE)

								        {

								            // Yes; read in more

								            if (fgets(_szReadBuf, SIZECHARS(_szReadBuf), _pfileSrc))

								            {

								                _ichRead = 0;

								                _cchRead = strlen(_szReadBuf);

								                _bFirstChar = TRUE;

								            }

								            else

								            {

								                _ichRead = 0;

								                _cchRead = 0;

								            }

								        }


								        if (_ichRead < _cchRead)

								            _ch = _szReadBuf[_ichRead];

								        else

								            _ch = CHAR_EOF;

								    } while ((dwFlags & RCF_NEXTNWS) && IS_WHITESPACE(_ch));


								    // Are we supposed to skip to the next non-whitespace?

								    if (dwFlags & RCF_NEXTNWS)

								    {

								        // Yes; then retain the "first character" state

								        _bFirstChar = bFirstCharSav;

								    }


								    return _ch;

								}


								/*-------------------------------------------------------------------------

								Purpose: Read ahead to the next character in the buffer and return its

								         value, but don't set _ch or increment the read pointer.

								*/

								char CParseFile::_SniffChar(int ichAhead)

								{

								    if (_ichRead + ichAhead < _cchRead)

								        return _szReadBuf[_ichRead + ichAhead];


								    return 0;

								}


								/*-------------------------------------------------------------------------

								Purpose: Write the character to the file

								*/

								void CParseFile::_WriteChar(char ch)

								{

								    _szWriteBuf[_ichWrite++] = ch;

								    _szWriteBuf[_ichWrite] = 0;


								    if ('\n' == ch || SIZECHARS(_szWriteBuf)-1 == _ichWrite)

								    {

								        fputs(_szWriteBuf, _pfileDest);

								        _ichWrite = 0;

								    }

								}


								/*-------------------------------------------------------------------------

								Purpose: Flushes the write buffer to the file

								*/

								void CParseFile::_FlushWriteBuffer(void)

								{

								    if (_ichWrite > 0)

								    {

								        fputs(_szWriteBuf, _pfileDest);

								        _ichWrite = 0;

								    }

								}


								/*-------------------------------------------------------------------------

								Purpose: Parse a .inf file.

								*/

								void CParseFile::_ParseInf(void)

								{

								    _ReadChar(0);


								    while (CHAR_EOF != _ch)

								    {

								        if (_bFirstChar)

								        {

								            // Is this a comment?

								            if (';' == _ch)

								            {

								                // Yes; skip to next line

								                _ReadChar(RCF_NEXTLINE);

								                continue;

								            }


								            if (_SkipWhitespace())

								                continue;

								        }


								        _WriteChar(_ch);

								        _ReadChar(0);

								    }

								}


								/*-------------------------------------------------------------------------

								Purpose: Write the current character and the rest of the tag.  Assumes

								         _ch is the beginning of the tag ('<').


								         There are some parts of the tag which may be compacted if _bSkipWhitespace

								         is TRUE.  The general rule is only one space is required between attributes,

								         and newlines are converted to spaces if necessary.  Anything in quotes

								         (single or double) are left alone.

								*/

								void CParseFile::_WriteTag(void)

								{

								    BOOL bSingleQuotes = FALSE;

								    BOOL bDblQuotes = FALSE;


								    // The end of the tag is the next '>' that is not in single or double-quotes.


								    while (CHAR_EOF != _ch)

								    {

								        if ('\'' == _ch)

								            bSingleQuotes ^= TRUE;

								        else if ('"' == _ch)

								            bDblQuotes ^= TRUE;


								        if (!bSingleQuotes && !bDblQuotes)

								        {

								            // _SkipWhitespace returns TRUE if it skips any whitespace,

								            // which means we've read some more input, which means we should

								            // go to the top of the loop and check for EOF and quotes.

								            if (_bSkipWhitespace && _SkipWhitespace(TRUE))

								                continue;


								            // End of tag?

								            if ('>' == _ch)

								            {

								                // Yes

								                _WriteChar(_ch);

								                break;

								            }

								        }


								        _WriteChar(_ch);

								        _ReadChar(0);

								    }

								}


								/*-------------------------------------------------------------------------

								Purpose: Skip the current comment tag.  Assumes _ch is the beginning of

								         the tag ('<').

								*/

								void CParseFile::_SkipCommentTag(void)

								{

								    // The end of the tag is the next '-->'


								    while (CHAR_EOF != _ch)

								    {

								        // Is the end of the comment coming up?

								        if ('-' == _ch && _SniffChar(1) == '-' && _SniffChar(2) == '>')

								        {

								            // Yes

								            _ReadChar(0);   // skip '-'

								            _ReadChar(0);   // skip '>'

								            break;

								        }


								        _ReadChar(0);

								    }

								}


								/*-------------------------------------------------------------------------

								Purpose: Skip leading whitespace.


								         Returns TRUE if anything was skipped

								*/

								BOOL CParseFile::_SkipWhitespace(BOOL bPreserveOneSpace)

								{

								    BOOL bRet = FALSE;


								    if (_bSkipWhitespace)

								    {

								        if (IS_WHITESPACE(_ch))

								        {

								            // Skip leading whitespace in line

								            _ReadChar(RCF_NEXTNWS);

								            bRet = TRUE;

								        }

								        else if (IS_NEWLINE(_ch))

								        {

								            // Move to the next line

								            _ReadChar(RCF_NEXTLINE);


								            // Skip leading whitespace on the next line, but don't write

								            // another space char (we'll do that here if necessary) and

								            // ignore the return value since we've already skipped some

								            // whitespace here (return TRUE).

								            _SkipWhitespace(FALSE);


								            bRet = TRUE;

								        }

								        // Write a single space char if we skipped something and the caller

								        // asked us to preserve a space.

								        if (bRet && bPreserveOneSpace)

								            _WriteChar(' ');

								    }

								    return bRet;

								}


								/*-------------------------------------------------------------------------

								Purpose: Skip a C or C++ style comment


								         Returns TRUE if a comment boundary was encountered.

								*/

								BOOL CParseFile::_SkipComment(int * pcNestedComment)

								{

								    BOOL bRet = FALSE;


								    if ('/' == _ch)

								    {

								        // Is this a C++ comment?

								        if ('/' == _SniffChar(1))

								        {

								            // Yes; skip it to end of line

								            if (!_bFirstChar || !_bSkipWhitespace)

								                _WriteChar('\n');


								            _ReadChar(RCF_NEXTLINE);

								            bRet = TRUE;

								        }

								        // Is this a C comment?

								        else if ('*' == _SniffChar(1))

								        {

								            // Yes; skip to respective '*/'

								            _ReadChar(0);       // skip '/'

								            _ReadChar(0);       // skip '*'

								            (*pcNestedComment)++;

								            bRet = TRUE;

								        }

								    }

								    else if ('*' == _ch)

								    {

								        // Is this the end of a C comment?

								        if ('/' == _SniffChar(1))

								        {

								            // Yes

								            _ReadChar(0);       // skip '*'

								            _ReadChar(0);       // skip '/'

								            (*pcNestedComment)--;


								            // Prevent writing an unnecessary '\n'

								            _bFirstChar = TRUE;

								            bRet = TRUE;

								        }

								    }

								    return bRet;

								}


								/*-------------------------------------------------------------------------

								Purpose: Parse the innertext of the STYLE tag, remove any comments

								*/

								void CParseFile::_ParseInnerStyle(void)

								{

								    int cNestedComment = 0;


								    // The end of the tag is the next '</STYLE>'


								    _ReadChar(0);


								    while (CHAR_EOF != _ch)

								    {

								        if (_bFirstChar && _SkipWhitespace())

								            continue;


								        // Is the end of the styletag section coming up?

								        if ('<' == _ch && _IsTagEqual("/STYLE"))

								        {

								            // Yes

								            break;

								        }


								        if (_SkipComment(&cNestedComment))

								            continue;


								        if (0 == cNestedComment && !IS_NEWLINE(_ch))

								            _WriteChar(_ch);


								        _ReadChar(0);

								    }

								}


								/*-------------------------------------------------------------------------

								Purpose: Returns TRUE if the given tagname matches the currently parsed token

								*/

								BOOL CParseFile::_IsTagEqual(LPSTR pszTag)

								{

								    int ich = 1;


								    while (*pszTag)

								    {

								        if (_SniffChar(ich++) != *pszTag++)

								            return FALSE;

								    }


								    // We should verify we've come to the end of the tagName

								    char chEnd = _SniffChar(ich);


								    return (' ' == chEnd || '>' == chEnd || '<' == chEnd);

								}


								/*-------------------------------------------------------------------------

								Purpose: Returns TRUE if the current tag is an end tag

								*/

								BOOL CParseFile::_IsEndTag(void)

								{

								    return (_SniffChar(1) == '/');

								}


								/*-------------------------------------------------------------------------

								Purpose: Parse a .htm or .hta file.

								*/

								void CParseFile::_ParseHtml(void)

								{

								    BOOL bFollowingTag = FALSE;

								    BOOL bFollowingEndTag = FALSE;


								    _ReadChar(0);


								    while (CHAR_EOF != _ch)

								    {

								        // Anytime we read another char, we should go to the top of the loop

								        // to check for EOF and skip leading whitespace if it's a new line.

								        //

								        // Note that _SkipWhitespace returns TRUE if it has skipped something,

								        // which also involves reading a new char.


								        if (_bFirstChar && _SkipWhitespace())

								            continue;


								        // Is this a tag?

								        if ('<' == _ch)

								        {

								            // Yes; looks like it


								            // Since we've found a new tag, no need to remember if we just saw

								            // an end tag. That only matters for text content following an end

								            // tag. For example, given "<SPAN>foo</SPAN>  bar", we need to

								            // preserve a space before the word "bar".

								            bFollowingEndTag = FALSE;


								            if (_IsTagEqual("!--"))

								            {

								                // Comment; skip it

								                _SkipCommentTag();

								            }

								            else if (_IsTagEqual("SCRIPT"))

								            {

								                // Parse the script

								                _WriteTag();        // write the <SCRIPT> tag


								                // FEATURE (scotth): we always assume javascript, maybe we should support something else

								                _ParseJS();


								                _WriteTag();        // write the </SCRIPT> tag

								            }

								            else if (_IsTagEqual("STYLE"))

								            {

								                _WriteTag();        // write the <STYLE> tag

								                _ParseInnerStyle();

								                _WriteTag();        // write the </STYLE> tag

								            }

								            else

								            {

								                // Check for end tag ("</") before calling _WriteTag

								                bFollowingEndTag = _IsEndTag();


								                // Any other tag: write the tag and go to the next one

								                _WriteTag();

								            }


								            bFollowingTag = TRUE;

								            _ReadChar(0);

								            continue;

								        }


								        if (bFollowingTag && _bSkipWhitespace)

								        {

								            // We can't entirely skip whitespace following tags such as </SPAN>

								            // or </A>, but we can at least collapse it down to a single space.

								            BOOL bPreserveOneSpace = bFollowingEndTag;


								            bFollowingEndTag = FALSE;

								            bFollowingTag = FALSE;


								            if (_SkipWhitespace(bPreserveOneSpace))

								                continue;

								        }


								        _WriteChar(_ch);

								        _ReadChar(0);

								    }

								}


								/*-------------------------------------------------------------------------

								Purpose: Parse a .js file.

								*/

								void CParseFile::_ParseJS(void)

								{

								    BOOL bDblQuotes = FALSE;

								    BOOL bSingleQuotes = FALSE;

								    int cNestedComment = 0;


								    _ReadChar(0);


								    while (CHAR_EOF != _ch)

								    {

								        // Are we in a comment?

								        if (0 == cNestedComment)

								        {

								            // No; (we only pay attention to strings when they're not in comments)

								            if ('\'' == _ch)

								                bSingleQuotes ^= TRUE;

								            else if ('"' == _ch)

								                bDblQuotes ^= TRUE;


								            if (_bSkipWhitespace && !bDblQuotes && !bSingleQuotes)

								            {

								                if (IS_WHITESPACE(_ch))

								                {

								                    // Skip whitespace

								                    if (!_bFirstChar)

								                        _WriteChar(' ');


								                    _ReadChar(RCF_NEXTNWS);

								                    continue;

								                }

								                else if (IS_NEWLINE(_ch))

								                {

								                    // Since javascript doesn't require a ';' at the end of a statement,

								                    // we should at least replace the newline with a space so tokens don't

								                    // get appended accidentally.


								                    // The javascript engine has a line-length limit.  So don't replace

								                    // a newline with a space.

								                    if (!_bFirstChar)

								                        _WriteChar('\n');


								                    _ReadChar(RCF_NEXTLINE);

								                    continue;

								                }

								            }


								            // Are we in a string?

								            if (!bDblQuotes && !bSingleQuotes)

								            {

								                // No; look for the terminating SCRIPT tag

								                if ('<' == _ch)

								                {

								                    if (_IsTagEqual("/SCRIPT"))

								                    {

								                        // We've reached the end of the script block

								                        break;

								                    }

								                }

								            }

								        }


								        // Are we in a string?

								        if (!bDblQuotes && !bSingleQuotes)

								        {

								            // No; look for comments...

								            if (_SkipComment(&cNestedComment))

								                continue;

								        }


								        if (0 == cNestedComment)

								            _WriteChar(_ch);


								        _ReadChar(0);

								    }

								}


								/*-------------------------------------------------------------------------

								Purpose: Parse a .htc file.

								*/

								void CParseFile::_ParseHtc(void)

								{

								    BOOL bFollowingTag = FALSE;

								    int cNestedComment = 0;


								    _ReadChar(0);


								    while (CHAR_EOF != _ch)

								    {

								        if (_bFirstChar && _SkipWhitespace())

								            continue;


								        // Is this a tag?

								        if ('<' == _ch)

								        {

								            // Yes; is it a script tag?

								            if (_IsTagEqual("SCRIPT"))

								            {

								                // Yes; parse the script

								                _WriteTag();        // write the <SCRIPT> tag


								                // FEATURE (scotth): we always assume javascript

								                _ParseJS();


								                _WriteTag();        // write the </SCRIPT> tag


								                _ReadChar(0);

								                bFollowingTag = TRUE;

								                continue;

								            }

								            else

								            {

								                _WriteTag();

								                _ReadChar(0);

								                bFollowingTag = TRUE;

								                continue;


								            }

								        }


								        // Look for comments outside the SCRIPT block...

								        if (_SkipComment(&cNestedComment))

								            continue;


								        if (bFollowingTag && _bSkipWhitespace)

								        {

								            bFollowingTag = FALSE;


								            if (_SkipWhitespace())

								                continue;

								        }


								        if (0 == cNestedComment)

								             _WriteChar(_ch);


								        _ReadChar(0);

								    }

								}