#include "priv.h" #define IS_WHITESPACE(ch) (' ' == ch || '\t' == ch) #define IS_NEWLINE(ch) ('\n' == ch) // Flags for _ReadChar #define RCF_NEXTLINE 0x0001 // skip to next line #define RCF_NEXTNWS 0x0002 // skip to next non-whitespace #define RCF_SKIPTRAILING 0x0004 // skip trailing whitespace // constructor CParseFile::CParseFile() { } /*------------------------------------------------------------------------- Purpose: Parse the given file according to the provided flags. */ void CParseFile::Parse(FILE * pfileSrc, FILE * pfileDest, DWORD dwFlags) { _bSkipWhitespace = BOOLIFY(dwFlags & PFF_WHITESPACE); _pfileSrc = pfileSrc; _pfileDest = pfileDest; _ichRead = 0; _cchRead = 0; _ichWrite = 0; _ch = 0; if (dwFlags & PFF_HTML) _ParseHtml(); else if (dwFlags & PFF_HTC) _ParseHtc(); else if (dwFlags & PFF_JS) _ParseJS(); else _ParseInf(); _FlushWriteBuffer(); } /*------------------------------------------------------------------------- Purpose: Read the next character in the file. Sets _ch. */ char CParseFile::_ReadChar(DWORD dwFlags) { BOOL bFirstCharSav = _bFirstChar; do { _ichRead++; _bFirstChar = FALSE; // Are we past the buffer, or do we skip to next line? if (_ichRead >= _cchRead || dwFlags & RCF_NEXTLINE) { // Yes; read in more if (fgets(_szReadBuf, SIZECHARS(_szReadBuf), _pfileSrc)) { _ichRead = 0; _cchRead = strlen(_szReadBuf); _bFirstChar = TRUE; } else { _ichRead = 0; _cchRead = 0; } } if (_ichRead < _cchRead) _ch = _szReadBuf[_ichRead]; else _ch = CHAR_EOF; } while ((dwFlags & RCF_NEXTNWS) && IS_WHITESPACE(_ch)); // Are we supposed to skip to the next non-whitespace? if (dwFlags & RCF_NEXTNWS) { // Yes; then retain the "first character" state _bFirstChar = bFirstCharSav; } return _ch; } /*------------------------------------------------------------------------- Purpose: Read ahead to the next character in the buffer and return its value, but don't set _ch or increment the read pointer. */ char CParseFile::_SniffChar(int ichAhead) { if (_ichRead + ichAhead < _cchRead) return _szReadBuf[_ichRead + ichAhead]; return 0; } /*------------------------------------------------------------------------- Purpose: Write the character to the file */ void CParseFile::_WriteChar(char ch) { _szWriteBuf[_ichWrite++] = ch; _szWriteBuf[_ichWrite] = 0; if ('\n' == ch || SIZECHARS(_szWriteBuf)-1 == _ichWrite) { fputs(_szWriteBuf, _pfileDest); _ichWrite = 0; } } /*------------------------------------------------------------------------- Purpose: Flushes the write buffer to the file */ void CParseFile::_FlushWriteBuffer(void) { if (_ichWrite > 0) { fputs(_szWriteBuf, _pfileDest); _ichWrite = 0; } } /*------------------------------------------------------------------------- Purpose: Parse a .inf file. */ void CParseFile::_ParseInf(void) { _ReadChar(0); while (CHAR_EOF != _ch) { if (_bFirstChar) { // Is this a comment? if (';' == _ch) { // Yes; skip to next line _ReadChar(RCF_NEXTLINE); continue; } if (_SkipWhitespace()) continue; } _WriteChar(_ch); _ReadChar(0); } } /*------------------------------------------------------------------------- Purpose: Write the current character and the rest of the tag. Assumes _ch is the beginning of the tag ('<'). There are some parts of the tag which may be compacted if _bSkipWhitespace is TRUE. The general rule is only one space is required between attributes, and newlines are converted to spaces if necessary. Anything in quotes (single or double) are left alone. */ void CParseFile::_WriteTag(void) { BOOL bSingleQuotes = FALSE; BOOL bDblQuotes = FALSE; // The end of the tag is the next '>' that is not in single or double-quotes. while (CHAR_EOF != _ch) { if ('\'' == _ch) bSingleQuotes ^= TRUE; else if ('"' == _ch) bDblQuotes ^= TRUE; if (!bSingleQuotes && !bDblQuotes) { // _SkipWhitespace returns TRUE if it skips any whitespace, // which means we've read some more input, which means we should // go to the top of the loop and check for EOF and quotes. if (_bSkipWhitespace && _SkipWhitespace(TRUE)) continue; // End of tag? if ('>' == _ch) { // Yes _WriteChar(_ch); break; } } _WriteChar(_ch); _ReadChar(0); } } /*------------------------------------------------------------------------- Purpose: Skip the current comment tag. Assumes _ch is the beginning of the tag ('<'). */ void CParseFile::_SkipCommentTag(void) { // The end of the tag is the next '-->' while (CHAR_EOF != _ch) { // Is the end of the comment coming up? if ('-' == _ch && _SniffChar(1) == '-' && _SniffChar(2) == '>') { // Yes _ReadChar(0); // skip '-' _ReadChar(0); // skip '>' break; } _ReadChar(0); } } /*------------------------------------------------------------------------- Purpose: Skip leading whitespace. Returns TRUE if anything was skipped */ BOOL CParseFile::_SkipWhitespace(BOOL bPreserveOneSpace) { BOOL bRet = FALSE; if (_bSkipWhitespace) { if (IS_WHITESPACE(_ch)) { // Skip leading whitespace in line _ReadChar(RCF_NEXTNWS); bRet = TRUE; } else if (IS_NEWLINE(_ch)) { // Move to the next line _ReadChar(RCF_NEXTLINE); // Skip leading whitespace on the next line, but don't write // another space char (we'll do that here if necessary) and // ignore the return value since we've already skipped some // whitespace here (return TRUE). _SkipWhitespace(FALSE); bRet = TRUE; } // Write a single space char if we skipped something and the caller // asked us to preserve a space. if (bRet && bPreserveOneSpace) _WriteChar(' '); } return bRet; } /*------------------------------------------------------------------------- Purpose: Skip a C or C++ style comment Returns TRUE if a comment boundary was encountered. */ BOOL CParseFile::_SkipComment(int * pcNestedComment) { BOOL bRet = FALSE; if ('/' == _ch) { // Is this a C++ comment? if ('/' == _SniffChar(1)) { // Yes; skip it to end of line if (!_bFirstChar || !_bSkipWhitespace) _WriteChar('\n'); _ReadChar(RCF_NEXTLINE); bRet = TRUE; } // Is this a C comment? else if ('*' == _SniffChar(1)) { // Yes; skip to respective '*/' _ReadChar(0); // skip '/' _ReadChar(0); // skip '*' (*pcNestedComment)++; bRet = TRUE; } } else if ('*' == _ch) { // Is this the end of a C comment? if ('/' == _SniffChar(1)) { // Yes _ReadChar(0); // skip '*' _ReadChar(0); // skip '/' (*pcNestedComment)--; // Prevent writing an unnecessary '\n' _bFirstChar = TRUE; bRet = TRUE; } } return bRet; } /*------------------------------------------------------------------------- Purpose: Parse the innertext of the STYLE tag, remove any comments */ void CParseFile::_ParseInnerStyle(void) { int cNestedComment = 0; // The end of the tag is the next '' _ReadChar(0); while (CHAR_EOF != _ch) { if (_bFirstChar && _SkipWhitespace()) continue; // Is the end of the styletag section coming up? if ('<' == _ch && _IsTagEqual("/STYLE")) { // Yes break; } if (_SkipComment(&cNestedComment)) continue; if (0 == cNestedComment && !IS_NEWLINE(_ch)) _WriteChar(_ch); _ReadChar(0); } } /*------------------------------------------------------------------------- Purpose: Returns TRUE if the given tagname matches the currently parsed token */ BOOL CParseFile::_IsTagEqual(LPSTR pszTag) { int ich = 1; while (*pszTag) { if (_SniffChar(ich++) != *pszTag++) return FALSE; } // We should verify we've come to the end of the tagName char chEnd = _SniffChar(ich); return (' ' == chEnd || '>' == chEnd || '<' == chEnd); } /*------------------------------------------------------------------------- Purpose: Returns TRUE if the current tag is an end tag */ BOOL CParseFile::_IsEndTag(void) { return (_SniffChar(1) == '/'); } /*------------------------------------------------------------------------- Purpose: Parse a .htm or .hta file. */ void CParseFile::_ParseHtml(void) { BOOL bFollowingTag = FALSE; BOOL bFollowingEndTag = FALSE; _ReadChar(0); while (CHAR_EOF != _ch) { // Anytime we read another char, we should go to the top of the loop // to check for EOF and skip leading whitespace if it's a new line. // // Note that _SkipWhitespace returns TRUE if it has skipped something, // which also involves reading a new char. if (_bFirstChar && _SkipWhitespace()) continue; // Is this a tag? if ('<' == _ch) { // Yes; looks like it // Since we've found a new tag, no need to remember if we just saw // an end tag. That only matters for text content following an end // tag. For example, given "foo bar", we need to // preserve a space before the word "bar". bFollowingEndTag = FALSE; if (_IsTagEqual("!--")) { // Comment; skip it _SkipCommentTag(); } else if (_IsTagEqual("SCRIPT")) { // Parse the script _WriteTag(); // write the tag } else if (_IsTagEqual("STYLE")) { _WriteTag(); // write the tag } else { // Check for end tag (" // or , but we can at least collapse it down to a single space. BOOL bPreserveOneSpace = bFollowingEndTag; bFollowingEndTag = FALSE; bFollowingTag = FALSE; if (_SkipWhitespace(bPreserveOneSpace)) continue; } _WriteChar(_ch); _ReadChar(0); } } /*------------------------------------------------------------------------- Purpose: Parse a .js file. */ void CParseFile::_ParseJS(void) { BOOL bDblQuotes = FALSE; BOOL bSingleQuotes = FALSE; int cNestedComment = 0; _ReadChar(0); while (CHAR_EOF != _ch) { // Are we in a comment? if (0 == cNestedComment) { // No; (we only pay attention to strings when they're not in comments) if ('\'' == _ch) bSingleQuotes ^= TRUE; else if ('"' == _ch) bDblQuotes ^= TRUE; if (_bSkipWhitespace && !bDblQuotes && !bSingleQuotes) { if (IS_WHITESPACE(_ch)) { // Skip whitespace if (!_bFirstChar) _WriteChar(' '); _ReadChar(RCF_NEXTNWS); continue; } else if (IS_NEWLINE(_ch)) { // Since javascript doesn't require a ';' at the end of a statement, // we should at least replace the newline with a space so tokens don't // get appended accidentally. // The javascript engine has a line-length limit. So don't replace // a newline with a space. if (!_bFirstChar) _WriteChar('\n'); _ReadChar(RCF_NEXTLINE); continue; } } // Are we in a string? if (!bDblQuotes && !bSingleQuotes) { // No; look for the terminating SCRIPT tag if ('<' == _ch) { if (_IsTagEqual("/SCRIPT")) { // We've reached the end of the script block break; } } } } // Are we in a string? if (!bDblQuotes && !bSingleQuotes) { // No; look for comments... if (_SkipComment(&cNestedComment)) continue; } if (0 == cNestedComment) _WriteChar(_ch); _ReadChar(0); } } /*------------------------------------------------------------------------- Purpose: Parse a .htc file. */ void CParseFile::_ParseHtc(void) { BOOL bFollowingTag = FALSE; int cNestedComment = 0; _ReadChar(0); while (CHAR_EOF != _ch) { if (_bFirstChar && _SkipWhitespace()) continue; // Is this a tag? if ('<' == _ch) { // Yes; is it a script tag? if (_IsTagEqual("SCRIPT")) { // Yes; parse the script _WriteTag(); // write the tag _ReadChar(0); bFollowingTag = TRUE; continue; } else { _WriteTag(); _ReadChar(0); bFollowingTag = TRUE; continue; } } // Look for comments outside the SCRIPT block... if (_SkipComment(&cNestedComment)) continue; if (bFollowingTag && _bSkipWhitespace) { bFollowingTag = FALSE; if (_SkipWhitespace()) continue; } if (0 == cNestedComment) _WriteChar(_ch); _ReadChar(0); } }