Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

624 lines
16 KiB

  1. #include "priv.h"
  2. #define IS_WHITESPACE(ch) (' ' == ch || '\t' == ch)
  3. #define IS_NEWLINE(ch) ('\n' == ch)
  4. // Flags for _ReadChar
  5. #define RCF_NEXTLINE 0x0001 // skip to next line
  6. #define RCF_NEXTNWS 0x0002 // skip to next non-whitespace
  7. #define RCF_SKIPTRAILING 0x0004 // skip trailing whitespace
  8. // constructor
  9. CParseFile::CParseFile()
  10. {
  11. }
  12. /*-------------------------------------------------------------------------
  13. Purpose: Parse the given file according to the provided flags.
  14. */
  15. void CParseFile::Parse(FILE * pfileSrc, FILE * pfileDest, DWORD dwFlags)
  16. {
  17. _bSkipWhitespace = BOOLIFY(dwFlags & PFF_WHITESPACE);
  18. _pfileSrc = pfileSrc;
  19. _pfileDest = pfileDest;
  20. _ichRead = 0;
  21. _cchRead = 0;
  22. _ichWrite = 0;
  23. _ch = 0;
  24. if (dwFlags & PFF_HTML)
  25. _ParseHtml();
  26. else if (dwFlags & PFF_HTC)
  27. _ParseHtc();
  28. else if (dwFlags & PFF_JS)
  29. _ParseJS();
  30. else
  31. _ParseInf();
  32. _FlushWriteBuffer();
  33. }
  34. /*-------------------------------------------------------------------------
  35. Purpose: Read the next character in the file. Sets _ch.
  36. */
  37. char CParseFile::_ReadChar(DWORD dwFlags)
  38. {
  39. BOOL bFirstCharSav = _bFirstChar;
  40. do
  41. {
  42. _ichRead++;
  43. _bFirstChar = FALSE;
  44. // Are we past the buffer, or do we skip to next line?
  45. if (_ichRead >= _cchRead || dwFlags & RCF_NEXTLINE)
  46. {
  47. // Yes; read in more
  48. if (fgets(_szReadBuf, SIZECHARS(_szReadBuf), _pfileSrc))
  49. {
  50. _ichRead = 0;
  51. _cchRead = strlen(_szReadBuf);
  52. _bFirstChar = TRUE;
  53. }
  54. else
  55. {
  56. _ichRead = 0;
  57. _cchRead = 0;
  58. }
  59. }
  60. if (_ichRead < _cchRead)
  61. _ch = _szReadBuf[_ichRead];
  62. else
  63. _ch = CHAR_EOF;
  64. } while ((dwFlags & RCF_NEXTNWS) && IS_WHITESPACE(_ch));
  65. // Are we supposed to skip to the next non-whitespace?
  66. if (dwFlags & RCF_NEXTNWS)
  67. {
  68. // Yes; then retain the "first character" state
  69. _bFirstChar = bFirstCharSav;
  70. }
  71. return _ch;
  72. }
  73. /*-------------------------------------------------------------------------
  74. Purpose: Read ahead to the next character in the buffer and return its
  75. value, but don't set _ch or increment the read pointer.
  76. */
  77. char CParseFile::_SniffChar(int ichAhead)
  78. {
  79. if (_ichRead + ichAhead < _cchRead)
  80. return _szReadBuf[_ichRead + ichAhead];
  81. return 0;
  82. }
  83. /*-------------------------------------------------------------------------
  84. Purpose: Write the character to the file
  85. */
  86. void CParseFile::_WriteChar(char ch)
  87. {
  88. _szWriteBuf[_ichWrite++] = ch;
  89. _szWriteBuf[_ichWrite] = 0;
  90. if ('\n' == ch || SIZECHARS(_szWriteBuf)-1 == _ichWrite)
  91. {
  92. fputs(_szWriteBuf, _pfileDest);
  93. _ichWrite = 0;
  94. }
  95. }
  96. /*-------------------------------------------------------------------------
  97. Purpose: Flushes the write buffer to the file
  98. */
  99. void CParseFile::_FlushWriteBuffer(void)
  100. {
  101. if (_ichWrite > 0)
  102. {
  103. fputs(_szWriteBuf, _pfileDest);
  104. _ichWrite = 0;
  105. }
  106. }
  107. /*-------------------------------------------------------------------------
  108. Purpose: Parse a .inf file.
  109. */
  110. void CParseFile::_ParseInf(void)
  111. {
  112. _ReadChar(0);
  113. while (CHAR_EOF != _ch)
  114. {
  115. if (_bFirstChar)
  116. {
  117. // Is this a comment?
  118. if (';' == _ch)
  119. {
  120. // Yes; skip to next line
  121. _ReadChar(RCF_NEXTLINE);
  122. continue;
  123. }
  124. if (_SkipWhitespace())
  125. continue;
  126. }
  127. _WriteChar(_ch);
  128. _ReadChar(0);
  129. }
  130. }
  131. /*-------------------------------------------------------------------------
  132. Purpose: Write the current character and the rest of the tag. Assumes
  133. _ch is the beginning of the tag ('<').
  134. There are some parts of the tag which may be compacted if _bSkipWhitespace
  135. is TRUE. The general rule is only one space is required between attributes,
  136. and newlines are converted to spaces if necessary. Anything in quotes
  137. (single or double) are left alone.
  138. */
  139. void CParseFile::_WriteTag(void)
  140. {
  141. BOOL bSingleQuotes = FALSE;
  142. BOOL bDblQuotes = FALSE;
  143. // The end of the tag is the next '>' that is not in single or double-quotes.
  144. while (CHAR_EOF != _ch)
  145. {
  146. if ('\'' == _ch)
  147. bSingleQuotes ^= TRUE;
  148. else if ('"' == _ch)
  149. bDblQuotes ^= TRUE;
  150. if (!bSingleQuotes && !bDblQuotes)
  151. {
  152. // _SkipWhitespace returns TRUE if it skips any whitespace,
  153. // which means we've read some more input, which means we should
  154. // go to the top of the loop and check for EOF and quotes.
  155. if (_bSkipWhitespace && _SkipWhitespace(TRUE))
  156. continue;
  157. // End of tag?
  158. if ('>' == _ch)
  159. {
  160. // Yes
  161. _WriteChar(_ch);
  162. break;
  163. }
  164. }
  165. _WriteChar(_ch);
  166. _ReadChar(0);
  167. }
  168. }
  169. /*-------------------------------------------------------------------------
  170. Purpose: Skip the current comment tag. Assumes _ch is the beginning of
  171. the tag ('<').
  172. */
  173. void CParseFile::_SkipCommentTag(void)
  174. {
  175. // The end of the tag is the next '-->'
  176. while (CHAR_EOF != _ch)
  177. {
  178. // Is the end of the comment coming up?
  179. if ('-' == _ch && _SniffChar(1) == '-' && _SniffChar(2) == '>')
  180. {
  181. // Yes
  182. _ReadChar(0); // skip '-'
  183. _ReadChar(0); // skip '>'
  184. break;
  185. }
  186. _ReadChar(0);
  187. }
  188. }
  189. /*-------------------------------------------------------------------------
  190. Purpose: Skip leading whitespace.
  191. Returns TRUE if anything was skipped
  192. */
  193. BOOL CParseFile::_SkipWhitespace(BOOL bPreserveOneSpace)
  194. {
  195. BOOL bRet = FALSE;
  196. if (_bSkipWhitespace)
  197. {
  198. if (IS_WHITESPACE(_ch))
  199. {
  200. // Skip leading whitespace in line
  201. _ReadChar(RCF_NEXTNWS);
  202. bRet = TRUE;
  203. }
  204. else if (IS_NEWLINE(_ch))
  205. {
  206. // Move to the next line
  207. _ReadChar(RCF_NEXTLINE);
  208. // Skip leading whitespace on the next line, but don't write
  209. // another space char (we'll do that here if necessary) and
  210. // ignore the return value since we've already skipped some
  211. // whitespace here (return TRUE).
  212. _SkipWhitespace(FALSE);
  213. bRet = TRUE;
  214. }
  215. // Write a single space char if we skipped something and the caller
  216. // asked us to preserve a space.
  217. if (bRet && bPreserveOneSpace)
  218. _WriteChar(' ');
  219. }
  220. return bRet;
  221. }
  222. /*-------------------------------------------------------------------------
  223. Purpose: Skip a C or C++ style comment
  224. Returns TRUE if a comment boundary was encountered.
  225. */
  226. BOOL CParseFile::_SkipComment(int * pcNestedComment)
  227. {
  228. BOOL bRet = FALSE;
  229. if ('/' == _ch)
  230. {
  231. // Is this a C++ comment?
  232. if ('/' == _SniffChar(1))
  233. {
  234. // Yes; skip it to end of line
  235. if (!_bFirstChar || !_bSkipWhitespace)
  236. _WriteChar('\n');
  237. _ReadChar(RCF_NEXTLINE);
  238. bRet = TRUE;
  239. }
  240. // Is this a C comment?
  241. else if ('*' == _SniffChar(1))
  242. {
  243. // Yes; skip to respective '*/'
  244. _ReadChar(0); // skip '/'
  245. _ReadChar(0); // skip '*'
  246. (*pcNestedComment)++;
  247. bRet = TRUE;
  248. }
  249. }
  250. else if ('*' == _ch)
  251. {
  252. // Is this the end of a C comment?
  253. if ('/' == _SniffChar(1))
  254. {
  255. // Yes
  256. _ReadChar(0); // skip '*'
  257. _ReadChar(0); // skip '/'
  258. (*pcNestedComment)--;
  259. // Prevent writing an unnecessary '\n'
  260. _bFirstChar = TRUE;
  261. bRet = TRUE;
  262. }
  263. }
  264. return bRet;
  265. }
  266. /*-------------------------------------------------------------------------
  267. Purpose: Parse the innertext of the STYLE tag, remove any comments
  268. */
  269. void CParseFile::_ParseInnerStyle(void)
  270. {
  271. int cNestedComment = 0;
  272. // The end of the tag is the next '</STYLE>'
  273. _ReadChar(0);
  274. while (CHAR_EOF != _ch)
  275. {
  276. if (_bFirstChar && _SkipWhitespace())
  277. continue;
  278. // Is the end of the styletag section coming up?
  279. if ('<' == _ch && _IsTagEqual("/STYLE"))
  280. {
  281. // Yes
  282. break;
  283. }
  284. if (_SkipComment(&cNestedComment))
  285. continue;
  286. if (0 == cNestedComment && !IS_NEWLINE(_ch))
  287. _WriteChar(_ch);
  288. _ReadChar(0);
  289. }
  290. }
  291. /*-------------------------------------------------------------------------
  292. Purpose: Returns TRUE if the given tagname matches the currently parsed token
  293. */
  294. BOOL CParseFile::_IsTagEqual(LPSTR pszTag)
  295. {
  296. int ich = 1;
  297. while (*pszTag)
  298. {
  299. if (_SniffChar(ich++) != *pszTag++)
  300. return FALSE;
  301. }
  302. // We should verify we've come to the end of the tagName
  303. char chEnd = _SniffChar(ich);
  304. return (' ' == chEnd || '>' == chEnd || '<' == chEnd);
  305. }
  306. /*-------------------------------------------------------------------------
  307. Purpose: Returns TRUE if the current tag is an end tag
  308. */
  309. BOOL CParseFile::_IsEndTag(void)
  310. {
  311. return (_SniffChar(1) == '/');
  312. }
  313. /*-------------------------------------------------------------------------
  314. Purpose: Parse a .htm or .hta file.
  315. */
  316. void CParseFile::_ParseHtml(void)
  317. {
  318. BOOL bFollowingTag = FALSE;
  319. BOOL bFollowingEndTag = FALSE;
  320. _ReadChar(0);
  321. while (CHAR_EOF != _ch)
  322. {
  323. // Anytime we read another char, we should go to the top of the loop
  324. // to check for EOF and skip leading whitespace if it's a new line.
  325. //
  326. // Note that _SkipWhitespace returns TRUE if it has skipped something,
  327. // which also involves reading a new char.
  328. if (_bFirstChar && _SkipWhitespace())
  329. continue;
  330. // Is this a tag?
  331. if ('<' == _ch)
  332. {
  333. // Yes; looks like it
  334. // Since we've found a new tag, no need to remember if we just saw
  335. // an end tag. That only matters for text content following an end
  336. // tag. For example, given "<SPAN>foo</SPAN> bar", we need to
  337. // preserve a space before the word "bar".
  338. bFollowingEndTag = FALSE;
  339. if (_IsTagEqual("!--"))
  340. {
  341. // Comment; skip it
  342. _SkipCommentTag();
  343. }
  344. else if (_IsTagEqual("SCRIPT"))
  345. {
  346. // Parse the script
  347. _WriteTag(); // write the <SCRIPT> tag
  348. // FEATURE (scotth): we always assume javascript, maybe we should support something else
  349. _ParseJS();
  350. _WriteTag(); // write the </SCRIPT> tag
  351. }
  352. else if (_IsTagEqual("STYLE"))
  353. {
  354. _WriteTag(); // write the <STYLE> tag
  355. _ParseInnerStyle();
  356. _WriteTag(); // write the </STYLE> tag
  357. }
  358. else
  359. {
  360. // Check for end tag ("</") before calling _WriteTag
  361. bFollowingEndTag = _IsEndTag();
  362. // Any other tag: write the tag and go to the next one
  363. _WriteTag();
  364. }
  365. bFollowingTag = TRUE;
  366. _ReadChar(0);
  367. continue;
  368. }
  369. if (bFollowingTag && _bSkipWhitespace)
  370. {
  371. // We can't entirely skip whitespace following tags such as </SPAN>
  372. // or </A>, but we can at least collapse it down to a single space.
  373. BOOL bPreserveOneSpace = bFollowingEndTag;
  374. bFollowingEndTag = FALSE;
  375. bFollowingTag = FALSE;
  376. if (_SkipWhitespace(bPreserveOneSpace))
  377. continue;
  378. }
  379. _WriteChar(_ch);
  380. _ReadChar(0);
  381. }
  382. }
  383. /*-------------------------------------------------------------------------
  384. Purpose: Parse a .js file.
  385. */
  386. void CParseFile::_ParseJS(void)
  387. {
  388. BOOL bDblQuotes = FALSE;
  389. BOOL bSingleQuotes = FALSE;
  390. int cNestedComment = 0;
  391. _ReadChar(0);
  392. while (CHAR_EOF != _ch)
  393. {
  394. // Are we in a comment?
  395. if (0 == cNestedComment)
  396. {
  397. // No; (we only pay attention to strings when they're not in comments)
  398. if ('\'' == _ch)
  399. bSingleQuotes ^= TRUE;
  400. else if ('"' == _ch)
  401. bDblQuotes ^= TRUE;
  402. if (_bSkipWhitespace && !bDblQuotes && !bSingleQuotes)
  403. {
  404. if (IS_WHITESPACE(_ch))
  405. {
  406. // Skip whitespace
  407. if (!_bFirstChar)
  408. _WriteChar(' ');
  409. _ReadChar(RCF_NEXTNWS);
  410. continue;
  411. }
  412. else if (IS_NEWLINE(_ch))
  413. {
  414. // Since javascript doesn't require a ';' at the end of a statement,
  415. // we should at least replace the newline with a space so tokens don't
  416. // get appended accidentally.
  417. // The javascript engine has a line-length limit. So don't replace
  418. // a newline with a space.
  419. if (!_bFirstChar)
  420. _WriteChar('\n');
  421. _ReadChar(RCF_NEXTLINE);
  422. continue;
  423. }
  424. }
  425. // Are we in a string?
  426. if (!bDblQuotes && !bSingleQuotes)
  427. {
  428. // No; look for the terminating SCRIPT tag
  429. if ('<' == _ch)
  430. {
  431. if (_IsTagEqual("/SCRIPT"))
  432. {
  433. // We've reached the end of the script block
  434. break;
  435. }
  436. }
  437. }
  438. }
  439. // Are we in a string?
  440. if (!bDblQuotes && !bSingleQuotes)
  441. {
  442. // No; look for comments...
  443. if (_SkipComment(&cNestedComment))
  444. continue;
  445. }
  446. if (0 == cNestedComment)
  447. _WriteChar(_ch);
  448. _ReadChar(0);
  449. }
  450. }
  451. /*-------------------------------------------------------------------------
  452. Purpose: Parse a .htc file.
  453. */
  454. void CParseFile::_ParseHtc(void)
  455. {
  456. BOOL bFollowingTag = FALSE;
  457. int cNestedComment = 0;
  458. _ReadChar(0);
  459. while (CHAR_EOF != _ch)
  460. {
  461. if (_bFirstChar && _SkipWhitespace())
  462. continue;
  463. // Is this a tag?
  464. if ('<' == _ch)
  465. {
  466. // Yes; is it a script tag?
  467. if (_IsTagEqual("SCRIPT"))
  468. {
  469. // Yes; parse the script
  470. _WriteTag(); // write the <SCRIPT> tag
  471. // FEATURE (scotth): we always assume javascript
  472. _ParseJS();
  473. _WriteTag(); // write the </SCRIPT> tag
  474. _ReadChar(0);
  475. bFollowingTag = TRUE;
  476. continue;
  477. }
  478. else
  479. {
  480. _WriteTag();
  481. _ReadChar(0);
  482. bFollowingTag = TRUE;
  483. continue;
  484. }
  485. }
  486. // Look for comments outside the SCRIPT block...
  487. if (_SkipComment(&cNestedComment))
  488. continue;
  489. if (bFollowingTag && _bSkipWhitespace)
  490. {
  491. bFollowingTag = FALSE;
  492. if (_SkipWhitespace())
  493. continue;
  494. }
  495. if (0 == cNestedComment)
  496. _WriteChar(_ch);
  497. _ReadChar(0);
  498. }
  499. }