Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

668 lines
9.2 KiB

  1. /*++
  2. Copyright (c) 1995 Microsoft Corporation
  3. Module Name :
  4. parse.hxx
  5. Abstract:
  6. Simple parser class for extrapolating HTTP headers information
  7. Author:
  8. John Ludeman (JohnL) 18-Jan-1995
  9. Project:
  10. HTTP server
  11. Revision History:
  12. --*/
  13. #include <tcpdllp.hxx>
  14. # include <parse.hxx>
  15. INET_PARSER::INET_PARSER(
  16. CHAR * pszStart
  17. )
  18. /*++
  19. Routine Description:
  20. Sets the initial position of the buffer for parsing
  21. Arguments:
  22. pszStart - start of character buffer
  23. pszEnd - End of buffer
  24. Return Value:
  25. --*/
  26. : m_fListMode ( FALSE ),
  27. m_pszPos ( pszStart ),
  28. m_pszTokenTerm( NULL ),
  29. m_pszLineTerm ( NULL )
  30. {
  31. DBG_ASSERT( pszStart );
  32. //
  33. // Chew up any initial white space at the beginning of the buffer
  34. // and terminate the first token in the string.
  35. //
  36. EatWhite();
  37. TerminateToken();
  38. }
  39. INET_PARSER::~INET_PARSER(
  40. VOID
  41. )
  42. /*++
  43. Routine Description:
  44. Restores any changes we made to the string while parsing
  45. Arguments:
  46. --*/
  47. {
  48. RestoreBuffer();
  49. }
  50. CHAR *
  51. INET_PARSER::QueryPos(
  52. VOID
  53. )
  54. /*++
  55. Routine Description:
  56. Removes the terminators and returns the current parser position
  57. Arguments:
  58. Return Value:
  59. Zero terminated string if we've reached the end of the buffer
  60. --*/
  61. {
  62. RestoreToken();
  63. RestoreLine();
  64. return m_pszPos;
  65. }
  66. VOID
  67. INET_PARSER::SetPtr(
  68. CHAR * pch
  69. )
  70. /*++
  71. Routine Description:
  72. Sets the parser to point at a new location
  73. Arguments:
  74. pch - New position for parser to start parsing from
  75. Return Value:
  76. --*/
  77. {
  78. RestoreToken();
  79. RestoreLine();
  80. m_pszPos = pch;
  81. }
  82. CHAR *
  83. INET_PARSER::QueryToken(
  84. VOID
  85. )
  86. /*++
  87. Routine Description:
  88. Returns a pointer to the current zero terminated token
  89. If list mode is on, then a comma is considered a delimiter.
  90. Arguments:
  91. Return Value:
  92. Zero terminated string if we've reached the end of the buffer
  93. --*/
  94. {
  95. if ( !m_pszTokenTerm )
  96. TerminateToken( m_fListMode ? ',' : '\0' );
  97. return m_pszPos;
  98. }
  99. CHAR *
  100. INET_PARSER::QueryLine(
  101. VOID
  102. )
  103. /*++
  104. Routine Description:
  105. Returns a pointer to the current zero terminated line
  106. Arguments:
  107. Return Value:
  108. Zero terminated string if we've reached the end of the buffer
  109. --*/
  110. {
  111. RestoreToken();
  112. if ( !m_pszLineTerm )
  113. TerminateLine();
  114. return m_pszPos;
  115. }
  116. BOOL
  117. INET_PARSER::CopyToken(
  118. STR * pStr,
  119. BOOL fAdvanceToken
  120. )
  121. /*++
  122. Routine Description:
  123. Copies the token at the current position to *pStr
  124. Arguments:
  125. pStr - Receives token
  126. fAdvanceToken - True if we should advance to the next token
  127. Return Value:
  128. TRUE if successful, FALSE otherwise
  129. --*/
  130. {
  131. BOOL fRet;
  132. DBG_ASSERT( pStr );
  133. if ( !m_pszTokenTerm )
  134. TerminateToken();
  135. fRet = pStr->Copy( m_pszPos );
  136. if ( fAdvanceToken )
  137. NextToken();
  138. return fRet;
  139. }
  140. BOOL
  141. INET_PARSER::CopyToEOL(
  142. STR * pstr,
  143. BOOL fAdvance
  144. )
  145. /*++
  146. Routine Description:
  147. Copies the token at the current character position
  148. Arguments:
  149. --*/
  150. {
  151. BOOL fRet;
  152. RestoreToken();
  153. if ( !m_pszLineTerm )
  154. TerminateLine();
  155. fRet = pstr->Copy( m_pszPos );
  156. if ( fAdvance )
  157. NextLine();
  158. return fRet;
  159. }
  160. BOOL
  161. INET_PARSER::AppendToEOL(
  162. STR * pstr,
  163. BOOL fAdvance
  164. )
  165. /*++
  166. Routine Description:
  167. Same as CopyToEOL except the text from the current line is appended to
  168. pstr
  169. Arguments:
  170. --*/
  171. {
  172. BOOL fRet;
  173. RestoreToken();
  174. if ( !m_pszLineTerm )
  175. TerminateLine();
  176. fRet = pstr->Append( m_pszPos );
  177. if ( fAdvance )
  178. NextLine();
  179. return fRet;
  180. }
  181. CHAR *
  182. INET_PARSER::NextLine(
  183. VOID
  184. )
  185. /*++
  186. Routine Description:
  187. Sets the current position to the first non-white character after the
  188. next '\n' (or terminating '\0').
  189. --*/
  190. {
  191. RestoreToken();
  192. RestoreLine();
  193. m_pszPos = AuxSkipTo( '\n' );
  194. if ( *m_pszPos )
  195. m_pszPos++;
  196. return EatWhite();
  197. }
  198. CHAR *
  199. INET_PARSER::NextToken(
  200. VOID
  201. )
  202. /*++
  203. Routine Description:
  204. Sets the current position to the next non-white character after the
  205. current token
  206. --*/
  207. {
  208. //
  209. // Make sure the line is terminated so a '\0' will be returned after
  210. // the last token is found on this line
  211. //
  212. RestoreToken();
  213. if ( !m_pszLineTerm )
  214. TerminateLine();
  215. //
  216. // Skip the current token
  217. //
  218. EatNonWhite();
  219. EatWhite();
  220. TerminateToken();
  221. return m_pszPos;
  222. }
  223. CHAR *
  224. INET_PARSER::NextToken(
  225. CHAR ch
  226. )
  227. /*++
  228. Routine Description:
  229. Advances the position to the next token after ch (stopping
  230. at the end of the line)
  231. --*/
  232. {
  233. //
  234. // Make sure the line is terminated so a '\0' will be returned after
  235. // the last token is found on this line
  236. //
  237. RestoreToken();
  238. if ( !m_pszLineTerm )
  239. TerminateLine();
  240. //
  241. // Look for the specified character (generally ',' or ';')
  242. //
  243. SkipTo( ch );
  244. if ( *m_pszPos )
  245. m_pszPos++;
  246. EatWhite();
  247. TerminateToken( ch );
  248. return m_pszPos;
  249. }
  250. CHAR *
  251. INET_PARSER::SkipTo(
  252. CHAR ch
  253. )
  254. /*++
  255. Routine Description:
  256. Skips to the specified character or returns a null terminated string
  257. if the end of the line is reached
  258. --*/
  259. {
  260. //
  261. // Make sure the line is terminated so a '\0' will be returned after
  262. // the last token is found on this line
  263. //
  264. RestoreToken();
  265. if ( !m_pszLineTerm )
  266. TerminateLine();
  267. m_pszPos = AuxSkipTo( ch );
  268. return m_pszPos;
  269. }
  270. VOID
  271. INET_PARSER::SetListMode(
  272. BOOL fListMode
  273. )
  274. /*++
  275. Routine Description:
  276. Resets the parser mode to list mode or non-list mode
  277. Arguments:
  278. --*/
  279. {
  280. RestoreToken();
  281. if ( !m_pszLineTerm )
  282. TerminateLine();
  283. m_fListMode = fListMode;
  284. }
  285. VOID
  286. INET_PARSER::TerminateToken(
  287. CHAR ch
  288. )
  289. /*++
  290. Routine Description:
  291. Zero terminates after the white space of the current token
  292. Arguments:
  293. --*/
  294. {
  295. DBG_ASSERT( !m_pszTokenTerm );
  296. m_pszTokenTerm = AuxEatNonWhite( ch );
  297. m_chTokenTerm = *m_pszTokenTerm;
  298. *m_pszTokenTerm = '\0';
  299. }
  300. VOID
  301. INET_PARSER::RestoreToken(
  302. VOID
  303. )
  304. /*++
  305. Routine Description:
  306. Restores the character replaced by the zero terminator
  307. Arguments:
  308. --*/
  309. {
  310. if ( m_pszTokenTerm )
  311. {
  312. *m_pszTokenTerm = m_chTokenTerm;
  313. m_pszTokenTerm = NULL;
  314. }
  315. }
  316. VOID
  317. INET_PARSER::TerminateLine(
  318. VOID
  319. )
  320. /*++
  321. Routine Description:
  322. Zero terminates at the end of this line
  323. Arguments:
  324. --*/
  325. {
  326. DBG_ASSERT( !m_pszLineTerm );
  327. m_pszLineTerm = AuxSkipTo( '\n' );
  328. //
  329. // Now trim any trailing white space on the line
  330. //
  331. if ( m_pszLineTerm > m_pszPos )
  332. {
  333. m_pszLineTerm--;
  334. while ( m_pszLineTerm >= m_pszPos &&
  335. ISWHITEA( *m_pszLineTerm ))
  336. {
  337. m_pszLineTerm--;
  338. }
  339. }
  340. //
  341. // Go forward one (trimming found the last non-white
  342. // character)
  343. //
  344. if ( *m_pszLineTerm &&
  345. *m_pszLineTerm != '\n' &&
  346. !ISWHITEA( *m_pszLineTerm ))
  347. {
  348. m_pszLineTerm++;
  349. }
  350. m_chLineTerm = *m_pszLineTerm;
  351. *m_pszLineTerm = '\0';
  352. }
  353. VOID
  354. INET_PARSER::RestoreLine(
  355. VOID
  356. )
  357. /*++
  358. Routine Description:
  359. Restores the character replaced by the zero terminator
  360. Arguments:
  361. --*/
  362. {
  363. if ( m_pszLineTerm )
  364. {
  365. *m_pszLineTerm = m_chLineTerm;
  366. m_pszLineTerm = NULL;
  367. }
  368. }
  369. CHAR *
  370. INET_PARSER::AuxEatNonWhite(
  371. CHAR ch
  372. )
  373. /*++
  374. Routine Description:
  375. In non list mode returns the first white space character after
  376. the current parse position
  377. In list mode returns the first delimiter ( "';\n" ) character after
  378. the current parse position
  379. Arguments:
  380. ch - Optional character that is considered white space (such as ',' or ';'
  381. when doing list processing).
  382. --*/
  383. {
  384. CHAR * psz = m_pszPos;
  385. //
  386. // Note that ISWHITEA includes '\r'. In list mode, comma and semi-colon
  387. // are considered delimiters
  388. //
  389. if ( !m_fListMode )
  390. {
  391. while ( *psz &&
  392. *psz != '\n' &&
  393. !ISWHITEA(*psz)&&
  394. *psz != ch )
  395. {
  396. psz++;
  397. }
  398. return psz;
  399. }
  400. else
  401. {
  402. while ( *psz &&
  403. *psz != '\n' &&
  404. #if 0
  405. // fix #20931
  406. !ISWHITEA(*psz)&&
  407. #endif
  408. *psz != ',' &&
  409. *psz != ';' &&
  410. *psz != ch )
  411. {
  412. psz++;
  413. }
  414. return psz;
  415. }
  416. }
  417. CHAR *
  418. INET_PARSER::AuxEatWhite(
  419. VOID
  420. )
  421. /*++
  422. Routine Description:
  423. Returns the first non-white space character after the current parse
  424. position
  425. Arguments:
  426. --*/
  427. {
  428. CHAR * psz = m_pszPos;
  429. //
  430. // Note that ISWHITEA includes '\r'
  431. //
  432. while ( *psz &&
  433. *psz != '\n' &&
  434. ISWHITEA(*psz))
  435. {
  436. psz++;
  437. }
  438. return psz;
  439. }
  440. CHAR *
  441. INET_PARSER::AuxSkipTo(
  442. CHAR ch
  443. )
  444. /*++
  445. Routine Description:
  446. Skips to the specified character or returns a null terminated string
  447. if the end of the line is reached
  448. --*/
  449. {
  450. CHAR * psz = m_pszPos;
  451. while ( *psz &&
  452. *psz != '\n' &&
  453. *psz != ch )
  454. {
  455. psz++;
  456. }
  457. return psz;
  458. }