Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

560 lines
15 KiB

  1. #include <wininetp.h>
  2. #include <perfdiag.hxx>
  3. #include "httpp.h"
  4. //
  5. // HTTP_HEADER_PARSER implementation
  6. //
  7. HTTP_HEADER_PARSER::HTTP_HEADER_PARSER(
  8. IN LPSTR szHeaders,
  9. IN DWORD cbHeaders
  10. ) : HTTP_HEADERS()
  11. /*++
  12. Routine Description:
  13. Constructor for the HTTP_HEADER_PARSER object. Calls ParseHeaders to
  14. build a parsed version of the header string passed in.
  15. Arguments:
  16. szHeaders - pointer to the headers to parse
  17. cbHeaders - length of the headers
  18. Return Value:
  19. None.
  20. --*/
  21. {
  22. DWORD dwBytesScaned = 0;
  23. BOOL fFoundCompleteLine;
  24. BOOL fFoundEndOfHeaders;
  25. DWORD error;
  26. error = ParseHeaders(
  27. szHeaders,
  28. cbHeaders,
  29. TRUE, // Eof
  30. &dwBytesScaned,
  31. &fFoundCompleteLine,
  32. &fFoundEndOfHeaders
  33. );
  34. INET_ASSERT(error == ERROR_SUCCESS);
  35. INET_ASSERT(fFoundCompleteLine);
  36. INET_ASSERT(fFoundEndOfHeaders);
  37. }
  38. BOOL
  39. HTTP_HEADER_PARSER::ParseStatusLine(
  40. IN LPSTR lpHeaderBase,
  41. IN DWORD dwBufferLength,
  42. IN BOOL fEof,
  43. IN OUT DWORD *lpdwBufferLengthScanned,
  44. OUT DWORD *lpdwStatusCode,
  45. OUT DWORD *lpdwMajorVersion,
  46. OUT DWORD *lpdwMinorVersion
  47. )
  48. /*++
  49. Routine Description:
  50. Parses the Status line of an HTTP server response. Takes care of adding the status
  51. line to HTTP header array.
  52. Arguments:
  53. lpszHeader - pointer to the header to check
  54. dwHeaderLength - length of the header
  55. Return Value:
  56. BOOL - TRUE if line was successively parsed and processed, FALSE otherwise
  57. --*/
  58. {
  59. #define BEFORE_VERSION_NUMBERS 0
  60. #define MAJOR_VERSION_NUMBER 1
  61. #define MINOR_VERSION_NUMBER 2
  62. #define STATUS_CODE_NUMBER 3
  63. #define AFTER_STATUS_CODE 4
  64. #define MAX_STATUS_INTS 4
  65. LPSTR lpszEnd = lpHeaderBase + dwBufferLength;
  66. LPSTR response = lpHeaderBase + *lpdwBufferLengthScanned;
  67. DWORD dwBytesScanned = 0;
  68. DWORD dwStatusLineSize = 0;
  69. LPSTR lpszStatusLine;
  70. int ver_state = BEFORE_VERSION_NUMBERS;
  71. DWORD adwStatusInts[MAX_STATUS_INTS];
  72. BOOL success = TRUE;
  73. for ( int i = 0; i < MAX_STATUS_INTS; i++)
  74. adwStatusInts[i] = 0;
  75. lpszStatusLine = response;
  76. //
  77. // While walking the Status Line looking for terminating \r\n,
  78. // we extract the Major.Minor Versions and Status Code in that order.
  79. // text and spaces will lie between/before/after the three numbers
  80. // but the idea is to remeber which number we're calculating based on a numeric state
  81. // If all goes well the loop will churn out an array with the 3 numbers plugged in as DWORDs
  82. //
  83. while ((response < lpszEnd) && (*response != '\r') && (*response != '\n'))
  84. {
  85. // below should be wrapped in while (response[i] != ' ') to be more robust???
  86. switch (ver_state)
  87. {
  88. case BEFORE_VERSION_NUMBERS:
  89. if (*response == '/')
  90. {
  91. INET_ASSERT(ver_state == BEFORE_VERSION_NUMBERS);
  92. ver_state++; // = MAJOR_VERSION_NUMBER
  93. }
  94. else if (*response == ' ')
  95. {
  96. ver_state = STATUS_CODE_NUMBER;
  97. }
  98. break;
  99. case MAJOR_VERSION_NUMBER:
  100. if (*response == '.')
  101. {
  102. INET_ASSERT(ver_state == MAJOR_VERSION_NUMBER);
  103. ver_state++; // = MINOR_VERSION_NUMBER
  104. break;
  105. }
  106. // fall through
  107. case MINOR_VERSION_NUMBER:
  108. if (*response == ' ')
  109. {
  110. INET_ASSERT(ver_state == MINOR_VERSION_NUMBER);
  111. ver_state++; // = STATUS_CODE_NUMBER
  112. break;
  113. }
  114. // fall through
  115. case STATUS_CODE_NUMBER:
  116. if (isdigit(*response)) {
  117. int val = *response - '0';
  118. adwStatusInts[ver_state] = adwStatusInts[ver_state] * 10 + val;
  119. }
  120. else if ( adwStatusInts[STATUS_CODE_NUMBER] > 0 )
  121. {
  122. //
  123. // we eat spaces before status code is found,
  124. // once we have the status code we can go on to the next
  125. // state on the next non-digit. This is done
  126. // to cover cases with several spaces between version
  127. // and the status code number.
  128. //
  129. INET_ASSERT(ver_state == STATUS_CODE_NUMBER);
  130. ver_state++; // = AFTER_STATUS_CODE
  131. break;
  132. } else if (!isspace(*response)) {
  133. adwStatusInts[ver_state] = (DWORD)-1;
  134. }
  135. break;
  136. case AFTER_STATUS_CODE:
  137. break;
  138. }
  139. ++response;
  140. ++dwBytesScanned;
  141. }
  142. dwStatusLineSize = dwBytesScanned;
  143. if (response == lpszEnd) {
  144. //
  145. // response now points one past the end of the buffer. We may be looking
  146. // over the edge...
  147. //
  148. // if we're at the end of the connection then the server sent us an
  149. // incorrectly formatted response. Probably an error.
  150. //
  151. // Otherwise its a partial response. We need more
  152. //
  153. DEBUG_PRINT(HTTP,
  154. INFO,
  155. ("found end of short response in status line\n"
  156. ));
  157. success = fEof ? TRUE : FALSE;
  158. //
  159. // if we really hit the end of the response then update the amount of
  160. // headers scanned
  161. //
  162. if (!success) {
  163. dwBytesScanned = 0;
  164. }
  165. goto quit;
  166. }
  167. while ((response < lpszEnd)
  168. && ((*response == '\r') || (*response == ' '))) {
  169. ++response;
  170. ++dwBytesScanned;
  171. }
  172. if (response == lpszEnd) {
  173. //
  174. // hit end of buffer without finding LF
  175. //
  176. success = FALSE;
  177. DEBUG_PRINT(HTTP,
  178. WARNING,
  179. ("hit end of buffer without finding LF\n"
  180. ));
  181. goto quit;
  182. } else if (*response == '\n') {
  183. ++response;
  184. ++dwBytesScanned;
  185. //
  186. // if we found the empty line then we are done
  187. //
  188. success = TRUE;
  189. }
  190. INET_ASSERT(success);
  191. //
  192. // Now we have our parsed header to add to the array
  193. //
  194. HEADER_STRING * freeHeader;
  195. DWORD iSlot;
  196. freeHeader = FindFreeSlot(&iSlot);
  197. if (freeHeader == NULL) {
  198. INET_ASSERT(FALSE);
  199. success = FALSE;
  200. goto quit;
  201. } else {
  202. INET_ASSERT(iSlot == 0); // status line should always be first
  203. freeHeader->CreateOffsetString((DWORD)(lpszStatusLine - lpHeaderBase), dwStatusLineSize);
  204. freeHeader->SetHash(0); // status line has no hash value.
  205. }
  206. quit:
  207. *lpdwStatusCode = adwStatusInts[STATUS_CODE_NUMBER];
  208. *lpdwMajorVersion = adwStatusInts[MAJOR_VERSION_NUMBER];
  209. *lpdwMinorVersion = adwStatusInts[MINOR_VERSION_NUMBER];
  210. *lpdwBufferLengthScanned += dwBytesScanned;
  211. return success;
  212. }
  213. DWORD
  214. HTTP_HEADER_PARSER::ParseHeaders(
  215. IN LPSTR lpHeaderBase,
  216. IN DWORD dwBufferLength,
  217. IN BOOL fEof,
  218. IN OUT DWORD *lpdwBufferLengthScanned,
  219. OUT LPBOOL pfFoundCompleteLine,
  220. OUT LPBOOL pfFoundEndOfHeaders
  221. )
  222. /*++
  223. Routine Description:
  224. Loads headers into HTTP_HEADERS member for subsequent parsing.
  225. Parses string based headers and adds their parts to an internally stored
  226. array of HTTP_HEADERS.
  227. Input is assumed to be well formed Header Name/Value pairs, each deliminated
  228. by ':' and '\r\n'.
  229. Arguments:
  230. lpszHeader - pointer to the header to check
  231. dwHeaderLength - length of the header
  232. Return Value:
  233. None.
  234. --*/
  235. {
  236. LPSTR lpszEnd = lpHeaderBase + dwBufferLength;
  237. LPSTR response = lpHeaderBase + *lpdwBufferLengthScanned;
  238. DWORD dwBytesScanned = 0;
  239. BOOL success = FALSE;
  240. DWORD error = ERROR_SUCCESS;
  241. *pfFoundEndOfHeaders = FALSE;
  242. //
  243. // Each iteration of the following loop
  244. // walks an HTTP header line of the form:
  245. // HeaderName: HeaderValue\r\n
  246. //
  247. do
  248. {
  249. DWORD dwHash = HEADER_HASH_SEED;
  250. LPSTR lpszHeaderName;
  251. DWORD dwHeaderNameLength = 0;
  252. DWORD dwHeaderLineLength = 0;
  253. DWORD dwPreviousAmountOfBytesScanned = dwBytesScanned;
  254. //
  255. // Remove leading whitespace from header
  256. //
  257. while ( (response < lpszEnd) && ((*response == ' ') || (*response == '\t')) )
  258. {
  259. ++response;
  260. ++dwBytesScanned;
  261. }
  262. //
  263. // Scan for HeaderName:
  264. //
  265. lpszHeaderName = response;
  266. dwPreviousAmountOfBytesScanned = dwBytesScanned;
  267. while ((response < lpszEnd) && (*response != ':') && (*response != '\r') && (*response != '\n'))
  268. {
  269. //
  270. // This code incapsulates CalculateHashNoCase as an optimization,
  271. // we attempt to calculate the Hash value as we parse the header.
  272. //
  273. CHAR ch = *response;
  274. if ((ch >= 'A') && (ch <= 'Z')) {
  275. ch = MAKE_LOWER(ch);
  276. }
  277. dwHash += (DWORD)(dwHash << 5) + ch;
  278. ++response;
  279. ++dwBytesScanned;
  280. }
  281. dwHeaderNameLength = (DWORD) (response - lpszHeaderName);
  282. //
  283. // catch bogus responses: if we find what looks like one of a (very)
  284. // small set of HTML tags, then assume the previous header was the
  285. // last
  286. //
  287. if ((dwHeaderNameLength >= sizeof("<HTML>") - 1)
  288. && (*lpszHeaderName == '<')
  289. && (!strnicmp(lpszHeaderName, "<HTML>", sizeof("<HTML>") - 1)
  290. || !strnicmp(lpszHeaderName, "<HEAD>", sizeof("<HEAD>") - 1))) {
  291. *pfFoundEndOfHeaders = TRUE;
  292. break;
  293. }
  294. //
  295. // Keep scanning till end of the line.
  296. //
  297. while ((response < lpszEnd) && (*response != '\r') && (*response != '\n'))
  298. {
  299. ++response;
  300. ++dwBytesScanned;
  301. }
  302. dwHeaderLineLength = (DWORD) (response - lpszHeaderName); // note: this headerLINElength
  303. if (response == lpszEnd) {
  304. //
  305. // response now points one past the end of the buffer. We may be looking
  306. // over the edge...
  307. //
  308. // if we're at the end of the connection then the server sent us an
  309. // incorrectly formatted response. Probably an error.
  310. //
  311. // Otherwise its a partial response. We need more
  312. //
  313. DEBUG_PRINT(HTTP,
  314. INFO,
  315. ("found end of short response\n"
  316. ));
  317. success = fEof ? TRUE : FALSE;
  318. //
  319. // if we really hit the end of the response then update the amount of
  320. // headers scanned
  321. //
  322. if (!success) {
  323. dwBytesScanned = dwPreviousAmountOfBytesScanned;
  324. }
  325. break;
  326. }
  327. else
  328. {
  329. //
  330. // we reached a CR or LF. This is the end of this current header. Find
  331. // the start of the next one
  332. //
  333. //
  334. // first, strip off any trailing spaces from the current header. We do
  335. // this by simply reducing the string length. We only look for space
  336. // and tab characters. Only do this if we have a non-zero length header
  337. //
  338. if (dwHeaderLineLength != 0) {
  339. for (int i = -1; response[i] == ' ' || response[i] == '\t'; --i) {
  340. --dwHeaderLineLength;
  341. }
  342. }
  343. INET_ASSERT((int)dwHeaderLineLength >= 0);
  344. //
  345. // some servers respond with "\r\r\n". Lame
  346. // A new twist: "\r \r\n". Lamer
  347. //
  348. while ((response < lpszEnd)
  349. && ((*response == '\r') || (*response == ' '))) {
  350. ++response;
  351. ++dwBytesScanned;
  352. }
  353. if (response == lpszEnd) {
  354. //
  355. // hit end of buffer without finding LF
  356. //
  357. success = FALSE;
  358. DEBUG_PRINT(HTTP,
  359. WARNING,
  360. ("hit end of buffer without finding LF\n"
  361. ));
  362. //
  363. // get more data, reparse this line
  364. //
  365. dwBytesScanned = dwPreviousAmountOfBytesScanned;
  366. break;
  367. } else if (*response == '\n') {
  368. ++response;
  369. ++dwBytesScanned;
  370. //
  371. // if we found the empty line then we are done
  372. //
  373. if (dwHeaderLineLength == 0) {
  374. *pfFoundEndOfHeaders = TRUE;
  375. break;
  376. }
  377. success = TRUE;
  378. }
  379. }
  380. //
  381. // Now we have our parsed header to add to the array
  382. //
  383. HEADER_STRING * freeHeader;
  384. DWORD iSlot;
  385. freeHeader = FindFreeSlot(&iSlot);
  386. if (freeHeader == NULL) {
  387. error = GetError();
  388. INET_ASSERT(error != ERROR_SUCCESS);
  389. goto quit;
  390. } else {
  391. freeHeader->CreateOffsetString((DWORD) (lpszHeaderName - lpHeaderBase), dwHeaderLineLength);
  392. freeHeader->SetHash(dwHash);
  393. }
  394. //CHAR szTemp[256];
  395. //
  396. //memcpy(szTemp, lpszHeaderName, dwHeaderLineLength);
  397. //lpszHeaderName[dwHeaderLineLength] = '\0';
  398. //DEBUG_PRINT(HTTP,
  399. // INFO,
  400. // ("ParseHeaders: adding=%q\n", lpszHeaderName
  401. // ));
  402. //
  403. // Now see if this is a known header we are adding, if so then we note that fact
  404. //
  405. DWORD dwKnownQueryIndex;
  406. if (HeaderMatch(dwHash, lpszHeaderName, dwHeaderNameLength, &dwKnownQueryIndex) )
  407. {
  408. freeHeader->SetNextKnownIndex(FastAdd(dwKnownQueryIndex, iSlot));
  409. }
  410. } while (TRUE);
  411. quit:
  412. *lpdwBufferLengthScanned += dwBytesScanned;
  413. *pfFoundCompleteLine = success;
  414. return error;
  415. }