#include #include #include "httpp.h" // // HTTP_HEADER_PARSER implementation // HTTP_HEADER_PARSER::HTTP_HEADER_PARSER( IN LPSTR szHeaders, IN DWORD cbHeaders ) : HTTP_HEADERS() /*++ Routine Description: Constructor for the HTTP_HEADER_PARSER object. Calls ParseHeaders to build a parsed version of the header string passed in. Arguments: szHeaders - pointer to the headers to parse cbHeaders - length of the headers Return Value: None. --*/ { DWORD dwBytesScaned = 0; BOOL fFoundCompleteLine; BOOL fFoundEndOfHeaders; DWORD error; error = ParseHeaders( szHeaders, cbHeaders, TRUE, // Eof &dwBytesScaned, &fFoundCompleteLine, &fFoundEndOfHeaders ); INET_ASSERT(error == ERROR_SUCCESS); INET_ASSERT(fFoundCompleteLine); INET_ASSERT(fFoundEndOfHeaders); } BOOL HTTP_HEADER_PARSER::ParseStatusLine( IN LPSTR lpHeaderBase, IN DWORD dwBufferLength, IN BOOL fEof, IN OUT DWORD *lpdwBufferLengthScanned, OUT DWORD *lpdwStatusCode, OUT DWORD *lpdwMajorVersion, OUT DWORD *lpdwMinorVersion ) /*++ Routine Description: Parses the Status line of an HTTP server response. Takes care of adding the status line to HTTP header array. Arguments: lpszHeader - pointer to the header to check dwHeaderLength - length of the header Return Value: BOOL - TRUE if line was successively parsed and processed, FALSE otherwise --*/ { #define BEFORE_VERSION_NUMBERS 0 #define MAJOR_VERSION_NUMBER 1 #define MINOR_VERSION_NUMBER 2 #define STATUS_CODE_NUMBER 3 #define AFTER_STATUS_CODE 4 #define MAX_STATUS_INTS 4 LPSTR lpszEnd = lpHeaderBase + dwBufferLength; LPSTR response = lpHeaderBase + *lpdwBufferLengthScanned; DWORD dwBytesScanned = 0; DWORD dwStatusLineSize = 0; LPSTR lpszStatusLine; int ver_state = BEFORE_VERSION_NUMBERS; DWORD adwStatusInts[MAX_STATUS_INTS]; BOOL success = TRUE; for ( int i = 0; i < MAX_STATUS_INTS; i++) adwStatusInts[i] = 0; lpszStatusLine = response; // // While walking the Status Line looking for terminating \r\n, // we extract the Major.Minor Versions and Status Code in that order. // text and spaces will lie between/before/after the three numbers // but the idea is to remeber which number we're calculating based on a numeric state // If all goes well the loop will churn out an array with the 3 numbers plugged in as DWORDs // while ((response < lpszEnd) && (*response != '\r') && (*response != '\n')) { // below should be wrapped in while (response[i] != ' ') to be more robust??? switch (ver_state) { case BEFORE_VERSION_NUMBERS: if (*response == '/') { INET_ASSERT(ver_state == BEFORE_VERSION_NUMBERS); ver_state++; // = MAJOR_VERSION_NUMBER } else if (*response == ' ') { ver_state = STATUS_CODE_NUMBER; } break; case MAJOR_VERSION_NUMBER: if (*response == '.') { INET_ASSERT(ver_state == MAJOR_VERSION_NUMBER); ver_state++; // = MINOR_VERSION_NUMBER break; } // fall through case MINOR_VERSION_NUMBER: if (*response == ' ') { INET_ASSERT(ver_state == MINOR_VERSION_NUMBER); ver_state++; // = STATUS_CODE_NUMBER break; } // fall through case STATUS_CODE_NUMBER: if (isdigit(*response)) { int val = *response - '0'; adwStatusInts[ver_state] = adwStatusInts[ver_state] * 10 + val; } else if ( adwStatusInts[STATUS_CODE_NUMBER] > 0 ) { // // we eat spaces before status code is found, // once we have the status code we can go on to the next // state on the next non-digit. This is done // to cover cases with several spaces between version // and the status code number. // INET_ASSERT(ver_state == STATUS_CODE_NUMBER); ver_state++; // = AFTER_STATUS_CODE break; } else if (!isspace(*response)) { adwStatusInts[ver_state] = (DWORD)-1; } break; case AFTER_STATUS_CODE: break; } ++response; ++dwBytesScanned; } dwStatusLineSize = dwBytesScanned; if (response == lpszEnd) { // // response now points one past the end of the buffer. We may be looking // over the edge... // // if we're at the end of the connection then the server sent us an // incorrectly formatted response. Probably an error. // // Otherwise its a partial response. We need more // DEBUG_PRINT(HTTP, INFO, ("found end of short response in status line\n" )); success = fEof ? TRUE : FALSE; // // if we really hit the end of the response then update the amount of // headers scanned // if (!success) { dwBytesScanned = 0; } goto quit; } while ((response < lpszEnd) && ((*response == '\r') || (*response == ' '))) { ++response; ++dwBytesScanned; } if (response == lpszEnd) { // // hit end of buffer without finding LF // success = FALSE; DEBUG_PRINT(HTTP, WARNING, ("hit end of buffer without finding LF\n" )); goto quit; } else if (*response == '\n') { ++response; ++dwBytesScanned; // // if we found the empty line then we are done // success = TRUE; } INET_ASSERT(success); // // Now we have our parsed header to add to the array // HEADER_STRING * freeHeader; DWORD iSlot; freeHeader = FindFreeSlot(&iSlot); if (freeHeader == NULL) { INET_ASSERT(FALSE); success = FALSE; goto quit; } else { INET_ASSERT(iSlot == 0); // status line should always be first freeHeader->CreateOffsetString((DWORD)(lpszStatusLine - lpHeaderBase), dwStatusLineSize); freeHeader->SetHash(0); // status line has no hash value. } quit: *lpdwStatusCode = adwStatusInts[STATUS_CODE_NUMBER]; *lpdwMajorVersion = adwStatusInts[MAJOR_VERSION_NUMBER]; *lpdwMinorVersion = adwStatusInts[MINOR_VERSION_NUMBER]; *lpdwBufferLengthScanned += dwBytesScanned; return success; } DWORD HTTP_HEADER_PARSER::ParseHeaders( IN LPSTR lpHeaderBase, IN DWORD dwBufferLength, IN BOOL fEof, IN OUT DWORD *lpdwBufferLengthScanned, OUT LPBOOL pfFoundCompleteLine, OUT LPBOOL pfFoundEndOfHeaders ) /*++ Routine Description: Loads headers into HTTP_HEADERS member for subsequent parsing. Parses string based headers and adds their parts to an internally stored array of HTTP_HEADERS. Input is assumed to be well formed Header Name/Value pairs, each deliminated by ':' and '\r\n'. Arguments: lpszHeader - pointer to the header to check dwHeaderLength - length of the header Return Value: None. --*/ { LPSTR lpszEnd = lpHeaderBase + dwBufferLength; LPSTR response = lpHeaderBase + *lpdwBufferLengthScanned; DWORD dwBytesScanned = 0; BOOL success = FALSE; DWORD error = ERROR_SUCCESS; *pfFoundEndOfHeaders = FALSE; // // Each iteration of the following loop // walks an HTTP header line of the form: // HeaderName: HeaderValue\r\n // do { DWORD dwHash = HEADER_HASH_SEED; LPSTR lpszHeaderName; DWORD dwHeaderNameLength = 0; DWORD dwHeaderLineLength = 0; DWORD dwPreviousAmountOfBytesScanned = dwBytesScanned; // // Remove leading whitespace from header // while ( (response < lpszEnd) && ((*response == ' ') || (*response == '\t')) ) { ++response; ++dwBytesScanned; } // // Scan for HeaderName: // lpszHeaderName = response; dwPreviousAmountOfBytesScanned = dwBytesScanned; while ((response < lpszEnd) && (*response != ':') && (*response != '\r') && (*response != '\n')) { // // This code incapsulates CalculateHashNoCase as an optimization, // we attempt to calculate the Hash value as we parse the header. // CHAR ch = *response; if ((ch >= 'A') && (ch <= 'Z')) { ch = MAKE_LOWER(ch); } dwHash += (DWORD)(dwHash << 5) + ch; ++response; ++dwBytesScanned; } dwHeaderNameLength = (DWORD) (response - lpszHeaderName); // // catch bogus responses: if we find what looks like one of a (very) // small set of HTML tags, then assume the previous header was the // last // if ((dwHeaderNameLength >= sizeof("") - 1) && (*lpszHeaderName == '<') && (!strnicmp(lpszHeaderName, "", sizeof("") - 1) || !strnicmp(lpszHeaderName, "", sizeof("") - 1))) { *pfFoundEndOfHeaders = TRUE; break; } // // Keep scanning till end of the line. // while ((response < lpszEnd) && (*response != '\r') && (*response != '\n')) { ++response; ++dwBytesScanned; } dwHeaderLineLength = (DWORD) (response - lpszHeaderName); // note: this headerLINElength if (response == lpszEnd) { // // response now points one past the end of the buffer. We may be looking // over the edge... // // if we're at the end of the connection then the server sent us an // incorrectly formatted response. Probably an error. // // Otherwise its a partial response. We need more // DEBUG_PRINT(HTTP, INFO, ("found end of short response\n" )); success = fEof ? TRUE : FALSE; // // if we really hit the end of the response then update the amount of // headers scanned // if (!success) { dwBytesScanned = dwPreviousAmountOfBytesScanned; } break; } else { // // we reached a CR or LF. This is the end of this current header. Find // the start of the next one // // // first, strip off any trailing spaces from the current header. We do // this by simply reducing the string length. We only look for space // and tab characters. Only do this if we have a non-zero length header // if (dwHeaderLineLength != 0) { for (int i = -1; response[i] == ' ' || response[i] == '\t'; --i) { --dwHeaderLineLength; } } INET_ASSERT((int)dwHeaderLineLength >= 0); // // some servers respond with "\r\r\n". Lame // A new twist: "\r \r\n". Lamer // while ((response < lpszEnd) && ((*response == '\r') || (*response == ' '))) { ++response; ++dwBytesScanned; } if (response == lpszEnd) { // // hit end of buffer without finding LF // success = FALSE; DEBUG_PRINT(HTTP, WARNING, ("hit end of buffer without finding LF\n" )); // // get more data, reparse this line // dwBytesScanned = dwPreviousAmountOfBytesScanned; break; } else if (*response == '\n') { ++response; ++dwBytesScanned; // // if we found the empty line then we are done // if (dwHeaderLineLength == 0) { *pfFoundEndOfHeaders = TRUE; break; } success = TRUE; } } // // Now we have our parsed header to add to the array // HEADER_STRING * freeHeader; DWORD iSlot; freeHeader = FindFreeSlot(&iSlot); if (freeHeader == NULL) { error = GetError(); INET_ASSERT(error != ERROR_SUCCESS); goto quit; } else { freeHeader->CreateOffsetString((DWORD) (lpszHeaderName - lpHeaderBase), dwHeaderLineLength); freeHeader->SetHash(dwHash); } //CHAR szTemp[256]; // //memcpy(szTemp, lpszHeaderName, dwHeaderLineLength); //lpszHeaderName[dwHeaderLineLength] = '\0'; //DEBUG_PRINT(HTTP, // INFO, // ("ParseHeaders: adding=%q\n", lpszHeaderName // )); // // Now see if this is a known header we are adding, if so then we note that fact // DWORD dwKnownQueryIndex; if (HeaderMatch(dwHash, lpszHeaderName, dwHeaderNameLength, &dwKnownQueryIndex) ) { freeHeader->SetNextKnownIndex(FastAdd(dwKnownQueryIndex, iSlot)); } } while (TRUE); quit: *lpdwBufferLengthScanned += dwBytesScanned; *pfFoundCompleteLine = success; return error; }