/********************************************************************** Cache Search Stuff (simple strstr) Marc Miller (t-marcmi) - 1998 **********************************************************************/ #include "cachesrch.h" DWORD CacheSearchEngine::CacheStreamWrapper::s_dwPageSize = 0; BOOL CacheSearchEngine::CacheStreamWrapper::_ReadNextBlock() { if (_fEndOfFile) return FALSE; if (!s_dwPageSize) { SYSTEM_INFO sysInfo; GetSystemInfo(&sysInfo); s_dwPageSize = sysInfo.dwPageSize; } BOOL fNewRead = FALSE; // is this our first look at this file? if (!_pbBuff) { // Allocate a page of memory // Note: find out why this returned error code #87 //_pbBuff = (LPBYTE)(VirtualAlloc(NULL, s_dwPageSize, MEM_COMMIT, PAGE_READWRITE)); _pbBuff = (LPBYTE)(LocalAlloc(LPTR, s_dwPageSize)); if (!_pbBuff) { //DWORD dwError = GetLastError(); return FALSE; } fNewRead = TRUE; _dwCacheStreamLoc = 0; } BOOL fSuccess; DWORD dwSizeRead = s_dwPageSize; if ((fSuccess = ReadUrlCacheEntryStream(_hCacheStream, _dwCacheStreamLoc, _pbBuff, &dwSizeRead, 0)) && dwSizeRead) { _fEndOfFile = (dwSizeRead < s_dwPageSize); _dwCacheStreamLoc += dwSizeRead; _dwBuffSize = dwSizeRead; _pbBuffPos = _pbBuff; _pbBuffLast = _pbBuff + dwSizeRead; _dataType = ASCII_DATA; // default if (fNewRead) { // deterine data type if (_dwBuffSize >= sizeof(USHORT)) { if (*((USHORT *)_pbBuff) == UNICODE_SIGNATURE) _dataType = UNICODE_DATA; else if (*((USHORT *)_pbBuff) == UNICODE_SIGNATURE_BACKWARDS) _dataType = UNICODE_BACKWARDS_DATA; if (s_IsUnicode(_dataType)) _pbBuffPos += s_Charsize(_dataType); } } } else { fSuccess = FALSE; DWORD dwError = GetLastError(); ASSERT(dwError != ERROR_INSUFFICIENT_BUFFER); } return fSuccess; } CacheSearchEngine::CacheStreamWrapper::CacheStreamWrapper(HANDLE hCacheStream) { // this class can be allocated on the stack: _pbBuff = NULL; _pbBuffPos = NULL; _pbBuffLast = NULL; _dwBuffSize = 0; _hCacheStream = hCacheStream; _fEndOfFile = FALSE; // Read in preliminary block of data -- // Die on next read to handle failure _fEndOfFile = !(_ReadNextBlock()); } CacheSearchEngine::CacheStreamWrapper::~CacheStreamWrapper() { if (_pbBuff) { //VirtualFree(_pbBuff); LocalFree(_pbBuff);; _pbBuff = NULL; } } // Read next byte from cache stream, reading in next block if necessary BOOL CacheSearchEngine::CacheStreamWrapper::_GetNextByte(BYTE &b) { // // If the initial read fails _pbBuffPos will be NULL. Don't // allow it to be dereffed. // BOOL fSuccess = _pbBuffPos ? TRUE : FALSE; if (_pbBuffPos == _pbBuffLast) fSuccess = _ReadNextBlock(); if (fSuccess) b = *(_pbBuffPos++); return fSuccess; } BOOL CacheSearchEngine::CacheStreamWrapper::GetNextChar(WCHAR &wc) { BOOL fSuccess = TRUE; if (s_IsUnicode(_dataType)) { BYTE b1, b2; LPBYTE bs = (LPBYTE)&wc; if (_GetNextByte(b1) && _GetNextByte(b2)) { switch (_dataType) { case UNICODE_DATA: bs[0] = b1; bs[1] = b2; break; case UNICODE_BACKWARDS_DATA: bs[0] = b2; bs[1] = b1; break; default: ASSERT(0); } } else fSuccess = FALSE; } else { BYTE szData[2]; if (_GetNextByte(szData[0])) { int cch = 1; if (IsDBCSLeadByte(szData[0])) { if (!_GetNextByte(szData[1])) { fSuccess = FALSE; } cch++; } if (fSuccess) { fSuccess = (MultiByteToWideChar(CP_ACP, 0, (LPSTR)szData, cch, &wc, 1) > 0); } } else { fSuccess = FALSE; } } return fSuccess; } // Prepare a search target string for searching -- void CacheSearchEngine::StreamSearcher::_PrepareSearchTarget(LPCWSTR pwszSearchTarget) { UINT uStrLen = lstrlenW(pwszSearchTarget); _pwszPreparedSearchTarget = ((LPWSTR)LocalAlloc(LPTR, (uStrLen + 1) * sizeof(WCHAR))); if (_pwszPreparedSearchTarget) { // Strip leading and trailing whitespace and compress adjacent whitespace characters // into literal spaces LPWSTR pwszTemp = _pwszPreparedSearchTarget; pwszSearchTarget = s_SkipWhiteSpace(pwszSearchTarget); BOOL fAddWs = FALSE; while(*pwszSearchTarget) { if (s_IsWhiteSpace(*pwszSearchTarget)) { fAddWs = TRUE; pwszSearchTarget = s_SkipWhiteSpace(pwszSearchTarget); } else { if (fAddWs) { *(pwszTemp++) = L' '; fAddWs = FALSE; } *(pwszTemp++) = *(pwszSearchTarget++); } } *pwszTemp = L'\0'; } } // Search a character stream for a searchtarget // Does a simple strstr, but tries to be smart about whitespace and // ignores HTML where possible... BOOL CacheSearchEngine::StreamSearcher::SearchCharStream(CacheSearchEngine::IWideSequentialReadStream &wsrs, BOOL fIsHTML/* = FALSE*/) { BOOL fFound = FALSE; if (_pwszPreparedSearchTarget && *_pwszPreparedSearchTarget) { WCHAR wc; LPCWSTR pwszCurrent = _pwszPreparedSearchTarget; BOOL fMatchedWS = FALSE; #if 0 BOOL fIgnoreHTMLTag = FALSE; #endif while(*pwszCurrent && wsrs.GetNextChar(wc)) { #if 0 if (fIsHTML && (wc == L'<')) fIgnoreHTMLTag = TRUE; else if (fIgnoreHTMLTag) { if (wc == L'>') fIgnoreHTMLTag = FALSE; } else #endif if (s_IsWhiteSpace(wc)) { // matched whitespace in search stream, look for // matching whitespace in target string if (!fMatchedWS) { if (s_IsWhiteSpace(*pwszCurrent)) { fMatchedWS = TRUE; ++pwszCurrent; } else pwszCurrent = _pwszPreparedSearchTarget; } } else { fMatchedWS = FALSE; if (!ChrCmpIW(*pwszCurrent, wc)) { ++pwszCurrent; } else { pwszCurrent = _pwszPreparedSearchTarget; } } } fFound = !*pwszCurrent; } return fFound; } BOOL CacheSearchEngine::SearchCacheStream(CacheSearchEngine::StreamSearcher &cse, HANDLE hCacheStream, BOOL fIsHTML/* = FALSE*/) { CacheStreamWrapper csw(hCacheStream); return cse.SearchCharStream(csw, fIsHTML); }