Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

250 lines
7.4 KiB

  1. /**********************************************************************
  2. Cache Search Stuff (simple strstr)
  3. Marc Miller (t-marcmi) - 1998
  4. **********************************************************************/
  5. #include "cachesrch.h"
  6. DWORD CacheSearchEngine::CacheStreamWrapper::s_dwPageSize = 0;
  7. BOOL CacheSearchEngine::CacheStreamWrapper::_ReadNextBlock() {
  8. if (_fEndOfFile)
  9. return FALSE;
  10. if (!s_dwPageSize) {
  11. SYSTEM_INFO sysInfo;
  12. GetSystemInfo(&sysInfo);
  13. s_dwPageSize = sysInfo.dwPageSize;
  14. }
  15. BOOL fNewRead = FALSE; // is this our first look at this file?
  16. if (!_pbBuff) {
  17. // Allocate a page of memory
  18. // Note: find out why this returned error code #87
  19. //_pbBuff = (LPBYTE)(VirtualAlloc(NULL, s_dwPageSize, MEM_COMMIT, PAGE_READWRITE));
  20. _pbBuff = (LPBYTE)(LocalAlloc(LPTR, s_dwPageSize));
  21. if (!_pbBuff) {
  22. //DWORD dwError = GetLastError();
  23. return FALSE;
  24. }
  25. fNewRead = TRUE;
  26. _dwCacheStreamLoc = 0;
  27. }
  28. BOOL fSuccess;
  29. DWORD dwSizeRead = s_dwPageSize;
  30. if ((fSuccess = ReadUrlCacheEntryStream(_hCacheStream, _dwCacheStreamLoc,
  31. _pbBuff, &dwSizeRead, 0)) && dwSizeRead)
  32. {
  33. _fEndOfFile = (dwSizeRead < s_dwPageSize);
  34. _dwCacheStreamLoc += dwSizeRead;
  35. _dwBuffSize = dwSizeRead;
  36. _pbBuffPos = _pbBuff;
  37. _pbBuffLast = _pbBuff + dwSizeRead;
  38. _dataType = ASCII_DATA; // default
  39. if (fNewRead) {
  40. // deterine data type
  41. if (_dwBuffSize >= sizeof(USHORT)) {
  42. if (*((USHORT *)_pbBuff) == UNICODE_SIGNATURE)
  43. _dataType = UNICODE_DATA;
  44. else if (*((USHORT *)_pbBuff) == UNICODE_SIGNATURE_BACKWARDS)
  45. _dataType = UNICODE_BACKWARDS_DATA;
  46. if (s_IsUnicode(_dataType))
  47. _pbBuffPos += s_Charsize(_dataType);
  48. }
  49. }
  50. }
  51. else {
  52. fSuccess = FALSE;
  53. DWORD dwError = GetLastError();
  54. ASSERT(dwError != ERROR_INSUFFICIENT_BUFFER);
  55. }
  56. return fSuccess;
  57. }
  58. CacheSearchEngine::CacheStreamWrapper::CacheStreamWrapper(HANDLE hCacheStream) {
  59. // this class can be allocated on the stack:
  60. _pbBuff = NULL;
  61. _pbBuffPos = NULL;
  62. _pbBuffLast = NULL;
  63. _dwBuffSize = 0;
  64. _hCacheStream = hCacheStream;
  65. _fEndOfFile = FALSE;
  66. // Read in preliminary block of data --
  67. // Die on next read to handle failure
  68. _fEndOfFile = !(_ReadNextBlock());
  69. }
  70. CacheSearchEngine::CacheStreamWrapper::~CacheStreamWrapper() {
  71. if (_pbBuff) {
  72. //VirtualFree(_pbBuff);
  73. LocalFree(_pbBuff);;
  74. _pbBuff = NULL;
  75. }
  76. }
  77. // Read next byte from cache stream, reading in next block if necessary
  78. BOOL CacheSearchEngine::CacheStreamWrapper::_GetNextByte(BYTE &b)
  79. {
  80. //
  81. // If the initial read fails _pbBuffPos will be NULL. Don't
  82. // allow it to be dereffed.
  83. //
  84. BOOL fSuccess = _pbBuffPos ? TRUE : FALSE;
  85. if (_pbBuffPos == _pbBuffLast)
  86. fSuccess = _ReadNextBlock();
  87. if (fSuccess)
  88. b = *(_pbBuffPos++);
  89. return fSuccess;
  90. }
  91. BOOL CacheSearchEngine::CacheStreamWrapper::GetNextChar(WCHAR &wc) {
  92. BOOL fSuccess = TRUE;
  93. if (s_IsUnicode(_dataType)) {
  94. BYTE b1, b2;
  95. LPBYTE bs = (LPBYTE)&wc;
  96. if (_GetNextByte(b1) && _GetNextByte(b2)) {
  97. switch (_dataType) {
  98. case UNICODE_DATA:
  99. bs[0] = b1;
  100. bs[1] = b2;
  101. break;
  102. case UNICODE_BACKWARDS_DATA:
  103. bs[0] = b2;
  104. bs[1] = b1;
  105. break;
  106. default: ASSERT(0);
  107. }
  108. }
  109. else
  110. fSuccess = FALSE;
  111. }
  112. else
  113. {
  114. BYTE szData[2];
  115. if (_GetNextByte(szData[0]))
  116. {
  117. int cch = 1;
  118. if (IsDBCSLeadByte(szData[0]))
  119. {
  120. if (!_GetNextByte(szData[1]))
  121. {
  122. fSuccess = FALSE;
  123. }
  124. cch++;
  125. }
  126. if (fSuccess)
  127. {
  128. fSuccess = (MultiByteToWideChar(CP_ACP, 0, (LPSTR)szData, cch, &wc, 1) > 0);
  129. }
  130. }
  131. else
  132. {
  133. fSuccess = FALSE;
  134. }
  135. }
  136. return fSuccess;
  137. }
  138. // Prepare a search target string for searching --
  139. void CacheSearchEngine::StreamSearcher::_PrepareSearchTarget(LPCWSTR pwszSearchTarget)
  140. {
  141. UINT uStrLen = lstrlenW(pwszSearchTarget);
  142. _pwszPreparedSearchTarget = ((LPWSTR)LocalAlloc(LPTR, (uStrLen + 1) * sizeof(WCHAR)));
  143. if (_pwszPreparedSearchTarget) {
  144. // Strip leading and trailing whitespace and compress adjacent whitespace characters
  145. // into literal spaces
  146. LPWSTR pwszTemp = _pwszPreparedSearchTarget;
  147. pwszSearchTarget = s_SkipWhiteSpace(pwszSearchTarget);
  148. BOOL fAddWs = FALSE;
  149. while(*pwszSearchTarget) {
  150. if (s_IsWhiteSpace(*pwszSearchTarget)) {
  151. fAddWs = TRUE;
  152. pwszSearchTarget = s_SkipWhiteSpace(pwszSearchTarget);
  153. }
  154. else {
  155. if (fAddWs) {
  156. *(pwszTemp++) = L' ';
  157. fAddWs = FALSE;
  158. }
  159. *(pwszTemp++) = *(pwszSearchTarget++);
  160. }
  161. }
  162. *pwszTemp = L'\0';
  163. }
  164. }
  165. // Search a character stream for a searchtarget
  166. // Does a simple strstr, but tries to be smart about whitespace and
  167. // ignores HTML where possible...
  168. BOOL CacheSearchEngine::StreamSearcher::SearchCharStream(CacheSearchEngine::IWideSequentialReadStream &wsrs,
  169. BOOL fIsHTML/* = FALSE*/)
  170. {
  171. BOOL fFound = FALSE;
  172. if (_pwszPreparedSearchTarget && *_pwszPreparedSearchTarget)
  173. {
  174. WCHAR wc;
  175. LPCWSTR pwszCurrent = _pwszPreparedSearchTarget;
  176. BOOL fMatchedWS = FALSE;
  177. #if 0
  178. BOOL fIgnoreHTMLTag = FALSE;
  179. #endif
  180. while(*pwszCurrent && wsrs.GetNextChar(wc)) {
  181. #if 0
  182. if (fIsHTML && (wc == L'<'))
  183. fIgnoreHTMLTag = TRUE;
  184. else if (fIgnoreHTMLTag) {
  185. if (wc == L'>')
  186. fIgnoreHTMLTag = FALSE;
  187. }
  188. else
  189. #endif
  190. if (s_IsWhiteSpace(wc)) {
  191. // matched whitespace in search stream, look for
  192. // matching whitespace in target string
  193. if (!fMatchedWS) {
  194. if (s_IsWhiteSpace(*pwszCurrent)) {
  195. fMatchedWS = TRUE;
  196. ++pwszCurrent;
  197. }
  198. else
  199. pwszCurrent = _pwszPreparedSearchTarget;
  200. }
  201. }
  202. else {
  203. fMatchedWS = FALSE;
  204. if (!ChrCmpIW(*pwszCurrent, wc)) {
  205. ++pwszCurrent;
  206. }
  207. else {
  208. pwszCurrent = _pwszPreparedSearchTarget;
  209. }
  210. }
  211. }
  212. fFound = !*pwszCurrent;
  213. }
  214. return fFound;
  215. }
  216. BOOL CacheSearchEngine::SearchCacheStream(CacheSearchEngine::StreamSearcher &cse, HANDLE hCacheStream,
  217. BOOL fIsHTML/* = FALSE*/)
  218. {
  219. CacheStreamWrapper csw(hCacheStream);
  220. return cse.SearchCharStream(csw, fIsHTML);
  221. }