Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

565 lines
16 KiB

  1. /////////////////////////////////////////////////////////////////////////////////
  2. //
  3. // fusion\xmlparser\BufferedStream.cxx
  4. //
  5. /////////////////////////////////////////////////////////////////////////////////
  6. #include "stdinc.h"
  7. #include "core.hxx"
  8. #pragma hdrstop
  9. #include <memory.h>
  10. //#include <shlwapip.h>
  11. #include <ole2.h>
  12. #include <xmlparser.h>
  13. #include "bufferedstream.hxx"
  14. #include "xmlstream.hxx"
  15. #include "encodingstream.hxx"
  16. #include "xmlhelper.hxx"
  17. const long BLOCK_SIZE = 4096;
  18. // no point remembering a line buffer longer than this because client
  19. // probably can't deal with that anyway.
  20. const long MAX_LINE_BUFFER = 512;
  21. BufferedStream::BufferedStream(XMLStream *pXMLStream)
  22. {
  23. _pchBuffer = NULL;
  24. _lSize = 0;
  25. _pXMLStream = pXMLStream;
  26. init();
  27. }
  28. /////////////////////////////////////////////////////////////////////////////
  29. void BufferedStream::init()
  30. {
  31. _lCurrent = _lUsed = _lMark = 0;
  32. _lLine = 1; // lines start at 1.
  33. _lMarkedline = 1;
  34. _lLinepos = 0;
  35. _lMarkedlinepos = 0;
  36. _chLast = 0;
  37. _lStartAt = 0;
  38. _fEof = false;
  39. _lLockedPos = -1;
  40. _lLastWhiteSpace = -1;
  41. _lLockCount = 0;
  42. _fNotified = false;
  43. _fFrozen = false;
  44. _pPendingEncoding = NULL;
  45. }
  46. /////////////////////////////////////////////////////////////////////////////
  47. BufferedStream::~BufferedStream()
  48. {
  49. delete [] _pchBuffer;
  50. _pStmInput = NULL;
  51. delete _pPendingEncoding;
  52. _pPendingEncoding = NULL;
  53. }
  54. /////////////////////////////////////////////////////////////////////////////
  55. HRESULT BufferedStream::Reset()
  56. {
  57. init();
  58. delete[] _pchBuffer;
  59. _pchBuffer = NULL;
  60. _lSize = 0;
  61. _pStmInput = NULL;
  62. _lLockedPos = -1;
  63. _lLockCount = 0;
  64. _fFrozen = false;
  65. delete _pPendingEncoding;
  66. _pPendingEncoding = NULL;
  67. return S_OK;
  68. }
  69. /////////////////////////////////////////////////////////////////////////////
  70. HRESULT
  71. BufferedStream::Load(
  72. /* [unique][in] */ EncodingStream __RPC_FAR *pStm)
  73. {
  74. if (pStm != NULL)
  75. {
  76. init();
  77. _pStmInput = pStm;
  78. return S_OK;
  79. }
  80. else
  81. {
  82. _pStmInput = NULL;
  83. }
  84. return S_OK;
  85. }
  86. /////////////////////////////////////////////////////////////////////////////
  87. HRESULT
  88. BufferedStream::AppendData( const BYTE* in, ULONG length, BOOL lastBuffer)
  89. {
  90. HRESULT hr;
  91. if (_fEof)
  92. {
  93. init();
  94. }
  95. if (!_pStmInput)
  96. {
  97. EncodingStream* stream = (EncodingStream*)EncodingStream::newEncodingStream(NULL);
  98. if (stream == NULL)
  99. return E_OUTOFMEMORY;
  100. _pStmInput = stream;
  101. stream->Release(); // Smart pointer is holding a ref
  102. }
  103. checkhr2(_pStmInput->AppendData(in, length, lastBuffer));
  104. return S_OK;
  105. }
  106. /////////////////////////////////////////////////////////////////////////////
  107. HRESULT
  108. BufferedStream::nextChar(
  109. /* [out] */ WCHAR* ch,
  110. /* [out] */ bool* fEOF)
  111. {
  112. HRESULT hr;
  113. if (_lCurrent >= _lUsed)
  114. {
  115. if (_fEof)
  116. {
  117. *fEOF = true;
  118. return S_OK;
  119. }
  120. if (! _fNotified && _lUsed > 0)
  121. {
  122. _fNotified = true; // notify data available BEFORE blowing
  123. // NOTE: this code approximates what prepareForInput does
  124. // in order to accurately predict when the buffer is about to
  125. // be re-allocated.
  126. long shift = _fFrozen ? 0 : getNewStart(); // is data about to shift?
  127. long used = _lUsed - shift; // this is how much is really used after shift
  128. if (_lSize - used < BLOCK_SIZE + 1) // +1 for null termination.
  129. {
  130. // we will reallocate !! So return a special
  131. // return code
  132. hr = E_DATA_REALLOCATE;
  133. }
  134. else
  135. hr = E_DATA_AVAILABLE; // away the old data so parser can save it if need be.
  136. checkhr2( _pXMLStream->ErrorCallback(hr) );
  137. }
  138. checkhr2( fillBuffer() );
  139. if (_fEof)
  140. {
  141. *fEOF = true;
  142. return S_OK;
  143. }
  144. _fNotified = false;
  145. }
  146. WCHAR result = _pchBuffer[_lCurrent++];
  147. switch (result)
  148. {
  149. case 0xa:
  150. case 0xd:
  151. if (result == 0xd || _chLast != 0xd)
  152. _lLine++;
  153. _lLinepos = _lCurrent;
  154. _chLast = result;
  155. _lLastWhiteSpace = _lCurrent;
  156. break;
  157. case 0x20:
  158. case 0x9:
  159. _lLastWhiteSpace = _lCurrent;
  160. break;
  161. case 0xfffe:
  162. case 0xffff:
  163. //case 0xfeff:
  164. ::FusionpDbgPrintEx(
  165. FUSION_DBG_LEVEL_ERROR,
  166. "SXS.DLL: XML Parser found either 0xfffe or 0xffff\n");
  167. return XML_E_BADCHARDATA;
  168. }
  169. *ch = result;
  170. return S_OK;
  171. }
  172. /////////////////////////////////////////////////////////////////////////////
  173. HRESULT BufferedStream::scanPCData(
  174. /* [out] */ WCHAR* ch,
  175. /* [out] */ bool* fWhitespace)
  176. {
  177. WCHAR result;
  178. bool foundNonWhiteSpace = false;
  179. if (! isWhiteSpace(*ch))
  180. foundNonWhiteSpace = true;
  181. // Then skip the data until we find '<', '>' or '&'
  182. while (_lCurrent < _lUsed)
  183. {
  184. result = _pchBuffer[_lCurrent++];
  185. switch (result)
  186. {
  187. case ']': // xiaoyu : the specified chars can be changed for our own purpose
  188. case '>':
  189. case '<':
  190. case '&':
  191. case '\'': // so this can be used to scan attribute values also.
  192. case '"': // so this can be used to scan attribute values also.
  193. *ch = result;
  194. if (foundNonWhiteSpace)
  195. *fWhitespace = false;
  196. return S_OK;
  197. break;
  198. case 0xa:
  199. case 0xd:
  200. if (result == 0xd || _chLast != 0xd)
  201. _lLine++;
  202. _lLinepos = _lCurrent;
  203. _chLast = result;
  204. _lLastWhiteSpace = _lCurrent;
  205. break;
  206. case 0x20:
  207. case 0x9:
  208. _lLastWhiteSpace = _lCurrent;
  209. break;
  210. case 0xfffe:
  211. case 0xffff:
  212. ::FusionpDbgPrintEx(
  213. FUSION_DBG_LEVEL_ERROR,
  214. "SXS.DLL: XML Parser found either 0xfffe or 0xffff\n");
  215. return XML_E_BADCHARDATA;
  216. default:
  217. foundNonWhiteSpace = true;
  218. break;
  219. }
  220. }
  221. // And just return E_PENDING if we run out of buffer.
  222. if (foundNonWhiteSpace)
  223. *fWhitespace = false;
  224. return E_PENDING;
  225. }
  226. /////////////////////////////////////////////////////////////////////////////
  227. long BufferedStream::getLine()
  228. {
  229. return _lMarkedline;
  230. }
  231. /////////////////////////////////////////////////////////////////////////////
  232. long BufferedStream::getLinePos()
  233. {
  234. // _lMarkedlinepos is the position of the beginning of the marked line
  235. // relative to the beginning of the buffer, and _lMark is the
  236. // position of the marked token relative to the beginning of the
  237. // buffer, So the position of the marked token relative to the
  238. // current line is the difference between the two.
  239. // We also return a 1-based position so that the start of the
  240. // line = column 1. This is consistent with the line numbers
  241. // which are also 1-based.
  242. return (_lMarkedlinepos > _lMark+1) ? 0 : _lMark+1-_lMarkedlinepos;
  243. }
  244. /////////////////////////////////////////////////////////////////////////////
  245. long BufferedStream::getInputPos()
  246. {
  247. return _lStartAt+_lMark;
  248. }
  249. /////////////////////////////////////////////////////////////////////////////
  250. WCHAR* BufferedStream::getLineBuf(ULONG* len, ULONG* startpos)
  251. {
  252. *len = 0;
  253. if (_pchBuffer == NULL)
  254. return NULL;
  255. WCHAR* result = &_pchBuffer[_lMarkedlinepos];
  256. ULONG i = 0;
  257. // internal _pchBuffer is guarenteed to be null terminated.
  258. WCHAR ch = result[i];
  259. while (ch != 0 && ch != L'\n' && ch != L'\r')
  260. {
  261. i++;
  262. ch = result[i];
  263. }
  264. *len = i;
  265. // also return the line position relative to start of
  266. // returned buffer.
  267. *startpos = (_lMarkedlinepos > _lMark+1) ? 0 : _lMark+1-_lMarkedlinepos;
  268. return result;
  269. }
  270. /////////////////////////////////////////////////////////////////////////////
  271. HRESULT BufferedStream::switchEncoding(const WCHAR * charset, ULONG len)
  272. {
  273. HRESULT hr = S_OK;
  274. if (!_pStmInput)
  275. {
  276. hr = E_FAIL;
  277. goto CleanUp;
  278. }
  279. else
  280. {
  281. _pPendingEncoding = Encoding::newEncoding(charset, len);
  282. if (_pPendingEncoding == NULL)
  283. {
  284. hr = E_OUTOFMEMORY;
  285. goto CleanUp;
  286. }
  287. if (! _fFrozen)
  288. {
  289. hr = doSwitchEncoding();
  290. }
  291. }
  292. CleanUp:
  293. return hr;
  294. }
  295. /////////////////////////////////////////////////////////////////////////////
  296. HRESULT BufferedStream::doSwitchEncoding()
  297. {
  298. Encoding* encoding = _pPendingEncoding;
  299. _pPendingEncoding = NULL;
  300. HRESULT hr = _pStmInput->switchEncodingAt(encoding, _lStartAt + _lCurrent);
  301. if (hr == S_FALSE)
  302. {
  303. // need to re-read to force re-decode into new encoding.
  304. // In other words we have to forget that we read past this
  305. // position already so that the next call to nextChar
  306. // will call FillBuffer again.
  307. // (+1 so that nextChar works correctly).
  308. _lUsed = _lStartAt + _lCurrent;
  309. hr = S_OK;
  310. }
  311. else if (FAILED(hr))
  312. {
  313. hr = (hr == E_INVALIDARG) ? XML_E_INVALIDENCODING : XML_E_INVALIDSWITCH;
  314. }
  315. return hr;
  316. }
  317. /////////////////////////////////////////////////////////////////////////////
  318. // Returns a pointer to a contiguous block of text accumulated
  319. // from the last time Mark() was called up to but not including
  320. // the last character read. (This allows a parser to have a
  321. // lookahead character that is not included in the token).
  322. HRESULT
  323. BufferedStream::getToken(const WCHAR**p, long* len)
  324. {
  325. if (_pchBuffer == NULL)
  326. return E_FAIL;
  327. if (_lCurrent != _lCurrent2)
  328. {
  329. // need to fix up buffer since it is no
  330. // out of sync since we've been compressing
  331. // whitespace.
  332. }
  333. *p = &_pchBuffer[_lMark];
  334. *len = getTokenLength();
  335. return S_OK;
  336. }
  337. /////////////////////////////////////////////////////////////////////////////
  338. void
  339. BufferedStream::Lock()
  340. {
  341. // We allow nested locking - where the outer lock wins - unlock only
  342. // really unlocks when the outer lock is unlocked.
  343. if (++_lLockCount == 1)
  344. {
  345. _lLockedPos = _lMark;
  346. _lLockedLine = _lMarkedline;
  347. _lLockedLinePos = _lMarkedlinepos;
  348. }
  349. }
  350. /////////////////////////////////////////////////////////////////////////////
  351. void
  352. BufferedStream::UnLock()
  353. {
  354. if (--_lLockCount == 0)
  355. {
  356. _lMark = _lLockedPos;
  357. _lMarkedline = _lLockedLine;
  358. _lMarkedlinepos = _lLockedLinePos;
  359. _lLockedPos = -1;
  360. }
  361. }
  362. /////////////////////////////////////////////////////////////////////////////
  363. HRESULT
  364. BufferedStream::Freeze()
  365. {
  366. HRESULT hr;
  367. if (_lCurrent > _lMidPoint)
  368. {
  369. // Since we freeze the buffer a lot now (any time we're inside
  370. // a tag) we need to shift the bytes down in the buffer more
  371. // frequently in order to guarentee we have space in the buffer
  372. // when we need it. Otherwize the buffer would tend to just
  373. // keep growing and growing. So we shift the buffer when we
  374. // go past the midpoint.
  375. checkhr2( prepareForInput() );
  376. }
  377. _fFrozen = true;
  378. return S_OK;
  379. }
  380. /////////////////////////////////////////////////////////////////////////////
  381. HRESULT
  382. BufferedStream::UnFreeze()
  383. {
  384. _fFrozen = false;
  385. if (_pPendingEncoding)
  386. {
  387. return doSwitchEncoding();
  388. }
  389. return S_OK;
  390. }
  391. /////////////////////////////////////////////////////////////////////////////
  392. HRESULT
  393. BufferedStream::fillBuffer()
  394. {
  395. HRESULT hr;
  396. checkhr2( prepareForInput() );
  397. if (_pStmInput)
  398. {
  399. long space = _lSize - _lUsed - 1; // reserve 1 for NULL termination
  400. // get more bytes.
  401. ULONG read = 0;
  402. HRESULT rc = _pStmInput->Read(&_pchBuffer[_lUsed], space*sizeof(WCHAR), &read);
  403. _lUsed += read/sizeof(WCHAR); // stream must return unicode characters.
  404. _pchBuffer[_lUsed] = 0; // NULL terminate the _pchBuffer.
  405. if (FAILED(rc))
  406. return rc;
  407. if (read == 0)
  408. {
  409. _fEof = true;
  410. // increment _lCurrent, so that getToken returns
  411. // last character in file.
  412. _lCurrent++; _lCurrent2++;
  413. }
  414. }
  415. else
  416. {
  417. // SetInput or AppendData hasn't been called yet.
  418. return E_PENDING;
  419. }
  420. return S_OK;
  421. }
  422. /////////////////////////////////////////////////////////////////////////////
  423. HRESULT
  424. BufferedStream::prepareForInput()
  425. {
  426. // move the currently used section of the _pchBuffer
  427. // (buf[mark] to buf[used]) down to the beginning of
  428. // the _pchBuffer.
  429. long newstart = 0;
  430. // BUGBUG - if this code is changed BufferedStream::nextChar has to
  431. // be updated also so that they stay in sync, otherwise we might
  432. // re-allocated the buffer without generating an E_DATA_REALLOCATE
  433. // notification - which would be very bad (causes GPF's in the parser).
  434. if (! _fFrozen) // can't shift bits if the buffer is frozen.
  435. {
  436. newstart = getNewStart();
  437. if (newstart > 0)
  438. {
  439. WCHAR* src = &_pchBuffer[newstart];
  440. _lUsed -= newstart;
  441. _lStartAt += newstart;
  442. ::memmove(_pchBuffer,src,_lUsed*sizeof(WCHAR));
  443. _lCurrent -= newstart;
  444. _lCurrent2 -= newstart;
  445. _lLastWhiteSpace -= newstart;
  446. _lLinepos = (_lLinepos > newstart) ? _lLinepos-newstart : 0;
  447. _lMarkedlinepos = (_lLinepos > newstart) ? _lMarkedlinepos-newstart : 0;
  448. _lMark -= newstart;
  449. _lLockedLinePos = (_lLockedLinePos > newstart) ? _lLockedLinePos-newstart : 0;
  450. _lLockedPos -= newstart;
  451. }
  452. }
  453. // make sure we have a reasonable amount of space
  454. // left in the _pchBuffer.
  455. long space = _lSize - _lUsed;
  456. if (space > 0) space--; // reserve 1 for NULL termination
  457. if (_pchBuffer == NULL || space < BLOCK_SIZE)
  458. {
  459. // double the size of the buffer.
  460. long newsize = (_lSize == 0) ? BLOCK_SIZE : (_lSize*2);
  461. WCHAR* newbuf = NEW (WCHAR[newsize]);
  462. if (newbuf == NULL)
  463. {
  464. // try more conservative allocation.
  465. newsize = _lSize + BLOCK_SIZE;
  466. newbuf = NEW (WCHAR[newsize]);
  467. }
  468. if (newbuf == NULL && space == 0)
  469. return E_OUTOFMEMORY;
  470. if (newbuf != NULL)
  471. {
  472. if (_pchBuffer != NULL)
  473. {
  474. // copy old bytes to new _pchBuffer.
  475. ::memcpy(newbuf,_pchBuffer,_lUsed*sizeof(WCHAR));
  476. delete [] _pchBuffer;
  477. }
  478. newbuf[_lUsed] = 0; // make sure it's null terminated.
  479. _pchBuffer = newbuf;
  480. _lSize = newsize;
  481. _lMidPoint = newsize / 2;
  482. }
  483. }
  484. return S_OK;
  485. }
  486. /////////////////////////////////////////////////////////////////////////////
  487. long
  488. BufferedStream::getNewStart()
  489. {
  490. long newstart = 0;
  491. // Unless the buffer is frozen, in which case we just reallocate and
  492. // do no shifting of data.
  493. if (_lLockedPos > 0)
  494. {
  495. // and try and preserve the beginning of the marked line if we can
  496. if (_lLockedLinePos < _lLockedPos &&
  497. _lLockedPos - _lLockedLinePos < MAX_LINE_BUFFER)
  498. {
  499. newstart = _lLockedLinePos;
  500. }
  501. }
  502. else if (_lMark > 0)
  503. {
  504. // and try and preserve the beginning of the marked line if we can
  505. newstart = _lMark;
  506. if (_lMarkedlinepos < _lMark &&
  507. _lMark - _lMarkedlinepos < MAX_LINE_BUFFER) // watch out for long lines
  508. {
  509. newstart = _lMarkedlinepos;
  510. }
  511. }
  512. return newstart;
  513. }