Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

646 lines
18 KiB

  1. //
  2. // fsteam.cpp
  3. // Implements a file stream
  4. // for reading text files line by line.
  5. // the standard C streams, only support
  6. // unicode as binary streams which are a pain to work
  7. // with).
  8. //
  9. // This class reads/writes both ANSI and UNICODE files
  10. // and converts to/from UNICODE internally
  11. //
  12. // Does not do any CR/LF translations either on input
  13. // or output.
  14. //
  15. // Copyright(C) Microsoft Corporation 2000
  16. // Author: Nadim Abdo (nadima)
  17. //
  18. #include "stdafx.h"
  19. #define TRC_GROUP TRC_GROUP_UI
  20. #define TRC_FILE "fstream.cpp"
  21. #include <atrcapi.h>
  22. #include "fstream.h"
  23. #ifndef UNICODE
  24. //
  25. // Adding ansi support is just a matter of converting
  26. // from UNICODE file to ANSI internal if the file
  27. // has a UNICODE BOM
  28. //
  29. #error THIS MODULE ASSUMES BEING COMPILED UNICODE, ADD ANSI IF NEEDED
  30. #endif
  31. CTscFileStream::CTscFileStream()
  32. {
  33. DC_BEGIN_FN("~CFileStream");
  34. _hFile = INVALID_HANDLE_VALUE;
  35. _pBuffer = NULL;
  36. _fOpenForRead = FALSE;
  37. _fOpenForWrite = FALSE;
  38. _fReadToEOF = FALSE;
  39. _fFileIsUnicode = FALSE;
  40. _fAtStartOfFile = TRUE;
  41. _pAnsiLineBuf = NULL;
  42. _cbAnsiBufSize = 0;
  43. DC_END_FN();
  44. }
  45. CTscFileStream::~CTscFileStream()
  46. {
  47. DC_BEGIN_FN("~CFileStream");
  48. Close();
  49. if(_hFile != INVALID_HANDLE_VALUE)
  50. {
  51. CloseHandle(_hFile);
  52. _hFile = INVALID_HANDLE_VALUE;
  53. }
  54. if(_pBuffer)
  55. {
  56. LocalFree(_pBuffer);
  57. _pBuffer = NULL;
  58. }
  59. if(_pAnsiLineBuf)
  60. {
  61. LocalFree(_pAnsiLineBuf);
  62. _pAnsiLineBuf = NULL;
  63. }
  64. DC_END_FN();
  65. }
  66. INT CTscFileStream::OpenForRead(LPTSTR szFileName)
  67. {
  68. DC_BEGIN_FN("OpenForRead");
  69. INT err;
  70. err = Close();
  71. if(err != ERR_SUCCESS)
  72. {
  73. return err;
  74. }
  75. //Alloc read buffers
  76. if(!_pBuffer)
  77. {
  78. _pBuffer = (PBYTE)LocalAlloc(LPTR, READ_BUF_SIZE);
  79. if(!_pBuffer)
  80. {
  81. return ERR_OUT_OF_MEM;
  82. }
  83. }
  84. if(!_pAnsiLineBuf)
  85. {
  86. _pAnsiLineBuf = (PBYTE)LocalAlloc(LPTR, LINEBUF_SIZE);
  87. if(!_pAnsiLineBuf)
  88. {
  89. return ERR_OUT_OF_MEM;
  90. }
  91. _cbAnsiBufSize = LINEBUF_SIZE;
  92. }
  93. memset(_pBuffer, 0, READ_BUF_SIZE);
  94. memset(_pAnsiLineBuf, 0, LINEBUF_SIZE);
  95. _hFile = CreateFile( szFileName,
  96. GENERIC_READ,
  97. FILE_SHARE_READ,
  98. NULL,
  99. OPEN_ALWAYS, //Creates if !exist
  100. FILE_ATTRIBUTE_NORMAL,
  101. NULL);
  102. if(INVALID_HANDLE_VALUE == _hFile)
  103. {
  104. TRC_ERR((TB, _T("CreateFile failed: %s - err:%x"),
  105. szFileName, GetLastError()));
  106. return ERR_CREATEFILE;
  107. }
  108. #ifdef OS_WINCE
  109. DWORD dwRes;
  110. dwRes = SetFilePointer( _hFile, 0, NULL, FILE_BEGIN);
  111. if (dwRes == (DWORD)0xffffffff) {
  112. DWORD dwErr = GetLastError();
  113. TRC_ERR((TB, _T("CreateFile failed to reset: %s - err:%x"),
  114. szFileName, GetLastError()));
  115. return ERR_CREATEFILE;
  116. }
  117. #endif
  118. _curBytePtr = 0;
  119. _curBufSize = 0;
  120. _tcsncpy(_szFileName, szFileName, MAX_PATH-1);
  121. //Yes this is ok, the size is MAX_PATH+1 ;-)
  122. _szFileName[MAX_PATH] = 0;
  123. _fOpenForRead = TRUE;
  124. _fFileIsUnicode = FALSE;
  125. _fAtStartOfFile = TRUE;
  126. DC_END_FN();
  127. return ERR_SUCCESS;
  128. }
  129. //
  130. // Opens the stream for writing
  131. // always nukes the existing file contents
  132. //
  133. INT CTscFileStream::OpenForWrite(LPTSTR szFileName, BOOL fWriteUnicode)
  134. {
  135. DC_BEGIN_FN("OpenForWrite");
  136. INT err;
  137. DWORD dwAttributes = 0;
  138. err = Close();
  139. if(err != ERR_SUCCESS)
  140. {
  141. return err;
  142. }
  143. if(_pAnsiLineBuf)
  144. {
  145. LocalFree(_pAnsiLineBuf);
  146. _pAnsiLineBuf = NULL;
  147. }
  148. _pAnsiLineBuf = (PBYTE)LocalAlloc(LPTR, LINEBUF_SIZE);
  149. if(!_pAnsiLineBuf)
  150. {
  151. return ERR_OUT_OF_MEM;
  152. }
  153. _cbAnsiBufSize = LINEBUF_SIZE;
  154. //
  155. // Preserve any existing attributes
  156. //
  157. dwAttributes = GetFileAttributes(szFileName);
  158. if (-1 == dwAttributes)
  159. {
  160. TRC_ERR((TB,_T("GetFileAttributes for %s failed 0x%x"),
  161. szFileName, GetLastError()));
  162. dwAttributes = FILE_ATTRIBUTE_NORMAL;
  163. }
  164. _hFile = CreateFile( szFileName,
  165. GENERIC_WRITE,
  166. FILE_SHARE_READ,
  167. NULL,
  168. CREATE_ALWAYS, //Creates and reset
  169. dwAttributes,
  170. NULL);
  171. if(INVALID_HANDLE_VALUE == _hFile)
  172. {
  173. TRC_ERR((TB, _T("CreateFile failed: %s - err:%x"),
  174. szFileName, GetLastError()));
  175. return ERR_CREATEFILE;
  176. }
  177. _tcsncpy(_szFileName, szFileName, MAX_PATH-1);
  178. //Yes this is ok, the size is MAX_PATH+1 ;-)
  179. _szFileName[MAX_PATH] = 0;
  180. _fOpenForWrite = TRUE;
  181. _fFileIsUnicode = fWriteUnicode;
  182. _fAtStartOfFile = TRUE;
  183. DC_END_FN();
  184. return ERR_SUCCESS;
  185. }
  186. INT CTscFileStream::Close()
  187. {
  188. DC_BEGIN_FN("Close");
  189. if(_hFile != INVALID_HANDLE_VALUE)
  190. {
  191. CloseHandle(_hFile);
  192. _hFile = INVALID_HANDLE_VALUE;
  193. }
  194. _fOpenForRead = _fOpenForWrite = FALSE;
  195. _fReadToEOF = FALSE;
  196. _tcscpy(_szFileName, _T(""));
  197. //Don't free the read buffers
  198. //they'll be cached for subsequent use
  199. DC_END_FN();
  200. return ERR_SUCCESS;
  201. }
  202. //
  203. // Read a line from the file and return it as UNICODE
  204. //
  205. // Read up to the next newline, or till cbLineSize/sizeof(WCHAR) or
  206. // untill the EOF. Whichever comes first.
  207. //
  208. //
  209. INT CTscFileStream::ReadNextLine(LPWSTR szLine, INT cbLineSize)
  210. {
  211. BOOL bRet = FALSE;
  212. INT cbBytesCopied = 0;
  213. INT cbOutputSize = 0;
  214. BOOL fDone = FALSE;
  215. PBYTE pOutBuf = NULL; //where to write the result
  216. BOOL fFirstIter = TRUE;
  217. DC_BEGIN_FN("ReadNextLine");
  218. TRC_ASSERT(_hFile != INVALID_HANDLE_VALUE,
  219. (TB,_T("No file handle")));
  220. TRC_ASSERT(_pBuffer, (TB,_T("NO buffer")));
  221. if(_fOpenForRead && !_fReadToEOF && cbLineSize && szLine)
  222. {
  223. //
  224. //Read up to a line's worth (terminated by \n)
  225. //but stop short if szLine is too small
  226. //
  227. //
  228. //Check if we've got enough buffered bytes to read from
  229. //if not go ahead and read another buffer's worth
  230. //
  231. while(!fDone)
  232. {
  233. if(_curBytePtr >= _curBufSize)
  234. {
  235. //Read next buffer full
  236. DWORD cbRead = 0;
  237. bRet = ReadFile(_hFile,
  238. _pBuffer,
  239. READ_BUF_SIZE,
  240. &cbRead,
  241. NULL);
  242. if(!bRet && GetLastError() == ERROR_HANDLE_EOF)
  243. {
  244. //cancel error
  245. bRet = TRUE;
  246. _fReadToEOF = TRUE;
  247. }
  248. if(bRet)
  249. {
  250. if(cbRead)
  251. {
  252. _curBufSize = cbRead;
  253. _curBytePtr = 0;
  254. }
  255. else
  256. {
  257. _fReadToEOF = TRUE;
  258. if(cbBytesCopied)
  259. {
  260. //reached EOF but we've returned at least
  261. //some data
  262. return ERR_SUCCESS;
  263. }
  264. else
  265. {
  266. //EOF can't read any data
  267. return ERR_EOF;
  268. }
  269. }
  270. }
  271. else
  272. {
  273. TRC_NRM((TB,_T("ReadFile returned fail:%x"),
  274. GetLastError()));
  275. return ERR_FILEOP;
  276. }
  277. }
  278. TRC_ASSERT(_curBytePtr < READ_BUF_SIZE,
  279. (TB,_T("_curBytePtr %d exceeds buf size"),
  280. _curBytePtr));
  281. //
  282. // If we're at the start of the file,
  283. //
  284. if(_fAtStartOfFile)
  285. {
  286. //CAREFULL this could update the current byte ptr
  287. CheckFirstBufMarkedUnicode();
  288. _fAtStartOfFile = FALSE;
  289. }
  290. if(fFirstIter)
  291. {
  292. if(_fFileIsUnicode)
  293. {
  294. //file is unicode output directly into user buffer
  295. pOutBuf = (PBYTE)szLine;
  296. //leave a space for a trailing WCHAR null
  297. cbOutputSize = cbLineSize - sizeof(WCHAR);
  298. }
  299. else
  300. {
  301. //read half as many chars as there are bytes in the output
  302. //buf because conversion doubles.
  303. //leave a space for a trailing WCHAR null
  304. cbOutputSize = cbLineSize/sizeof(WCHAR) - 2;
  305. //Alloc ANSI buffer for this line
  306. //if cached buffer is too small
  307. if(cbOutputSize + 2 > _cbAnsiBufSize)
  308. {
  309. if ( _pAnsiLineBuf)
  310. {
  311. LocalFree( _pAnsiLineBuf);
  312. _pAnsiLineBuf = NULL;
  313. }
  314. _pAnsiLineBuf = (PBYTE)LocalAlloc(LPTR,
  315. cbOutputSize + 2);
  316. if(!_pAnsiLineBuf)
  317. {
  318. return ERR_OUT_OF_MEM;
  319. }
  320. _cbAnsiBufSize = cbOutputSize + 2;
  321. }
  322. //file is ANSI output into temporary buffer for conversion
  323. pOutBuf = _pAnsiLineBuf;
  324. }
  325. fFirstIter = FALSE;
  326. }
  327. PBYTE pStartByte = (PBYTE)_pBuffer + _curBytePtr;
  328. PBYTE pReadByte = pStartByte;
  329. PBYTE pNewLine = NULL;
  330. //Find newline. Don't bother scanning further than we can
  331. //write in the input buffer
  332. int maxreaddist = min(_curBufSize-_curBytePtr,
  333. cbOutputSize-cbBytesCopied);
  334. PBYTE pEndByte = (PBYTE)pStartByte + maxreaddist;
  335. for(;pReadByte<pEndByte;pReadByte++)
  336. {
  337. if(*pReadByte == '\n')
  338. {
  339. if(_fFileIsUnicode)
  340. {
  341. //
  342. // Check if the previous byte was a zero
  343. // if so we've hit the '0x0 0xa' byte pair
  344. // for a unicode '\n'
  345. //
  346. if(pReadByte != pStartByte &&
  347. *(pReadByte - 1) == 0)
  348. {
  349. pNewLine = pReadByte;
  350. break;
  351. }
  352. }
  353. else
  354. {
  355. pNewLine = pReadByte;
  356. break;
  357. }
  358. }
  359. }
  360. if(pNewLine)
  361. {
  362. int cbBytesToCopy = (pNewLine - pStartByte) +
  363. (_fFileIsUnicode ? sizeof(WCHAR) : sizeof(CHAR));
  364. if(cbBytesToCopy <= (cbOutputSize-cbBytesCopied))
  365. {
  366. memcpy( pOutBuf + cbBytesCopied, pStartByte,
  367. cbBytesToCopy);
  368. _curBytePtr += cbBytesToCopy;
  369. cbBytesCopied += cbBytesToCopy;
  370. fDone = TRUE;
  371. }
  372. }
  373. else
  374. {
  375. //Didn't find a newline
  376. memcpy( pOutBuf + cbBytesCopied, pStartByte,
  377. maxreaddist);
  378. //we're done if we filled up the output
  379. _curBytePtr += maxreaddist;
  380. cbBytesCopied += maxreaddist;
  381. if(cbBytesCopied == cbOutputSize)
  382. {
  383. fDone = TRUE;
  384. }
  385. }
  386. } // iterate over file buffer chunks
  387. //Ensure trailing null
  388. pOutBuf[cbBytesCopied] = 0;
  389. if(_fFileIsUnicode)
  390. {
  391. pOutBuf[cbBytesCopied+1] = 0;
  392. }
  393. //Done reading line
  394. if(_fFileIsUnicode)
  395. {
  396. EatCRLF( (LPWSTR)szLine, cbBytesCopied/sizeof(WCHAR));
  397. return ERR_SUCCESS;
  398. }
  399. else
  400. {
  401. //The file is ANSI. Conv to UNICODE,
  402. //first copy the contents out of the output
  403. //Now convert to UNICODE
  404. int ret =
  405. MultiByteToWideChar(CP_ACP,
  406. MB_PRECOMPOSED,
  407. (LPCSTR)_pAnsiLineBuf,
  408. -1,
  409. szLine,
  410. cbLineSize/sizeof(WCHAR));
  411. if(ret)
  412. {
  413. EatCRLF( (LPWSTR)szLine, ret - 1);
  414. return ERR_SUCCESS;
  415. }
  416. else
  417. {
  418. TRC_ERR((TB,_T("MultiByteToWideChar failed: %x"),
  419. GetLastError()));
  420. DWORD dwErr = GetLastError();
  421. if(ERROR_INSUFFICIENT_BUFFER == dwErr)
  422. {
  423. return ERR_BUFTOOSMALL;
  424. }
  425. else
  426. {
  427. return ERR_UNKNOWN;
  428. }
  429. }
  430. }
  431. }
  432. else
  433. {
  434. //error path
  435. if(_fReadToEOF)
  436. {
  437. return ERR_EOF;
  438. }
  439. if(!_fOpenForRead)
  440. {
  441. return ERR_NOTOPENFORREAD;
  442. }
  443. else if (!_pBuffer)
  444. {
  445. return ERR_OUT_OF_MEM;
  446. }
  447. else
  448. {
  449. return ERR_UNKNOWN;
  450. }
  451. }
  452. DC_END_FN();
  453. }
  454. // check for the UNICODE BOM and eat it
  455. void CTscFileStream::CheckFirstBufMarkedUnicode()
  456. {
  457. DC_BEGIN_FN("CheckFirstBufMarkedUnicode");
  458. TRC_ASSERT(_pBuffer, (TB,_T("NO buffer")));
  459. if(_curBufSize >= sizeof(WCHAR))
  460. {
  461. LPWSTR pwsz = (LPWSTR)_pBuffer;
  462. if(UNICODE_BOM == *pwsz)
  463. {
  464. TRC_NRM((TB,_T("File is UNICODE")));
  465. _fFileIsUnicode = TRUE;
  466. _curBytePtr += sizeof(WCHAR);
  467. }
  468. else
  469. {
  470. TRC_NRM((TB,_T("File is ANSI")));
  471. _fFileIsUnicode = FALSE;
  472. }
  473. }
  474. else
  475. {
  476. //File to small (less than 2 bytes)
  477. //can't be unicode
  478. _fFileIsUnicode = FALSE;
  479. }
  480. DC_END_FN();
  481. }
  482. //
  483. // Write string szLine to the file
  484. // converting to ANSI if the file is not a unicode file
  485. // also writeout the UNICODE BOM at the start of the
  486. // the file
  487. //
  488. INT CTscFileStream::Write(LPWSTR szLine)
  489. {
  490. DC_BEGIN_FN("WriteNext");
  491. BOOL bRet = FALSE;
  492. DWORD cbWrite = 0;
  493. PBYTE pDataOut = NULL;
  494. DWORD dwWritten;
  495. if(_fOpenForWrite && szLine)
  496. {
  497. TRC_ASSERT(_hFile != INVALID_HANDLE_VALUE,
  498. (TB,_T("No file handle")));
  499. if(_fFileIsUnicode)
  500. {
  501. if(_fAtStartOfFile)
  502. {
  503. //Write the BOM
  504. WCHAR wcBOM = UNICODE_BOM;
  505. bRet = WriteFile( _hFile, &wcBOM, sizeof(wcBOM),
  506. &dwWritten, NULL);
  507. if(!bRet || dwWritten != sizeof(wcBOM))
  508. {
  509. TRC_NRM((TB,_T("WriteFile returned fail:%x"),
  510. GetLastError()));
  511. return ERR_FILEOP;
  512. }
  513. _fAtStartOfFile = FALSE;
  514. }
  515. //Write UNICODE data out directly
  516. pDataOut = (PBYTE)szLine;
  517. cbWrite = wcslen(szLine) * sizeof(WCHAR);
  518. }
  519. else
  520. {
  521. //Convert UNICODE data to ANSI
  522. //before writing it out
  523. TRC_ASSERT(_pAnsiLineBuf && _cbAnsiBufSize,
  524. (TB,_T("ANSI conversion buffer should be allocated")));
  525. INT ret = WideCharToMultiByte(
  526. CP_ACP,
  527. WC_COMPOSITECHECK | WC_DEFAULTCHAR,
  528. szLine,
  529. -1,
  530. (LPSTR)_pAnsiLineBuf,
  531. _cbAnsiBufSize,
  532. NULL, // system default character.
  533. NULL); // no notification of conversion failure.
  534. if(ret)
  535. {
  536. pDataOut = _pAnsiLineBuf;
  537. cbWrite = ret - 1; //don't write out the NULL
  538. }
  539. else
  540. {
  541. TRC_ERR((TB,_T("MultiByteToWideChar failed: %x"),
  542. GetLastError()));
  543. DWORD dwErr = GetLastError();
  544. if(ERROR_INSUFFICIENT_BUFFER == dwErr)
  545. {
  546. return ERR_BUFTOOSMALL;
  547. }
  548. else
  549. {
  550. return ERR_UNKNOWN;
  551. }
  552. }
  553. }
  554. bRet = WriteFile( _hFile, pDataOut, cbWrite,
  555. &dwWritten, NULL);
  556. if(bRet && dwWritten == cbWrite)
  557. {
  558. return ERR_SUCCESS;
  559. }
  560. else
  561. {
  562. TRC_NRM((TB,_T("WriteFile returned fail:%x"),
  563. GetLastError()));
  564. return ERR_FILEOP;
  565. }
  566. }
  567. else
  568. {
  569. if(!_fOpenForWrite)
  570. {
  571. return ERR_NOTOPENFORWRITE;
  572. }
  573. else
  574. {
  575. return ERR_UNKNOWN;
  576. }
  577. }
  578. DC_END_FN();
  579. }
  580. //
  581. // Remap a \r\n pair from the end of the line
  582. // to a \n
  583. //
  584. void CTscFileStream::EatCRLF(LPWSTR szLine, INT nChars)
  585. {
  586. if(szLine && nChars >= 2)
  587. {
  588. if(szLine[nChars-1] == _T('\n') &&
  589. szLine[nChars-2] == _T('\r'))
  590. {
  591. szLine[nChars-2] = _T('\n');
  592. //this adds a double NULL to the end of the string
  593. szLine[nChars-1] = 0;
  594. }
  595. }
  596. }