Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

382 lines
8.6 KiB

  1. /* cat - conCATenate STDIN to STDOUT
  2. *
  3. * 24-Apr 1998 hiroyama
  4. *
  5. */
  6. #include "precomp.h"
  7. #pragma hdrstop
  8. #include "fnreg.h"
  9. #if !defined(UNICODE)
  10. #error please compile me for UNICODE
  11. #endif
  12. #ifndef _T
  13. #define _T TEXT
  14. #endif
  15. #define LINENUMBER 0x0001
  16. #define EOL_MARK 0x0002
  17. #define TAB_SPECIAL 0x0004
  18. #define CTRL_SPECIAL 0x0008
  19. #define NO_BUFFERED_OUTPUT 0x0010
  20. #define UNICODE_INPUT 0x0020
  21. #define AUTO_UNICODE_DETECT 0x0040
  22. struct InOutMode {
  23. BOOLEAN fUnicodeInput;
  24. BOOLEAN fMbcsOutput;
  25. BOOLEAN fNeedSwab;
  26. BOOLEAN fLineBuffer;
  27. };
  28. DWORD options = 0;
  29. #define LARGE_BUFSIZE (512)
  30. void usage()
  31. {
  32. static const char str[] =
  33. "cat [-aenotuvV] [filename ...]\n"
  34. "-a same as -u if input begins with BOM\n"
  35. "-e add '$' at the end of line\n"
  36. "-n add line number\n"
  37. "-o suppress buffering for output\n"
  38. "-t show tab character as '^I'\n"
  39. "-u unicode text processing\n"
  40. "-v show control characters as '^' + alphabet, except tab and newline.\n"
  41. "-V show version\n"
  42. "-- declare end of options\n";
  43. fputs(str, stderr);
  44. exit(EXIT_FAILURE);
  45. }
  46. template <class T>
  47. inline void ntoa(T* p, int n, int width)
  48. {
  49. p += width;
  50. *p-- = '\0';
  51. for (--width; width >= 0; --width) {
  52. *p-- = (n % 10) + '0';
  53. n /= 10;
  54. }
  55. }
  56. template <class T>
  57. inline void swap(T& a, T& b)
  58. {
  59. a ^= b ^= a ^= b;
  60. }
  61. /////////////////////////////////////////////////////////
  62. // Complex cat UNICODE helpers
  63. /////////////////////////////////////////////////////////
  64. inline int getcaw(InOutMode mode, FILE* fp)
  65. {
  66. if (mode.fUnicodeInput) {
  67. wchar_t c = fgetwc(fp);
  68. if (c == WEOF)
  69. return EOF;
  70. return c;
  71. }
  72. return fgetc(fp);
  73. }
  74. inline void ungetaw(InOutMode mode, int c, FILE* fp)
  75. {
  76. if (mode.fUnicodeInput)
  77. ungetwc((wchar_t)c, fp);
  78. else
  79. ungetc(c, fp);
  80. }
  81. inline void putcharaw(InOutMode mode, int c)
  82. {
  83. if (mode.fUnicodeInput) {
  84. // if output is dbcs (i.e. tty output), we need to
  85. // translate the unicode character
  86. if (mode.fMbcsOutput) {
  87. // prevent to print BOM
  88. if (c != 0xfeff) {
  89. // translate the output
  90. char buf[2];
  91. int n = wctomb(buf, (wchar_t)c);
  92. putchar(buf[0]);
  93. if (n == 2) {
  94. putchar(buf[1]);
  95. }
  96. }
  97. }
  98. else {
  99. putwchar((wchar_t)c);
  100. }
  101. }
  102. else
  103. putchar(c);
  104. }
  105. inline void swab(int& c)
  106. {
  107. c = ((c & 0xff00) >> 8) | ((unsigned char)c << 8);
  108. }
  109. /////////////////////////////////////////////////////////
  110. // Complex cat
  111. /////////////////////////////////////////////////////////
  112. void complex_cat(const TCHAR* fname)
  113. {
  114. FILE* fp;
  115. static char* large_buf;
  116. if (fname) {
  117. if ((fp = _tfopen(fname, _T("rb"))) == NULL) {
  118. _tperror(fname);
  119. exit(EXIT_FAILURE);
  120. }
  121. large_buf = (char*)alloca(LARGE_BUFSIZE);
  122. if (setvbuf(fp, large_buf, _IOFBF, LARGE_BUFSIZE))
  123. perror("setvbuf");
  124. }
  125. else {
  126. // if fname == NULL, take input from stdin.
  127. fp = stdin;
  128. }
  129. static bool tol = true; // Top Of Line
  130. static long lineno = 0;
  131. int c, c2;
  132. // Initialize In/Out mode
  133. InOutMode inOutMode = {
  134. !!(options & UNICODE_INPUT),
  135. false,
  136. false,
  137. // if buffered mode and stdout is tty, flush buffer at each EOL
  138. !(options & NO_BUFFERED_OUTPUT) && _isatty(_fileno(stdout)),
  139. };
  140. // UNICODE initialization
  141. if (inOutMode.fUnicodeInput) {
  142. // sample the first word for BOM detection
  143. c = fgetwc(fp);
  144. init_unicode:
  145. _setmode(_fileno(fp), _O_BINARY);
  146. if (_isatty(_fileno(stdout))) {
  147. // if the output is tty,
  148. // need to convert UNICODE to MBCS on output
  149. inOutMode.fMbcsOutput = true;
  150. }
  151. // try to process the BOM
  152. if (c == 0xfeff) {
  153. putcharaw(inOutMode, c);
  154. }
  155. else if (c == 0xfffe) {
  156. inOutMode.fNeedSwab = true;
  157. swab(c);
  158. putcharaw(inOutMode, c);
  159. }
  160. else {
  161. ungetwc((wchar_t)c, fp);
  162. }
  163. }
  164. else if (options & AUTO_UNICODE_DETECT) {
  165. // sample and examine the first word to see if it's UNICODE BOM
  166. c = fgetwc(fp);
  167. if (c == 0xfffe || c == 0xfeff) {
  168. inOutMode.fUnicodeInput = true;
  169. goto init_unicode;
  170. }
  171. ungetwc((wchar_t)c, fp);
  172. }
  173. #ifdef MEASURE_PERF
  174. DWORD start = ::GetTickCount();
  175. #endif
  176. while ((c = getcaw(inOutMode, fp)) != EOF) {
  177. if (tol) {
  178. // process line number
  179. tol = false;
  180. if (options & LINENUMBER) {
  181. if (inOutMode.fUnicodeInput && !inOutMode.fMbcsOutput) {
  182. wchar_t buf[5];
  183. ntoa(buf, ++lineno, 4);
  184. fputws(buf, stdout);
  185. fputws(L": ", stdout);
  186. }
  187. else {
  188. char buf[5];
  189. ntoa(buf, ++lineno, 4);
  190. fputs(buf, stdout);
  191. fputs(": ", stdout);
  192. }
  193. }
  194. }
  195. if (inOutMode.fNeedSwab)
  196. swab(c);
  197. switch (c) {
  198. case '\r':
  199. c2 = getcaw(inOutMode, fp);
  200. if (c2 != '\n') {
  201. ungetaw(inOutMode, c2, fp);
  202. goto normal_input;
  203. }
  204. // fall through
  205. case '\n':
  206. if (options & EOL_MARK) {
  207. putcharaw(inOutMode, '$');
  208. }
  209. if (c != '\n') {
  210. putcharaw(inOutMode, c);
  211. c = c2;
  212. }
  213. putcharaw(inOutMode, c);
  214. if (inOutMode.fLineBuffer) {
  215. // if line buffer mode, flush it
  216. fflush(stdout);
  217. }
  218. tol = true;
  219. break;
  220. case '\t':
  221. if (options & TAB_SPECIAL) {
  222. fputs("^I", stdout);
  223. }
  224. else {
  225. putcharaw(inOutMode, c);
  226. }
  227. break;
  228. default:
  229. normal_input:
  230. if (c < 0x20 && (options & CTRL_SPECIAL)) {
  231. putcharaw(inOutMode, '^');
  232. c += '@';
  233. }
  234. putcharaw(inOutMode, c);
  235. break;
  236. }
  237. }
  238. if (fname) {
  239. fclose(fp);
  240. }
  241. #ifdef MEASURE_PERF
  242. DWORD end = ::GetTickCount();
  243. fprintf(stderr, "delta=%u\n", end - start);
  244. #endif
  245. }
  246. void cat(const TCHAR* fname = NULL)
  247. {
  248. static bool is1st = true;
  249. if (is1st) {
  250. is1st = false;
  251. if (options & NO_BUFFERED_OUTPUT) {
  252. // non buffered mode
  253. if (setvbuf(stdout, NULL, _IONBF, 0))
  254. perror("setvbuf");
  255. }
  256. else {
  257. if (setvbuf(stdout, NULL, _IOFBF, LARGE_BUFSIZE))
  258. perror("setvbuf");
  259. }
  260. }
  261. complex_cat(fname);
  262. }
  263. void parse_option(const TCHAR* s, bool& eoo) // eoo: end of options
  264. {
  265. extern char version[];
  266. while (*++s) {
  267. switch (*s) {
  268. case _T('-'):
  269. eoo = true;
  270. return;
  271. case _T('a'):
  272. options |= AUTO_UNICODE_DETECT;
  273. break;
  274. case _T('e'):
  275. options |= EOL_MARK;
  276. break;
  277. case _T('n'):
  278. options |= LINENUMBER;
  279. break;
  280. case _T('v'):
  281. options |= CTRL_SPECIAL;
  282. break;
  283. case _T('t'):
  284. options |= TAB_SPECIAL;
  285. break;
  286. case _T('o'):
  287. options |= NO_BUFFERED_OUTPUT;
  288. break;
  289. case _T('u'):
  290. options |= UNICODE_INPUT;
  291. break;
  292. case _T('V'):
  293. fputs(version, stderr);
  294. exit(EXIT_SUCCESS);
  295. default:
  296. usage(); // never returns
  297. }
  298. }
  299. }
  300. #ifdef UNICODE
  301. #define main wmain
  302. #endif
  303. extern "C"
  304. int __cdecl main(int argc, TCHAR** argv)
  305. {
  306. int n = 0;
  307. bool eoo = false;
  308. fnexpand(&argc, &argv);
  309. setlocale(LC_ALL, "");
  310. #if 1
  311. // set stdout binary mode
  312. _setmode(_fileno(stdout), _O_BINARY);
  313. #else // TEST
  314. for (int i = 0; i < argc; ++i) {
  315. _putts(argv[i]);
  316. }
  317. exit(0);
  318. #endif
  319. while (--argc) {
  320. if (**++argv == _T('-') && !eoo) {
  321. parse_option(*argv, eoo);
  322. }
  323. else {
  324. ++n;
  325. eoo = true;
  326. cat(*argv);
  327. }
  328. }
  329. if (n == 0) {
  330. _setmode(_fileno(stdin), _O_BINARY);
  331. cat();
  332. }
  333. return EXIT_SUCCESS;
  334. }