Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

650 lines
16 KiB

  1. /* $Source: /u/mark/src/pax/RCS/extract.c,v $
  2. *
  3. * $Revision: 1.3 $
  4. *
  5. * extract.c - Extract files from a tar archive.
  6. *
  7. * DESCRIPTION
  8. *
  9. * AUTHOR
  10. *
  11. * Mark H. Colburn, NAPS International (mark@jhereg.mn.org)
  12. *
  13. * Sponsored by The USENIX Association for public distribution.
  14. *
  15. * Copyright (c) 1989 Mark H. Colburn.
  16. * All rights reserved.
  17. *
  18. * Redistribution and use in source and binary forms are permitted
  19. * provided that the above copyright notice is duplicated in all such
  20. * forms and that any documentation, advertising materials, and other
  21. * materials related to such distribution and use acknowledge that the
  22. * software was developed * by Mark H. Colburn and sponsored by The
  23. * USENIX Association.
  24. *
  25. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
  26. * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
  27. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  28. *
  29. * $Log: extract.c,v $
  30. * Revision 1.3 89/02/12 10:29:43 mark
  31. * Fixed misspelling of Replstr
  32. *
  33. * Revision 1.2 89/02/12 10:04:24 mark
  34. * 1.2 release fixes
  35. *
  36. * Revision 1.1 88/12/23 18:02:07 mark
  37. * Initial revision
  38. *
  39. */
  40. #ifndef lint
  41. static char *ident = "$Id: extract.c,v 1.3 89/02/12 10:29:43 mark Exp Locker: mark $";
  42. static char *copyright = "Copyright (c) 1989 Mark H. Colburn.\nAll rights reserved.\n";
  43. #endif /* ! lint */
  44. /* Headers */
  45. #include "pax.h"
  46. /* Defines */
  47. /*
  48. * Swap bytes.
  49. */
  50. #define SWAB(n) ((((ushort)(n) >> 8) & 0xff) | (((ushort)(n) << 8) & 0xff00))
  51. /* Function Prototypes */
  52. #ifdef __STDC__
  53. static int inbinary(char *, char *, Stat *);
  54. static int inascii(char *, char *, Stat *);
  55. static int inswab(char *, char *, Stat *);
  56. static int readtar(char *, Stat *);
  57. static int readcpio(char *, Stat *);
  58. #else /* !__STDC__ */
  59. static int inbinary();
  60. static int inascii();
  61. static int inswab();
  62. static int readtar();
  63. static int readcpio();
  64. #endif /* __STDC__ */
  65. /* read_archive - read in an archive
  66. *
  67. * DESCRIPTION
  68. *
  69. * Read_archive is the central entry point for reading archives.
  70. * Read_archive determines the proper archive functions to call
  71. * based upon the archive type being processed.
  72. *
  73. * RETURNS
  74. *
  75. */
  76. #ifdef __STDC__
  77. void read_archive(void) /* Xn */
  78. #else
  79. void read_archive() /* Xn */
  80. #endif
  81. {
  82. Stat sb;
  83. char name[PATH_MAX + 1];
  84. int match;
  85. int pad;
  86. #ifdef DF_TRACE_DEBUG
  87. printf("DF_TRACE_DEBUG: void read_archive() in extract.c\n");
  88. #endif
  89. name_gather(); /* get names from command line */
  90. name[0] = '\0';
  91. while (get_header(name, &sb) == 0) {
  92. match = name_match(name) ^ f_reverse_match;
  93. if (f_list) { /* only wanted a table of contents */
  94. if (match) {
  95. print_entry(name, &sb);
  96. }
  97. if (((ar_format == TAR)
  98. ? buf_skip(ROUNDUP((OFFSET) sb.sb_size, BLOCKSIZE))
  99. : buf_skip((OFFSET) sb.sb_size)) < 0) {
  100. warn(name, "File data is corrupt");
  101. }
  102. } else if (match) {
  103. if (rplhead != (Replstr *)NULL) {
  104. rpl_name(name);
  105. if (strlen(name) == 0) {
  106. continue;
  107. }
  108. }
  109. if (get_disposition("extract", name) ||
  110. get_newname(name, sizeof(name))) {
  111. /* skip file... */
  112. if (((ar_format == TAR)
  113. ? buf_skip(ROUNDUP((OFFSET) sb.sb_size, BLOCKSIZE))
  114. : buf_skip((OFFSET) sb.sb_size)) < 0) {
  115. warn(name, "File data is corrupt");
  116. }
  117. continue;
  118. }
  119. if (inentry(name, &sb) < 0) {
  120. warn(name, "File data is corrupt");
  121. }
  122. if (f_verbose) {
  123. print_entry(name, &sb);
  124. }
  125. #if 0 /* NIST-PCTS */
  126. if (ar_format == TAR && sb.sb_nlink > 1)
  127. #else /* NIST-PCTS */
  128. if (ar_format == TAR && sb.sb_nlink > 1 && /* NIST-PCTS */
  129. (sb.sb_mode & S_IFMT) != S_IFDIR) /* NIST-PCTS */
  130. #endif /* NIST-PCTS */
  131. {
  132. /*
  133. * This kludge makes sure that the link table is cleared
  134. * before attempting to process any other links.
  135. */
  136. #ifdef DF_TRACE_DEBUG
  137. printf("DF_TRACE_DEBUG: () in extract.c\n");
  138. #endif
  139. if (sb.sb_nlink > 1) {
  140. linkfrom(name, &sb);
  141. }
  142. }
  143. if (ar_format == TAR && (pad = sb.sb_size % BLOCKSIZE) != 0) {
  144. pad = BLOCKSIZE - pad;
  145. buf_skip((OFFSET) pad);
  146. }
  147. } else {
  148. if (((ar_format == TAR)
  149. ? buf_skip(ROUNDUP((OFFSET) sb.sb_size, BLOCKSIZE))
  150. : buf_skip((OFFSET) sb.sb_size)) < 0) {
  151. warn(name, "File data is corrupt");
  152. }
  153. }
  154. }
  155. close_archive();
  156. }
  157. /* get_header - figures which type of header needs to be read.
  158. *
  159. * DESCRIPTION
  160. *
  161. * This is merely a single entry point for the two types of archive
  162. * headers which are supported. The correct header is selected
  163. * depending on the archive type.
  164. *
  165. * PARAMETERS
  166. *
  167. * char *name - name of the file (passed to header routine)
  168. * Stat *asb - Stat block for the file (passed to header routine)
  169. *
  170. * RETURNS
  171. *
  172. * Returns the value which was returned by the proper header
  173. * function.
  174. */
  175. #ifdef __STDC__
  176. int get_header(char *name, Stat *asb)
  177. #else
  178. int get_header(name, asb)
  179. char *name;
  180. Stat *asb;
  181. #endif
  182. {
  183. #ifdef DF_TRACE_DEBUG
  184. printf("DF_TRACE_DEBUG: int get_header() in extract.c\n");
  185. #endif
  186. if (ar_format == TAR) {
  187. return(readtar(name, asb));
  188. } else {
  189. return(readcpio(name, asb));
  190. }
  191. }
  192. /* readtar - read a tar header
  193. *
  194. * DESCRIPTION
  195. *
  196. * Tar_head read a tar format header from the archive. The name
  197. * and asb parameters are modified as appropriate for the file listed
  198. * in the header. Name is assumed to be a pointer to an array of
  199. * at least PATH_MAX bytes.
  200. *
  201. * PARAMETERS
  202. *
  203. * char *name - name of the file for which the header is
  204. * for. This is modified and passed back to
  205. * the caller.
  206. * Stat *asb - Stat block for the file for which the header
  207. * is for. The fields of the stat structure are
  208. * extracted from the archive header. This is
  209. * also passed back to the caller.
  210. *
  211. * RETURNS
  212. *
  213. * Returns 0 if a valid header was found, or -1 if EOF is
  214. * encountered.
  215. */
  216. #ifdef __STDC__
  217. static int readtar(char *name, Stat *asb)
  218. #else
  219. static int readtar(name, asb)
  220. char *name;
  221. Stat *asb;
  222. #endif
  223. {
  224. int status = 3; /* Initial status at start of archive */
  225. static int prev_status;
  226. #ifdef DF_TRACE_DEBUG
  227. printf("DF_TRACE_DEBUG: static int readtar() in extract.c\n");
  228. #endif
  229. for (;;) {
  230. prev_status = status;
  231. status = read_header(name, asb);
  232. switch (status) {
  233. case 1: /* Valid header */
  234. return(0);
  235. case 0: /* Invalid header */
  236. switch (prev_status) {
  237. case 3: /* Error on first record */
  238. warn(ar_file, "This doesn't look like a tar archive");
  239. /* FALLTHRU */
  240. case 2: /* Error after record of zeroes */
  241. case 1: /* Error after header rec */
  242. warn(ar_file, "Skipping to next file...");
  243. /* FALLTHRU */
  244. default:
  245. case 0: /* Error after error */
  246. break;
  247. }
  248. break;
  249. case 2: /* Record of zeroes */
  250. case EOF: /* End of archive */
  251. default:
  252. return(-1);
  253. }
  254. }
  255. }
  256. /* readcpio - read a CPIO header
  257. *
  258. * DESCRIPTION
  259. *
  260. * Read in a cpio header. Understands how to determine and read ASCII,
  261. * binary and byte-swapped binary headers. Quietly translates
  262. * old-fashioned binary cpio headers (and arranges to skip the possible
  263. * alignment byte). Returns zero if successful, -1 upon archive trailer.
  264. *
  265. * PARAMETERS
  266. *
  267. * char *name - name of the file for which the header is
  268. * for. This is modified and passed back to
  269. * the caller.
  270. * Stat *asb - Stat block for the file for which the header
  271. * is for. The fields of the stat structure are
  272. * extracted from the archive header. This is
  273. * also passed back to the caller.
  274. *
  275. * RETURNS
  276. *
  277. * Returns 0 if a valid header was found, or -1 if EOF is
  278. * encountered.
  279. */
  280. #ifdef __STDC__
  281. static int readcpio(char *name, Stat *asb)
  282. #else
  283. static int readcpio(name, asb)
  284. char *name;
  285. Stat *asb;
  286. #endif
  287. {
  288. OFFSET skipped;
  289. char magic[M_STRLEN];
  290. static int align = 0; /* Xn */
  291. #ifdef DF_TRACE_DEBUG
  292. printf("DF_TRACE_DEBUG: static int readcpio() in extract.c\n");
  293. #endif
  294. if (align > 0) {
  295. buf_skip((OFFSET) align);
  296. }
  297. align = 0;
  298. for (;;) {
  299. buf_read(magic, M_STRLEN);
  300. skipped = 0;
  301. while ((align = inascii(magic, name, asb)) < 0
  302. && (align = inbinary(magic, name, asb)) < 0
  303. && (align = inswab(magic, name, asb)) < 0) {
  304. if (++skipped == 1) {
  305. if (total - sizeof(magic) == 0) {
  306. fatal("Unrecognizable archive");
  307. }
  308. warnarch("Bad magic number", (OFFSET) sizeof(magic));
  309. if (name[0]) {
  310. warn(name, "May be corrupt");
  311. }
  312. }
  313. memcpy(magic, magic + 1, sizeof(magic) - 1);
  314. buf_read(magic + sizeof(magic) - 1, 1);
  315. }
  316. if (skipped) {
  317. warnarch("Apparently resynchronized", (OFFSET) sizeof(magic));
  318. warn(name, "Continuing");
  319. }
  320. if (strcmp(name, TRAILER) == 0) {
  321. return (-1);
  322. }
  323. if (nameopt(name) >= 0) {
  324. break;
  325. }
  326. #if 0 /* NIST-PCTS */
  327. buf_skip((OFFSET) asb->sb_size + align);
  328. #else /* NIST-PCTS */
  329. if (asb->sb_nlink > 1 && islink(name, asb)) /* NIST-PCTS */
  330. buf_skip((OFFSET) 0 + align); /* NIST-PCTS */
  331. else /* NIST-PCTS */
  332. buf_skip((OFFSET) asb->sb_size + align); /* NIST-PCTS */
  333. #endif /* NIST-PCTS */
  334. }
  335. #ifdef S_IFLNK
  336. if ((asb->sb_mode & S_IFMT) == S_IFLNK) {
  337. if (buf_read(asb->sb_link, (uint) asb->sb_size) < 0) {
  338. warn(name, "Corrupt symbolic link");
  339. return (readcpio(name, asb));
  340. }
  341. asb->sb_link[asb->sb_size] = '\0';
  342. asb->sb_size = 0;
  343. }
  344. #endif /* S_IFLNK */
  345. /* destroy absolute pathnames for security reasons */
  346. if (name[0] == '/') {
  347. if (name[1]) {
  348. while (name[0] = name[1]) {
  349. ++name;
  350. }
  351. } else {
  352. name[0] = '.';
  353. }
  354. }
  355. asb->sb_atime = asb->sb_ctime = asb->sb_mtime;
  356. #if 0 /* NIST-PCTS */
  357. if (asb->sb_nlink > 1) {
  358. #else /* NIST-PCTS */
  359. if (asb->sb_nlink > 1 && (asb->sb_mode & S_IFMT) != S_IFDIR) { /* NIST-PCTS */
  360. #endif /* NIST-PCTS */
  361. linkto(name, asb);
  362. }
  363. return (0);
  364. }
  365. /* inswab - read a reversed by order binary header
  366. *
  367. * DESCRIPTIONS
  368. *
  369. * Reads a byte-swapped CPIO binary archive header
  370. *
  371. * PARMAMETERS
  372. *
  373. * char *magic - magic number to match
  374. * char *name - name of the file which is stored in the header.
  375. * (modified and passed back to caller).
  376. * Stat *asb - stat block for the file (modified and passed back
  377. * to the caller).
  378. *
  379. *
  380. * RETURNS
  381. *
  382. * Returns the number of trailing alignment bytes to skip; -1 if
  383. * unsuccessful.
  384. *
  385. */
  386. #ifdef __STDC__
  387. static int inswab(char *magic, char *name, Stat *asb)
  388. #else
  389. static int inswab(magic, name, asb)
  390. char *magic;
  391. char *name;
  392. Stat *asb;
  393. #endif
  394. {
  395. ushort namesize;
  396. uint namefull;
  397. Binary binary;
  398. #ifdef DF_TRACE_DEBUG
  399. printf("DF_TRACE_DEBUG: static int inswab() in extract.c\n");
  400. #endif
  401. if (*((ushort *) magic) != SWAB(M_BINARY)) {
  402. return (-1);
  403. }
  404. memcpy((char *) &binary,
  405. magic + sizeof(ushort),
  406. M_STRLEN - sizeof(ushort));
  407. if (buf_read((char *) &binary + M_STRLEN - sizeof(ushort),
  408. sizeof(binary) - (M_STRLEN - sizeof(ushort))) < 0) {
  409. warnarch("Corrupt swapped header",
  410. (OFFSET) sizeof(binary) - (M_STRLEN - sizeof(ushort)));
  411. return (-1);
  412. }
  413. asb->sb_dev = (dev_t) SWAB(binary.b_dev);
  414. asb->sb_ino = (ino_t) SWAB(binary.b_ino);
  415. asb->sb_mode = SWAB(binary.b_mode);
  416. asb->sb_uid = SWAB(binary.b_uid);
  417. asb->sb_gid = SWAB(binary.b_gid);
  418. asb->sb_nlink = SWAB(binary.b_nlink);
  419. #ifndef _POSIX_SOURCE
  420. asb->sb_rdev = (dev_t) SWAB(binary.b_rdev);
  421. #endif
  422. asb->sb_mtime = (time_t) SWAB(binary.b_mtime[0]) << 16 | SWAB(binary.b_mtime[1]); /* Xn */
  423. asb->sb_size = (long) SWAB(binary.b_size[0]) << 16 | SWAB(binary.b_size[1]); /* Xn */
  424. if ((namesize = SWAB(binary.b_name)) == 0 || namesize >= PATH_MAX) {
  425. warnarch("Bad swapped pathname length",
  426. (OFFSET) sizeof(binary) - (M_STRLEN - sizeof(ushort)));
  427. return (-1);
  428. }
  429. if (buf_read(name, namefull = namesize + namesize % 2) < 0) {
  430. warnarch("Corrupt swapped pathname", (OFFSET) namefull);
  431. return (-1);
  432. }
  433. if (name[namesize - 1] != '\0') {
  434. warnarch("Bad swapped pathname", (OFFSET) namefull);
  435. return (-1);
  436. }
  437. return (asb->sb_size % 2);
  438. }
  439. /* inascii - read in an ASCII cpio header
  440. *
  441. * DESCRIPTION
  442. *
  443. * Reads an ASCII format cpio header
  444. *
  445. * PARAMETERS
  446. *
  447. * char *magic - magic number to match
  448. * char *name - name of the file which is stored in the header.
  449. * (modified and passed back to caller).
  450. * Stat *asb - stat block for the file (modified and passed back
  451. * to the caller).
  452. *
  453. * RETURNS
  454. *
  455. * Returns zero if successful; -1 otherwise. Assumes that the entire
  456. * magic number has been read.
  457. */
  458. #ifdef __STDC__
  459. static int inascii(char *magic, char *name, Stat *asb)
  460. #else
  461. static int inascii(magic, name, asb)
  462. char *magic;
  463. char *name;
  464. Stat *asb;
  465. #endif
  466. {
  467. uint namelen;
  468. char header[H_STRLEN + 1];
  469. dev_t unused;
  470. #ifdef DF_TRACE_DEBUG
  471. printf("DF_TRACE_DEBUG: static int inascii() in extract.c\n");
  472. #endif
  473. if (strncmp(magic, M_ASCII, M_STRLEN) != 0) {
  474. return (-1);
  475. }
  476. if (buf_read(header, H_STRLEN) < 0) {
  477. warnarch("Corrupt ASCII header", (OFFSET) H_STRLEN);
  478. return (-1);
  479. }
  480. header[H_STRLEN] = '\0';
  481. asb->sb_mode = 0;
  482. if (sscanf(header, H_SCAN, &asb->sb_dev,
  483. &asb->sb_ino, &asb->sb_mode, &asb->sb_uid,
  484. #ifdef _POSIX_SOURCE
  485. &asb->sb_gid, &asb->sb_nlink, &unused,
  486. #else
  487. &asb->sb_gid, &asb->sb_nlink, &asb->sb_rdev,
  488. #endif
  489. &asb->sb_mtime, &namelen, &asb->sb_size) != H_COUNT) {
  490. warnarch("Bad ASCII header", (OFFSET) H_STRLEN);
  491. return (-1);
  492. }
  493. if (namelen == 0 || namelen >= PATH_MAX) {
  494. warnarch("Bad ASCII pathname length", (OFFSET) H_STRLEN);
  495. return (-1);
  496. }
  497. if (buf_read(name, namelen) < 0) {
  498. warnarch("Corrupt ASCII pathname", (OFFSET) namelen);
  499. return (-1);
  500. }
  501. if (name[namelen - 1] != '\0') {
  502. warnarch("Bad ASCII pathname", (OFFSET) namelen);
  503. return (-1);
  504. }
  505. return (0);
  506. }
  507. /* inbinary - read a binary header
  508. *
  509. * DESCRIPTION
  510. *
  511. * Reads a CPIO format binary header.
  512. *
  513. * PARAMETERS
  514. *
  515. * char *magic - magic number to match
  516. * char *name - name of the file which is stored in the header.
  517. * (modified and passed back to caller).
  518. * Stat *asb - stat block for the file (modified and passed back
  519. * to the caller).
  520. *
  521. * RETURNS
  522. *
  523. * Returns the number of trailing alignment bytes to skip; -1 if
  524. * unsuccessful.
  525. */
  526. #ifdef __STDC__
  527. static int inbinary(char *magic, char *name, Stat *asb)
  528. #else
  529. static int inbinary(magic, name, asb)
  530. char *magic;
  531. char *name;
  532. Stat *asb;
  533. #endif
  534. {
  535. uint namefull;
  536. Binary binary;
  537. #ifdef DF_TRACE_DEBUG
  538. printf("DF_TRACE_DEBUG: static int inbinary() in extract.c\n");
  539. #endif
  540. if (*((ushort *) magic) != M_BINARY) {
  541. return (-1);
  542. }
  543. memcpy((char *) &binary,
  544. magic + sizeof(ushort),
  545. M_STRLEN - sizeof(ushort));
  546. if (buf_read((char *) &binary + M_STRLEN - sizeof(ushort),
  547. sizeof(binary) - (M_STRLEN - sizeof(ushort))) < 0) {
  548. warnarch("Corrupt binary header",
  549. (OFFSET) sizeof(binary) - (M_STRLEN - sizeof(ushort)));
  550. return (-1);
  551. }
  552. asb->sb_dev = binary.b_dev;
  553. asb->sb_ino = binary.b_ino;
  554. asb->sb_mode = binary.b_mode;
  555. asb->sb_uid = binary.b_uid;
  556. asb->sb_gid = binary.b_gid;
  557. asb->sb_nlink = binary.b_nlink;
  558. #ifndef _POSIX_SOURCE
  559. asb->sb_rdev = binary.b_rdev;
  560. #endif
  561. asb->sb_mtime = (time_t) binary.b_mtime[0] << 16 | binary.b_mtime[1]; /* Xn */
  562. asb->sb_size = (long) binary.b_size[0] << 16 | binary.b_size[1]; /* Xn */
  563. if (binary.b_name == 0 || binary.b_name >= PATH_MAX) {
  564. warnarch("Bad binary pathname length",
  565. (OFFSET) sizeof(binary) - (M_STRLEN - sizeof(ushort)));
  566. return (-1);
  567. }
  568. if (buf_read(name, namefull = binary.b_name + binary.b_name % 2) < 0) {
  569. warnarch("Corrupt binary pathname", (OFFSET) namefull);
  570. return (-1);
  571. }
  572. if (name[binary.b_name - 1] != '\0') {
  573. warnarch("Bad binary pathname", (OFFSET) namefull);
  574. return (-1);
  575. }
  576. return (asb->sb_size % 2);
  577. }