/*
 * 512restor (1024restor)
 *
 * Scott Narveson (...!umn_cs!stolaf!narveson)
 * June 1988
 *
 * Program to restore files from V7-style dump tapes with 1/2 K
 * (512restor) or 1 K (1024restor) blocks created on PDP 11
 * computers.  Tapes created on a PDP 11/70 running version 7 Unix
 * and a PDP 11/84 running Ultrix 11 are readable by this
 * program; similar formats "should" be readable but have not
 * been tested.
 *
 * Usage: 512restor -ixtv [-a archive] [name ... ]
 *
 *	where:
 *		-i	Read directory information from tape and
 *			enter an interactive mode similar to BSD 4.3's
 *			restore program.  Typing "help" at the interactive
 *			prompt gives more info.
 *		-x	Extract named objects from tape.  Objects may
 *			be files or directories.  Naming a directory
 *			implies a full recursive restore of that directory
 *			from tape.
 *		-t	List table of contents from tape.  Directory info
 *			from tape is consulted to provide this listing;
 *			a full pass of the tape is not made.
 *		-v	Be more verbose with status messages.
 *		-a archive	look for dump information in the file
 *			"archive" instead of TAPE (see rstr.h).  Current
 *			default for TAPE is /dev/rmt0.
 *
 * Features, Comments, and Caveats:
 *
 *	This program was developed on a VAX 11/780 running BSD 4.3 and
 *	is intended to read tapes created by PDP 11s.  Machine
 *	dependencies resulting from this combination exist in the
 *	routines read_hdr(), ptovl(), and in the definition of the
 *	struct spcl.  (More dependencies may well exist, but don't
 *	occur to me offhand.)
 *
 *	The program will attempt to properly restore file "holes" as
 *	holes (using an lseek in favor of writing a buffer full of
 *	zeroes), but since the block sizes involved differ rather greatly
 *	(i.e., 512 or 1024 bytes on tape, and 8K blocks are written on
 *	the vax) such a file may or may not consume more actual file
 *	system space when restored.
 *
 *	This program never deals with raw file systems.  Directories are
 *	restored with mkdir(2), and files are restored using only open(2),
 *	write(2) and lseek(2).
 *
 *	Setuid and setgid information is restored only to the superuser.
 *
 * Agenda:
 *
 *	This program will choke on (physically) bad tapes (i.e.,
 *	if a read of the archive fails the program just exits).
 *	Being able to read past bad tape blocks (especially when we're
 *	past the directory info on the tape) would be desirable.
 *
 *	(Related to the previous.)  Being able to restore files in
 *	the absence of directory information (bad tape) would be
 *	nice.  Something like "restore by inode #" or "restore by
 *	uid" is possible even if coherent directory information
 *	is not present.
 */

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/file.h>
#include <sys/time.h>
#include <signal.h>
#include "rstr.h"

/* GLOBALS */

int	(*do_inode)();	/* Pointer to routine used to handle TS_INODE */
			/* records.  Starts as directory-handling routine, */
			/* then is set to regular file handler when first */
			/* regular file is seen */

/*
 * directory-related
 */

short	cur_vol;		/* current tape volume */

ushort	dumpmap[MSIZ],		/* TS_BITS bitmap */
	clrimap[MSIZ],		/* TS_CLRI bitmap (unused) */
	dummymap[MSIZ];		/* re-read bitmaps into this (i.e., ignore) */

char	*fname = TAPE;		/* name of archive (global so nextvol() can */
				/* re-open multivolume archives in midstream) */

/*
 * command-line arguments and modes
 */

char		*progname,
		*file_list[MAXNAME];

int		file_count = 0,	/* number of file arguments on command line */
		verbose = 0,	/* be wordy (-v) */
		ext_count,	/* number of files on extraction list */
		ext_ena = 0,	/* extraction of files enabled (-x) */
		ena_all = 0,	/* extract or list everything */
		listing = 0,	/* table of contents (-t) */
		interact = 0;	/* interactive mode (-i) */
uid_t		uid,		/* real uid of invoker */
		euid;		/* ...effective uid */

extern int	errno;
extern char	*sys_errlist[];

/*
 * Some globals (to this module).
 * Needed so nextvol and read_block can survive together.
 */

static char	tapebuf[BSIZE*NTREC],	/* buffer for archive reads */
		*tp;			/* points to undelivered stuff */
static int	delivered = NTREC;	/* number of logical blocks */
					/* returned from this physical block */

/*
 * main
 *
 */

main(ac, av)
char	**av;
{
	char		*key;
	int		fd;
	struct spcl	h;
	int		clean_up();
	uid_t		getuid(), geteuid();

	if (ac == 1) {
		fprintf(stderr, "usage: %s -xitv [-a archive] [name ...]\n",
			av[0]);
		exit(1);
	}
	progname = *av;

	uid = getuid();
	euid = geteuid();

	/*
	 * Get the key arg first.
	 */
	--ac;
	if (**++av == '-')
		++*av;

	/*
	 * Process the separate characters in the key.
	 * For odd commands like -itvxittvit the last key wins.
	 */
	for (key = *av; *key; key++)
		switch (*key) {
		case 'x':
			ext_ena = 1;
			listing = 0;
			interact = 0;
			break;
		case 't':
			listing = 1;
			ext_ena = 0;
			interact = 0;
			break;
		case 'v':
			verbose = 1;
			break;
		case 'i':
			interact = 1;
			ext_ena = 0;
			listing = 0;
			break;
		default:
			fprintf(stderr, "%s: unknown key (%c)\n", *key);
			exit(1);
		}
	
	/*
	 * Process the other args.
	 */
	while (--ac) {
		switch (**++av) {
		case '-':
			if (!strcmp(*av, "-a")) {
				if (!ac) {
					fprintf(stderr,
					   "%s: no archive specified with -a\n",
					    progname);
					exit(1);
				} else {
					--ac;
					fname = *++av;
				}
			} else {
				fprintf(stderr,"%s: unknown option (%s)\n",
					progname, *av);
				exit(1);
			}
			break;
		default:
			if (ext_ena || listing)
				file_list[file_count++] = *av;
			break;
		}
	}
	if ((ext_ena || listing) && !file_count)
		ena_all = 1;

	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
		(void) signal(SIGINT, clean_up);
	if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
		(void) signal(SIGHUP, clean_up);
	if (signal(SIGPIPE, SIG_IGN) != SIG_IGN)
		(void) signal(SIGPIPE, clean_up);
	if (signal(SIGTERM, SIG_IGN) != SIG_IGN)
		(void) signal(SIGTERM, clean_up);

	if ((fd = open(fname, O_RDONLY)) <= 0) {
		perror(fname);
		exit(1);
	}

	if (read_hdr(fd, &h) == -1) {
		fprintf(stderr, "%s: archive not in dump format\n", progname);
		exit(1);
	}

	if (h.c_type != TS_TAPE) {
		fprintf(stderr, "%s: first record not volume label\n",
			progname);
		exit(1);
	}

	cur_vol = h.c_volume;

	ident(&h);

	process_tape(fd);
}

/*
 * begin_extract
 *
 * Called when the first non-directory file is seen
 * on tape.  Main duty is, if we're in interactive mode,
 * to allow an alternative tape volume to be mounted.
 */
begin_extract(fd, hp)
struct spcl	*hp;
{
	char	buf[BUFSIZ];
	short	new_vol;

	if (interact) {
		printf("Enter beginning tape volume <%d> ", cur_vol);
		(void) fflush(stdout);
		(void) gets(buf);
		if (*buf) {
			if ((new_vol = (short) atoi(buf)) < cur_vol ||
					new_vol > 10) {
				printf("Unreasonable answer.  ");
			} else {
				cur_vol = new_vol;
				if (nextvol(fd) == -1) {
					fprintf(stderr,
					 "%s: can't mount next tape volume.\n",
					 progname);
					clean_up(1);
				}
			}
		}
		printf("Starting with volume %d\n", cur_vol);
	}
	if (verbose)
		printf("Extracting files.\n");
	return read_regfile(fd, hp);
}

/*
 * checksum
 *
 * Calculate checksum on what should be a header block.  b points
 * to the start of a block, cast to a short pointer.  We don't
 * take any dire action if checksumming fails; just complain and
 * return appropriate result.
 */
checksum(b)
short	*b;
{
	register short	 i, j;

	j = BSIZE/sizeof(short);
	i = 0;

	do
		i += *b++;
	while (--j);

	if (i != CHECKSUM) {
		fprintf(stderr, "Not a header block: checksum error (%o)\n",
			i & 0xFFFF);
		return -1;
	}

	return 0;
}

/*
 * flush_cache
 *
 * Force read_block to actually read from the archive
 * when it's called next.
 */
flush_cache()
{
	delivered = NTREC;
}

/*
 * ident
 *
 * hp points to a copy of the first header block on tape.
 * Print some introductory information.
 */
ident(hp)
struct spcl	*hp;
{
	char	*ctime();

	if (!interact && !verbose)
		return;

	printf("\n");
	printf("dump date:   %s", ctime(&hp->c_date));
	printf("dumped from: %s", ctime(&hp->c_ddate));
	printf("\n");
}

/*
 * nextvol
 *
 * fd is the file descriptor of the opened archive.
 * Close that file descriptor and attempt to open the next
 * volume of the archive.  We use the global cur_vol to
 * verify correctness, and make sure that the value of the
 * file descriptor for the new volume is the same as for the
 * previous volume (so that this routine's activities are
 * invisible to "higher" layers.
 */
nextvol(fd)
{
	struct spcl	hdr;
	int		oldfd = fd,
			tries = 10;
	char		buf[BUFSIZ];

	(void) close(fd);

	while (tries--) {
		flush_cache();
		if (!tries)
			printf("Last try -- ");

		printf("Mount volume %d and press <RETURN> ", cur_vol);
		(void) fflush(stdout);

		(void) gets(buf);

		if ((fd = open(fname, O_RDONLY)) <= 0) {
			fprintf(stderr, "nextvol: can't open %s (%s)\n", fname,
				sys_errlist[errno]);
		/*
		 * This call of read_hdr is BAAAAD programming practice --
		 * it will call read_block, you see, which is what called
		 * this routine in the first place.  However, it is an
		 * easy way to get the first header from the next volume, and
		 * with our kludgy routine flush_cache above we survive the
		 * subtleties of the recursion.  Programmer gets a C-.
		 */
		} else if (read_hdr(fd, &hdr) == -1) {
			fprintf(stderr, "nextvol: can't read header\n");
			(void) close(fd);
		} else if (hdr.c_type != TS_TAPE) {
			fprintf(stderr, "nextvol: didn't get TS_TAPE\n");
			(void) close(fd);
		} else if (hdr.c_volume != cur_vol) {
			fprintf(stderr, "nextvol: wrong volume -- wanted ");
			fprintf(stderr, "%d, got %d\n", cur_vol, hdr.c_volume);
			(void) close(fd);
		} else {
			if (fd != oldfd && dup2(fd, oldfd) == -1) {
				perror("nextvol: problems with dup2");
				clean_up(1);
			}
			return 0;
		}
	}
	return -1;
}

/*
 * process_tape
 *
 * Overall control loop for reading the archive.  This routine expects
 * to see only sequences of (in varying order):
 *
 *	TS_INODE ... file
 *	TS_BITS  ... bitmap
 *	TS_CLRI  ... bitmap
 *	TS_END
 *
 * TS_ADDR records are handled via getfile() (see getfile.c),
 * and TS_TAPE records are seen in nextvol(), whose invocations
 * are transparent to this module.
 */
process_tape(fd)
{
	struct spcl	hdr;
	int		tries;
	extern int	read_dir();
	static int	seen_dumpmap = 0;

	do_inode = read_dir;	/* to begin with, TS_INODE records */
				/* describe directories */

	init_dir();

	for ( ;; ) {

		if (read_hdr(fd, &hdr) != 0) {
			fprintf(stderr, "%s: didn't get header.\n", progname);
			fprintf(stderr, "Scanning archive for header record ");
			fprintf(stderr, "(this may be fruitless).\n");
			while (read_hdr(fd, &hdr) != 0)
				;
		}

		switch (hdr.c_type) {
		case TS_TAPE:
			fprintf(stderr, "process_tape: got volume label ");
			fprintf(stderr, "in mid_tape (continuing).\n");
			break;
		case TS_INODE:
			for (tries = 5; tries; tries--) {
				if ((*do_inode)(fd, &hdr) == 0)
					break;
				/*
				 * some archives will not have a TS_ADDR
				 * record where we expect it (and hence the
				 * do_inode routine will return early), but
				 * the next file (TS_INODE record) will start
				 * where the TS_ADDR record was expected --
				 * this loop lets us read what was present
				 * in the bad(?) file and survive to extract
				 * other files
				 */
				if (hdr.c_type != TS_INODE)
					clean_up(1);
			}
			break;
		case TS_BITS:
			/*
			 * We'll just stick with the first version of
			 * the dumpmap record -- "duplicate" maps appearing
			 * later in multi-volume archives differ, and
			 * and screw up this program's assumptions.
			 */
			if (!seen_dumpmap) {
				if (read_bits(fd, &hdr, dumpmap) == -1) {
				  fprintf(stderr, "process_tape: can't read ");
				  fprintf(stderr, "inode dump map\n");
				  clean_up(1);
				}
				seen_dumpmap = 1;
				init_dump();
			} else {
				if (read_bits(fd, &hdr, dummymap) == -1) {
				  fprintf(stderr, "process_tape: can't read ");
				  fprintf(stderr, "duplicate inode dump map\n");
				  clean_up(1);
				}
			}
			break;
		case TS_ADDR:
			fprintf(stderr, "%s warning: unexpected", progname);
			fprintf(stderr, "TS_ADDR record (continuing).\n");
			break;
		case TS_END:
			if (verbose)
				printf("Done.\n");
			clean_up(0);
		case TS_CLRI:
			if (read_bits(fd, &hdr, clrimap) == -1) {
				fprintf(stderr, "process_tape: can't read ");
				fprintf(stderr, "empty inode map\n");
				clean_up(1);
			}
			break;
		default:
			fprintf(stderr, "process_tape: unknown dump record\n");
			clean_up(1);
		}
	}
}

/*
 * ptovl
 *
 * Convert a pdp11-format long to vax format.  Byte-order is not
 * the problem; we need to swap the 16-bit words within the 32-bit
 * long.
 */
long
ptovl(l)
long	l;
{
	short	*s, tmp;

	s = (short *) &l;

	tmp = *s;
	*s = *(s+1);
	*(s+1) = tmp;

	return l;
}

/*
 * read_bits
 *
 * Read the bitmap described in the header record hp from the file
 * descriptor fd into the buffer referred to by m.  The c_count
 * field of the header record gives the number of blocks in the
 * bitmap.
 */
read_bits(fd, hp, m)
struct spcl	*hp;
ushort		*m;
{
	register i;

	i = hp->c_count;

	while (i--) {
		if (read_block(fd, (char *) m) == -1) {
			fprintf(stderr, "read_bits: read_block problem\n");
			return -1;
		}
		m += BSIZE / sizeof(short);
	}
	return 0;
}

/*
 * read_block
 *
 * Read physical blocks from the tape archive, and parcel out
 * logical blocks into the user-supplied buffer buf.
 */
read_block(fd, buf)
char	*buf;
{
	int		nread;

	if (delivered == NTREC) {	/* tapebuf empty; read the archive */
		switch (nread = read(fd, tapebuf, BSIZE*NTREC)) {
		case BSIZE*NTREC:
			/*
			 * normal case;
			 */
			delivered = 0;
			break;
		case 0:
			/*
			 * EOF.  For this application, we assume
			 * that this means end-of-volume.
			 */
			printf("Done with volume %d -- \n", cur_vol);

			cur_vol++;
			if (nextvol(fd) == 0)
				return read_block(fd, buf);

			fprintf(stderr, "%s: can't mount next tape volume.\n",
				progname);
			clean_up(1);
		case -1:
			perror("read_block: read");
			return -1;
		default:
			fprintf(stderr, "read_block warning: wanted ");
			fprintf(stderr, "%d bytes, got %d\n", BSIZE*NTREC,
				nread);
			/*
			 * Try and parcel out whatever we read, if possible.
			 * This may be inappropriate -- would dump ever create
			 * such a tape?
			 */
			delivered = NTREC - (nread / BSIZE);
			if (delivered == NTREC) {
				if (nextvol(fd) == 0)
					return read_block(fd, buf);
				fprintf(stderr,
					"%s: can't mount next tape volume.\n",
					progname);
				clean_up(1);
			}
			break;
		}
		tp = tapebuf;
	}

	bcopy(tp, buf, BSIZE);
	tp += BSIZE;
	delivered++;

	return 0;
}

/*
 * read_hdr
 *
 * Read a header record from the archive.  The bulk of pdp/vax
 * problems are resolved here.  The main problem (example):
 *
 *	struct foo {		this structure will occupy 6 bytes on a pdp
 *		short foo1;	but 8 bytes on a vax -- the vax cc will align
 *		long foo2;	the long on the next  32-bit boundary, and end
 *	};			up skipping 2 bytes of memory.
 *
 * Given the above, an equivalently declared struct spcl on a vax
 * ends up requiring more space (8 bytes, actually).  So, we end up
 * reading the header from tape into a generic buffer, then breaking
 * it out (using carefully chosen bcopies) into a vax struct spcl.
 *
 * We finish by converting longs from pdp to vax format.
 *
 * Not terrifically elegant, but seems to work.
 */
read_hdr(fd, hp)
struct spcl	*hp;
{
	char		buf[BSIZE],
			*s = buf;
	struct dinode	*dp = &hp->c_dinode;
	long		ptovl();

	if (read_block(fd, buf) == -1) {
		fprintf(stderr, "read_hdr: read_block problem\n");
		return -1;
	}
	if (checksum((short *) buf) == -1) {
		fprintf(stderr, "read_hdr: checksum error\n");
		return -1;
	}

	/*
	 * Check the declaration of struct spcl to see the
	 * rationale behind these bcopies.  Basically, the
	 * four fields of struct spcl below each occur after
	 * word-aligning skip of two bytes on the part of the
	 * vax cc.
	 */
	bcopy(s, (char *) &hp->c_type, 2);
	s += 2;
	bcopy(s, (char *) &hp->c_date, 10);
	s += 10;
	bcopy(s, (char *) &hp->c_tapea, 10);
	s += 10;
	bcopy(s, (char *) &hp->c_dinode, 66);
	s += 66;
	bcopy(s, hp->c_addr, PAD);

	hp->c_date = ptovl(hp->c_date);
	hp->c_ddate = ptovl(hp->c_ddate);
	hp->c_tapea = ptovl(hp->c_tapea);

	dp->di_size = ptovl(dp->di_size);
	dp->di_atime = ptovl(dp->di_atime);
	dp->di_mtime = ptovl(dp->di_mtime);
	dp->di_ctime = ptovl(dp->di_ctime);

	return 0;
}

/*
 * read_regfile
 *
 * Central routine to read a regular (i.e., non directory) file
 * from the archive.  This routine relies heavily of utilities
 * defined in getfile.c -- open_file(), write_block(), flush_out(),
 * and close_file().
 */
read_regfile(fd, hp)
struct spcl	*hp;
{
	struct dinode	*dp = &hp->c_dinode;
	char		*name, *inam();
	extern int	write_block(), flush_out(), null();

	if ((dp->di_mode & S_IFMT) != S_IFREG) {
		/*
		 * We see inodes for special (device) files on dumps of,
		 * say, root file systems.  The size is typically 0, but
		 * just to be sure we flush the file.
		 */
		if (getfile(fd, hp, null, null) == -1) {
			fprintf(stderr, "read_regfile: can't flush file\n");
			return -1;
		}
		return 0;
	}

	if (marked(hp->c_inumber)) {
		if ((name = inam(hp->c_inumber)) == NULL) {
			fprintf(stderr, "read_regfile: inam problem\n");
			return 0;
		}
		if (open_file(name) == -1) {
			fprintf(stderr, "extract: couldn't open %s\n",
				name);
			/*
			 * "shouldn't happen", but let's
			 * flush the file anyway
			 */
			if (getfile(fd, hp, null, null) == -1) {
				fprintf(stderr,
				 "read_regfile: can't flush file\n");
				return -1;
			}
		}
		if (verbose)
			printf("x %s\n", name);
		if (getfile(fd, hp, write_block, flush_out) == -1) {
			fprintf(stderr,
				"read_regfile: getfile problem\n");
			(void) flush_out();
			close_file();
			rest_mods(name, &hp->c_dinode);
			return -1;
		}
		close_file();
		rest_mods(name, &hp->c_dinode);
		if (--ext_count <= 0) {
			if (verbose)
				printf("Done (extraction list empty).\n");
			clean_up(0);
		}
	} else if (getfile(fd, hp, null, null) == -1) {
		fprintf(stderr, "read_regfile: can't flush file\n");
		return -1;
	}
	return 0;
}

/*
 * rest_mods
 *
 * Restore the original file ownership, timestamp, and
 * permissions using the copy of the disk inode in dp.
 *
 * If we're not root, don't bother with syscalls that will
 * fail.
 */
rest_mods(name, dp)
char		*name;
struct dinode	*dp;
{
	struct timeval	tvp[2];
	int		mask;

	/*
	 * be paranoid to make sure we don't
	 * unwittingly use absolute pathnames
	 */
	if (*name == '/')
		name++;

	/*
	 * only root-invoked restores will reset setuid info
	 */
	mask = uid ? 0777 : 07777;

	if (!euid) {	/* don't waste time on syscalls that won't work */
		tvp[0].tv_sec = dp->di_atime;
		tvp[1].tv_sec = dp->di_mtime;
		if (utimes(name, tvp) == -1)
			fprintf(stderr, "%s warning: utimes: %s\n",
				progname, sys_errlist[errno]);
		if (chown(name, dp->di_uid, dp->di_gid) == -1)
			fprintf(stderr, "%s warning: chown: %s\n",
				progname, sys_errlist[errno]);
	}
	if (chmod(name, (int) (dp->di_mode & mask)) == -1)
		fprintf(stderr, "%s warning: chmod: %s\n",
			progname, sys_errlist[errno]);
	if (dp->di_mode & S_ISUID)
		fprintf(stderr, "%s warning: setuid bit on %s %sset.\n",
			progname, name, uid ? "not " : "");
	if (dp->di_mode & S_ISGID)
		fprintf(stderr, "%s warning: setgid bit on %s %sset.\n",
			progname, name, uid ? "not " : "");
}
