
/*
 *		lzcomp [-options] infile outfile
 */
#ifdef	DOCUMENTATION

title	lzcomp	File Compression
index		File compression

synopsis
	.s.nf
	lzcomp [-options] [infile [outfile]]
	.s.f
description

	lzcomp implements the Lempel-Ziv file compression algorithm.
	(Files compressed by lzcomp are uncompressed by lzdcmp.)
	It essentially finds common substrings and replaces them
	with a variable-size code.  This is deterministic, and
	can be done with a single pass over the file.  Thus,
	the decompression procedure needs no input table, but
	can track the way the table was built.

	Options may be given in either case.
	.lm +8
	.p -8
	-B	Input file is "binary", not "human readable text".
	This is necessary on Dec operating systems, such as VMS,
	that treat these files differently.  (Note that binary
	support is rudimentary and probably insufficient as yet.)
	.p -8
	-C	Supress block compression for compatiblity with
	past versions of compress.
	.p -8
	-M bits	Write using the specified number of bits in the
	code -- useful for big machines making files for little
	machines.
	.p -8
	-N	Write in a format compatible with old compress.
	.p -8
	-Q	Quiet -- don't write out status messages.
	.p -8
	-D val	Debug (if compiled in) (value is debug level).
	-V	Verbose (if compiled in), for debugging.
	.s.lm -16
	The other two arguments are the input and output
	filenames respectively.  Redirection is supported,
	however, the output should be a disk/tape file.

	The file format is almost identical to the original
	Unix implementation of compress (V3.0).  Files written
	by Unix compress should be readable by lzdcmp.

Authors
	The algorithm is from "A Technique for High Performance
	Data Compression."  Terry A. Welch. IEEE Computer Vol 17,
	No. 6 (June 1984), pp 8-19.

	This revision is by Martin Minow.

	Unix Compress authors are as follows:
	.s.nf
	Spencer W. Thomas	(decvax!harpo!utah-cs!utah-gr!thomas)
	Jim McKie		(decvax!mcvax!jim)
	Steve Davies		(decvax!vax135!petsd!peora!srd)
	Ken Turkowski		(decvax!decwrl!turtlevax!ken)
	James A. Woods		(decvax!ihnp4!ames!jaw)
	Joe Orost		(decvax!vax135!petsd!joe)
	.s.f
#endif

/*
 * Compatible with compress.c, v3.0 84/11/27
 */

/*)BUILD
 *		$(PROGRAM) = lzcomp
 *		$(INCLUDE) = lz.h
 *		$(FILES) = { lzcmp1.c lzcmp2.c lzio.c }
 */

#include	"lz.h"
#ifndef	decus
#include	<signal.h>
extern int	abort();
#endif
#ifdef DEBUG
#ifdef unix
#include <sys/types.h>
#include <sys/timeb.h>
#else
#include <timeb.h>
#endif
static struct timeb start_time;
static struct timeb end_time;
#endif

/*
 * These global parameters are written to the compressed file.
 * The decompressor needs them.
 */
short		maxbits = BITS;		/* settable max # bits/code	*/
short		block_compress = BLOCK_MASK;
code_int	maxmaxcode = 1 << BITS;
static char_type magic_header[] =	/* First 2 bytes of compressed	*/
    { HEAD1_MAGIC, HEAD2_MAGIC, 0 };	/* data file.			*/

code_int	hsize = HSIZE;		/* Actual hash table size	*/

flag		binary = FALSE;		/* Read text if false		*/
flag		nomagic = FALSE;	/* No magic header if TRUE	*/
flag		quiet = FALSE;		/* don't talk about compression	*/
#ifdef	DEBUG
flag		debug = 0;
#endif
long		fsize;			/* Input file size in bytes	*/
char		*infilename = NULL;	/* For error printouts		*/
char		*outfilename = NULL;	/* For openoutput and errors	*/

main(argc, argv)
int		argc;
char		*argv[];
/*
 * Compress mainline
 */
{
#ifdef DEBUG
	int		msec;
	extern long	in_count;
#endif

	setup(argc, argv);
	getsize();
	openoutput();
#ifndef	decus
	signal(SIGINT, abort);
#endif
#ifdef DEBUG
	ftime(&start_time);
#endif
	if (maxbits < INIT_BITS)		/* maxbits is set by	*/
	    maxbits = INIT_BITS;		/* the -M option.  Make	*/
	if (maxbits > BITS)			/* sure it's within a	*/
	    maxbits = BITS;			/* reasonable range	*/
	maxmaxcode = 1 << maxbits;		/* Truly biggest code	*/
#ifndef COMPATIBLE
	if (!nomagic) {
	    PUT(magic_header[0]);
	    PUT(magic_header[1]);
	    PUT((char) (maxbits | block_compress));
	}
#endif
	if (!compress()) {
	    fprintf(stderr, "Warning, compression unsuccessful.\n");
	    /*
	     * Hook here to delete output file.
	     */
	}
#ifdef DEBUG
	if (!quiet) {
	    ftime(&end_time);
	    end_time.time -= start_time.time;
	    msec = end_time.millitm - start_time.millitm;
	    if (msec < 0) {
		msec += 1000;
		end_time.time--;
	    }
	    fprintf(stderr, "%ld.%03d seconds for compression.",
		end_time.time, msec);
	    if (msec >= 500)			/* Roundoff		*/
		end_time.time++;
	    start_time.time = (in_count * 100) / end_time.time;
	    fprintf(stderr, "  %ld.%02ld input bytes per second.\n",
		start_time.time / 100, start_time.time % 100);
	}
#endif
	exit(IO_SUCCESS);
}

typedef struct TUNETAB {
    long	fsize;
    code_int	hsize;
} TUNETAB;

static TUNETAB tunetab[] = {
#if HSIZE > 5003
    {	1 << 12,	 5003	},
#endif
#if HSIZE > 9001
    {	1 << 13,	 9001	},
#endif
#if HSIZE > 18013
    {	1 << 14,	18013	},
#endif
#if HSIZE > 35023
    {	1 << 15,	35023	},
    {	47000,		50021	},
#endif
    {	      0,	    0	},
};

#ifdef unix
#include	<sys/types.h>
#include	<sys/stat.h>
#endif
#ifdef vms
#if L_cuserid >= 16
/*
 * VMS Version 4.1 or later (by inspection -- defined in <stdio.h>
 */
#include	<types.h>
#include	<stat.h>
#endif
#endif

getsize()
/*
 * Tune the hash table parameters for small files.
 * We don't have a good way to find the file size on vms.
 * fsize is set to zero if we can't find it.
 */
{
	register TUNETAB	*tunep;
#ifdef	rsx
	extern char	f_efbk;	/* F.EFBK -- highest block in file	*/
#define	fdb(p,offset)	(stdin->io_fdb[((int) &p + offset)] & 0xFF)
#define efbk(offset)	fdb(f_efbk, offset)
	extern char	f_rtyp;	/* F.RTYP -- Record type		*/
	extern char	f_ratt;	/* F.RATT -- Record attributes		*/
	/*
	 * Note: Block number is stored high-order word first.
	 */
	fsize = efbk(2)
	    + (efbk(3) << 8)
	    + (efbk(0) << 16)
	    + (efbk(1) << 24);
	fsize *= 512;
#endif
#ifdef	rt11
	fsize = stdin->io_size;		/* Set by Decus C		*/
	fsize *= 512;
#endif
#ifdef	vms
#if L_cuserid >= 16
	/*
	 * VMS Version 4 -- see note above.
 	 */
	struct stat	statbuf;

	fsize = 0;
	if (fstat(fileno(stdin), &statbuf) == 0)
	    fsize = (long) statbuf.st_size;
#else
	fsize = 0;
#endif
#endif
#ifdef	unix
	struct stat	statbuf;

	fsize = 0;
	if (fstat(fileno(stdin), &statbuf) == 0)
	    fsize = (long) statbuf.st_size;
#endif
	hsize = HSIZE;
	if (fsize > 0) {
	    for (tunep = tunetab; tunep->fsize != 0; tunep++) {
		if (fsize < tunep->fsize) {
		    hsize = tunep->hsize;
		    break;
		}
	    }
	}
#ifdef	DEBUG
	if (!quiet) {
	    if (fsize > 0) {
		fprintf(stderr, "%s: size %ld bytes, hash size %d\n",
		    infilename, fsize, hsize);
	    }
	    else {
		fprintf(stderr, "%s: size unknown, hash size %d\n",
		    infilename, hsize);
	    }
	}
#endif
}

static
setup(argc, argv)
int		argc;
char		*argv[];
/*
 * Get parameters and open files.  Exit fatally on errors.
 */
{
	register char	*ap;
	register int	c;
	int		i, j;
	char		*arg;
#ifndef unix
	char		filename[80];
#endif

#ifdef	vms
	argc = getredirection(argc, argv);
#endif
	for (i = j = 1; i < argc; i++) {
	    arg = ap = argv[i];
	    if (*ap++ != '-' || *ap == EOS)	/* Filename?		*/
		argv[j++] = argv[i];		/* Just copy it		*/
	    else {
		c = *ap++;			/* Option byte		*/
		if (islower(c))
		    c = toupper(c);
		switch (c) {
		case 'B':
		    binary = TRUE;
		    break;

		case 'C':
		    block_compress = 0;
		    break;

		case 'M':
		    if (*ap == EOS) {
			ap = argv[++i];
			if (i >= argc) {
			    fprintf(stderr, "-M needs a value\n");
			    goto usage;
			}
		    }
		    maxbits = atoi(ap);
		    goto nextarg;

#ifdef COMPATIBLE
		case 'N':
		    nomagic = TRUE;
		    break;
#endif

		case 'Q':
		    quiet = TRUE;
		    break;

#ifdef DEBUG
		case 'D':
		    if (isdigit(*ap)) {
			debug = atoi(ap);
			goto nextarg;
		    }
		    debug = 1;
		    break;

#endif
		default:
		    fprintf(stderr, "Unknown option \"%s\"\n", arg);
usage:		    fprintf(stderr, "The following options are valid:\n\
-B\tBinary file (important on VMS/RSX, ignored on Unix)\n\
-C\tNo block compress (compatible with compress 2.0)\n\
-M val\tExplicitly set the maximum number of code bits\n");
#ifdef COMPATIBLE
		    fprintf(stderr, "\
-N\tNo header (file is readable by old compress)\n");
#endif
		    fprintf(stderr, "\
-Q\tNo output to stderr, unless error.\n");
#ifdef DEBUG
		    fprintf(stderr, "-Dn\tDebug (n == level)\n");
#endif
		    exit(IO_ERROR);
		}				/* Switch on options	*/
	    }					/* If -option		*/
nextarg:    ;					/* Go to next argv[]	*/
	}					/* For all argc's	*/
	/* outfilename = NULL; */		/* Set "stdout" signal	*/
	switch (j) {				/* Any file arguments?	*/
	case 3:					/* both files given	*/
	    if (!streq(argv[2], "-"))		/* But - means stdout	*/
		outfilename = argv[2];
	case 2:					/* Input file given	*/
	    if (!streq(argv[1], "-")) {
#ifdef decus
		if (freopen(argv[1], (binary) ? "rn" : "r", stdin) == NULL) {
		    perror(argv[1]);
		    exit(IO_ERROR);
		}
#else
		/*
		 * Special case for vms too?
		 */
		if (freopen(argv[1], "r", stdin) == NULL) {
		    perror(argv[1]);
		    exit(IO_ERROR);
		}		    
#endif
		infilename = argv[1];
		break;
	    }
	    /* Else "-": explicitly read from stdin.			*/
	case 0:					/* None!		*/
	case 1:					/* No file arguments	*/
#ifdef	vms
	    fgetname(stdin, filename);
	    infilename = malloc(strlen(filename) + 1);
	    strcpy(infilename, filename);
#else
#ifdef decus
	    fgetname(stdin, filename);
	    infilename = malloc(strlen(filename) + 1);
	    strcpy(infilename, filename);
#else
	    infilename = "stdin";
#endif
#endif
	    break;

	default:
	    fprintf(stderr, "Too many file arguments\n");
	    exit(IO_ERROR);
	}
}

openoutput()
/*
 * Open the output file (after the input file has been opened).
 * if outfilename == NULL, it's already open on stdout.
 */
{
#ifndef	unix
	char		filename[80];
#endif

	if (outfilename == NULL) {
#ifdef	vms
	    fgetname(stdout, filename);
	    outfilename = malloc(strlen(filename) + 1);
	    strcpy(outfilename, filename);
#else
#ifdef decus
	    fgetname(stdout, filename);
	    outfilename = malloc(strlen(filename) + 1);
	    strcpy(outfilename, filename);
#else
	    outfilename = "<stdout>";
#endif
#endif
	}
	else {
#ifdef decus
	    if (freopen(outfilename, "wn", stdout) == NULL) {
		perror(outfilename);
		exit(IO_ERROR);
	    }
#else
	    if (freopen(outfilename, "w", stdout) == NULL) {
		perror(outfilename);
		exit(IO_ERROR);
	    }
#endif
	}
	if (isatty(fileno(stdout))) {
	    fprintf(stderr, "%s: is a terminal.  We object.\n",
		outfilename);
	    exit(IO_ERROR);
	}
}
