
/*****************************************************************************

	compd.c 	dictionary compress/decompress		V1.0
			copyright Harold Z. Bencowitz
			changed most recently 31-oct-86

******************************************************************************

description:

	compd is a program to compress word lists into dictionaries
	and decompress them back into a word list. it is written in
	whitesmith's C to run on rt11 and tsx+. it has been tested
	on v5.3 and v6.01 respectively. a word list is defined as a
	file containing one word per line in alphabetical order and
	containing only the characters a through z and '. words in
	the input file are compressed by preceding each word with an
	ascii character 0 - @ indicating that the first 0 - 16
	characters of a word are the same as the preceding word.
	only the first different character and the following ones
	are listed in the dictionary. the dictionary has
	approximately 66 characters per line.

operating instructions:

	instructions are also explained by the help (\h) command. the
	operator is prompted by '*' from the rt11 command string
	interpretor (csi). in response enter:

     [outfile1][,outfile2][,outfile3]=infile1[,infile2][,infile3][/o][/p]

	all but the first input file and the first output file are ignored.
	if no output file is specified, the output file default is
	*.dic where the name is that of the input file. the default
	input filetype is .wrd. one of two manditory (not both) options
	must be specified. options are: /d (decompress) and /c (compress).

revision history:

	v1.0 completed 31-oct-86	

limitations:

	word list files must be alphabetized, all in lower case, and
	each word separated by <cr><lf> including the last word. no ascii
	character < 141 octal 97 ascii (a) may be used except for '.
	the default file types work completely for compression only.
	for decompression, the correct default file type is applied to
	the output file only but not to the input file.

installation and building:

	compile compd.c and link with hclib.obj (my c library) and clib
	(whitesmith's c library).

implementation notes:

ttt
*****************************************************************************/

#include <std.h>
#include <rt11.h>

#define WRAPSIZE 66
#define NBUFFERS 4
#define WORDPERBLOCK 256
#define BYTEPERBLOCK 512
#define MAXWORD 80
#define INCHAN 14
#define OUTCHAN 15

_main()
{
	char in_files[6][15], out_files[3][15], opts[10];
	char *cpystr();
    static char deftype[12]={'W','R','D','D','I','C','D','I','C','D','I','C'};
	register int i;
	int min;
	int compress(), decompress(), greet(), rtcsi(), option(), help();
	int instr();
	long sec, tics();
/*
 *		start
 */
	greet("\t\tcompress/decompress word lists into dictionaries\tV1.0\n");
	errfmt("enter \"/H<cr>\" for help\n\n");
/*
 *		main body of program
 */
	FOREVER {
/*
 *		get and interpret command line
 */
label1:
	   rtcsi(out_files, in_files, deftype, opts);
	   tics();
	   if(option(opts, 'H')) {		/* help selected */
	      help();
	      goto label1;
	   }
	   if(in_files[0][0] == '\0') {
	      errfmt("comp - no input file name entered\n");
	      goto label1;
	   }
	   if(!option(opts, 'C') && !option(opts, 'D')) {
	      errfmt("comp - either option C or D must be specified\n");
	      goto label1;
	   }
	   if(option(opts, 'C') && option(opts, 'D')) {
	      errfmt("comp - options C and D are mutually exclusive\n");
	      goto label1;
	   }
	   if(out_files[0][0] == '\0') {	/* default output name */
	      cpystr(out_files[0], in_files[0], NULL);
	      i = instr(out_files[0], ".");
	      if(option(opts, 'C'))
	         cpystr(&out_files[0][i], ".DIC", NULL);
	      else
	         cpystr(&out_files[0][i], ".WRD", NULL);
	   }
/*
 *		compress
 */
	   if(option(opts, 'C'))
	      compress(in_files[0], out_files[0]);
/*
 *		decompress
 */
	   if(option(opts, 'D'))
	      decompress(in_files[0], out_files[0]);
/*
 *		time the run
 */
	   sec = tics() / 60;
	   min = sec / 60;
	   sec -= min * 60;
	   errfmt("elapsed time %i:%l\n", min, sec);
	}
}

/****************************************************************************/

int help()		/* to review help text, edit file spell.hlp */

{
	errfmt("\n\t\t\t      compd\t\tv1.0\n\nThis program takes a word");
	errfmt(" list and compresses it into a dictionary or\ndecompress");
	errfmt("es a dictionary into a word list. a dictionary is list o");
	errfmt("f words with\nall characters in lower case, containing o");
	errfmt("nly characters a - z and '. each\nword is separated by c");
	errfmt("arriage return/line feed and the words are in\nalphabeti");
	errfmt("cal order. a dictionary is made by preceding each word w");
	errfmt("ith an ascii\ncharacter 0 - @ indicating that 0 - 16 of ");
	errfmt("the leading characters are the\nsame as the preceding wo");
	errfmt("rd. only the characters including and trailing the\nfirs");
	errfmt("t one different from the preceding word are then placed ");
	errfmt("in the dictionary\nfile. a new line is started approxima");
	errfmt("tely every 66 characters. The rt11\ncommand string inter");
	errfmt("pretor is used. Up to three output files and up to six\n");
	errfmt("input files can be specified in the command string, howe");
	errfmt("ver all but the first \noutput file and the first input ");
	errfmt("file are ignored. for compression the default \ninput fi");
	errfmt("letype is .wrd and the default output filetype is .dic. ");
	errfmt("for\ndecompression the default output filetype is .wrd. ");
	errfmt("Note that if the input\nfiletype (.dic) must be entered.");
	errfmt(" exactly one option (of /c and /d) must be\nselected by ");
	errfmt("including \"/o\" anywhere in the command string where \"");
	errfmt("a\" represent\na single character option indicator.\n\n\t");
	errfmt("\t\t[op1]=ip1[/o]\n\n/h type this help message\t/c compre");
	errfmt("ss\t\t/d decompress\n");
}

/****************************************************************************/

int getword(channel, wrdbuf, n)			/* get a word from the file
						open on channel number
channel (bopen()) and write it to char[] wrdbuf limiting length to maxword
characters. n must be the number of blocks in the file when getword() is
called the first time and must be zero afterward. the input file must have
each word terminated by '\n' even if it is the last word. if a word is cut off
at maxword characters, a character is lost and the remainder becomes a new
word.	*/

char	*wrdbuf;
int	channel, n;

{
	register char c;
	register int i;
	static int nbuff, flag, block, nblocks, p_buff, p_char;
	int bread();
	extern char dbuff[NBUFFERS][BYTEPERBLOCK];
/*
 *		first call to this file
 */
	if(n > 0) {
	   nbuff = 0;
	   nblocks = n;
	   block = 0;
	   bread(channel, &dbuff[0][0], WORDPERBLOCK, block++, 0);/* .readw */
	   for(i = 1; i < NBUFFERS && i < nblocks; i++)
	      bread(channel, &dbuff[i][0], WORDPERBLOCK, block++, 0);/*.read*/
	   p_buff = p_char = 0;
	}
/*
 *		test for end of file
 */
	if(dbuff[p_buff][p_char] == '\0' || nbuff >= nblocks)
	   return(EOF);
/*
 *		read in the word
 */
	for(i = 0;(c = dbuff[p_buff][p_char]) > '\015' && i < MAXWORD - 1;) {
	   wrdbuf[i++] = c;
	   if(c == '\0')
	      return(EOF);
	   if(++p_char >= BYTEPERBLOCK) {
	      if(block < nblocks)
	         bread(channel, &dbuff[p_buff][0], WORDPERBLOCK, block++, 1);
	      p_char = 0;
	      nbuff++;
	      p_buff++;
	      if(p_buff >= NBUFFERS)
	         p_buff = 0;
	   }
	}
	wrdbuf[i] = '\0';
/*
 *		set pointers for next word
 */
	p_char += 2;

	if(p_char >= BYTEPERBLOCK) {

	   p_char %= BYTEPERBLOCK;
	   if(block < nblocks)
	      bread(channel, &dbuff[p_buff][0], WORDPERBLOCK, block++, 1);
	   nbuff++;
	   p_buff++;
	   if(p_buff >= NBUFFERS)
	      p_buff = 0;
	}

	return(1);
}

/***************************************************************************/

int decompress(input, output)		/* decompress the dictionary file
					input to a word list file output. */

char	*input, *output;

{
	char wrdbuf[MAXWORD];
	register int c, cp = 0;
	FIO *fopen(), *fclose(), *fcreate();
	extern FIO fpo, fpi;
/*
 *		open/create input/output files
 */
	if(!fopen(&fpi, input, READ)) {		/* open input file */
	   errfmt("comp - unable to open input file %p\n", input);
	   fclose(&fpi);
	   return;
	}
	if(!fcreate(&fpo, output, WRITE)) {	/* create output file */
	   errfmt("comp - unable to create output file %p\n", output);
	   fclose(&fpi);
	   fclose(&fpo);
	   return;
	}

	errfmt("decompressing input dictionary file %p into file %p\n", \
	 input, output);
/*
 *		test the file
 */
	c = getc(&fpi);
	if(c != '0') {
	   errfmt("comp - %p is not a dictionary file\n", input);
	   return;
	}
	wrdbuf[0] = '\0';
/*
 *		get the words
 */
	FOREVER {
	   c = getc(&fpi);
	   if(c == EOF) {
	      if(cp > 0) {
	         wrdbuf[cp] = '\0';
	         putword(&fpo, wrdbuf, MAXWORD);	      
	      }
	      fclose(&fpi);
	      fclose(&fpo);
	      return;
	   }
	   if(c >= '0' && c <= '@') {
	      wrdbuf[cp] = '\0';
	      putword(&fpo, wrdbuf, MAXWORD);	      
	      cp = c - '0';
	   }
	   else if(c == '\n' || c > 'z' || (c < 'a' && c != '\''))
	      ;
	   else {
	      wrdbuf[cp++] = c;
	      if(cp >= MAXWORD)
	         cp = 0;
	   }
	}
}

/***************************************************************************/

int compress(input, output)		/* compress the words in the file
					named input and write the output
to the file named output.	*/

char	*input, *output;

{
	char wrdbuf[2][MAXWORD];
	register int x = 1, kount = 0, i;
	int y = 0, nblocks;
	int getword(), bopen(), bclose();
	FIO *fcreate(), *fclose();
	extern FIO fpo;
/*
 *		open/create input/output files
 */
	if((nblocks = bopen(INCHAN, input)) >= 0) {
	   errfmt("comp - unable to open input file %p\n", input);
	   return;
	}
	if(!fcreate(&fpo, output, WRITE)) {	/* create output file */
	   errfmt("comp - unable to create output file %p\n", output);
	   bclose(INCHAN);
	   fclose(&fpo);
	   return;
	}

	errfmt("compressing input file %p into dictionary file %p\n", \
	 input, output);

	nblocks = -nblocks;

	wrdbuf[1][0] = '\0';
/*
 *		main loop
 */
	FOREVER {
/*
 *		reset word indices
 */
	   if(x == 0) {
	      y = 0;
	      x++;
	   }
	   else {
	      x = 0;
	      y++;
	   }
/*
 *		get a word from the input file, finish up if EOF
 */
	   if(getword(INCHAN, wrdbuf[x], nblocks) == EOF) {
	      bclose(INCHAN);
	      fclose(&fpo);
	      return;
	   }
	   nblocks = 0;			/* set to zero after the first call */
/*
 *		find out how many common characters
 */
	   for(i = 0; i <= 15 && wrdbuf[x][i] != '\0'; i++) {
	      if(wrdbuf[x][i] != wrdbuf[y][i])
	         break;
	   }
/*
 *		test for two identical words
 */
	   if(wrdbuf[x][i] == '\0')
	      continue;
/*
 *		output the code character
 */
	   putc(&fpo, i + 48);
	   kount++;
/*
 *		output the remaining characters
 */
	   for( ; wrdbuf[x][i] != '\0'; i++) {
	      putc(&fpo, wrdbuf[x][i]);
	      kount++;
	   }
/*
 *		break up the output file into lines
 */
	   if(kount >= WRAPSIZE) {
	      putc(&fpo, '\n');
	      kount = 0;
	   }
	}
}

/***************************************************************************/
/*			external definitions				   */

int		xxx = 0;   /* neccessary to prevent odd address ofr dbuff */
char		dbuff[NBUFFERS][BYTEPERBLOCK] = { 0 };
FIO		fpi = {0}, fpo = {0};

