
/*****************************************************************************

	spell.c 	spelling checker			V2.3
			copyright Harold Z. Bencowitz
			changed most recently 13-feb-87

******************************************************************************

description:

	spell is a spelling checker written in whitesmith's C which
	runs on rt11 and tsx+. it has been tested on v5.3 and v6.01
	respectively. words in the input file are compared to one
	or more dictionaries (files of alphabetized words) and an
	alphabetized list of the unmatched words is sent to the output
	file(s). the output list can list each word as many times as it
	is used or optionally only once. spell can be used as a tool to
	alphabetize a list of words (eg a dictionary) or to produce an
	alphabetized list of the words in a text file without comparing
	to a dictionary.

operating instructions:

	instructions are also explained by the help (\h) command. the
	operator is prompted by '*' from the rt11 command string
	interpretor (csi). in response enter:

     [outfile1][,outfile2][,outfile3]=infile1[,infile2][,infile3][/o][/p]

	where there can be 0-3 output files (can be tt:, lp: etc) and 1-6
	input files. if none is specified the output file default is
	*.spl where the name is that of input file 1. input files 2-6 are
	optional dictionaries (in addition to the main dictionary
	sy:spell.wrd). the default input filetype is .wrd for both the
	file being checked and user dictionaries. note that if a
	device name is specified in the output or input file lists, that
	device is used as the default device for any other files in the
	same list. the default dictionary is sy:spell.dic.
	there are several options which can be combined in any
	combination (except with /h help, the rest of the command is
	ignored). other options are: /d (do not use the default
	dictionary), /u (unique - output file contains unique words ie
	only one copy of each unmatched word), /r (runoff - ignores lines
	in the input file starting with '.', /i (input file is a word list,
	ie one word per line), and /s (shell sort - much
	faster for input files already sorted or nearly sorted, eg a
	dictionary). spell can be used (if no dictionary specified and /d
	option) to convert a text file to an alphabetized list of the
	words contained or to alphabetize a list of words.

possible future changes:

	a display in context feature, possibly with automatic dictionary
	updates will be considered.

revision history:

	v1.1 completed 16-oct-83	

	v1.2 completed 06-sep-86	minor code changes for compatibility
					with newer versions of Whitesmith's

	v2.0 completed 31-oct-86	a complete revision with most of
					the code rewritten. the basic
					algorithm was completely changed to
	read input file into memory and access dictionaries from disk
	(opposite to v1.2). bugs fixed in input of hyphenated words. bugs
	fixed in handling of runoff files. default output filetype added.
	timing added. quick sort added. faster copy routine added. char/word
	optimized. default dictionaries combined into one. output routine
	changed. new faster input routine to read dictionary files.

	v2.1 completed 11-dec-86	all input/output routines rewritten.
					external sort (merge added). words
	less than 2 characters excluded. enter/leave and other changes for
	improved error handling.

	v2.2 completed 19-dec-86	/i option for faster input of word
					lists. better quicksort. default
	dictionary compressed. bugs in getword() and hgetc() fixed.
	strip out common words in load().

	v2.3 completed 13-feb-87	use of memory altered to use
					all of memory without fixed
	char/word ratio (pointer array grows down from memtop).	bug in
	sp_out() fixed. skip most dictionary word comparisons. bugs in
	hopen() and merge() fixed.

limitations:

	the default dictionary must be on SY: and in compressed format.
	before compression it must be in the user dictionary format.
	user dictionary files must be alphabetized, all in lower case, and
	each word separated by <cr><lf> including the last word. only a - z
	and ' characters may be used. the output list of words is
	always alphabetized and can not be obtained in order of appearance.
	uppercase letters are converted to lower case before comparing to
	the dictionaries. words can not be displayed in context. there is
	no limit to the size of the input file and the dictionary files
	other than available disk space. there is no specific terminal
	requirement.

installation and building:

	compile spell.c and link with hclib.obj (my c library) and clib
	(whitesmith's c library). to install place spell.sav wherever you
	wish and the default dictionary (spell.dic) on the systems disk
	(sy:). spell.dic can be replaced by any ascii file list of words
	containing only alphabetic characters (a-z) in lower case and the
	apostrophe character (') with each word separated by
	carriage return-line feed (ie '\n') and in alphabetical
	order if this list is compressed using program compd.sav.
	it is probably easier to use this default dictionary and
	either add words to it as needed or else put additional words
	into special user dictionaries (format as above but not compressed),
	although if a large user dictionary is to be used it may be faster
	to add the words to spell.dic since a compressed dictionary is read
	faster.

implementation notes:

	the program sizes available memory at run time and uses all
	of it to store words. as the input file is read it is parsed
	to remove all characters but ' and a-z. upper case letters
	are converted to lower case. hyphenated words at the end of
	a line are joined. hyphenated words within a line are treated
	as two separate words. runoff '.' commands are ignored if
	option /r is selected. the input words are loaded into memory.
	unless /d is specified each input word is compared to a list
	of the most common english words and not loaded if matched.
	words of less than 2 characters are also not loaded. the number
	of words and characters loaded will not include those rejected
	because of a match or one character. if the input file is a
	word list it will be loaded faster with the /i option which
	uses getword() instead of parsing the file a character at a time.
	the words are then alphabetized. quick sort is used but shell sort
	(option /s) is twice as fast if the words are close to
	alphabetical. if option /u is selected, duplicate words are
	removed. remaining words in memory are then compared to the
	default dictionary (unless option /d selected) and any other
	dictionaries specified. the routine which reads the dictionary
	files makes very strict assumptions about the format of the
	file as described above. dictionary files must only contain
	a-z [lower case], ', <cr>, and <lf>. unless /d or /i is
	selected, the input words are compared to a list of common words
	and rejected if matched before loading them into memory.
	spell can be used as a tool to prepare dictionaries since its
	output is in the correct format. the output is written to a
	temporary disk file. this file is copied to the specified
	output files or renamed if on the same disk.

ttt
*****************************************************************************/

#include <std.h>
#include <rt11.h>

#define NBUFFERS 2
#define WORDPERBLOCK 256
#define BYTEPERBLOCK 512
#define MAXWORD 32
#define OFFSET 0
#define LOWCHAN 8
#define NCHAN 6
#define READ 1
#define WRITE 2
#define MERGEORDER 5
#define MINCHAR 2

typedef struct hio
	{
	int	chan;		/* channel number */
	char	mode;		/* mode, READ or WRITE or NULL (not open) */
	char	ugetc;		/* start of block unget character */
	int	nblocks;	/* number of blocks in the file */
	int	block;		/* next block to read or write */
	int	bkount;		/* number of blocks completed */
	int	p_buff;		/* pointer to which buffer is active */
	int	p_char;		/* pointer to next character */
	char	dbuff[NBUFFERS][BYTEPERBLOCK];	/* buffers */
	} HIO;

_main()

{
	int loop(), limits(), greet();
	unsigned int enter();
	extern unsigned pbot, membot, memtop;
/*
 *		start
 */
	greet("\t\tspelling checker for rt-11 and tsx+\t\tV2.3\n");
	errfmt("enter \"/H<cr>\" for help\n\n");
/*
 *		get memory limits, divide available high memory into space
 *		for pointers to dictionary words (pwrd) and space for
 *		characters (ie actual words). membot is the address just
 *		above the program image, pointers start here. 
 */
	limits(OFFSET, &pbot, &membot, &memtop);	/* size memory */
	if((membot % 2) == 1)
	   membot += 1;
	if((memtop % 2) == 1)
	   memtop -= 1;
/*
 *		main body of program
 */
	FOREVER
	   enter(&loop);
}

/****************************************************************************/

int loop()		/* "the main" operating part of the program */

{
	char in_files[6][15], out_files[3][15], opts[10];
	static char temp_file[] = "DK:TEMP00.TMP";
    static char deftype[12]={'W','R','D','S','P','L','S','P','L','S','P','L'};
	char *gname(), *cpystr();
	int ropt, min, flag, kount;
	int leave(), tmp_out(), load(), rtcsi(), option(), help();
	int quickr(), cmpwrd(), shell(), r_alpord(), exchange(), sp_out();
	int ext_sort(), hopen(), hclose(), hcreate();
	register int i;
	long ltics, sec, tics();
	unsigned totalc, nwords, unique();
	extern char **pwrd;
	extern HIO	hfiles[];
/*
 *
 */
	FOREVER {
/*
 *		get and interpret command line
 */
label1:
	   rtcsi(out_files, in_files, deftype, opts);
	   ltics = tics();
	   if(option(opts, 'H')) {		/* help selected */
	      help();
	      goto label1;
	   }
	   if(in_files[0][0] == '\0') {
	      errfmt("spell - no input file name entered\n");
	      goto label1;
	   }
	   if(out_files[0][0] == '\0') {	/* default output name */
	      cpystr(out_files[0], in_files[0], NULL);
	      i = instr(out_files[0], ".");
	      cpystr(&out_files[0][i], ".SPL", NULL);
	   }
/*
 *		open input files
 */
	   if(!hopen(&hfiles[0], in_files[0])) {	/* open input file */
	   errfmt("spell - unable to open input file %p\n",in_files[0]);
	      hclose(&hfiles[0]);
	      goto label1;
	   }
/*
 *		load input into memory, alphabetize, unique, compare
 */
	   flag = NO;		/* YES if all of input file read in */
	   kount = 0;
	   while(!flag) {
/*
 *		create temporary output file
 */
	     if(!hcreate(&hfiles[1],gname(temp_file,kount),0)){
	     errfmt("spell - unable to create temporary output file\n");
	         hclose(&hfiles[0]);
	         hclose(&hfiles[1]);
	         goto label1;
	      }
/*
 *		parse input file and load into memory
 */
	      flag = load(&hfiles[0], &nwords, &totalc, kount, \
	       option(opts, 'D'), option(opts, 'R'), option(opts, 'I'));
	      errfmt("%i words (%i characters) loaded into memory\n",\
		 nwords, totalc);
	      kount++;			/* increment number of loads */
/*
 *		set address of word array
 */
	      pwrd = memtop - 2 * (nwords - 1);
/*
 *		alphabetize
 */
	      if(option(opts, 'S'))
	         shell(nwords, r_alpord, exchange, 0);
	      else
	         quickr(nwords, r_alpord, exchange, 0);
/*
 *		strip down to unique words
 */
	      if(option(opts, 'U'))
	         unique(nwords);
/*
 *		check spelling against dictionaries
 */
	      if(!option(opts, 'D'))
	         cmpwrd("SY:SPELL.DIC", in_files[0], nwords, YES);

	      for(i = 1; in_files[i][0] > '\0'; i++)
	         cmpwrd(in_files[i], in_files[0], nwords, NO);
/*
 *		write remaining unmatched words to temporary file
 */
	      tmp_out(&hfiles[1], nwords);
/*
 *		close output file
 */
	      if(!hclose(&hfiles[1])) {
	         errfmt("spell - unable to close temporary file\n");
	         leave();
	      }
	   }
/*
 *		close input file
 */
	   if(!hclose(&hfiles[0])) {
	      errfmt("spell - unable to close input file\n");
	      leave();
	   }
/*
 *		alphabetize and unique output file if > 1 batch of input
 */
	   if(kount > 1)
	      ext_sort(--kount, temp_file, option(opts, 'U'));
/*
 *		send output to the output files
 */
	   sp_out(temp_file, out_files, in_files[0]);
/*
 *		time the run
 */
	   sec = tics() / 60;
	   min = sec / 60;
	   sec -= min * 60;
	   errfmt("elapsed time %i:%l\n", min, sec);
	}
}

/****************************************************************************/

char *gname(file_name, n)	/* returns file name in file_name where the
				name is tempNN.tmp. the "NN" represents n
which is constrained to be < 100. */

register char	*file_name;
int	n;

{
	unsigned decode();

	decode(file_name + 7, 2, "%+02i", n);
	return(file_name);
}

/****************************************************************************/

int tmp_out(f_p, nwords) /* write the words in memory to a temporary file */

HIO	*f_p;
int	nwords;

{
	register char *pc, **pw;
	int putword();
	extern unsigned memtop;

	for(pw = memtop; nwords-- > 0; pw--) { /* while words left */
	   if(*pw != NULL) {			/* if a good word */
	      pc = *pw;				/* initialize pointer */
	      putword(f_p, pc);
	   }
	}
}

/****************************************************************************/

int help()		/* to review help text, edit file spell.hlp */

{
	errfmt("\t\t\t      spell v2.3\n\nThis program matches text or a");
	errfmt(" list of words against up to 7 dictionary files.\nThe ou");
	errfmt("tput is an alphabetized list of words not found in any d");
	errfmt("ictionary. Words\nfrom the input file are defined as str");
	errfmt("ings of characters a - z and ' termina-\nted by any othe");
	errfmt("r character (all other characters are ignored). Upper ca");
	errfmt("se\nletters are converted to lower case. If a hyphen end");
	errfmt("s a line, the word is\njoined. The rt11 command string i");
	errfmt("nterpretor is used. Up to three output and\n6 input file");
	errfmt("s can be specified. input files 2-6 are dictionaries aga");
	errfmt("inst which\nto check input file 1. the default dictionar");
	errfmt("y is dk:spell.dic in compressed\nascii format. Other dic");
	errfmt("tionaries are ASCII files containing alphabetical word\n");
	errfmt("lists (one word per line). the default input filetype is");
	errfmt(" .wrd and the default\noutput filetype is .spl. Note tha");
	errfmt("t if a device name is given in either the\ninput or outp");
	errfmt("ut file list, that device is the default for the remaind");
	errfmt("er of the\ncommand string (ie not dk:). options can be s");
	errfmt("elected by including \"/O\" any-\nwhere in the command s");
	errfmt("tring where \"O\" represents a single character option.\n");
	errfmt("\n[op1][,op2][,op3]=ip1[,ip2][,ip3][,ip4][,ip5][,ip6][/O]");
	errfmt("\n\n/D do not use the default dictionary\t/S shell sort (");
	errfmt("faster if presorted)\n/H type this help message\t\t/U un");
	errfmt("ique, list output words once only\n/I input file in dict");
	errfmt("ionary format (one word per line), for faster input\n/R ");
	errfmt("input file in runoff format (ignore lines starting with ");
	errfmt("'.')\n");
}

/****************************************************************************/

unsigned unique(nwords)		/* strip out duplicate words */

unsigned	nwords;

{
	char **pw2;
	register char **pw;
	register int i, k;
	int alpho();
	extern char **pwrd;
/*
 *		start, no words skipped yet, pointers adjacent
 */
	pw2 = pw = pwrd;
	pw++;
	for(i = 1; i < nwords; pw++, i++) {
	   k = alpho(*pw, *pw2);
	   if(k == 0)
	      *pw = NULL;
	   else
	      pw2 = pw;
	}
}

/****************************************************************************/

int load(f_p, nwords, totalc, n, dopt, ropt, iopt) 	/* read the input file
							into memory, returns
YES if the entire file is in, NO otherwise. nwords and totalc are the numbers
of words and characters loaded respectively. if iot is YES, the input file is
assumed to be a dictionary and loaded with a faster algorithm. ropt indicates
a runoff file in which case lines beginning with '.' are ignored. dopt
indicates whether or not the /d option is active. */

HIO		*f_p;
unsigned	*nwords, *totalc;
int		n, dopt, ropt, iopt;

{
	register char c, *pc, **pw;
	static char lastc;
	int wflag;
	int wlist(), hgetc(), hungetc();
	unsigned nc;
	extern unsigned membot, memtop;
/*
 *		set up pointers
 */
	*totalc = nc = 0;
	*nwords = 0;
	pc = membot;
	pw = memtop;
/*
 *		read in a word
 */
	if(n == 0)			/* initialize first call to load */
	   lastc = '\n';

	if(iopt) {
	   FOREVER {
	      if((nc = getword(f_p, pc)) == EOF)
	         return(YES);
	      else {
	         *pw-- = pc++;
	         pc += nc;
	         *totalc += nc;
	         (*nwords)++;
	         if(pc + MAXWORD >= pw)
	            return(NO);
	      }
	   }
	}

	FOREVER {
	   wflag = NO;
	   while(nc < MAXWORD - 1) {
label1:	      c = hgetc(f_p);
	      if(c == EOF) {
	         if(wflag == YES) {
	            *totalc += nc;
	            *pc = '\0';
	            (*nwords)++;
	         }
	         return(YES);
	      }
	      if(c >= 'A' && c <= 'Z')	/* convert lower case to upper */
	         c += 32;
	      if((c >= 'a' && c <= 'z') || c == '\'') {
	         if(wflag == NO) {	/* store address of word */
	            *pw-- = pc;
	         }
	         *pc++ = c;		/* store character */
	         nc++;
	         wflag = YES;		/* in word */
	      }
	      else if(c == '-' && wflag == YES) {	/* hyphenation */
	         if((c = hgetc(f_p)) != '\n') {	/* not at end of line */
	            hungetc(f_p, c);
	            lastc = '-';	/* this line not currently needed */
	            break;
	         }
	         else {
	            while((c = hgetc(f_p)) == ' ' || c == '\t' || c == '\n')
	               ;
	            hungetc(f_p, c);
	            c = '\n';
	         }
	      }
	      else if(c == '.' && ropt == TRUE && lastc == '\n') {
	         while((c= hgetc(f_p)) != '\n') {/* deal with runoff option */
	            if(c == EOF) {		/*burn rest of line */
	               if(wflag == YES) {
	                  *totalc += nc;
	                  *pc = '\0';
	                  (*nwords)++;
	               }
	               return(YES);
	            }
	         }
	         if(wflag == YES)
	            break;
	      }
	      else if(wflag == YES) {		/* if word delimiter */
	         lastc = c;
	         break;
	      }
	      lastc = c;
	   }
	   *pc++ = '\0';
	   if(nc < MINCHAR || (!dopt && wlist(*(pw+1)))) {
	      nc = 0;				/* word too small */
	      pw++;				/* reset pointers */
	      pc = *pw;
	   }
	   else {
	      *totalc += nc;
	      nc = 0;
	      (*nwords)++;
	      if(pc + MAXWORD >= pw)
	         return(NO);
	   }
	}
}

/****************************************************************************/

int r_alpord(i, j)	/* compares two words as to reverse alphabetic
			order. i and j are the array indexes (for pwrd)
which point at the two words to compare. returns < 0 if word[i] < word[j];
0 if they are the same word; or > 0 if word[i] > word[j]. */

unsigned	i, j;

{
	register char	*s, *t;
	extern char **pwrd;

	s = pwrd[i];
	t = pwrd[j];

	for( ; *s == *t; s++, t++) {
	   if(*s == '\0')
	      return(0);
	}
	return(*t - *s);
}

/****************************************************************************/

int alpord(i, j)	/* compares two words as to alphabetic
			order. i and j are the array indexes (for pwrd)
which point at the two words to compare. returns < 0 if word[i] < word[j];
0 if they are the same word; or > 0 if word[i] > word[j]. */

unsigned	i, j;

{
	register char	*s, *t;
	extern char **pwrd;

	s = pwrd[i];
	t = pwrd[j];

	for( ; *s == *t; s++, t++) {
	   if(*s == '\0')
	      return(0);
	}
	return(*s - *t);
}

/****************************************************************************/

int exchange(i, j)		/* exchanges two word in memory by exchanging
				the order in which their addresses are stored
in pwrd. i and j are the pwrd indexes of the two word to be exchanged. */

unsigned	i, j;
{
	register char *t;
	extern char **pwrd;

	t = pwrd[i];
	pwrd[i] = pwrd[j];
	pwrd[j] = t;
}

/****************************************************************************/

int sp_out(filename, out_files, infile)	/* send result to the specified
					output files. the results are in
the file named filename. if it is already on the appropriate device, it is
renamed instead of copied. */

char	*filename, *infile;
char	out_files[3][15];
{
	char *cpystr();
	int channel, inchan, outchan, copyok = YES;
	register int rnflag = NO, i, k;
	int leave(), remove(), rename(), same_device(), copy();
	extern char chan_n[];
/*
 *
 */
	for(i = 0; i < 3; i++) {
	   if(out_files[i][0] == '\0')
	      break;
   if((i==2||out_files[i+1][0]=='\0')&&same_device(filename,out_files[i])>=0){
	      for(k = 0; k < NCHAN; k++) {
	         if(chan_n[k] == NO) {
	            channel = k + LOWCHAN;
	            break;
	         }
	      }
	      if(k >= NCHAN) {
	     errfmt("spell - channel not available for output file rename\n");
	         leave();
	      }
	      rename(filename, out_files[i], channel);
	      rnflag = YES;
	   }
	   else {
	      for(k = 0; k < NCHAN; k++) {
	         if(chan_n[k] == NO) {
	            inchan = k + LOWCHAN;
	            break;
	         }
	      }
	      for(k++; k < NCHAN; k++) {
	         if(chan_n[k] == NO) {
	            outchan = k + LOWCHAN;
	            break;
	         }
	      }
	      if(k >= NCHAN) {
	     errfmt("spell - channels not available for output file copy\n");
	         leave();
	      }
	      if(!copy(filename, out_files[i], inchan, outchan))
	         copyok = NO;
	   }
	}

	if(rnflag == NO && copyok == YES)
	   remove(filename);

	return(YES);
}

/****************************************************************************/

int copy(fname1, fname2, inchan, outchan)	/* copies file fname1 to file 
						fname2 block by block returns 
1 if successful and 0 if not. */

char	*fname1, *fname2;
int	inchan, outchan;

{
	int buff[WORDPERBLOCK];
	register int i, nblocks;
	int leave(), bread(), bwrite(), bopen(), bcreate(), bclose();
/*
 *		open input file
 */
	if((nblocks = bopen(inchan, fname1)) >= 0) { /* open input file */
	   errfmt("spell - unable to open input file %p\n", fname1);
	   bclose(inchan);
	   leave();
	}

	nblocks = -nblocks;
/*
 *		create output file
 */
	if(bcreate(outchan, fname2, nblocks) >= 0) { /* create output file */
	   errfmt("spell - unable to create output file %p\n", fname2);
	   bclose(inchan);
	   bclose(outchan);
	   leave();
	}
/*
 *		copy (read, write)
 */
	for(i = 0; i < nblocks; i++) {
	   if(bread(inchan, buff, WORDPERBLOCK, i, 0)>=0 || \
	    bwrite(outchan, buff, WORDPERBLOCK, i, 0)>=0) {
	      errfmt("spell - unable to copy output file\n");
	      bclose(inchan);
	      bclose(outchan);
	      leave();
	   }
	}
/*
 *		close up
 */
	bclose(inchan);
	bclose(outchan);
	return(YES);
}

/****************************************************************************/

int same_device(fname1, fname2)		/* compares two strings up to the
					first ":". if they are equal and 
each contain ":", returns 1; if one string starts with "DK:" and the other
contains no ":", returns 1; if strings are equal without ":", returns 0;
and otherwise returns -1. used to determine if two filenames start with
the same device name. the two filenames are converted to upper case before
starting. */

register char	*fname1, *fname2;
{
	int	i, c;
	unsigned	instr(), substr();
/*
 *		convert filenames to upper case
 */
	for(i = 0; (c = fname1[i]) != '\0'; i++) {
	   if(c <= 'z' && c >= 'a')
	      fname1[i] -= 32;
	}
	for(i = 0; (c = fname2[i]) != '\0'; i++) {
	   if(c <= 'z' && c >= 'a')
	      fname2[i] -= 32;
	}
/*
 *		one is "DK:", the other no device
 */
	if(substr(fname1, "DK:") == 0 && fname1[0] != '\0'\
	 && fname2[instr(fname2, ":")] == '\0')	
	   return(1);
	if(substr(fname2, "DK:") == 0 && fname2[0] != '\0'\
	 && fname1[instr(fname1, ":")] == '\0')
	   return(1);
/*
 *		equal to the ":"?
 */
	for( ; *fname1 == *fname2; fname1++, fname2++) {
	   if(*fname1 == '\0') {
	      if(*fname2 == '\0')
	         return(0);
	      return(-1);
	   }
	   if(*fname1 == ':')
	      return(1);
	}
/*
 *		unequal
 */
	return(-1);
}

/***************************************************************************/

int cmpwrd(dict, in_file, nwords, cdic)	/* compare the words in memory to
					those in dictionary dict. assumes
that dict contains a list of alphabetized words containing lower case letters
and ' only and each separated by \n. returns nothing.		*/

char		*dict, in_file;
unsigned	nwords;
int		cdic;

{
	register char **pw;
	char wrdbuf[MAXWORD];
	register int i;
	int nchar = 0, same_chars = 0;
	int leave(), bclose(), new_alpho(), getword(), hopen(), hclose();
	int getdw();
	extern char **pwrd;
	extern unsigned memtop;
	extern HIO hfiles[];

	if(!hopen(&hfiles[2], dict)) {
	   errfmt("spell - unable to open dictionary file %p\n", dict);
	   bclose(hfiles[0].chan);
	   leave();
	}
	errfmt("comparing input file %p to dictionary %p\n", in_file, dict);

	pw = memtop;

	if(cdic) {
	   if(getdw(&hfiles[2], wrdbuf) == EOF)		/* get dict word */
	      return;
	}
	else if(getword(&hfiles[2], wrdbuf) == EOF)	/* get dict word */
	      return;

	while(*pw == NULL) {
	   pw--;
	   if(pw < pwrd) {
	      hclose(&hfiles[2]);
	      return;
	   }
	}

	FOREVER {
	   i = new_alpho(*pw, wrdbuf, &nchar);
	   if(i <= 0) {
	      if(i == 0)
	         *pw = NULL;
	      pw--;
	      while(*pw == NULL) {
	         pw--;
	         if(pw < pwrd)
	            break;
	      }
	      if(pw < pwrd)
	         break;
	   }
	   else {
	      if(cdic) {
	         while((same_chars = getdw(&hfiles[2], wrdbuf)) > nchar)
	            ;
	         if(same_chars == EOF)
	            break;
	      }
	      else if(getword(&hfiles[2], wrdbuf) == EOF) /* get dict word */
	         break;
	   }
	}
	hclose(&hfiles[2]);
}

/****************************************************************************/

int hgetc(x)		/* get a character from the file with HIO of x. use of
			this routine will not precisely duplicate the input
file if it contains a carriage return (13, octal 15) alone without a line feed
(10, octal 12) since all CRs are ignored. */

HIO	*x;

{
	char junk;
	register int c, tempc;
	int getsub(), leave();
/*
 *		get character and test for end of file
 */
	if(x->mode != READ)
	   leave();
	if(!x->nblocks)
	   return(EOF);
/*
 *		unget character?
 */
	if(x->p_char < 0)
	   c = x->ugetc;		/* start of block unget character */
	else
	   c = x->dbuff[x->p_buff][x->p_char];
/*
 *		if beginning of file go until good character
 */
	if(!x->p_char && !x->bkount) {
	   while(c == '\0' || c == '\015') {
	      if(++(x->p_char) >= BYTEPERBLOCK) {
	         if(getsub(x, 0, &junk))
	            return(c);
	      }
	      c = x->dbuff[x->p_buff][x->p_char];
	   }
	}
/*
 *		leave pointer on next character
 */
	for(tempc = '\0'; tempc == '\0' || tempc == '\015'; ) {
	   if(++(x->p_char) >= BYTEPERBLOCK) {
	      if(getsub(x, 0, &junk))
	         return(c);
	   }
	   tempc = x->dbuff[x->p_buff][x->p_char];
	}

	return(c);
}

/****************************************************************************/

int getsub(x, n, wbuf)	/* subroutine used by getword() and getdw() */

HIO	*x;
int	n;
char	*wbuf;

{
	int bread(), bclose(), leave();

	x->bkount++;
	if(x->block >= x->nblocks) {
	   if(x->bkount >= x->nblocks) {
	      wbuf[n] = '\0';
	      x->nblocks = 0;
	      return(YES);
	   }
	}
	else {
      if(bread(x->chan,&x->dbuff[x->p_buff][0],WORDPERBLOCK,x->block++,0)>=0){
	      errfmt("spell - unable to read\n");
	      bclose(x->chan);
	      leave();
	   }
	}
	x->p_char = 0;
	if(++x->p_buff >= NBUFFERS)
	   x->p_buff = 0;

	return(NO);
}

/****************************************************************************/

int getdw(x, wrdbuf)				/* get a word from the READ
						file opened with HIO x and
copy it to char wrdbuf[] limiting length to MAXWORD characters. the input file
must be in compressed dictionary format ie each word preceded by a single
character '0' - '@' indicating the number of beginning characters in common
with the previous word (alphabetized). if the word exceeds MAXWORD it is
truncated and the extra characters are ignored. returns EOF or number of
characters unchanged form the previous word (excluding the '\0').
this may be 0 even though a valid word is returned (EOF must be negative!).
leave() called on all errors. */

HIO	*x;
char	*wrdbuf;

{
	static char tbuf[MAXWORD];
	register char c;
	register int k, i;
	int same_chars;
	int getsub(), leave();
/*
 *		test for READ file and end of file
 */
	if(x->mode != READ)
	   leave();
	if(!x->nblocks)
	   return(EOF);
	c = x->dbuff[x->p_buff][x->p_char];
/*
 *		get first character
 */
	if(c < '0' || c > '@') {
	   errfmt("spell - improper dictionary, not compressed format\n");
	   leave();
	}	   
/*
 *		copy same characters from temporary buffer
 */
	for(i = 0, k = c - '0'; i < k; i++)
	   wrdbuf[i] = tbuf[i];
	same_chars = i;
/*
 *		get new characters
 */
	for(c = 'a'; (c >= 'a' || c == '\'') && i < MAXWORD - 1; ) {
	   if(++(x->p_char) >= BYTEPERBLOCK) {
	      if(getsub(x, i, wrdbuf))
	         return(same_chars);
	   }
	   c=x->dbuff[x->p_buff][x->p_char];
	   tbuf[i] = wrdbuf[i] = c;
	   i++;
	}
	wrdbuf[--i] = '\0';
/*
 *		set pointers for next word
 */
	while((c = x->dbuff[x->p_buff][x->p_char]) < '0' || c > '@') {
	   if(++(x->p_char) >= BYTEPERBLOCK) {
	      if(getsub(x, i, wrdbuf))
	         return(same_chars);
	   }
	}

	return(same_chars);
}

/****************************************************************************/

int getword(x, wrdbuf)				/* get a word from the READ
						file opened with HIO x and
copy it to char wrdbuf[] limiting length to MAXWORD characters. the input file
must have each word terminated by '\n' even if it is the last word. if a
word is cut off at MAXWORD characters and the remainder
becomes a new word. returns EOF or the number of characters excluding
the '\0'. if there is any error leave() is called. */

HIO	*x;
char	*wrdbuf;

{
	register char c;
	register int i;
	int getsub(), leave();
/*
 *		test for READ file and end of file
 */
	if(x->mode != READ)
	   leave();
	if(!x->nblocks)
	   return(EOF);
/*
 *		read in the word
 */
	for(i=0;(c=x->dbuff[x->p_buff][x->p_char])>'\015' && i<MAXWORD - 1;) {
	   wrdbuf[i++] = c;
	   if(++(x->p_char) >= BYTEPERBLOCK) {
	      if(getsub(x, i, wrdbuf))
	         return(i);
	   }
	}
	wrdbuf[i] = '\0';
/*
 *		set pointers for next word
 */
	while(((c=x->dbuff[x->p_buff][x->p_char])<'a'||c>'z') && c!='\'') {
	   if(++(x->p_char) >= BYTEPERBLOCK) {
	      if(getsub(x, i, wrdbuf))
	         return(i);
	   }
	}
/*
 *		finish
 */
	return(i);
}

/****************************************************************************/

int hopen(x, fname)	/* open a file for buffered i/o */

HIO	*x;
char	*fname;

{
	register int i;
	int bopen(), bclose(), bread();
	extern char chan_n[];
/*
 *		test if this HIO is unused
 */
	if(x->mode != NO)
	   return(NO);
/*
 *		initialize file parameters
 */
	for(i = 0; i < NCHAN; i++) {
	   if(chan_n[i] == NO) {
	      x->chan = i + LOWCHAN;
	      break;
	   }
	}
	if(i >= NCHAN) {
	   errfmt("no channel available\n");
	   return(NO);
	}

	x->bkount = x->block = x->p_buff = x->p_char = 0;
/*
 *		issue lookup emt
 */
	if((i = bopen(x->chan, fname)) > 0)
	   return(NO);			/* if 0 blocks, it will succeed */

	x->nblocks = -i;
/*
 *		initialize (read to) buffers
 */
	for(i = 0; i < NBUFFERS && i < x->nblocks; i++) {
	   if(bread(x->chan,&x->dbuff[i][0],WORDPERBLOCK,(x->block)++, 0)>=0){
	      bclose(x->chan);
	      return(NO);
	   }
	}

	x->mode = READ;
	chan_n[x->chan - LOWCHAN] = YES;

	return(YES);
}

/****************************************************************************/

int putword(x, word)			/* write a word to the WRITE file
					created with HIO x. each word is
terminated with '\n'. returns YES or NO (proper file not created. */

HIO	*x;
char	*word;

{
	register int flag, i;
	int leave(), bclose(), bwrite();
/*
 *		test for WRITE file
 */
	if(x->mode != WRITE)
	   leave();
/*
 *		copy word into buffer, write buffer when full
 */
	flag = NO;
	for(i = 0; word[i] != '\0' && i < MAXWORD-1; i++) {
	   x->dbuff[x->p_buff][x->p_char++] = word[i];
	   if(x->p_char >= BYTEPERBLOCK) {
	      flag = YES;
	      x->p_char = 0;
	      x->p_buff++;
	      if(x->p_buff >= NBUFFERS)
	         x->p_buff = 0;
	   }
	}
	for(i = 0; i < 2; i++) {
	   if(!i)
	      x->dbuff[x->p_buff][x->p_char++] = '\015';
	   else
	      x->dbuff[x->p_buff][x->p_char++] = '\012';
	   if(x->p_char >= BYTEPERBLOCK) {
	      flag = YES;
	      x->p_char = 0;
	      x->p_buff++;
	      if(x->p_buff >= NBUFFERS)
	         x->p_buff = 0;
	   }
	}
	if(flag) {
	   i = (!x->p_buff ? NBUFFERS - 1: x->p_buff - 1);
   if(bwrite(x->chan, &x->dbuff[i][0], WORDPERBLOCK, x->block++, 0)>=0) {
	      errfmt("spell - unable to write\n");
	      bclose(x->chan);
	      leave();
	   }
	}
	return(YES);
}

/****************************************************************************/

int hcreate(x, fname, nb)	/* create a buffer file of nblocks */

HIO	*x;
char	*fname;
unsigned nb;

{	
	register int i;
	int bcreate();
	extern char chan_n[];
/*
 *		test if this HIO unused
 */
	if(x->mode != NO)
	   return(NO);
/*
 *		initialize file parameters
 */
	for(i = 0; i < NCHAN; i++) {
	   if(chan_n[i] == NO) {
	      x->chan = i + LOWCHAN;
	      break;
	   }
	}
	if(i >= NCHAN) {
	   errfmt("no channel available\n");
	   return(NO);
	}

	x->nblocks = nb;
	x->block = x->p_buff = x->p_char = 0;
/*
 *		create file, .enter
 */
	if((bcreate(x->chan, fname, nb)) >= 0)
	   return(NO);

	x->mode = WRITE;
	chan_n[x->chan - LOWCHAN] = YES;
	return(YES);
}

/****************************************************************************/

int hclose(x)		/* close a buffered file */

HIO	*x;

{
	int bclose(), bwrite();
	extern char chan_n[];
/*
 *		finish with output buffer, zero and write
 */
	if(x->mode == NULL)
	   return(NO);

	if(x->mode == WRITE && x->p_char > 0) {
	   for( ; x->p_char < BYTEPERBLOCK; x->p_char++)
	      x->dbuff[x->p_buff][x->p_char] = '\0';
if(bwrite(x->chan, &x->dbuff[x->p_buff][0], WORDPERBLOCK, x->block, 0) >= 0) {
	      errfmt("spell - unable to write\n");
	      bclose(x->chan);
	      return(NO);
	   }
	}
/*
 *		close file
 */
	if(bclose(x->chan) >= 0)
	   return(NO);
	else {
	   x->mode = 0;
	   chan_n[x->chan - LOWCHAN] = NO;	/* release channel number */
	   return(YES);
	}
}

/****************************************************************************/

int hungetc(x, c)		/* unget a character to file of HIO x */

HIO	*x;
int	c;

{
	if(x->p_char > 0) {
	   (x->p_char)--;
	   x->dbuff[x->p_buff][x->p_char] = c;
	}
	else if(x->p_char == 0) {
	   (x->p_char)--;
	   x->ugetc = c;
	}
	else
	   x->ugetc = c;
}

/****************************************************************************/

int gremov(low, lim, fname)	/* deletes temporary files numbered from low
				to lim inclusively; names are tempNN.tmp. */

int	low, lim;
char	*fname;

{
	char *gname();
	register int i;
	int remove(), hclose();
	extern HIO hfiles[];

	for(i = 0; i <= lim - low; i++) {
	   if(!hclose(&hfiles[i]))
	      errfmt("spell - unable to close temporary file\n");
	   else if(remove(gname(fname, low + i)) != 0)
	      errfmt("spell - unable to remove temporary file\n");
	}
}

/****************************************************************************/

int ext_sort(high, fname, unique)	/* perform external merged sort on
					the temporary output files to combine
them into a single final output file. high is the number of temporary files-1.
fname is the address of a string into which to deposit the name of the final
file. */

int	high;
char	*fname;
int	unique;

{
	char *gname();
	register int i, low, lim;
	int leave(), gremov(), merge(), hopen(), hclose(), hcreate();
	extern HIO hfiles[];
/*
 *		merge the temporary files
 */
	for(low = 0; low < high; low += MERGEORDER) {
	   lim = min(low + MERGEORDER - 1, high);
	   for(i = low; i <= lim; i++) {
	      if(!hopen(&hfiles[i - low], gname(fname, i))) {
	        errfmt("spell - unable to open temporary output files\n");
	         leave();
	      }
	   }
	   high++;
	   if(!hcreate(&hfiles[MERGEORDER], gname(fname, high), 0)) {
	      errfmt("spell - unable to create merge file\n");
	      leave();
	   }
	   merge(lim - low + 1, unique);
	   if(!hclose(&hfiles[MERGEORDER]))	/* close output file */
	      errfmt("spell - unable to close merge file\n");
	   gremov(low, lim, fname);		/* delete temporary files */
	}
	gname(fname, high);
}

/****************************************************************************/

int merge(nfiles, unique)		/* merges n files together. */

int	nfiles, unique;

{
	char lastwd[MAXWORD], *cpystr();
	register int i, nf = 0;
	int alpord(), shell(), exchange();
	int reheap(), getword(), putword(), alpho();
	char *cc;
	extern char **pwrd;
	extern unsigned membot;
	extern HIO hfiles[];
/*
 *		set up pointers
 */
	if((memtop - membot) < (MAXWORD * MERGEORDER + MERGEORDER)) {
	   errfmt("spell - inadequate memory for merge\n");
	   return;
	}

	pwrd = membot;

	cc = membot + 4 * MERGEORDER;		/* initialize char pointer */
/*
 *		start the heap set up
 */
	for(i = 0; i < nfiles; i++) {
	   if(getword(&hfiles[i], &cc[MAXWORD * i]) > 0)
	      pwrd[nf++] = &cc[MAXWORD * i];
	}
/*
 *		sort the heap
 */
	shell(nf, alpord, exchange, 0);
/*
 *		output top line from heap and replace
 */
	lastwd[0] = '\0';		/* initialize last word */
	while(nf > 0) {
	   if(unique) {
	      if(alpho(lastwd, pwrd[0]) != 0) {
	         putword(&hfiles[MERGEORDER], pwrd[0]);
	         cpystr(lastwd, pwrd[0], NULL);
	      }
	   }
	   else
	      putword(&hfiles[MERGEORDER], pwrd[0]);

	   i = (pwrd[0] - cc) / MAXWORD;
	   if(getword(&hfiles[i], pwrd[0]) == EOF)
	      pwrd[0] = pwrd[nf-- - 1];

	   reheap(nf, alpord, exchange);
	}
}

/****************************************************************************/

int reheap(nf, pord, pexch)		/* reorder the heap after putting
					a new member in the first slot. 
nf is the number of members in the heap. pord is the address of a comparison
function returning < 0 if the first argument is less than the second, > 0 if
the second argument is less than the first, or 0 if the two arguments are
equal. pexch is the address of a function which will exchange its two
arguments. */

int	nf;
int	(*pord)(), (*pexch)();
{
	register int	i, j;

	for(i = 1; i * 2 <= nf; i = j) {
	   j = i * 2;
	   if(j < nf) {
	      if((*pord)(j - 1, j) > 0)
	         j += 1;
	   }
	   if((*pord)(i - 1, j - 1) <= 0)
	      break;
	   (*pexch)(i - 1, j - 1);
	}
}

/****************************************************************************/

int quickr(u, pord, pexch, l)		/* recursive quicksort. based
					on "Programming Pearls" page 175.
pord is a routine with compares two items and pexch exchanges them. each
is called with two arguments which are the indices (0 - n-1) of the items.
u is the index of the last item. l is the index of the first item (offset).	
*/

register int	u;
int		l;
int	(*pord)(), (*pexch)();

{
/*	float rng();
	int sd1 = 69, sd2 = 1594;
*/
	register int	i, m;

	u--;
	if(u - l > 15) {
/*	   (*pexch)(l, l + (int)((float) (u-l+1) * rng(&sd1, &sd2)));	*/
	   (*pexch)(l, (u + l) / 2);
	   m = l;
	   for(i = l + 1; i <= u; i++) {
	      if((*pord)(i, l) < 0) {
	         (*pexch)(++m, i);
	      }
	   }
	   (*pexch)(l, m);
	   quickr(m, pord, pexch, l);
	   quickr(u+1, pord, pexch, m+1);
	}
	else
	   shell(u - l + 1, pord, pexch, l);
}

/****************************************************************************/

int wlist(s)	/* test whether the word in the string s is present in
		the list contained in this subroutine. */

char	*s;

{
	int alpho(), bin_search();

	static *wl[] = {
	"about", "after", "all", "also", "an", "and", "any", "are",
	"as", "at", "be", "been", "before", "but", "by", "can",
	"could", "did", "do", "does", "down", "each", "for", "from",
	"go", "had", "has", "have", "he", "her", "him", "his", "how",
	"if", "in", "into", "is", "it", "its", "like", "many", "may",
	"me", "more", "much", "my", "new", "no", "not", "now", "of",
	"off", "on", "one", "or", "other", "our", "out", "over",
	"right", "said", "see", "she", "so", "some", "than", "that",
	"the", "their", "them", "then", "there", "these", "they",
	"this", "time", "to", "too", "two", "up", "use", "was", "we",
	"were", "what", "when", "where", "which", "who", "will",
	"with", "would", "you", "your" };

/*	static char *wl[] = { "about", "after", "again", "all", "along",
	"also", "always", "an", "and", "another", "any", "are", "around",
	"as", "asked", "at", "away", "back", "be", "because", "been",
	"before", "below", "between", "big", "both", "but", "by", "came",
	"can", "come", "could", "day", "did", "do", "does", "don't", "down",
	"each", "end", "even", "every", "few", "find", "first", "for",
	"found", "from", "get", "give", "go", "good", "great", "had", "has",
	"have", "he", "help", "her", "here", "him", "his", "home", "how",
	"if", "in", "into", "is", "it", "its", "just", "know", "large",
	"last", "left", "like", "long", "look", "made", "make", "man",
	"many", "may", "me", "men", "might", "more", "most", "mr", "much",
	"must", "my", "name", "never", "new", "next", "no", "not", "now",
	"number", "of", "off", "often", "old", "on", "one", "only", "or",
	"other", "our", "out", "over", "own", "part", "place", "put",
	"read", "right", "said", "same", "saw", "say", "see", "set", "she",
	"should", "show", "small", "so", "some", "still", "such", "take",
	"tell", "than", "that", "the", "their", "them", "then", "there",
	"these", "they", "things", "think", "this", "those", "three",
	"through", "time", "to", "too", "two", "under", "up", "us", "use",
	"used", "very", "was", "way", "we", "well", "went", "were", "what",
	"when", "where", "which", "while", "who", "why", "will", "with",
	"word", "work", "would", "write", "years", "you", "your" };
n181 */

	return(bin_search(s, wl, alpho, 0, 93));
}

/****************************************************************************/

int bin_search(s, wl, pcomp, l, u) /* binary search routine for a string s.
				l is the index of the first (low) array
member and u is the index of the last (upper) array member. */

int		(*pcomp)();
char		*s, *wl[];
register int	l;
int		u;

{
	register int k, pivot;

	while (l <= u) {
	   pivot = (l + u) / 2;
	   k = (*pcomp)(wl[pivot], s);
	   if(k == 0)
	      return(YES);
	   else if(k < 0)
	      l = pivot + 1;
	   else 				/* (k > 0) */
	      u = pivot - 1;
	}

	return(NO);
}

/****************************************************************************/

int new_alpho(s, t, n)	/* compares two strings s and t as to alphabetic
			order. returns < 0 if s < t, 0 if they are equal,
or > 0 if s > t. n is changed to the index of the character first
mismatched. */

register char	*s, *t;
int	*n;

{
	register int	k;

	for(k = 0; s[k] == t[k]; k++) {
	   if(s[k] == '\0')
	      return(0);
	}
	*n = k;
	return(s[k] - t[k]);
}

/***************************************************************************/
/*			external definitions				   */

char 		chan_n[NCHAN] = { 0 };
char		**pwrd = NULL;
unsigned	pwsize = 0, pbot = NULL, membot = NULL, memtop = NULL;
HIO		hfiles[MERGEORDER + 1] = { 0 };

