/* #define	DEBUG */
/*
 *				p h b o o k . c
 */

/*)BUILD
*/

#ifdef	DOCUMENTATION

title	phbook	Phone Book Search
index		Phone book search

synopsis
	.s.nf
	phbook [-f file] arg[s]
	.s.f
description

	Phbook searches a user-defined phone book file for any
	entry that matches any argument in the argument list.
	The phone book, normally stored in file phbook.txt, may be
	created by any standard editor.  It contains lines
	of text, such as:

	  Digital Equipment	DEC	(617) 897-5111

	Note that phbook does not impose any format on the text,
	only that the text consist of words, separated by blanks.
	If a line starts with whitespace (blank or tab), it is
	considered to be a continuation of the previous line:

	  Digital Equipment	DEC	(617) 897-5111
	    146 Main Street, Maynard, MA 01754

	The example line would be printed by, e.g.,

	    phbook dig

	or

	    phbook 617

	(Searches skip over continuation lines).
	In a search argument, two "wild-cards" are recognized:
	.lm +4
	.s.i-2;* matches any string of characters, even null.
	.s.i-2;? matches a single non-null character.
	.s.lm -4
	Note that "phbook *" will print the entire phone book.
	Each match argument will be implicitly terminated by '*'.
	Thus, a search argument of "ma" will match "Martin".
	Upper- and lower-case are ignored in comparisons.

	If no wild-card characters are present in an argument,
	a "Soundex" match will be used. This permits matches
	even if you have (slightly) misspelled the name for
	which you are searching.

	If phbook is invoked without an argument, it will
	prompt for arguments, outputting all matches.  If
	the standard input is redirected and no argument is given,
	phbook may be used as a filter.

	If a "-f filename" argument is given on the command line,
	phbook searches the indicated file.  Otherwise, it tries to
	locate a phone book file using the following search list:

	(On DEC operating systems):
	  getenv("PHBOOK") (Vax/VMS only)
	  phbook:
	  phbook.txt
	  phbook:phbook.txt
	  root:phbook.txt
	  getenv("HOME")+"phbook.txt" (Vax/VMS only)
	(On Unix)
	  getenv("PHBOOK")
	  phbook.txt
	  getenv("HOME")+"phbook.txt"

	Note that, on Vax/VMS, your login.com file can assign
	any file to logical PHBOOK:, allowing you to override
	the file name specification.

Acknowledgements

	The Soundex algorithm was invented by Margaret K. Odell and
	Robert C. Russell.  U.S. patents 1261167 (1918) and 1435663
	(1922).  The version used here was modified from one described
	in Donald Knuth, Sorting and Searching.

Author

	Martin Minow

#endif

#include	<stdio.h>
#include	<ctype.h>
#ifdef	vms
#include	<ssdef.h>
#include	<stsdef.h>
#define	IO_NORMAL	(SS$_NORMAL | STS$M_INHIB_MSG)
#define	IO_ERROR	SS$_ABORT
#endif
/*
 * Note: IO_NORMAL and IO_ERROR are defined in the Decus C stdio.h file
 */
#ifndef	IO_NORMAL
#define	IO_NORMAL	0
#endif
#ifndef	IO_ERROR
#define	IO_ERROR	1
#endif

#ifdef	tolower
/*
 * Note: tolower is wrong on some Unix systems.  This one is
 * correct, but requires that the argument not have side-effects.
 */
#undef	tolower
#define tolower(c)	(isupper(c) ? (c + ('a' - 'A')) : c)
#endif
#ifdef	isprint
#ifndef	isgraph
#define	isgraph(c)	(isprint(c) && c != ' ')
#endif
#endif

#ifdef	decus
int	$$narg	=	1;			/* Don't require arg's.	*/
#endif

#define	TRUE		1
#define	FALSE		0
#define	EOS		0

#define	NFIELDS		100
#define	LINESIZE	257

char		*search[NFIELDS];		/* search text		*/
char		*word[NFIELDS];			/* file text split up	*/
int		issoundex[NFIELDS];		/* TRUE for soundex	*/
unsigned int	hash[NFIELDS];			/* soundex hash value	*/
char		line[LINESIZE];			/* text line as read	*/
char		work[LINESIZE];			/* text line in words	*/
char		argtext[LINESIZE];		/* prompted text	*/
FILE		*fd;				/* Phone book file	*/
int		debug = 0;

#ifndef	unix
/*
 * This search list is used to locate the phone book.
 */

static char	*searchlist[] = {
	"phbook.txt",				/* Current directory	*/
#ifdef	vms
	"phbook:",				/* Assigned logical	*/
	"phbook:phbook.txt",			/* Logical, this file	*/
	"root:phbook.txt",			/* User login dir.	*/
#endif
#ifdef	rsx
	"[userfiles]phbook.txt",
	"lb:[1,2]phbook.txt",
	"lb:[1,1]phbook.txt",
#endif
#ifdef	rt11
	"sy:phbook.txt",
#endif
	NULL
};
#endif

/*
 * Usage message
 */
static char	*helpmessage[] = {
	"Search phone book for an argument string.",
	"  '*' in a string matches any string of characters.",
	"  '?' in a string matches any non-null character.",
	"Any left-most match will succeed and multiple search",
	"arguments are permitted.  Soundex (match if names sound",
	"similar) will be used if you don't use '*' or '?'.",
	NULL,
};
main(argc, argv)
int		argc;
char		*argv[];
{
	openphonebook(argc, argv);
	if (argc > 1) {
	    findargs(argc, argv);
	    setsoundexflag();
	    findmatch();
	}
	else {
	    /*
	     * No arguments were given.  Prompt and read
	     * arguments from standard input.
	     */
	    if (isatty(fileno(stdin))) {
		fprintf(stderr,
		    "Phone book search program, <return> for help\n");
	    }
	    while (!feof(stdin)) {
		if (isatty(fileno(stdin))) {
		    fprintf(stderr, "search for: ");
		    fflush(stderr);
		}
		if (gets(argtext) == NULL)
		    break;
		if (argtext[0] == EOS) {
		    usage();
		}
		else {
		    packtext(argtext, search);
		    setsoundexflag();
		    findmatch();
		    rewind(fd);
		}
	    }
	}
	exit(IO_NORMAL);
}
openphonebook(argc, argv)
int		argc;		/* Arg count from operating system	*/
register char	*argv[];	/* Arg vector from operating system	*/
/*
 * Open the phone book file -- exit to system if failure.
 */
{
	register char	**namep;
	register char	*cp;
#ifdef	unix
	extern char	*getenv();
	char		working_filename[81];
#endif
#ifdef	vms
	extern char	*getenv();
	char		working_filename[81];
#endif

	while (--argc > 0) {
	    cp = *++argv;
	    if (*cp++ == '-') {
#ifdef	DEBUG
		if ((*cp == 'd' || *cp == 'D') && cp[1] == EOS) {
		    debug++;
		    *argv = NULL;
	        }
		else
#endif
		if ((*cp == 'f' || *cp == 'F') && cp[1] == EOS) {
		    *argv++ = NULL;
		    cp = *argv;
		    if (--argc <= 0) {
			fprintf(stderr, "-f must be followed by a file name\n");
			exit(IO_ERROR);
		    }
		    else if ((fd = fopen(cp, "r")) == NULL) {
			perror(cp);
			fprintf(stderr,
			    "Phbook could not open your phone book file\n");
			exit(IO_ERROR);
		    }
		    else {
			*argv = NULL;
			return;
		    }
		}
	    }
	}
#ifdef	unix
	/*
	 * Look for getenv("PHBOOK"), then getenv("HOME")+"phbook.txt"
	 */
	if ((cp = getenv("PHBOOK")) != NULL)
	    return (openit(cp, TRUE));
	else if (openit("phbook.txt", FALSE))
	    return;
	else if ((cp = getenv("HOME")) != NULL) {
	    strcpy(working_filename, cp);
	    strcat(working_filename, "/phbook.txt");
	    if (openit(working_filename, FALSE))
		return;
	}
#else
	/*
	 * On VMS, RSTS, RSX, or whathaveyou, try the search list
	 */
#ifdef	vms
	if ((cp = getenv("PHBOOK")) != NULL)
	    return (openit(cp, TRUE));
#endif
	for (namep = searchlist; *namep != NULL; namep++) {
	    if (openit(*namep, FALSE))
		return;
	}
#ifdef	vms
	if ((cp = getenv("HOME")) != NULL) {
	    strcpy(working_filename, cp);
	    strcat(working_filename, "phbook.txt");
	    if (openit(working_filename, FALSE))
		return;
	}
#endif
#endif
	fprintf(stderr, "Could not locate phone book (phbook.txt)\n");
	exit(IO_ERROR);
}

static int
openit(filename, fatal)
char	*filename;		/* Actual filename to open		*/
int	fatal;			/* TRUE if failure is fatal		*/
/*
 * Open the indicated file.  Return TRUE if success.  If failure and
 * fatal is TRUE, exit to the operating system with an appropriate
 * error message, else, return FALSE.
 */
{
	if ((fd = fopen(filename, "r")) != NULL)
	    return (TRUE);
	else if (!fatal)
	    return (FALSE);
	else {
	    perror(filename);
	    fprintf(stderr, "Couldn't open phone book, fatal\n");
	    exit(IO_ERROR);
	}
}
findargs(argc, argv)
int		argc;		/* Arg count from operating system	*/
char		*argv[];	/* Arg vector from operating system	*/
/*
 * Build the search arguments in field[].
 */
{
	register char		*cp;
	register int		fieldindex;
	register char		c;

	fieldindex = 0;
	while (--argc > 0) {
	    cp = *++argv;
	    if (cp == NULL)
		continue;
	    search[fieldindex] = cp;
	    while ((c = *cp) != EOS) {
		*cp++ = tolower(c);
	    }
	    fieldindex++;
	}
	search[fieldindex] = NULL;		/* terminate the list	*/
	setsoundexflag();
}
setsoundexflag()
/*
 * Examine the search arguments (in search[]), setting issoundex[]
 * appropriately.  If this argument should be "soundex'ed", calculate
 * the hash value.
 */
{
	register char		*cp;
	register int		fieldindex;
	register char		c;
	extern unsigned int	soundex();

	for (fieldindex = 0; (cp = search[fieldindex]) != NULL; fieldindex++) {
	    issoundex[fieldindex] = TRUE;
	    while ((c = *cp++) != EOS) {
		if (c == '*' || c == '?') {
		    issoundex[fieldindex] = FALSE;
		    break;
		}
	    }
	    if (issoundex[fieldindex])
		hash[fieldindex] = soundex(search[fieldindex]);
	}
}
findmatch()
/*
 * Read the file, print matching lines.
 */
{
	register int		fieldindex;	/* Field pointer	*/
	register char		**wp;		/* Word pointer		*/
	register int		matchcount;	/* Counts matches	*/
	extern unsigned int	soundex();

	matchcount = 0;
	line[0] = EOS;
	for (;;) {
	    if (line[0] == EOS && fgets(line, sizeof (line), fd) == NULL)
		break;
	    if (isspace(line[0])) {
		line[0] = EOS;			/* Force read next time	*/
		continue;
	    }
	    strcpy(work, line);
	    packtext(work, word);
	    for (fieldindex = 0; search[fieldindex] != NULL; fieldindex++) {
		for (wp = word; *wp != NULL; wp++) {
		    if (issoundex[fieldindex] != FALSE
		     && soundex(*wp) == hash[fieldindex])
			goto gotcha;
		    if (matchtest(*wp, search[fieldindex]))
			goto gotcha;
		}
	    }
	    line[0] = EOS;			/* Force read next time	*/
	    continue;				/* Not found		*/

gotcha:	    matchcount++;
	    do {
		fputs(line, stdout);
		if (fgets(line, sizeof (line), fd) == NULL)
		    goto done;
	    } while (isspace(line[0]));
	}
done:	if (matchcount == 0)
	    printf("No match found\n");
}
packtext(text, wordlist)
register char	*text;			/* This text line is packed	*/
register char	**wordlist;		/* Into this array of words	*/
/*
 * Build wordlist.  Modifies text.
 */
{
	register char		c;

	c = *text;
	while (c != EOS) {
	    while ((c = *text) != EOS && !isgraph(c))
		++text;				/* Skip over whitespace	*/
	    if (c == EOS)
		break;
	    *wordlist++ = text;			/* Start of new word	*/
	    while ((c = *text), isgraph(c))
		*text++ = tolower(c);
	    *text++ = EOS;			/* Terminate the word	*/
	}
	*wordlist = NULL;
}
int
matchtest(name, pattern)
register char	*name;		/* What to look for			*/
register char	*pattern;	/* May have wildcard			*/
/*
 * Recursive routine to match "name" against "pattern".
 * Returns TRUE if successful, FALSE if failure.
 *
 * pattern follows Dec filename wildcard conventions:  '*' matches any
 * string (even null), '?' matches any single (non-null) byte.
 *
 */
{
	register char	pattbyte;

	for (;;) {
	    /* fprintf(stderr, "(%s) (%s), namebyte = '%c', pattbyte = '%c'\n",
		name, pattern, *name, *pattern);
	    */
#ifdef	FAIL_AT_END_OF_PATTERN
	    /*
	     * First check for one-byte pattern "*" -- this must succeed
	     */
	    if ((pattbyte = *pattern++) == '*' && *pattern == EOS)
		return (TRUE);
	    /*
	     * If not, then if both strings finish equally, it succeeds.
	     */
	    if (*name == EOS && pattbyte == EOS)
		return (TRUE);
#else
	    /*
	     * Assume that patterns are terminated -- implicitly --
	     * by '*', allowing all left-matches to succeed.
	     */
	    if ((pattbyte = *pattern++) == EOS
	     || (pattbyte == '*' && *pattern == EOS))
		return (TRUE);
#endif
	    /*
	     * Not at end of the name string.
	     */
	    switch (pattbyte) {
#ifdef	FAIL_AT_END_OF_PATTERN
	    case EOS:			/* End of pattern -> failure	*/
		return (FALSE);
#endif
	    case '*':			/* Wild card means "advance"	*/
		do {
		    if (matchtest(name, pattern))
			return (TRUE);
		} while (*name++ != EOS);
		return (FALSE);		/* Did our best			*/

	    case '?':			/* One byte joker		*/
		break;			/* succeeds with this byte	*/

	    default:			/* Others much match exactly	*/
		if (*name != pattbyte)
		    return (FALSE);
		break;
	    }
	    name++;			/* Advance name byte		*/
	}
}
/*
 * soundex(string)
 *
 * Return the soundex hash value for the argument string.
 * S_V should be zero for efficiency.
 */

#define	S_V	0	/* Vowels: a e i o u (maybe y, h, w)		*/
#define	S_SK	1	/* "hard" consonants: s c z x k q		*/
#define	S_TD	2	/* Dental stops: t d				*/
#define	S_FV	3	/* f v 						*/
#define	S_GJ	4	/* g j						*/
#define	S_B	5	/* b						*/
#define	S_L	6	/* l						*/
#define	S_M	7	/* m						*/
#define	S_N	8	/* n						*/
#define	S_P	9	/* p						*/
#define	S_R	10	/* r						*/
#define	S_MAX	11	/* "radix" of soundex values			*/

/*
 * The following are the hash values for each letter.
 */

static char	sx_letters[] = {
/*	 a	 b	 c	 d	 e	 f	 g	 h	*/
	S_V,	S_B,	S_SK,	S_TD,	S_V,	S_FV,	S_GJ,	S_V,
/*	 i	 j	 k	 l	 m	 n	 o	 p	*/
	S_V,	S_GJ,	S_SK,	S_L,	S_M,	S_N,	S_V,	S_P,
/*	 q	 r	 s	 t	 u	 v	 w	 x	*/
	S_SK,	S_R,	S_SK,	S_TD,	S_V,	S_FV,	S_V,	S_SK,
/*	 y	 z							*/
	S_V,	S_SK,
};

/*
 * The first letter of the following consonant pairs is silent.
 */

static char	*sx_leading_silent = "tstzghknpnptpf";

unsigned int
soundex(string)
register char		*string;
/*
 * Compute the soundex hash for the argument string.
 * -- somewhat modified from the original algorithm.
 * "Margaret K. Odell and Robert C. Russell.  U.S.
 * patents 1261167 (1918) and 1435663 (1922)."  as
 * reprinted in Donald Knuth, Sorting and Searching.
 */
{
	register int	c;	/* Current character			*/
	register char	*sxp;	/* -> leading silent string		*/
	int		last;	/* Previous character for doubles	*/
	int		next;	/* Next character in the string		*/
	int		value;	/* Soundex value			*/
	int		temp;	/* Hash for this character		*/
	int		count;	/* Want only three digits		*/

#ifdef	DEBUG
	if (debug) {
	    printf("\"%s\" = ", string);
	}
#endif
	while (c = *string++, !isalpha(c)) {
	    if (c == EOS)
		return (0);
	}
	last = c = tolower(c);
	value = c - 'a' + 1;	/* Initial hash == first letter value	*/
#ifdef	DEBUG
	if (debug) {
	    printf("(%c %d)", c, value);
	}
#endif
	next = *string++;
	next = tolower(next);
	count = 3;		/* Maximum times through this loop	*/
	while (--count >= 0 && (c = next) != EOS) {
	    next = *string++;
	    next = tolower(next);
	    if (next != EOS) {
		/*
		 * Ignore the first letter in a silent-pair.
		 */
		for (sxp = sx_leading_silent; *sxp != EOS; sxp++) {
		    if (*sxp++ == c && *sxp == next) {
			goto reject;	/* reject silent first letter	*/
		    }
		}
		/*
		 * Change 'ph' to 'f'
		 */
		if (c == 'p' && next == 'h') {
		    c = 'f';
		    next = *string++;
		    next = tolower(next);
		}
	    }
	    if (islower(c)) {
		if ((temp = sx_letters[c - 'a']) != S_V && c != last) {
		    value *= S_MAX;
		    value += temp;
#ifdef	DEBUG
		    if (debug) {
			printf("(%c=%d %d)", c, temp, value);
		    }
#endif
		}
		last = c;
	    }
reject:	;				/* Jump here on silent pairs	*/
	}
	while (--count >= 0)		/* Finish off the hash code	*/
	    value *= S_MAX;
	return (value);
}
usage()
/*
 * Help message
 */
{
	register char		**hp;

	for (hp = helpmessage; *hp != NULL; hp++) {
	    fprintf(stderr, "%s\n", *hp);
	}
}

                                                                                                                                                                                                                                                                                                                                                                                                                          