/*
 *			R S G E N
 *
 * Build (and test) the database for rsent() which generates random
 * text messages.  From James Gimpel, "Algorithms in SNOBOL4".
 */

/*)BUILD	$(PROGRAM) = rsgen
		$(FILES) = { rsgen rsent rsout rstest }
*/

#ifdef	DOCUMENTATION

title	rsgen	Random sentence generator compiler
index		Random sentence generator compiler

synopsis
	.s.nf
	rsgen input_file output_file table_name
	.s.f
description

	If all three arguments are given, rsgen reads
	the input file, compiling the grammar.  It then
	writes the C language table to the output file,
	using the third arugment to specify the table name.
	.s
	If only an input_file is specified, the file is
	compiled and the program enters a dialog mode
	so you can generate sample sentences.  A C table
	can be generated from dialog mode.

grammar format

	The random sentence generator accepts sentences in
	a context-free format (also called "Bacus Naur" form
	by computer scientists).  In this format, there are
	"terminal symbols" -- words to be output and "non-terminal
	symbols" -- names of grammar rules.  This grammar also
	allows specification of "rule weights" so that certain
	rule choices are more likely than others.

	"Terminal symbols" are strings of words which are copied
	to the output device without interpretation.

	"Non-terminal symbols" are words enclosed in angle brackets
	(where the word is found in the dictionary of non-terminals.

	For example, here is a simple grammar:

	    ; comments have ';' in column 1
	    ; but they can't appear within a rule.
	    <HELLO> = Hello there.	|
		Hi there.		|
		Hello <NICE> <PERSON>.
	    ; note that a rule is continued by '|'
	    ; at the end of the line.  The '|' separates
	    ; alternative rule expansions.
	    ;
	    <NICE> = #4# | nice | <VERY> nice
	    ; 4 times out of six, <NICE> does nothing.
	    ; 1 time out of six, <NICE> outputs 'nice'.
	    ; 1 time out of six, <NICE> outputs '<VERY> nice'
	    ;
	    <VERY> = #4# very | <VERY> <VERY>
	    ; <VERY> illustrates a "recursive" rule that
	    ; may expand to "very" or "very very" or similar.
	    ; See Gimpel's book for an explanation of how
	    ; to prevent these from expanding forever.
	    ;
	    <PERSON> = man | woman | child | computer

	See the file RSTSET.GRM for an extensive sample.

diagnostics
	
	Various, self-explanatory, use a sample grammar that
	is at times quite insulting.

author

	Martin Minow

	From an algorithm published by James Gimpel in
	"Algorithms for Snobol IV".

bugs

#endif
#include <stdio.h>
#include <ctype.h>
#ifdef	vms
#include <ssdef.h>
#define	NORMAL	SS$_NORMAL
#define	FAILURE	SS$_ABORT
extern FILE	*fdopen();
#define	CREATE(f, m) fdopen(creat(f, 0, "rat=cr", "rfm=var"), m)
#else
#define	CREATE	fopen
#ifdef	decus
#define	NORMAL	IO_SUCCESS
#define	FAILURE	IO_ERROR
#else
#define	NORMAL	0
#define	FAILURE	2
#endif
#endif
#ifndef decus
#define	streq(st1, st2)	(strcmp(st1, st2) == 0)
#endif

#define	EOS		'\0'
#define	EOL		'\n'
#define	TRUE		1
#define	FALSE		0
#define	NRULE		50		/* Number of rules		*/
#define	NTERM		400		/* Rule bodies			*/
#define TEMPMAX		256		/* Temp buffer size		*/

typedef struct rs_rule {
	char	*r_name;		/* Rule name			*/
	int	r_weightsum;		/* Sum of all weights		*/
	char	**r_term;		/* Rule terms			*/
} RS_RULE;

RS_RULE		rule[NRULE];
static char	*term[NTERM];

extern RS_RULE	rstest[];		/* For nasty error messages	*/
extern int	wr_word();

int	rindex = 0;			/* Rule index			*/
int	tindex = 0;			/* Term index			*/
int	debug = 0;			/* Magic printout hack flag	*/
extern long	seed;			/* Magic for debugging only	*/
long	oldseed;			/* More magic for debugging	*/
char	line[133];			/* Input text line		*/
char	*linep = line;			/* -> current input text	*/
char	temp[TEMPMAX];			/* Working text			*/
FILE	*infd = NULL;

/*
 * Text to output to define praise subroutine
 */

char	*header[] = {
	"#include\t\t<stdio.h>",
	"#ifdef\tM68000",
	"ROM_SECT(_C_D00)",
	"#endif",
	"typedef struct rs_rule {",
	"\tchar\t*r_name;",
	"\tint\tr_weightsum;",
	"\tchar\t**r_term;",
	"} RS_RULE;",
	NULL,
};
main(argc, argv)
int		argc;			/* Argument counter		*/
char		*argv[];		/* Argument vector		*/
/*
 * Get grammar, then generate random sentences.
 *
 *	<grammar>	::=	<rule>
 *			||	<rule> <grammar>
 */
{
	char		*outtabname;
	char		*outfilename;

	time(&seed);			/* Randomize		*/
	if (argc > 1 && argv[1][0] == '-' && tolower(argv[1][1]) == 'd') {
	    debug++;
	    argc--;
	    argv++;
	}
	outtabname = NULL;
	outfilename = NULL;

	switch (argc) {
	case 0:
	case 1:				/* No arguments given	*/
	    if ((infd = fopen("rstest.grm", "r")) != NULL)
		printf("Reading default grammar \"rstest.grm\"\n");
	    else {
		do {
		    printf("Grammar input file: ");
		    fflush(stdout);
		    if (gets(line) == NULL)
			exit(1);
		    if ((infd = fopen(line, "r")) == NULL) {
			perror(line);
			nastygram("Can't find your grammar file. ",
			"You're <REALLY> a <STUPID> <FOOL>.");
			nastygram("but I'll give you another chance.", "");
		    }
		} while (infd == NULL);
	    }
	    getgrammar();
	    process();
	    break;

	case 4:			/* All arguments specified		*/
	    outtabname = argv[3];
	case 3:			/* Output file name specified		*/
	    outfilename = argv[2];
	case 2:			/* Grammar file specified		*/
	    if (outfilename == NULL) {
		outfilename = "temp.c";
		fprintf(stderr, "Grammar output to \"%s\"\n", outfilename);
	    }
	    if (outtabname == NULL) {
		outtabname = "rs_table";
		fprintf(stderr, "Table name is \"%s\"\n", outtabname);
	    }
	    if ((infd = fopen(argv[1], "r")) == NULL) {
		perror(argv[1]);
		sprintf(line,
		    "Your rule file \"%s\" wasn't found.  ", argv[1]);
		nastygram(line,
		    "Try again some other day, you <STUPID> <FOOL>.");
		exit(FAILURE);
	    }
	    getgrammar();
	    outgrammar(outfilename, outtabname);
	    stats();
	    break;

	default:
		nastygram("Only a <STUPID> <FOOL> would specify",
			"too many arguments.  Goodbye.");
		exit(FAILURE);
	}
}

nastygram(why1, why2)
char		*why1;
char		*why2;
/*
 * Tell the programmer where to get off.
 */
{
	rsent(why1, rstest, wr_word, stdout);
	rsent(why2, rstest, wr_word, stdout);
	wr_word(NULL, stdout);
}

stats()
{
	printf("%d rules, %d terms\n", rindex, tindex);
}

process()
/*
 * Get command and do it
 */
{
	register char	*lp;
	register int	howmany;
	extern int	out();

	stats();
	printf("<return> for rule names,\n'?' for grammar dump,\n");
	printf("'.' <filename> to write grammar in C\n");
	printf("or (<number>)rulename\n");
	for (;;) {
	    printf("* ");
	    fflush(stdout);
	    if (gets((lp = line)) == NULL)
		break;
	    if (*lp == EOS)
		dumpnames();
	    else if (streq(lp, "??"))
		debug++;
	    else if (streq(lp, "?"))
		dumpgrammar();
	    else if (*lp == '.') {
		for (lp++; *lp == ' '; lp++)
		    ;
		outgrammar(lp, NULL);
	    }
	    else {
		if (isdigit(*lp)) {
		    howmany = atoi(lp);
		    while (isdigit(*lp))
			lp++;
		    if (*lp == EOS) {
			rsent("Gotta have a rule, you <STUPID> <FOOL>.",
			    rstest, wr_word, stdout);
			continue;
		    }
		}
		else {
		    howmany = 1;
		}
		while (--howmany >= 0) {
		    rsent(lp, rule, wr_word, stdout);
		    wr_word(NULL, stdout);
		}
	    }
	}
}
outgrammar(filename, tablename)
char		*filename;
char		*tablename;
{
	register char	**hp;
	register int	i;
	register FILE	*outfd;

	outfd = NULL;
	if (*filename != EOS) {
	    if ((outfd = CREATE(filename, "w")) == NULL) {
		perror(filename);
		printf("Can't create \"%s\"\n", filename);
	    }
	}
	while (outfd == NULL) {
	    printf("Output C grammar to? ");
	    fflush(stdout);
	    if (gets(line) == NULL)
		exit(NORMAL);
	    if ((outfd = CREATE(line, "w")) == NULL) {
		perror(line);
		printf("Can't create \"%s\".\n", line);
	    }
	}
	if (tablename != NULL)
	    strcpy(line, tablename);
	else line[0] = EOS;
	while (line[0] == EOS) {
	    printf("Rule vector name: ");
	    fflush(stdout);
	    if (gets(line) == NULL)
		exit(NORMAL);
	};
	for (hp = header; *hp != NULL; hp++) {
	    fprintf(outfd, "%s\n", *hp);
	}
	fprintf(outfd, "static char *term[%d] = {\n", tindex);
	for (i = 0; i < tindex; i++) {
	    if (term[i] == NULL) {
		fprintf(outfd, " NULL,\n");
	    }
	    else {
		fprintf(outfd, " \"\\%03o%s\",\n", term[i][0], &term[i][1]);
	    }
	}
	fprintf(outfd, "};\n");
	fprintf(outfd, "RS_RULE %s[%d] = {\n", line, rindex + 1);
	for (i = 0; i <= rindex; i++) {
	    if (rule[i].r_name == NULL)
		fprintf(outfd, "{ NULL,");
	    else
		fprintf(outfd, "{ \"%s\",", rule[i].r_name);
	    fprintf(outfd, "\t%d, ", rule[i].r_weightsum);
	    if (rule[i].r_term == NULL)
		fprintf(outfd, "NULL },\n");
	    else
		fprintf(outfd, " &term[%d] },\n", rule[i].r_term - &term[0]);
	}
	fprintf(outfd, "};\n");
	fclose(outfd);
}
dumpnames()
/*
 * Dump all rule names
 */
{
	register int		r;
	register RS_RULE	*rp;
	register int	len;

	for (r = 0, rp = &rule[0]; r < rindex; r++, rp++) {
	    sprintf(line, "<%s> ", rp->r_name);
	    wr_word(line, stdout);
	}
	wr_word(NULL, stdout);
}

dumpgrammar()
/*
 * Dump the entire grammar
 */
{
	int			r;
	register RS_RULE	*rp;
	register char		**termp;
	register char		*tp;

	for (r = 0, rp = &rule[0]; r < rindex; r++, rp++) {
	    printf("%3d <%s> [%d] ::=\n", r, rp->r_name, rp->r_weightsum);
	    for (termp = rp->r_term; (tp = *termp++) != NULL;) {
		printf(" #%d# ", *tp++);
		printf(" %s", tp);
		if (*termp != NULL)
		    printf(" |\n");
	    }
	    printf("\n");
	}
}
getgrammar()
/*
 * Read the grammar
 */
{
	line[0] = EOS;
	while (!feof(infd)) {
	    getrule();
	}
	rule[rindex].r_name = NULL;		/* terminate rules	*/
	rule[rindex].r_weightsum = 0;
	rule[rindex].r_term = NULL;
}

getrule()
/*
 * Read one rule:
 *
 *	<rule>		::=	<rule_name> '=' <rule_bodies>
 *	<rule_name>	::=	'<' <text_string> '>'
 *	<rule_bodies>	::=	<rule_body>
 *			||	<rule_body> '|' <rule_bodies>
 *
 * '|' at the end of the line continues rule_bodies on the next line.
 * else, the rule ends at the end of line.
 *
 * ';' outside a rule is a comment line.
 */
{
	register RS_RULE	*rp;
	register int		c;
	char		*readname();
	int		getbyte();

	for (;;) {		/* Loop until a rule is read in		*/
	    while ((c = peek()) == ' ' || c == EOL)
		getbyte();
	    if (c == ';') {
		skipeol();
		continue;
	    }
	    if (c == EOF) {
		return (FALSE);
	    }
	    if (rindex >= (NRULE - 1)) {
		error("More than %d rules\n", NRULE);
	    }
	    rp = &rule[rindex++];		/* rp -> new rule	*/
	    if ((rp->r_name = readname()) == NULL) {
		bug("E", "no name for rule");
		skipeol();
		rindex--;
		continue;
	    }
	    skipwhite();
	    if (getbyte() != '=') {
		bug("W", "expecting '=' after rule name");
	    }
	    rp->r_term = &term[tindex];
	    rp->r_weightsum = getbody();
	    return (TRUE);
	}
}
int
getbody()
/*
 * Read all bodies for this rule:
 *
 *	<rule_body>	::=	(<weight>) <terms>
 *	<weight>	::=	'#' <digit_string> '#'
 *	<term>		::=	text string with <rule_name>s
 */
{
	int		wsum;		/* Weight sum			*/
	register char	*tp;		/* Text pointer			*/
	register int	c;		/* Current character		*/
	int		value;		/* Working value		*/
	int		getbyte();
	char		*stash();

	wsum = 0;
	while ((c = skipwhite()) != EOL && c != EOF) {
	    if (tindex >= (NTERM-1)) {
		error("More than %d rule terms\n", NTERM);
	    }
	    if (c == '#') {
		/*
		 * Specific weight:  <name> = #number# body
		 */
		getbyte();
		value = 0;
		while (isdigit((c = getbyte()))) {
		    value *= 10;
		    value += (c - '0');
		}
	    }
	    else value = 1;
	    wsum += value;
	    tp = &temp[0];
	    *tp++ = value;
	    while (tp < &temp[TEMPMAX-2]) {
		switch (peek()) {
		case EOF:
		case EOL:
		case '|':
		    goto breakout;

		case ' ':		/* Trash leading blanks	*/
		    if (tp == &temp[1]) {
			getbyte();
			break;
		    }
		default:
		    *tp++ = getbyte();
		}
	    }
breakout:
	    while (tp > &temp[1] && tp[-1] == ' ')
		tp--;				/* Trailing blanks too	*/
	    *tp = EOS;
	    term[tindex++] = stash(temp);
	    if (skipwhite() == '|') {
		getbyte();
		if (skipwhite() == EOL)
		    getbyte();
	    }	
	}
	term[tindex++] = NULL;			/* Terminate rule terms	*/
	return (wsum);
}
char *
readname()
/*
 * Read a rule <name>
 */
{
	register char	*tp;
	register int	c;
	char		*stash();
	int		getbyte();

	if (getbyte() != '<') {
	    bug("E", "Name must start with an '<'");
	    return (NULL);
	}
	for (tp = &temp[0]; tp < &temp[TEMPMAX - 2];) {
	    if ((c = getbyte()) == '>' || c == EOL || c == EOF)
		break;
	    *tp++ = c;
	}
	*tp = EOS;
	if (c != '>') {
	    bug("E", "Bad or long rule name -- must end with '>'");
	    fprintf(stderr, "name as read = \"%s\"\n", temp);
	    return (NULL);
	}
	return (stash(temp));
}
/*
 * Stuff
 */

char *
myalloc(size)
int		size;
/*
 * Allocate or die
 */
{
	register char	*p;

	if ((p = calloc(size, 1)) == NULL)
	    error("?-Praise-Can't allocate %d bytes\n", size);
	return (p);
}

char *
stash(string)
char		*string;
/*
 * Store a string away
 */
{
	register char	*s;

	if ((s = malloc(strlen(string) + 1)) == NULL)
	    error("?-Praise-Can't store %d byte string.\n", strlen(string));
	return(strcpy(s, string));
}

/*
 * I/O
 */

int
getbyte()
/*
 * Read one byte
 */
{
	if (*linep == EOS) {
	    if (fgets(line, sizeof line, infd) == NULL)
	    return (EOF);
	    linep = line;
	}
	return (*linep++);
}

int
peek()
/*
 * Peek at the next byte
 */
{
	register int	c;

	if ((c = getbyte()) != EOF)
	    linep--;
	return (c);
}

int
skipwhite()
/*
 * Skip over whitespace, return "peek()" after skip
 */
{
	register int	c;

	while ((c = peek()) == ' ' || c == '\t')
	    getbyte();
	return (c);
}

skipeol()
/*
 * Skip to end of line
 */
{
	register int	c;

	while ((c = getbyte()) != EOL && c != EOF)
	    ;
}

bug(severity, message)
char		*severity;
char		*message;
{

	fprintf(stderr, "?%s-Praise-%s", severity, message);
	if (!feof(infd)) {
	    fprintf(stderr, " at line:\n%.*s\n", strlen(line) - 1, line);
	    fprintf(stderr, "the error is near byte %d", linep - line);
	    if (linep > line && linep[-1] >= ' ') {
		fprintf(stderr, ": '%c'\n", linep[-1]);
	    }
	    else
		fprintf(stderr, "\n");
	}
	else
	    fprintf(stderr, " at end of input\n");
	if (tolower(severity[0]) == 'f')
	    error("can't continue", 0);
}

#ifndef	decus
error(format, arg)
char		*format;
int		arg;
/*
 * Pull the plug
 */
{
	fprintf(stderr, format, arg);
	exit(FAILURE);
}
#endif
                                                                                                                                                                                                                                                                                                                                                                            