/*

        Words.c
        A word counter that prints out counts of individual words
        within a document, with their lengths. 
        Includes total words and average length.

        This is a filter program, so invoke it by 
        >words <{infile}

        (c) John Buginas 1986
        All for the public domain

        This was programmed using lattice c.

        It may be a skelaton of a system of word analysis tools

        It works by constructing a b-tree in memory.

*/




#define ENDWHILE }
#define ENDIF }
#define ENDFUNCT }
#define EQ ==
#define BEGIN {
#define END }
#define AND &&
#define OR ||
#define NE !=
#define NOT !


#include <stdio.h>
#include <ctype.h>

#define MAXWORD 20
#define ALLOCSIZE 2000
#define LETTER 'a'
#define DIGIT  '0'

int unique = 0;
int totallen = 0;
int col = 0;
struct tnode 
        BEGIN
                char *word;
                int count;
                struct tnode *left;
                struct tnode *right;
        END;


struct tnode *talloc()
BEGIN
        char *getmem();
        return ((struct tnode *) getmem(sizeof(struct tnode)));
END

char *strsave(s)
char *s;
BEGIN
        char *p, *getmem();
        if (( p = getmem(strlen(s) + 1)) NE NULL)
                strcpy (p,s);
        return(p);
END

getword(w, lim)  /*return only words, skip spaces,special chars */
char *w;
int lim;
BEGIN
        int c;
        do 

            c = getc(stdin);
            while (NOT isalpha(c) AND c NE EOF);
        if (c EQ EOF)
          return(EOF);
        *w++ = c;
        while (--lim > 0 AND isalpha(c = *w++ = getc(stdin)));
        *--w = '\0';
        return(ungetc(c,stdin));
ENDFUNCT

struct tnode *tree(p,w)
struct tnode *p;
char *w;
BEGIN
        struct tnode *talloc();
        char *strsave();
        int cond;
        if (p EQ NULL)
                BEGIN
                        p = talloc();
                        p->word = strsave(w);
                        p->count = 1;
                        p->left = p->right = NULL;
                END
        else
        if ((cond = strcmp(w,p->word)) EQ 0)
                p->count++;
        else
        if (cond < 0)
                p->left = tree(p->left,w);
        else
                p->right = tree(p->right,w);
        return(p);
END

treeprint(p)
struct tnode *p;
BEGIN
        if (p NE NULL)
        BEGIN
                treeprint (p->left);
                unique++;
                totallen += (strlen(p->word) * p->count);
                printf ("%5d %s\n", p->count, p->word);
                treeprint (p->right);
        END
END

uppercase(word)
char word[];
BEGIN
        int i=0;
        int c;
        while (word[i])
                BEGIN
                  c = word[i];
                  c = toupper(c);
                  word[i++] = c;
                ENDWHILE
ENDFUNCT


main()
BEGIN
        struct tnode *root, *tree();
        static char copywrite[]="Copywrite john buginas (c) 1986";
        static char word[MAXWORD];
        int t;
        int subtotal = 0;
        root = NULL;
        while ((t = getword(word, MAXWORD)) NE EOF)
        BEGIN
               subtotal++;
               uppercase(word);
               root = tree(root,word);
        END 
        treeprint(root);
        printf("unique %d, total %d\n ", unique, subtotal);
        printf("averagelen %f\n", (float) totallen / subtotal);


END






