/*
 * 			t r . c
 *
 * Character transliterator program.
 *
 * tr is based on the RATFOR version distributed by Berkeley as part of the
 * Virtual Operating System toolkit.  That tr is, in turn, derived from the
 * program translit described in Kernighan and Plauger's Software Tools,
 * which derives from tr in C on UNIX.  Thus, this version is a translation
 * back to C of C code translated to RATFOR!
 *
)EDITLEVEL=05
 */

/*)BUILD
	$(TKBOPTIONS) = {
		TASK	=	...TRX
	}
*/

#ifdef	DOCUMENTATION

title	tr	Character Transliterator Program
index		Character transliterator program

Synopsis

	tr from-chars to-chars

Description

	tr copies the standard input to the standard output with substitution
	or deletion of selected characters.  It is used as follows:

	.lm +8
	tr FROM TO
	.lm -8

	where FROM and TO are strings of characters.  The effect of this
	command depends on the lengths of the two strings.

	If FROM and TO have the same length, characters in TO are substituted
	for characters in FROM.  Thus,

	.lm +8
	tr "abcd" "wxyz"
	.lm -8

	changes all a's to w's, b's to x's, and so on.  Note that the quotes
	are needed to ensure that tr receives lower-case characters.  tr DOES
	distinguish letters by case; the above command has no effect on A's,
	B's, C's, or D's.

	If TO is left out or explicitly null, any characters in FROM are
	deleted.

	If TO is non-null but shorter than FROM, all characters in FROM beyond
	the last one to match up with one in TO translate into the last
	character in TO; BUT any stream of consecutive characters so
	translated is reduced to just one occurence of the resulting
	character.  Thus,

	.lm +8
	tr "abcde12345" "ABCDE-"
	.lm -8

	changes "aAbB1234cCdD5678eEfF" into "AABB-CCDD-678EEfF".

	If a character appears more than once in FROM, the leftmost
	equivalence prevails.  Thus, to change all strings of *'s into a
	single *, use tr "**" "*".

	In addition to simple characters, FROM and TO may include:
	.lm +8

 Escapes
	.br
	\x is x except that \b is a backspace, \f is a form feed,
	\n is a newline, \r is a return, \t is a tab, and \ddd
	is the character whose OCTAL value is ddd.  (Only 8 bits of
	ddd are retained, and it is ok for there to be only one or
	two valid octal digits present.  Using \0 will cause unpredictable
	results.).  The letters b, f and so on must be lower case to
	be recognized.

 Ranges
	.br
	a-e is the same as abcde, etc.  Note that the range e-a, while legal,
	is empty.

 Classes
	.br
	A number of built-in character classes are available:

		:a 	is the same as a-zA-Z
		:d 	is the same as 0-9
		:n 	is the same as a-zA-Z0-9
		:	(:<SPACE>)  is  the  range  from
			CTRL/A to <SPACE>  in increasing
			numerical order
		:.	is the range including all ASCII
			characters other than NUL (\0).
			Thus,

				tr ":a:." "A-ZA-Z "

			translates all letters to upper-
			case and  reduces all streams of
			non-letters to a single space.

	Class characters a, d and n must be lower case.

 Negation
	.br
	If the first character of FROM is "^", any characters
	not in FROM match.  In this case, TO must be null or only a
	single character.

	.lm -8
	"\" and ":" lose their special meaning if they appear as the last
	character of FROM or TO; "-" loses it if it is the first or last; "^"
	loses it if it is not the first character of FROM; and all characters
	lose their special meaning if preceded by an escape ("\").

	.tp 4
I/O Peculiarities

	The C I/O system terminates records read with a single newline; hence
	the \r character will never be found for replacement.  Also, in RSX
	modes, output files must be broken into records; removing newlines can
	produce huge records that cannot be processed by RMS.

Diagnostics
	.lm +8

 ?TR-F-Not enough memory

 ?TR-F-Unknown class type <character>
	.br
	<character> appeared after a ":", but is not a known class type.

	.lm -8

Bugs

Author

	Jerry Leichter, based on the RATFOR version in Software Tools.

#endif

/*
 * Edit history
 * 0.0	23-Apr-81 JSL	Original implementation, based on the RATFOR version
 *			in Software Tools.
 * 1.0	 1-May-81 JSL	Conversion to use the new Utility Library.
 * 1.1	 4-May-81 JSL	Cleanups; use 'u' mode to leave <CR>'s alone.
 * 1.2	20-May-81 JSL	'u' mode was a bad idea; now a compile-time option.
 * 1.3	 4-Jun-81 JSL	Bad bug - negation caused an infinite loop on EOF!
 *			(Too much optimization.)
 * 1.4	29-Jun-81 JSL	Track change of "blocks" to "vstrings".
 * 2.0	30-Jan-83 JSL	Converted to new standard tool format.  index() ==>
 *			inchr(), which is now in the standard library.
 * 2.0d	31-Jan-83 JSL	Included text for need CU functions.  This avoids a
 *			bug in BUILD with $(LIBS) and, more important, is con-
 *			sistent with the current system build command files,
 *			which build the tools before they build the Utility
 *			Library.  This should eventually be changed back.
 * 2.1  22-Jun-83 MM	Replaced inchr() by strchr() for portability.
 *			Added redirection for vax native.
 */

char	*documentation[] = {
"tr copies the standard input to the standard output with substitution or",
"deletion of selected characters.  It is used as follows:",
"",
"	tr FROM TO <input >output",
"",
"where FROM and TO are strings of characters.  The effect of this command",
"depends on the lengths of the two strings.",
"",
"If FROM and TO have the same length, characters in TO are substituted for",
"characters in FROM.  Thus,",
"",
"	tr \"abcd\" \"wxyz\"",
"",
"changes all a's to w's, b's to x's, and so on.  Note that the quotes are",
"needed to ensure that tr receives lower-case characters.  tr DOES distinguish",
"letters by case; the above command has no effect on A's, B's, C's, or D's.",
"",
"If TO is left out or explicitly null, any characters in FROM are deleted.",
"If TO is non-null but shorter than FROM, all characters in FROM beyond the",
"last one to match up with one in TO translate into the last character in TO;",
"BUT any stream of consecutive characters so translated is reduced to just one",
"occurence of the resulting character.  Thus,",
"",
"	tr \"abcde12345\" \"ABCDE-\"",
"",
"changes \"aAbB1234cCdD5678eEfF\" into \"AABB-CCDD-678EEfF\".",
"",
"If a character appears more than once in FROM, the leftmost equivalence",
"prevails.  Thus, to change all strings of *'s into a single *, use tr ** *.",
"",
"In addition to simple characters, FROM and TO may include:",
"",
"	Escapes   \\x is x except that \\b is a backspace, \\f is a form feed,",
"		  \\n is a newline, \\r is a return, \\t is a tab, and \\ddd",
"		  is the character whose OCTAL value is ddd.  (Only 8 bits of",
"		  ddd are retained, and it is ok for there to be only one or",
"		  two valid octal digits present.  Using \\0 will cause random",
"		  results.).  The letters b, f and so on must be lower case to",
"		  be recognized.",
"	Ranges    a-e is the same as abcde, etc.  Note that e-a is empty.",
"	Classes   A number of built-in character classes are available:",
"			:a  is the same as a-zA-Z",
"			:d  is the same as 0-9",
"			:n  is the same as a-zA-Z0-9",
"			:   (:<SPACE>) is the range from CTRL/A to <SPACE>",
"			      in increasing numerical order",
"			:.  is the range including all ASCII characters other",
"			      than NUL.  Thus,",
"",
"				tr \":a:.\" \"A-ZA-Z \"",
"",
"			      translates all letters to uppercase and reduces",
"			      all streams of non-letters to a single space.",
"		  Class characters a, d and n must be lower case.",
"	Negation  If the first character of FROM is \"^\", any characters",
"		  not in FROM match.  In this case, TO must be null or only a",
"		  single character.",
"",
"\"\\\" and \":\" lose their special meaning if they appear as the last",
"character; \"-\" loses it if it is the first or last; \"^\" loses it if",
"it is not the first of FROM; and all characters lose their special meaning if",
"preceded by an escape (\"\\\").",
"",
"Note:  The C I/O system terminates records read with a single newline; hence",
"the \\r character will never be found for replacement.  Also, in RSX modes,",
"output files must be broken into records; removing newlines can produce huge",
"records that cannot be processed by RMS.",
0 };

#include <stdio.h>
/* #include <vstrin.h> */
/*
 *			v s t r i n g . h
 */

#ifdef	DOCUMENTATION

title	vstring		Header file for vstrings
index		Header file for using using vstrings

synopsis

	 #ifdef vms
	 #include "c:vstrin.h"
	 #else
	 #include <vstrin.h>
	 #endif

description

	This header file is used to allow programs to access vstrings.  See
	vstring.c for additional details.

	This file defines the structure of a vstring. The fields are:
	.lm+8

 char *vsdata

	Pointer to the actual data.  This will always be the first field, so 
	that a vstring can be considered to be of type char ** if you only
	want to read it.  vsdata will be NULL if the vstring has been damaged.
	
 unsigned vslen

	Number of bytes stored in the vstring.  

 unsigned vsdim

	Total number of bytes there is room for.

 unsigned vsext

	Allocation quantum - the amount the vstring is to grow by if it fills.
	If vsext==0, the vstring cannot grow.

	.lm-8
	The only fields generally directly accessed by programs are vsdata and
	vslen. Note that vsdata may change over time as items are added to and
	removed from the vstring.

	The only field you should normally change is vsext, the extension
	quantum.  The value current the next time the vstring must grow will
	determine how much it grows by.

	vstring.h also includes external declarations of the appropriate type
	for the functions in vstring.c.

bugs

	A vstring is really just a special case of a flex with item size 1.
	It has been retained as a separate datatype because the type of
	operations done on strings are not always generalizable.  For example,
	there is no obvious use for an fxadds().

author
	Jerry Leichter

#endif

/*
)EDITLEVEL=16
 *
 * Edit history
 * 0.0 30-Apr-81 JSL	Invention
 * 0.1  4-May-81 JSL	Better error handling
 * 0.2 12-May-81 JSL	Decided to keep blocks after all; balloc() is now
 *			block().
 * 0.3 13-May-81 JSL	Added _BLOCKS_
 * 0.4 18-May-81 JSL	Fix up documentation so GETRNO can find it.
 * 1.0 26-Jun-81 JSL	Changed name to vstring, which makes much more sense.
 * 1.1 29-Jun-81 JSL	More name changes, get consistent with flexes.
 * 1.2 14-Jul-82 JSL	Fixed some ancient typos.
 */

#ifndef _VSTRING_		/* Don't do this twice */
#define _VSTRING_

typedef struct vheader
	{ char *vsdata;
	  unsigned vslen;
	  unsigned vsdim;
	  unsigned vsext;
	} VSTRING;

extern VSTRING *vstring(), *vsaddc(), *vsadds();

#endif

#
#define EOS	'\0'
#define ESCAPE	'\\'	/* escape character */
#define NOT	'^'	/* match complement of class */
#define PCLASS	':'	/* pre-defined class */
#define RANGE	'-'	/* range specifier */

extern char esc();	/* Utility library escape handler */

extern VSTRING *makttb();	/* get makttb to be the right type */

static char filename[80];
			/* space for a filename for reopening stdin, stdout */
main(argc, argv)
int	argc;		/* Number of arguments				*/
char	*argv[];	/* Argument vector				*/
{
	register char	c;
	register int	i;
	register char	*arg;
	char		*froms;
	char		*tos;
	VSTRING		*from, *to;
	int		allbut, collapse, lastto;
#ifdef	vms
	extern int	errno;
#endif

	for (i = 1; i < argc; i++) {
#ifdef	vms
	    if (argv[i][0] == '<') {
		if (freopen(&argv[i][1], "r", stdin) == NULL) {
		    perror(&argv[i][1]);
		    exit(errno);
		}
		argv[i] = NULL;
	    }
	    else if (argv[i][0] == '>') {
		if (freopen(&argv[i][1], "w", stdout) == NULL) {
		    perror(&argv[i][1]);
		    exit(errno);
		}
		argv[i] = NULL;
	    }
	}
#endif
	froms = tos = NULL;
	for (i = 1; i < argc; i++) {
	    if (argv[i] == NULL)
		continue;
	    if (froms == NULL)
		froms = argv[i];
	    else if (tos == NULL)
		tos = argv[i];
	    else {
		usage("More than two arguments");
	    }
	}
	if (froms == NULL) {
	    usage("No arguments");
	}
	if (tos == NULL && strcmp(froms, "?") == 0) {
	    help();
	    return;
	}
	if (*froms == NOT)
	{	allbut = TRUE;
		froms++;
	}
	else
		allbut = FALSE;
	from = makttb(froms, vstring(strlen(froms), 4));
	to = vstring(16,2);
	if (tos == NULL)
		to = vsaddc(to, EOS);
	else 	to = makttb(tos, to);
	if (from == NULL || to == NULL)
		error("?TR-F-Not enough memory\n");
	tos = to->vsdata;
	froms = from->vsdata;
	lastto = strlen(tos) - 1;
	if (strlen(froms)-1 > lastto || allbut == TRUE)
		collapse = TRUE;
	else	collapse = FALSE;

#ifdef DEBUG
	printf("collapse %d, allbut %d, lastto %d\n",collapse,allbut,lastto);
	printf("to array: %s\n",tos);
	printf("from array: %s\n",froms);
#endif

#ifdef USE_U_MODE
#ifdef rt11
	/* 			R T - 1 1

	   Re-open our input and output files in non-stream mode so that
	   things like carriage returns get left alone.

	 */

	fgetname(stdin, filename);
	if (freopen(filename,"rn",stdin) == NULL)
		error("?TR-F-Can't reopen \"%s\"\n",filename);
	fgetname(stdout, filename);
	if (freopen(filename,"wn",stdout) == NULL)
		error("?TR-F-Can't reopen \"%s\"\n",filename);
#endif
#endif

	while ((c=getchar()) != EOF)
	{	i = xindex(froms,c,allbut,lastto);
		if (collapse && i>=lastto && lastto>=0)	/* collapse */
		{	putchar(tos[lastto]);
			do {
				i = xindex(froms,c = getchar(),allbut,
					lastto);
			} while (c != EOF && i >= lastto);
		}
		if (i>=0 && lastto>=0)			/* translate */
			putchar(tos[i]);
		else if (i < 0)				/* copy */
			putchar(c);
						/* else    delete */
	}
}

VSTRING *
makttb(array,ttb)
char array[];
VSTRING *ttb;
/* makttb - make a ttb (translation table) from the expansion of array in ttb;
 * return ttb or NULL on error.
 */
{
	filttb(EOS,array,ttb);
	return(vsaddc(ttb,EOS));
}

xindex(array,c,allbut,lastto)
char array[],c;
int allbut,lastto;
/*
 * Return -1 if the character isn't in array, else the index
 * invert condition returned by strchr() if necessary
 */
{
	extern char	*strchr();
	register char	*index;

	index = strchr(array, c);	/* index -> character (or NULL)	*/
	if (allbut == FALSE) {
		return((index == NULL) ? -1 : (index - array));
	}
	else if (index != NULL)
		return(-1);
	else	return(lastto+1);
}		

/*	Utility routines */

filttb(delim,array,ttb)
char delim,array[];
VSTRING *ttb;
/* filttb - expand the characters in array into ttb, stopping at delim */
{
	char *a;		/* NOT register - Lvalue needed for esc() */

	for (a = array; *a != delim && *a != EOS; a++)
	{
		if (*a == ESCAPE)
			vsaddc(ttb,esc(&a));
		else if (*a == PCLASS && *(a+1) != EOS)
			switch(*++a)
			{
			case 'a':
			case 'n':
				insrange('a','z',ttb);
				insrange('A','Z',ttb);
				if (*a == 'a')
					break;
			case 'd':
				insrange('0','9',ttb);
				break;
			case ' ':
				insrange('\001',' ',ttb);
				break;
			case '.':
				insrange('\001','\177',ttb);
				break;
			default:
				error("?TR-F-Unknown class type \"%c\"\n",*a);
			}
		else if (*a == RANGE && ttb->vslen > 0 && *(a+1) != EOS)
				dorange(&a,ttb);
		else	vsaddc(ttb,*a);
	}
	return;
}

dorange(ps,ttb)
char **ps;
VSTRING *ttb;
/* dorange - expand the range from the last character in ttb through
   the next character in *ps (which may be "escaped") into ttb. */
{
	(*ps)++;				/* skip the RANGE char */
	insrange(ttb->vsdata[--ttb->vslen],esc(ps),ttb);
	return;
}

insrange(lo,hi,ttb)
VSTRING *ttb;
int lo,hi;
/* insrange - insert the characters from lo to hi inclusive into ttb */
{
	register int c,hif;
	lo = lo & 0377;
	hif = hi & 0377;
	for (c = lo; c <= hif; c++)
		vsaddc(ttb,c);
	return;
}

help()
/*
 * Give good help
 */
{
	register char	**dp;

	for (dp = documentation; *dp; dp++)
		printf("%s\n", *dp);
}

usage(s)
char	*s;
{
	fprintf(stderr, "?TR-E-%s\n", s);
	fprintf(stderr,
		"Usage: tr from to.  tr ? for help.\n");
	exit(1);
}

/*
 * The following are exact copies of text from the Utility Library.  See
 * the revision history (for version 2.0d) for an explanation of why it's
 * here.
 */

/*
 *				e s c . c
 */

/*)LIBRARY
*/

#ifdef	DOCUMENTATION

title	esc	Process Escaped Characters
index		Process escaped characters

synopsis

	 char esc(ppc)
	 char **ppc;

	 int esc_msk;

description

	esc(ppc) recognizes and processes the escaped characters recognized by
	the C compiler.  If *ppc points to any character other than '\', esc()
	just returns that character.  Otherwise, it examines the next
	character.  If it finds a member of the set {b,f,n,r,t}, the
	appropriate character is returned, e.g., a <TAB> for '\t'. Also, \ddd,
	ddd being up to three octal digits, returns the character with value
	ddd.  Only the bottom 8 bits are retained, so the result is always a
	"proper" character.  This can be changed by changing the global
	esc_msk to contain whatever mask you prefer.  (The default is 0377.)

	The special characters b, f, and so on are recognized in lower case
	only.

	If '\' is the last character in s, '\' itself is returned.

	If '\' is followed by any other character x, x itself is returned.

	In all cases, *ppc is left pointing at the last character that esc()
	has "eaten", e.g., at the 't' if it is returning a <TAB> for '\t'.

bugs

author

	Jerry Leichter

#endif

/*
 * )EDITLEVEL=17
 * Edit history
 * 0.0  1-May-81 JSL	Invention
 * 0.1 28-May-81 JSL	Conversion to new comment convention
 * 0.2 23-Jun-81 JSL	escmsk ==> esc_msk
 * 0.3 29-Dec-81 MM	Redone for vax c
 * 0.4 13-Jul-82 JSL	Change esc_msk default to 0377 to support XASCII
 */

#define ESCAPE '\\'
#define EOS '\0'

int esc_msk = 0377;

char
esc(ppc)
char **ppc;
{
	register char c,c1;
	register char	*pc;

	pc = *ppc;
	if ((c = pc[0]) != ESCAPE || pc[1] == EOS)
		return(c);
	else
		c = *++pc;
		*ppc = pc;
		switch(c)
	{
	case 'b':
		return('\b');
	case 'f':
		return('\f');
	case 'n':
		return('\n');
	case 'r':
		return('\r');
	case 't':
		return('\t');
	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
		c -= '0';
		if ((c1 = pc[1]) >= '0' && c1 <= '7' )
		{	c = (c << 3) | (c1 - '0');
			pc++;
			if ((c1 = pc[1]) >= '0' && c1 <= '7' )
			{	c = (c << 3) | (c1 - '0');
				pc++;
			}
		}
		*ppc = pc;
		return(c & esc_msk);
	default:
		return(c);
	}
}

/*
 *				v s t r i n g . c
 */

/*)LIBRARY
*/

#ifdef	DOCUMENTATION

title	vstring	Functions to Manipulate Vstrings
index		Create a new vstring
index		Add a character to a vstring
index		Add a string to a vstring
index		Free a vstring

synopsis

	 #ifdef vms
	 #include "c:vstrin.h"
	 #else
	 #include <vstrin.h>
	 #endif

	 VSTRING *
	 vstring(dim,ext)
	 unsigned dim;
	 unsigned ext;

	 VSTRING *
	 vsaddc(pv,c)
	 VSTRING *pv;
	 char c;

	 VSTRING *
	 vsadds(pv,s)
	 VSTRING *pv;
	 char *s;

	 vsfree(pv)
	 VSTRING *pv;

description

	Vstrings are dynamically expandable strings.  They have room for some
	fixed number of characters, but can grow, obtaining memory through
	malloc(), if necessary.

	vstring(dim,ext) returns a pointer to a new vstring, or NULL if there
	is no space for one.  The vstring initially has room for dim bytes,
	and will grow by ext bytes if it fills.

	vsaddc() adds a character to a vstring.

	vsadds() adds a string to a vstring.  It merely calls vsaddc() in a
	loop. The trailing EOS is NOT added.	

	Both vsaddc() and vsadds() return NULL if the vstring would have had
	to grow but couldn't due to lack of memory or a zero growth quantum.
	In this case, the data in the vstring is lost and the vstring itself
	is marked damaged and cannot be used again; further attempts always
	return NULL.

	If all went well, the vstring's address as passed is returned.

	It is occassionally necessary to recover the information in a damaged
	vstring (in most cases, there is no reasonable way to continue once
	the program runs out of memory).  This can be done by making use of
	realloc()'s ability to reallocate the most recently freed block of
	core.  Before making the potentially damaging call, save the current
	values of the vsdata and vsused elements of the vstring.  Should the
	vstring become damaged, the vsdata value can be passed to realloc()
	safely.  Note that this recovers the data, but not the vstring's
	header; you still cannot apply any of the vstring-handling functions
	to it.

	vsfree() frees a vstring.  Attempting to free something that was not
	created with vstring() will cause trouble.  However, NULL or a vstring
	that was filled and marked damaged may be safely passed.

bugs

	A vstring is a special case of a flex which always has an item size
	of 1 byte.  It is convenient to maintain it as a separate data type
	because the kinds of operations done on the two differ.  For example,
	fxadd() takes the address of the item to add; vsaddc() takes the
	character itself, which is often more convenient.  The basic set of
	functions provided for vstrings is more limited; perhaps more purely
	string-oriented functions that work on vstrings will be defined later.

author

	Jerry Leichter

#endif

/*
 *)EDITLEVEL=10
 * Edit history
 * 0.0 28-Apr-81 JSL	Invention
 * 0.1  4-May-81 JSL	Better error handling
 * 0.2 12-May-81 JSL	Decided to keep blocks around after all; changed the
 *			name of balloc() to block() to match flex().
 * 0.3 23-Jun-81 JSL	Conversion to the new documentation conventions.
 * 1.0 26-Jun-81 JSL	Changed block to vstring, which makes more sense.
 * 1.1 29-Jun-81 JSL	More name changes; get consistent with flexes.
 */

#ifdef	vms
/* #include "c:vstrin.h" */
#else
/* #include <vstrin.h> */
#endif
#define NULL 0

extern char *malloc(), *realloc();

VSTRING *
vstring(dim,ext)
unsigned dim;		/* initial size for this vstring */
unsigned ext;		/* growth quantum */
{	register VSTRING *pv;
	if ((pv = (VSTRING *)malloc(sizeof(VSTRING))) == NULL)
		return(NULL);
	if ((pv->vsdata = malloc(dim)) == NULL)
	{	free(pv);
		return(NULL);
	}
	pv->vsdim = dim;
	pv->vsext = ext;
	pv->vslen = 0;
	return(pv);
}	

VSTRING *
vsaddc(pv,c)
register VSTRING *pv;
char c;
{
	if (pv != NULL)
	{	if (pv->vsdim <= pv->vslen)
		{	pv->vsdim += pv->vsext;
			if (pv->vsdim <= pv->vslen
			|| pv->vsdata == NULL
			|| (pv->vsdata = realloc(pv->vsdata,pv->vsdim))
				== NULL)
			{	pv->vsdim = 0;
				return(pv->vsdata = NULL);
			}
		}
		(pv->vsdata)[pv->vslen++] = c;
	}
	return(pv);
}

VSTRING *
vsadds(pv,s)
register VSTRING *pv;
register char *s;
{	while (*s)
		pv = vsaddc(pv,*s++);
	return(pv);
}

vsfree(pv)
register VSTRING *pv;
{	if (pv != NULL)
	{	if (pv->vsdata != NULL)
			free(pv->vsdata);
		free(pv);
	}
}
                                                                                                                                                                                                                                                                                                                                                                                   