Subject: C compiler generates very inefficient code for u_char
Index:	lib/ccom/c10.c,optable 2.11BSD

Description:
	The C compiler generates what might best be described as
	"inefficient" code when the "unsigned char" data type is
	used.

	One problem in c10.c was that the 'efftab' was never
	used if the data type was UNCHAR even though there were 
	'aub' table entries in 'efftab' to handle u_char data.  The
	other problem was in the handling of the simple post increment
	and decrement cases (simple means "b++;" without any further
	use in an expression).

	In 'optable' the |= and &= tables were broken out from the
	+= and -= table because while the pdp-11 does not have byte
	forms of the "add" and "sub" instructions it definitely 
	has byte forms of the "bis" and "bic" instructions and can
	generate much better code using them than going thru a u_char/int
	conversion.

	In quite a number of cases the "movb <operand>, r0"; bic $!377,r0" 
	was replaced by "clr r0; bisb <operand>, r0".  The second sequence 
	is one word shorter and a little faster.

	For the u_char divide case the "bic $!377,r0" is not required
	because the dividend was less than 255 going into the divide.
	Dividing anything less than 255 by any number will result in
	a quotient less than 255 - the masking of the high byte was
	not needed.

	vm_text.c in the kernel is particularily rife with u_char
	bit testing and setting - this module shrank by about 400
	bytes when recompiled with the new compiler.

Repeat-By:
	Compile the test program with "cc -O -S x.c" and examine
	the x.s file.

	------------------------test program 1-------------------
	unsigned char b;
	char	d;

main()
	{

	d = b;
	b++;
	d = ++b;
	if	(b && --b)
		exit();
	}
	--------------------------end test program 1-----------------
Generated code comparison between old and new compilers for test program 1:

	OLD			NEW			C
	===			===			=
_main:
	clr	r0		---
	bisb	_b,r0		---
	movb	r0,_d		movb	_b,_d		d = b;

	clr	r0		---
	bisb	_b,r0		---
	incb	_b		incb	_b		b++;

	incb	_b		incb	_b
	clr	r0		---
	bisb	_b,r0		---
	movb	r0,_d		movb	_b,_d		d = ++b;

	movb	_b,r0		---
	bic	$!377,r0	tstb	_b		if (b
	jeq	L3		jeq	L3			&&

	clr	r0		---
	bisb	_b,r0		---
	sub	$1,r0		---
	movb	r0,_b		---
	bic	$!377,r0	decb	_b		--b)
	tst	r0		---
	jeq	L3		jeq	L3
	jsr	pc,_exit	jsr	pc,_exit
L3:	jmp	cret		jmp	cret


	NOTE the verbose autodecrement code previously generated!

	------------------test program 2 (bit operators on u_char)------
	char	a;
	unsigned char b, x, *c;

main()
	{

	b |= 7;
	if	(x)
		exit();
	if	(b & 020)
		exit();
	if	(a & b)
		exit();
	b |= x;
	*c ^= a;
	}
----------------------------end test program 2------------------------
Generated code comparison between old and new compilers for test program 1:
	OLD			NEW			C
	--------		----------		---------
_main:
	clr	r0		---
	bisb	_b,r0		---
	bis	$7,r0		---
	movb	r0,_b		---
	bic	$!377,r0	bisb	$7,_b		b |= 7;

	movb	_x,r0		---
	bic	$!377,r0	tstb	_x		if (x)
	jeq	L4		jeq	L4
	jsr	pc,_exit	jsr	pc,_exit	   exit();

L4:	clr	r0		---
	bisb	_b,r0		---
	bit	$20,r0		bitb	$20,_b		if (b & 020)
	jeq	L5		jeq	L5
	jsr	pc,_exit	jsr	pc,_exit

L5:	movb	_a,r0		clr	r0
	clr	r1		bisb	_b,r0
	bisb	_b,r1		---
	bit	r1,r0		bitb	r0,_a		if (a & b)
	jeq	L6		jeq	L6
	jsr	pc,_exit	jsr	pc,_exit

L6:	clr	-(sp)		---
	bisb	_x,(sp)		---
	clr	r0		---
	bisb	_b,r0		---
	bis	(sp)+,r0	---
	movb	r0,_b		---
	bic	$!377,r0	bisb	_x,_b		b |= x;

	movb	_a,r0		movb	_a,r0
	mov	r0,-(sp)	mov	r0,-(sp)
	clr	r0		---
	bisb	*_c,r0		movb	*_c,r0
	xor	r0,(sp)		xor	r0,(sp)
	mov	(sp)+,r0	clr	r0
	movb	r0,*_c		bisb	(sp)+,r0
	bic	$!377,r0	---			*c ^= a;
	jmp	cret		jmp	cret

Fix:
	Apply the following patch to c10.c and optable.  Save the existing
	C compiler if you wish (mkdir /lib/o;cp /lib/c* /lib/o).  Recompile
	and install the C compiler with "make all;make install; make clean".

	Then at leisure recompile the kernel, libraries and the remainder
	of the system.

---------------------------------------------------------------------------
*** c10.c.old	Sun Feb 15 22:24:15 1987
--- c10.c	Sat Oct  5 16:41:38 1991
***************
*** 527,533 ****
  	 */
  	r = nreg - reg + areg - reg1 + 1;
  	if (table!=cctab || c==INCAFT || c==DECAFT || tree->t.type==LONG
! 	 || c==ASRSH || c==ASLSH || c==ASULSH || tree->t.tr1->t.type==UNCHAR
  	 || (opt = match(tree, efftab, r, 0)) == 0)
  		if ((opt=match(tree, table, r, 0))==0)
  			return(-1);
--- 527,534 ----
  	 */
  	r = nreg - reg + areg - reg1 + 1;
  	if (table!=cctab || c==INCAFT || c==DECAFT || tree->t.type==LONG
! /*	 || c==ASRSH || c==ASLSH || c==ASULSH || tree->t.tr1->t.type==UNCHAR */
! 	 || c==ASRSH || c==ASLSH || c==ASULSH
  	 || (opt = match(tree, efftab, r, 0)) == 0)
  		if ((opt=match(tree, table, r, 0))==0)
  			return(-1);
***************
*** 1001,1007 ****
  	p = *treep;
  	if ((p->t.op==INCAFT||p->t.op==DECAFT)
  	 && p->t.tr1->t.op==NAME) {
! 		return(1+rcexpr(paint(p->t.tr1, p->t.type), table, reg));
  	}
  	p1 = 0;
  /*
--- 1002,1013 ----
  	p = *treep;
  	if ((p->t.op==INCAFT||p->t.op==DECAFT)
  	 && p->t.tr1->t.op==NAME) {
! 		r = p->t.tr1->n.class;
! 		if (r == EXTERN || r == OFFS || r == STATIC &&
! 				p->t.tr1->t.type == UNCHAR)
! 			return(1+rcexpr(p->t.tr1, table, reg));
! 		else
! 			return(1+rcexpr(paint(p->t.tr1, p->t.type), table,reg));
  	}
  	p1 = 0;
  /*
*** optable.old	Fri Oct  7 17:12:25 1988
--- optable	Thu Oct  3 16:39:24 1991
***************
*** 32,39 ****
  	{74,cr74},
  	{75,cr75},
  	{76,cr72},
! 	{78,cr70},
! 	{85,cr70},
  	{79,cr79},
  	{102,cr102},
  	{51,cr51},
--- 32,39 ----
  	{74,cr74},
  	{75,cr75},
  	{76,cr72},
! 	{78,cr78},	/* |= */
! 	{85,cr78},	/* &= */
  	{79,cr79},
  	{102,cr102},
  	{51,cr51},
***************
*** 255,262 ****
  	F*
  	S1
  	movB1	R1,#1(R)
! 	mov	R1,R
! 	bic	$!377,R
  
  %ed*,nf
  	S
--- 255,262 ----
  	F*
  	S1
  	movB1	R1,#1(R)
! 	clr	R
! 	bisb	R1,R
  
  %ed*,nf
  	S
***************
*** 486,492 ****
  	F!
  	div	A2,R
  
! /* =+, =-, =|, =&~ */
  cr70:
  %[addq1:]
  %aw,aw
--- 486,492 ----
  	F!
  	div	A2,R
  
! /* =+, =- */
  cr70:
  %[addq1:]
  %aw,aw
***************
*** 607,617 ****
--- 607,619 ----
  	movfo	R1,#1(R)
  	movf	R1,R
  
+ %[addq11:]
  %al,c
  	I	A2,A1+
  	V	A1
  	F
  
+ %[addq12:]
  %al,al
  	I	A2+,A1+
  	V	A1
***************
*** 618,623 ****
--- 620,626 ----
  	I	A2,A1
  	F
  
+ %[addq13:]
  %al,nl
  	S
  	I	R+,A1+
***************
*** 625,630 ****
--- 628,634 ----
  	I	R,A1
  	F
  
+ %[addq14:]
  %nl*,c
  	F*
  	I	A2,#1+2(R)
***************
*** 632,637 ****
--- 636,642 ----
  	mov	#1+2(R),R+
  	mov	#1(R),R
  
+ %[addq15:]
  %nl*,al
  	F*
  	I	A2+,#1+2(R)
***************
*** 640,645 ****
--- 645,651 ----
  	mov	#1+2(R),R+
  	mov	#1(R),R
  
+ %[addq16:]
  %nl*,nl
  	SS
  	F*
***************
*** 815,826 ****
  
  %aub,n
  	SS
! 	clr	R
! 	bisb	A1',R
  	xor	R,(sp)
! 	mov	(sp)+,R
  	movb	R,A1''
- 	bic	$!377,R
  
  %n*,n
  	FS*
--- 821,831 ----
  
  %aub,n
  	SS
! 	movb	A1',R
  	xor	R,(sp)
! 	clr	R
! 	bisb	(sp)+,R
  	movb	R,A1''
  
  %n*,n
  	FS*
***************
*** 851,856 ****
--- 856,941 ----
  	asrB1	#1(R)
  	movB1	#1(R),R
  
+ /* =|, =&~ */
+ cr78:
+ %aw,aw
+ %	[addq1]
+ 
+ %aub,a
+ 	IBE	A2,A1'
+ 	clr	R
+ 	bisb	A1'',R
+ 
+ %a,aw
+ %ad,ad
+ %	[addq1a]
+ 
+ %aw,nw*
+ %	[addq2]
+ 
+ %aw,n
+ %	[addq3]
+ 
+ %aub,n
+ 	SS
+ 	IBE	(sp)+,A1'
+ 	clr	R
+ 	bisb	A1'',R
+ 
+ %ew*,nw*
+ %	[addq4]
+ 
+ %ad,ef
+ %	[addq4a]
+ 
+ %a,n
+ %ad,nf
+ %	[addq5]
+ 
+ %af,nf
+ %	[addq6]
+ 
+ %ew*,n
+ %	[addq7]
+ 
+ %nw*,n
+ %	[addq8]
+ 
+ %n*,n
+ %	[addq9]
+ 
+ %nub*,n
+ 	FS*
+ 	SS
+ 	IBE	(sp),*2(sp)
+ 	tst	(sp)+
+ 	clr	R
+ 	bisb	*(sp)+,R
+ 
+ %nd*,nf
+ %	[addq9a]
+ 
+ %nf*,nf
+ %	[addq10]
+ 
+ %al,c
+ %	[addq11]
+ 
+ %al,al
+ %	[addq12]
+ 
+ %al,nl
+ %	[addq13]
+ 
+ %nl*,c
+ %	[addq14]
+ 
+ %nl*,al
+ %	[addq15]
+ 
+ %nl*,nl
+ %	[addq16]
+ 
  /* << for longs */
  cr91:
  %nl,aw
***************
*** 996,1002 ****
  	S1!
  	jsr	pc,I
  	movb	R,A1''
- 	bic	$!377,R
  
  %aw,n
  %ab,n
--- 1081,1086 ----
***************
*** 1022,1028 ****
  	mov	(sp)+,R1
  	jsr	pc,I
  	movB1	R,A1''
- 	bic	$!377,R
  
  %nub*,n
  	FS*
--- 1106,1111 ----
***************
*** 1032,1038 ****
  	bisb	*(sp),R
  	jsr	pc,I
  	movB1	R,*(sp)+
- 	bic	$!377,R
  
  /* (int *) - (int *) */
  cr107:
--- 1115,1120 ----
***************
*** 1083,1089 ****
--- 1165,1173 ----
  %[move3:]
  %a,aw
  %ab,a
+ %ab,aub
  %aub,a
+ %aub,ab
  	IBE	A2,A1
  
  %[move4:]
***************
*** 1254,1264 ****
  
  /* =| and =& ~ */
  ci78:
! %a,aw
  %ab,a
  %aub,ab
  %	[move3]
  
  %a,n
  %	[move5]
  
--- 1338,1358 ----
  
  /* =| and =& ~ */
  ci78:
! %a,a
! %a,ab
! %a,aub
  %ab,a
+ %ab,ab
+ %ab,aub
+ %aub,a
  %aub,ab
+ %aub,aub
  %	[move3]
  
+ %aub,n
+ 	S
+ 	IBE	R,A1
+ 
  %a,n
  %	[move5]
  
***************
*** 1335,1340 ****
--- 1429,1435 ----
  ci70:
  %n*,z
  %a,z
+ %ab,1
  %aub,1
  %a,1
  	I'B1	A1
***************
*** 1561,1584 ****
  cc60:
  %a,z
  %ad,zf
  %	[move1]
  
- %aub,z
- 	movb	A1,R
- 	bic	$!377,R
- 
  %af,z
  	movof	A1,R
  
  %n*,z
  %nd*,zf
  %	[move2]
  
- %nub*,z
- 	F*
- 	movB1	#1(R),R
- 	bic	$!377,R
- 
  %nf*,z
  	F*
  	movof	#1(R),R
--- 1656,1672 ----
  cc60:
  %a,z
  %ad,zf
+ %aub,z
  %	[move1]
  
  %af,z
  	movof	A1,R
  
  %n*,z
  %nd*,zf
+ %nub*,z
  %	[move2]
  
  %nf*,z
  	F*
  	movof	#1(R),R
***************
*** 1728,1735 ****
--- 1816,1842 ----
  /* & as in "if ((a&b) ==0)" */
  cc81:
  %a,a
+ %a,ab
+ %a,aub
+ %ab,a
+ %ab,ab
+ %ab,aub
+ %aub,a
+ %aub,ab
+ %aub,aub
  %	[move3]
  
+ /*
+    special case. apparently "u_char b; [u_]char a; if (b & a)..." was
+    too complicated.  the resulting code was horrid.  this cuts the
+    waste by 33%
+ */
+    
+ %a,e
+ %aub,e
+ 	S
+ 	IBE	R,A1
+ 
  %n*,a
  %	[move6]
  
***************
*** 1804,1810 ****
  cs106:
  %z,n
  %zf,n
! 	clrB1	-(sp)
  
  %aw,n
  	mov	A1,-(sp)
--- 1911,1917 ----
  cs106:
  %z,n
  %zf,n
! 	clr	-(sp)
  
  %aw,n
  	mov	A1,-(sp)
