Subject: C compiler generates very inefficient code for u_char Index: lib/ccom/c10.c,optable 2.11BSD Description: The C compiler generates what might best be described as "inefficient" code when the "unsigned char" data type is used. One problem in c10.c was that the 'efftab' was never used if the data type was UNCHAR even though there were 'aub' table entries in 'efftab' to handle u_char data. The other problem was in the handling of the simple post increment and decrement cases (simple means "b++;" without any further use in an expression). In 'optable' the |= and &= tables were broken out from the += and -= table because while the pdp-11 does not have byte forms of the "add" and "sub" instructions it definitely has byte forms of the "bis" and "bic" instructions and can generate much better code using them than going thru a u_char/int conversion. In quite a number of cases the "movb , r0"; bic $!377,r0" was replaced by "clr r0; bisb , r0". The second sequence is one word shorter and a little faster. For the u_char divide case the "bic $!377,r0" is not required because the dividend was less than 255 going into the divide. Dividing anything less than 255 by any number will result in a quotient less than 255 - the masking of the high byte was not needed. vm_text.c in the kernel is particularily rife with u_char bit testing and setting - this module shrank by about 400 bytes when recompiled with the new compiler. Repeat-By: Compile the test program with "cc -O -S x.c" and examine the x.s file. ------------------------test program 1------------------- unsigned char b; char d; main() { d = b; b++; d = ++b; if (b && --b) exit(); } --------------------------end test program 1----------------- Generated code comparison between old and new compilers for test program 1: OLD NEW C === === = _main: clr r0 --- bisb _b,r0 --- movb r0,_d movb _b,_d d = b; clr r0 --- bisb _b,r0 --- incb _b incb _b b++; incb _b incb _b clr r0 --- bisb _b,r0 --- movb r0,_d movb _b,_d d = ++b; movb _b,r0 --- bic $!377,r0 tstb _b if (b jeq L3 jeq L3 && clr r0 --- bisb _b,r0 --- sub $1,r0 --- movb r0,_b --- bic $!377,r0 decb _b --b) tst r0 --- jeq L3 jeq L3 jsr pc,_exit jsr pc,_exit L3: jmp cret jmp cret NOTE the verbose autodecrement code previously generated! ------------------test program 2 (bit operators on u_char)------ char a; unsigned char b, x, *c; main() { b |= 7; if (x) exit(); if (b & 020) exit(); if (a & b) exit(); b |= x; *c ^= a; } ----------------------------end test program 2------------------------ Generated code comparison between old and new compilers for test program 1: OLD NEW C -------- ---------- --------- _main: clr r0 --- bisb _b,r0 --- bis $7,r0 --- movb r0,_b --- bic $!377,r0 bisb $7,_b b |= 7; movb _x,r0 --- bic $!377,r0 tstb _x if (x) jeq L4 jeq L4 jsr pc,_exit jsr pc,_exit exit(); L4: clr r0 --- bisb _b,r0 --- bit $20,r0 bitb $20,_b if (b & 020) jeq L5 jeq L5 jsr pc,_exit jsr pc,_exit L5: movb _a,r0 clr r0 clr r1 bisb _b,r0 bisb _b,r1 --- bit r1,r0 bitb r0,_a if (a & b) jeq L6 jeq L6 jsr pc,_exit jsr pc,_exit L6: clr -(sp) --- bisb _x,(sp) --- clr r0 --- bisb _b,r0 --- bis (sp)+,r0 --- movb r0,_b --- bic $!377,r0 bisb _x,_b b |= x; movb _a,r0 movb _a,r0 mov r0,-(sp) mov r0,-(sp) clr r0 --- bisb *_c,r0 movb *_c,r0 xor r0,(sp) xor r0,(sp) mov (sp)+,r0 clr r0 movb r0,*_c bisb (sp)+,r0 bic $!377,r0 --- *c ^= a; jmp cret jmp cret Fix: Apply the following patch to c10.c and optable. Save the existing C compiler if you wish (mkdir /lib/o;cp /lib/c* /lib/o). Recompile and install the C compiler with "make all;make install; make clean". Then at leisure recompile the kernel, libraries and the remainder of the system. --------------------------------------------------------------------------- *** c10.c.old Sun Feb 15 22:24:15 1987 --- c10.c Sat Oct 5 16:41:38 1991 *************** *** 527,533 **** */ r = nreg - reg + areg - reg1 + 1; if (table!=cctab || c==INCAFT || c==DECAFT || tree->t.type==LONG ! || c==ASRSH || c==ASLSH || c==ASULSH || tree->t.tr1->t.type==UNCHAR || (opt = match(tree, efftab, r, 0)) == 0) if ((opt=match(tree, table, r, 0))==0) return(-1); --- 527,534 ---- */ r = nreg - reg + areg - reg1 + 1; if (table!=cctab || c==INCAFT || c==DECAFT || tree->t.type==LONG ! /* || c==ASRSH || c==ASLSH || c==ASULSH || tree->t.tr1->t.type==UNCHAR */ ! || c==ASRSH || c==ASLSH || c==ASULSH || (opt = match(tree, efftab, r, 0)) == 0) if ((opt=match(tree, table, r, 0))==0) return(-1); *************** *** 1001,1007 **** p = *treep; if ((p->t.op==INCAFT||p->t.op==DECAFT) && p->t.tr1->t.op==NAME) { ! return(1+rcexpr(paint(p->t.tr1, p->t.type), table, reg)); } p1 = 0; /* --- 1002,1013 ---- p = *treep; if ((p->t.op==INCAFT||p->t.op==DECAFT) && p->t.tr1->t.op==NAME) { ! r = p->t.tr1->n.class; ! if (r == EXTERN || r == OFFS || r == STATIC && ! p->t.tr1->t.type == UNCHAR) ! return(1+rcexpr(p->t.tr1, table, reg)); ! else ! return(1+rcexpr(paint(p->t.tr1, p->t.type), table,reg)); } p1 = 0; /* *** optable.old Fri Oct 7 17:12:25 1988 --- optable Thu Oct 3 16:39:24 1991 *************** *** 32,39 **** {74,cr74}, {75,cr75}, {76,cr72}, ! {78,cr70}, ! {85,cr70}, {79,cr79}, {102,cr102}, {51,cr51}, --- 32,39 ---- {74,cr74}, {75,cr75}, {76,cr72}, ! {78,cr78}, /* |= */ ! {85,cr78}, /* &= */ {79,cr79}, {102,cr102}, {51,cr51}, *************** *** 255,262 **** F* S1 movB1 R1,#1(R) ! mov R1,R ! bic $!377,R %ed*,nf S --- 255,262 ---- F* S1 movB1 R1,#1(R) ! clr R ! bisb R1,R %ed*,nf S *************** *** 486,492 **** F! div A2,R ! /* =+, =-, =|, =&~ */ cr70: %[addq1:] %aw,aw --- 486,492 ---- F! div A2,R ! /* =+, =- */ cr70: %[addq1:] %aw,aw *************** *** 607,617 **** --- 607,619 ---- movfo R1,#1(R) movf R1,R + %[addq11:] %al,c I A2,A1+ V A1 F + %[addq12:] %al,al I A2+,A1+ V A1 *************** *** 618,623 **** --- 620,626 ---- I A2,A1 F + %[addq13:] %al,nl S I R+,A1+ *************** *** 625,630 **** --- 628,634 ---- I R,A1 F + %[addq14:] %nl*,c F* I A2,#1+2(R) *************** *** 632,637 **** --- 636,642 ---- mov #1+2(R),R+ mov #1(R),R + %[addq15:] %nl*,al F* I A2+,#1+2(R) *************** *** 640,645 **** --- 645,651 ---- mov #1+2(R),R+ mov #1(R),R + %[addq16:] %nl*,nl SS F* *************** *** 815,826 **** %aub,n SS ! clr R ! bisb A1',R xor R,(sp) ! mov (sp)+,R movb R,A1'' - bic $!377,R %n*,n FS* --- 821,831 ---- %aub,n SS ! movb A1',R xor R,(sp) ! clr R ! bisb (sp)+,R movb R,A1'' %n*,n FS* *************** *** 851,856 **** --- 856,941 ---- asrB1 #1(R) movB1 #1(R),R + /* =|, =&~ */ + cr78: + %aw,aw + % [addq1] + + %aub,a + IBE A2,A1' + clr R + bisb A1'',R + + %a,aw + %ad,ad + % [addq1a] + + %aw,nw* + % [addq2] + + %aw,n + % [addq3] + + %aub,n + SS + IBE (sp)+,A1' + clr R + bisb A1'',R + + %ew*,nw* + % [addq4] + + %ad,ef + % [addq4a] + + %a,n + %ad,nf + % [addq5] + + %af,nf + % [addq6] + + %ew*,n + % [addq7] + + %nw*,n + % [addq8] + + %n*,n + % [addq9] + + %nub*,n + FS* + SS + IBE (sp),*2(sp) + tst (sp)+ + clr R + bisb *(sp)+,R + + %nd*,nf + % [addq9a] + + %nf*,nf + % [addq10] + + %al,c + % [addq11] + + %al,al + % [addq12] + + %al,nl + % [addq13] + + %nl*,c + % [addq14] + + %nl*,al + % [addq15] + + %nl*,nl + % [addq16] + /* << for longs */ cr91: %nl,aw *************** *** 996,1002 **** S1! jsr pc,I movb R,A1'' - bic $!377,R %aw,n %ab,n --- 1081,1086 ---- *************** *** 1022,1028 **** mov (sp)+,R1 jsr pc,I movB1 R,A1'' - bic $!377,R %nub*,n FS* --- 1106,1111 ---- *************** *** 1032,1038 **** bisb *(sp),R jsr pc,I movB1 R,*(sp)+ - bic $!377,R /* (int *) - (int *) */ cr107: --- 1115,1120 ---- *************** *** 1083,1089 **** --- 1165,1173 ---- %[move3:] %a,aw %ab,a + %ab,aub %aub,a + %aub,ab IBE A2,A1 %[move4:] *************** *** 1254,1264 **** /* =| and =& ~ */ ci78: ! %a,aw %ab,a %aub,ab % [move3] %a,n % [move5] --- 1338,1358 ---- /* =| and =& ~ */ ci78: ! %a,a ! %a,ab ! %a,aub %ab,a + %ab,ab + %ab,aub + %aub,a %aub,ab + %aub,aub % [move3] + %aub,n + S + IBE R,A1 + %a,n % [move5] *************** *** 1335,1340 **** --- 1429,1435 ---- ci70: %n*,z %a,z + %ab,1 %aub,1 %a,1 I'B1 A1 *************** *** 1561,1584 **** cc60: %a,z %ad,zf % [move1] - %aub,z - movb A1,R - bic $!377,R - %af,z movof A1,R %n*,z %nd*,zf % [move2] - %nub*,z - F* - movB1 #1(R),R - bic $!377,R - %nf*,z F* movof #1(R),R --- 1656,1672 ---- cc60: %a,z %ad,zf + %aub,z % [move1] %af,z movof A1,R %n*,z %nd*,zf + %nub*,z % [move2] %nf*,z F* movof #1(R),R *************** *** 1728,1735 **** --- 1816,1842 ---- /* & as in "if ((a&b) ==0)" */ cc81: %a,a + %a,ab + %a,aub + %ab,a + %ab,ab + %ab,aub + %aub,a + %aub,ab + %aub,aub % [move3] + /* + special case. apparently "u_char b; [u_]char a; if (b & a)..." was + too complicated. the resulting code was horrid. this cuts the + waste by 33% + */ + + %a,e + %aub,e + S + IBE R,A1 + %n*,a % [move6] *************** *** 1804,1810 **** cs106: %z,n %zf,n ! clrB1 -(sp) %aw,n mov A1,-(sp) --- 1911,1917 ---- cs106: %z,n %zf,n ! clr -(sp) %aw,n mov A1,-(sp)