Subject: No 'unsigned long' math routines in libc.a (#134) Index: lib/pdp/crt/{ultof,ulsh,uldiv,ulrem}.s 2.11BSD Description: There is a total lack of support for "unsigned long" in both the C compiler and the runtime library. Repeat-By: Observation? ;-) Fix: The enclosed update consists of two parts. The first is a patch to the /usr/src/lib/libc/pdp/crt/Makefile. The second part is a set of routines which implement the 'shift', 'divide', 'remainder' and 'convert to floating point' operations. These routines are being posted in advance of the changes to the compiler itself. Yes, i have 'u_long' implemented in the C compiler but want to do some more testing (at present the kernel and networking work when compiled with the new compiler). You will need to install these routines before the changes to the compiler are made. You can use these routines prior to the compiler changes because there is both a C interface as well as an internal compiler interface to 'ultof', 'ulsh', 'uldiv', 'ulrem'. Example: long a, b, c; float d, ultof(); a = uldiv(a, b); /* a = b/c */ d = ultof(b); The ulrem and uldiv routines are slow and bulky for divisors greater or equal to 2^15. Feel free to improve them - i'd like a copy ;-) NOTE: It will be necessary to copy (or better yet make symlinks) these routines to the sys/pdp directory for kernel use. Instructions for this will come later. After applying the patch and unshar'ing the included file you need to: cd /usr/src/lib/libc/pdp/crt make ar rv /lib/libc.a *.o cd profiled ar rv /usr/lib/libc_p.a *.o cd .. make clean ranlib /lib/libc.a /usr/lib/libc_p.a ===============================cut here=========================== *** /usr/src/lib/libc/pdp/crt/Makefile.old Wed Feb 4 21:19:28 1987 --- /usr/src/lib/libc/pdp/crt/Makefile Sun Jun 6 20:49:46 1993 *************** *** 3,13 **** # All rights reserved. The Berkeley software License Agreement # specifies the terms and conditions for redistribution. # ! # @(#)Makefile 5.6 (Berkeley) 1/28/87 # # ! SRCS= aldiv.s almul.s alrem.s csv.s ldiv.s lmul.s lrem.s udiv.s ! OBJS= aldiv.o almul.o alrem.o csv.o ldiv.o lmul.o lrem.o udiv.o CFLAGS= -O ${DEFS} TAGSFILE=tags --- 3,16 ---- # All rights reserved. The Berkeley software License Agreement # specifies the terms and conditions for redistribution. # ! # @(#)Makefile 5.7 (2.11BSD GTE) 6/6/93 # # ! SRCS= aldiv.s almul.s alrem.s csv.s ldiv.s lmul.s lrem.s udiv.s \ ! uldiv.s ulrem.s ulsh.s ultof.s ! OBJS= aldiv.o almul.o alrem.o csv.o ldiv.o lmul.o lrem.o udiv.o \ ! uldiv.o ulrem.o ulsh.o ultof.o ! CFLAGS= -O ${DEFS} TAGSFILE=tags *************** *** 69,74 **** --- 72,81 ---- lmul.o: lmul.s ./DEFS.h lrem.o: lrem.s ./DEFS.h udiv.o: udiv.s ./DEFS.h + uldiv.o: uldiv.s ./DEFS.h + ulrem.o: ulrem.s ./DEFS.h + ulsh.o: ulsh.s ./DEFS.h + ultof.o: ultof.s ./DEFS.h # DEPENDENCIES MUST END AT END OF FILE # IF YOU PUT STUFF HERE IT WILL GO AWAY # see make depend above ============================cut here==================================== #! /bin/sh # This is a shell archive, meaning: # 1. Remove everything above the #! /bin/sh line. # 2. Save the resulting text in a file. # 3. Execute the file with /bin/sh (not csh) to create: # /usr/src/lib/libc/pdp/crt/ultof.s # /usr/src/lib/libc/pdp/crt/ulsh.s # /usr/src/lib/libc/pdp/crt/ulrem.s # /usr/src/lib/libc/pdp/crt/uldiv.s # This archive created: Fri Jun 11 21:46:01 1993 export PATH; PATH=/bin:/usr/bin:$PATH if test -f '/usr/src/lib/libc/pdp/crt/ultof.s' then echo shar: "will not over-write existing file '/usr/src/lib/libc/pdp/crt/ultof.s'" else sed 's/^X//' << \SHAR_EOF > '/usr/src/lib/libc/pdp/crt/ultof.s' X/* X * Program: ultof.s X * Copyright 1993, GTE Government Systems X * Author: Steven M. Schultz X * X * Version Date Modification X * 0.0 02Feb91 1. Initial inspiration struck. X * 1.0 05Jun93 2. Released into the Public Domain. X*/ X X#include "DEFS.h" X X/* X * All routines have both a C interface and an assembly interface. Normally X * the two are the same. In the case of 'ulsh' the compiler has placed one X * of the operands in r0 and r1 so the assembly interface differs from the X * C interface. X*/ X X#define twogig 050000 X X#if !defined(KERNEL) X/* X * float ultof(lhs) X * u_long lhs; X * X * unsigned 32-bit long to floating conversion. Calls to ultof generated X * automatically by the C compiler. This routine is purposefully X * not defined for the kernel since the kernel shouldn't (can't) do X * FP arithmetic. X */ X X .globl ultof Xultof: XENTRY(ultof) X jsr pc,l2f / 2(sp) -> fr0 X seti X rts pc X X/* X * Common sequences used more than once. Moved here to save space at the X * expense of a jsr+rts. Both do a 'setl', the caller must do a 'seti'. X * Not for the kernel until the kernel can do FP arithmetic. X*/ X XASENTRY(l2f) X setl X tst 4(sp) X bpl 1f X bic $100000,4(sp) X movif 4(sp),fr0 X addf $twogig,fr0 X rts pc X1: X movif 4(sp),fr0 X rts pc X XASENTRY(l6f) X setl X tst 8.(sp) X bpl 1f X bic $100000,8.(sp) X movif 8.(sp),fr3 X addf $twogig,fr3 X rts pc X1: X movif 8.(sp),fr3 X rts pc X#endif KERNEL SHAR_EOF fi if test -f '/usr/src/lib/libc/pdp/crt/ulsh.s' then echo shar: "will not over-write existing file '/usr/src/lib/libc/pdp/crt/ulsh.s'" else sed 's/^X//' << \SHAR_EOF > '/usr/src/lib/libc/pdp/crt/ulsh.s' X/* X * Program: ulsh.s X * Copyright 1993, GTE Government Systems X * Author: Steven M. Schultz X * X * Version Date Modification X * 0.0 02Feb91 1. Initial inspiration struck. X * 1.0 05Jun93 2. Released into the Public Domain. X*/ X X#include "DEFS.h" X X/* X * All routines have both a C interface and an assembly interface. Normally X * the two are the same. In the case of 'ulsh' the compiler has placed one X * of the operands in r0 and r1 so the assembly interface differs from the X * C interface. X*/ X X/* X * u_long ulsh(lhs, count) X * u_long lhs; X * short count; X * X * 32-bit "<<" and ">>" routines. Calls to ulsh are generated X * automatically by the C compiler. X */ X XASENTRY(ulsh) X tst 2(sp) / shift count is on stack, long is in r0+r1 X bpl 1f X ror r0 X ror r1 X inc 2(sp) X1: X ashc 2(sp),r0 X rts pc X XENTRY(ulsh) X mov 2(sp),r0 X mov 4(sp),r1 X tst 6(sp) / positive count? X bpl 1f / yes - br X ror r0 / do the first shift X ror r1 / the hard way X inc 6(sp) / bump count towards zero X1: X ashc 6(sp),r0 / do the rest of the shifting X rts pc X X/* X * u_long ualsh(lhs, count) X * u_long *lhs; X * short count; X * X * 32-bit "<<=" and ">>=" routines. Calls to ualsh are generated X * automatically by the C compiler. X */ X .globl ualsh Xualsh: XENTRY(ualsh) X mov r2,-(sp) / save a register X mov 4(sp),r2 / *lhs X mov (r2)+,r0 X mov (r2)+,r1 X tst 6(sp) / positive count? X bpl 1f / yes - br X ror r0 / do the first shift X ror r1 / the hard way X inc 6(sp) / bump count towards zero X1: X ashc 6(sp),r0 / do the rest of the shifting X mov r1,-(r2) X mov r0,-(r2) X mov (sp)+,r2 X rts pc SHAR_EOF fi if test -f '/usr/src/lib/libc/pdp/crt/ulrem.s' then echo shar: "will not over-write existing file '/usr/src/lib/libc/pdp/crt/ulrem.s'" else sed 's/^X//' << \SHAR_EOF > '/usr/src/lib/libc/pdp/crt/ulrem.s' X/* X * Program: ulrem.s X * Copyright 1993, GTE Government Systems X * Author: Steven M. Schultz X * X * Version Date Modification X * 0.0 02Feb91 1. Initial inspiration struck. X * 1.0 05Jun93 2. Released into the Public Domain. X*/ X X#include "DEFS.h" X X/* X * All routines have both a C interface and an assembly interface. Normally X * the two are the same. In the case of 'ulsh' the compiler has placed one X * of the operands in r0 and r1 so the assembly interface differs from the X * C interface. X*/ X X#define one 040200 X X/* X * u_long ulrem(lhs, rhs) X * u_long lhs, rhs; X * X * 32-bit "%" routine. Calls to ulrem are generated automatically by the C X * compiler. X */ X X#if !defined(KERNEL) X/* X * ulrem for applications (uses floating point). X */ X X .globl ulrem X .globl l2f, l6f X Xulrem: XENTRY(ulrem) X jsr pc,l2f / 2(sp) -> fr0 X movf fr0,fr2 / put in right place (fr2) X jsr pc,l6f / 6(sp) -> fr3 X tstf fr3 / check for division by zero X cfcc / don't want FP trap during X beq 1f / integer arithmetic X divf fr3,fr0 / fr0 = lhs/rhs X modf $one,fr0 / fr0 = integer((lhs/rhs) * 1.0) X mulf fr3,fr1 / fr0 = integer(lhs/rhs) * rhs X subf fr1,fr2 / fr2 = lhs - (integer(*lhs/rhs) * rhs) X1: X movfi fr2,-(sp) / (result) X mov (sp)+,r0 X mov (sp)+,r1 X seti X rts pc X#else X/* X * ulrem for the kernel (uses only fixed point - no FP) X*/ X .globl ulrem Xulrem: XENTRY(ulrem) X mov r2,-(sp) / faster than csv/cret ... X mov r3,-(sp) X mov r4,-(sp) X mov 8.(sp),r0 / r0 = hi(lhs) X mov 10.(sp),r1 / r1 = lo(lhs) X mov 12.(sp),r2 / r2 = hi(rhs) X mov 14.(sp),r3 / r3 = lo(rhs) X bne 3f X tst r2 X beq 9f / check for divide by 0 X3: X clr r4 / init scale of lhs X2: X ashc $1,r0 X blos 1f / check for zero at same time X inc r4 X br 2b X1: X mov r4,-(sp) / save scale of lhs X clr r4 X2: X asl r3 X rol r2 X bcs 1f X inc r4 / bump rhs scale X br 2b X1: X clr r0 X mov $1,r1 X sub (sp)+,r4 / difference in scale (rhs - lhs) X ashc r4,r0 / initial quotient adder X mov r1,-(sp) / quoadder lo X mov r0,-(sp) / quoadder hi X mov 12.(sp),r0 / r0 = hi(lhs) X mov 14.(sp),r1 / r1 = lo(lhs) X mov 16.(sp),r2 / r2 = hi(rhs) X mov 18.(sp),r3 / r3 = lo(rhs) X X ashc r4,r2 / scale rhs up for repetitive subtraction X clr r4 / quo lo X clr -(sp) / quo hi Xdocmp1: X cmp r2,r0 X bhi noadd1 X blo dosub1 X cmp r3,r1 X bhi noadd1 Xdosub1: X sub r3,r1 X sbc r0 X sub r2,r0 X add 4(sp),r4 / quo lo += quoadder lo X adc (sp) / quo hi X add 2(sp),(sp) / quo hi += quoadder hi X br docmp1 Xnoadd1: X clc / right shift rhs X ror r2 X ror r3 X clc / right shift quotient adder X ror 2(sp) X ror 4(sp) X bne docmp1 / quo adder not 0 means more to do X tst 2(sp) X bne docmp1 X add $6,sp / remove quo adder and quo high X9: X mov (sp)+,r4 / r0,r1 have remainder X mov (sp)+,r3 X mov (sp)+,r2 X rts pc X#endif KERNEL X X/* X * u_long ualrem(lhs, rhs) X * u_long *lhs, rhs; X * X * 32-bit "/=" routine. Calls to ualrem are generated automatically by the C X * compiler. X */ X X .globl ualrem Xualrem: XENTRY(ualrem) X mov r2,-(sp) / need a register to point at the lhs X mov 8.(sp),-(sp) / The rem algorithm is long X mov 8.(sp),-(sp) / enough that it just doesn't make sense X mov 8.(sp),r2 / to bother repeating it. We just translate X mov 2(r2),-(sp) / the call for ulrem and let it do the work X mov (r2),-(sp) / and return its results (also stuffing it X jsr pc,ulrem / into *lhs) X add $8.,sp / clean up stack X mov r0,(r2)+ / store high word, X mov r1,(r2) / and low X mov (sp)+,r2 / restore r2 X rts pc / and return SHAR_EOF fi if test -f '/usr/src/lib/libc/pdp/crt/uldiv.s' then echo shar: "will not over-write existing file '/usr/src/lib/libc/pdp/crt/uldiv.s'" else sed 's/^X//' << \SHAR_EOF > '/usr/src/lib/libc/pdp/crt/uldiv.s' X/* X * Program: uldiv.s X * Copyright 1993, GTE Government Systems X * Author: Steven M. Schultz X * X * Version Date Modification X * 0.0 02Feb91 1. Initial inspiration struck. X * 1.0 05Jun93 2. Released into the Public Domain. X*/ X X#include "DEFS.h" X X/* X * All routines have both a C interface and an assembly interface. Normally X * the two are the same. In the case of 'ulsh' the compiler has placed one X * of the operands in r0 and r1 so the assembly interface differs from the X * C interface. X*/ X X/* X * u_long uldiv(lhs, rhs) X * u_long lhs, rhs; X * X * unsigned 32-bit "/" routine. Calls to uldiv are generated automatically X * by the C compiler. X */ X X#if !defined(KERNEL) X/* X * uldiv for applications (uses floating point) X */ X .globl l2f, l6f X .globl uldiv Xuldiv: XENTRY(uldiv) X jsr pc,l2f / 2(sp) -> fr0 X jsr pc,l6f / 6(sp) -> fr3 X tstf fr3 / check for zero divisor X cfcc / don't want to have an FP fault X beq 1f / in integer arithmetic X divf fr3,fr0 / fr0 /= rhs X1: X movfi fr0,-(sp) X mov (sp)+,r0 / return result X mov (sp)+,r1 X seti X rts pc X#else X/* X * uldiv for the kernel (fixed point only - no FP) X */ X X .globl uldiv Xuldiv: XENTRY(uldiv) X mov r2,-(sp) / faster than csv/cret ... X mov r3,-(sp) X mov r4,-(sp) X mov 14.(sp),r3 / r3 = lo(rhs) X bmi slowuldiv / rhs >= 2^15 X tst 12.(sp) / hi(rhs) empty? X bne slowuldiv / no, rhs >= 2^16 X X mov 10.(sp),r2 / r2 = lo(lhs) X mov 8.(sp),r1 / r1 = hi(lhs) X X clr r0 / r0 = hi(lhs) / lo(rhs) X div r3,r0 / r1 = hi(lhs) % lo(rhs) X mov r0,r4 / save high quotient X mov r1,-(sp) / stash hi(tmp) X mov r1,r0 / tmp=(hi(lhs)%lo(rhs))<<16 | lo(lhs) X mov r2,r1 / (r0:r1 = tmp) X div r3,r0 / r0 = tmp / lo(rhs) X bvc 3f / done if tmp/lo(rhs) < 2^15 X X mov (sp),r0 / reload r0:r1 with tmp (regs may be X mov r2,r1 / clobbered by failed div) X sub r3,r0 / r0:r1 -= 2^16 * lo(rhs) X div r3,r0 X tst r1 / if (negative) remainder, subtract one from X sxt r1 / quotient X add r1,r0 / cannot overflow! X3: X tst (sp)+ / pop hi(tmp) off stack X mov r0,r1 / r1 (lo(quo)) = tmp / lo(rhs) X mov r4,r0 / r0 (hi(quo)) = hi(lhs) / lo(rhs) X9: X mov (sp)+,r4 / restore registers X mov (sp)+,r3 X mov (sp)+,r2 X rts pc X X/* X * The divisor (rhs) is known to be >= 2^15 so we perform a shift and X * subtract algorithm. It's slow - feel free to improve it. X * X * The algorithm for signed divide broke down for unsigned operands, a slower X * larger, more painful algorithm was implmented using scaling and X * repetitive subraction/shifting. Works best for large numbers (fewer X * shifts that way). X */ Xslowuldiv: X mov 8.(sp),r0 / r0 = hi(lhs) X mov 10.(sp),r1 / r1 = lo(lhs) X mov 12.(sp),r2 / r2 = hi(rhs) X / r3 = lo(rhs) - already done X X clr r4 / init scale of lhs X2: X ashc $1,r0 X blos 1f / check for zero at same time X inc r4 X br 2b X1: X mov r4,-(sp) / save scale of lhs X clr r4 X2: X asl r3 X rol r2 X bcs 1f X inc r4 / bump rhs scale X br 2b X1: X clr r0 X mov $1,r1 X sub (sp)+,r4 / difference in scale (rhs - lhs) X ashc r4,r0 / initial quotient adder X mov r1,-(sp) / quoadder lo X mov r0,-(sp) / quoadder hi X mov 12.(sp),r0 / r0 = hi(lhs) X mov 14.(sp),r1 / r1 = lo(lhs) X mov 16.(sp),r2 / r2 = hi(rhs) X mov 18.(sp),r3 / r3 = lo(rhs) X X ashc r4,r2 / scale rhs up for repetitive subtraction X clr r4 / quo lo X clr -(sp) / quo hi Xdocmp: X cmp r2,r0 X bhi noadd X blo dosub X cmp r3,r1 X bhi noadd Xdosub: X sub r3,r1 X sbc r0 X sub r2,r0 X add 4(sp),r4 / quo lo += quoadder lo X adc (sp) / quo hi X add 2(sp),(sp) / quo hi += quoadder hi X br docmp Xnoadd: X clc / right shift rhs X ror r2 X ror r3 X clc / right shift quotient adder X ror 2(sp) X ror 4(sp) X bne docmp / quo adder not 0 means more to do X tst 2(sp) X bne docmp X mov (sp)+,r0 / quo hi X mov r4,r1 / quo lo X cmp (sp)+,(sp)+ / remove quot adder X br 9b X#endif KERNEL X X/* X * u_long ualdiv(lhs, rhs) X * u_long *lhs, rhs; X * X * 32-bit "/=" routine. Calls to ualdiv are generated automatically by the C X * compiler. X */ X X .globl ualdiv Xualdiv: XENTRY(ualdiv) X mov r2,-(sp) / need a register to point at the lhs X mov 8.(sp),-(sp) / The divide algorithm is long X mov 8.(sp),-(sp) / enough that it just doesn't make sense X mov 8.(sp),r2 / to bother repeating it. We just translate X mov 2(r2),-(sp) / the call for uldiv and let it do the work X mov (r2),-(sp) / and return its results (also stuffing it X jsr pc,uldiv / into *lhs) X add $8.,sp / clean up stack X mov r0,(r2)+ / store high word, X mov r1,(r2) / and low X mov (sp)+,r2 / restore r2 X rts pc / and return SHAR_EOF fi exit 0 # End of shell archive