Subject: New and improved 'strcompact', 'symcompact' tools (#180) Index: ucb/strcompact,symcompact,symdump 2.11BSD Description: The utilities which compact symbol and string tables have been rewritten. Almost a factor of four speedup was seen in the 'strcompact' run time. Repeat-By: N/A Fix: All updates are available via anonymous FTP to ftp.iipo.gtegsc.com in the directory /pub/2.11BSD. The patch below includes three parts: 1) a 'script' file containing the 'rm' commands to remove the old versions of the sources. 2) a 'patchfile' which updates the ucb/Makefile. 3) a shar file of the replacement sources for 'strcompact', 'symcompact' and the new utility 'symdump' There is a new utility present: 'symdump'. This program is used by strcompact. 'symdump' reads the symbol+string tables of an object file and produces lines of the form: symbol_string type overlay value These lines are well suited to being processed by a multi-key 'sort' and indeed this is what is done now. In addition to being much faster the new versions of symcompact and strcompact realize a dramatic reduction in the symbol and string table sizes of /unix and /netnix. The new versions removed about *an additional* 4kb from the kernel and about another 3kb from the networking image - resulting in programs such as 'netstat' and 'vmstat' running faster. The following steps will install the new versions of the utilities: 1) save the file below to /tmp/foo 2) cd /tmp 3) sh foo 4) ./script 5) patch -p0 < patchfile 6) sh new.sources 7) cd /usr/src/ucb 8) make strcompact symcompact symdump 9) install -s strcompact symcompact symdump /usr/ucb 10) cd /tmp 11) rm foo script patchfile new.sources ===== cut here #! /bin/sh # This is a shell archive, meaning: # 1. Remove everything above the #! /bin/sh line. # 2. Save the resulting text in a file. # 3. Execute the file with /bin/sh (not csh) to create: # /tmp/script # /tmp/patchfile # /tmp/new.sources # This archive created: Mon Feb 21 20:23:35 1994 export PATH; PATH=/bin:/usr/bin:$PATH if test -f '/tmp/script' then echo shar: "will not over-write existing file '/tmp/script'" else sed 's/^X//' << \SHAR_EOF > '/tmp/script' X#! /bin/sh Xrm -f /usr/src/ucb/symcompact.c Xrm -f /usr/src/ucb/strcompact.c Xrm -f /usr/src/ucb/symdump.c SHAR_EOF chmod 755 '/tmp/script' fi if test -f '/tmp/patchfile' then echo shar: "will not over-write existing file '/tmp/patchfile'" else sed 's/^X//' << \SHAR_EOF > '/tmp/patchfile' X*** /usr/src/ucb/Makefile.old Sat Jan 22 10:55:20 1994 X--- /usr/src/ucb/Makefile Sat Feb 12 22:24:53 1994 X*************** X*** 3,9 **** X # All rights reserved. The Berkeley software License Agreement X # specifies the terms and conditions for redistribution. X # X! # @(#)Makefile 5.17.1 (2.11BSD GTE) 1/22/94 X # X DESTDIR= X CFLAGS= -O X--- 3,9 ---- X # All rights reserved. The Berkeley software License Agreement X # specifies the terms and conditions for redistribution. X # X! # @(#)Makefile 5.17.2 (2.11BSD GTE) 2/12/94 X # X DESTDIR= X CFLAGS= -O X*************** X*** 25,31 **** X STD= apply biff checknr colcrt colrm ctags expand fold \ X from gprof grep groups head last lastcomm leave logger mkstr \ X printenv ruptime rwho sccs script soelim strings strcompact \ X! symcompact symorder tail tcopy telnet unexpand unifdef users \ X whereis whoami whois what wc xstr yes X X # C programs that live in the current directory and need explicit make lines. X--- 25,31 ---- X STD= apply biff checknr colcrt colrm ctags expand fold \ X from gprof grep groups head last lastcomm leave logger mkstr \ X printenv ruptime rwho sccs script soelim strings strcompact \ X! symcompact symdump symorder tail tcopy telnet unexpand unifdef users \ X whereis whoami whois what wc xstr yes X X # C programs that live in the current directory and need explicit make lines. SHAR_EOF chmod 644 '/tmp/patchfile' fi if test -f '/tmp/new.sources' then echo shar: "will not over-write existing file '/tmp/new.sources'" else sed 's/^X//' << \SHAR_EOF > '/tmp/new.sources' X#! /bin/sh X# This is a shell archive, meaning: X# 1. Remove everything above the #! /bin/sh line. X# 2. Save the resulting text in a file. X# 3. Execute the file with /bin/sh (not csh) to create: X# /usr/src/ucb/symcompact.c X# /usr/src/ucb/symdump.c X# /usr/src/ucb/strcompact.c X# This archive created: Mon Feb 21 20:20:24 1994 Xexport PATH; PATH=/bin:/usr/bin:$PATH Xif test -f '/usr/src/ucb/symcompact.c' Xthen X echo shar: "will not over-write existing file '/usr/src/ucb/symcompact.c'" Xelse Xsed 's/^X//' << \SHAR_EOF > '/usr/src/ucb/symcompact.c' XX/* XX * Program Name: symcompact.c XX * Date: January 21, 1994 XX * Author: S.M. Schultz XX * XX * ----------------- Modification History --------------- XX * Version Date Reason For Modification XX * 1.0 21Jan94 1. Initial release into the public domain. XX * 1.1 11Feb94 2. Remove register symbols to save memory. XX*/ XX XX/* XX * This program compacts the symbol table of an executable. This is XX * done by removing '~symbol' references when _both_ the '~symbol' and XX * '_symbol' have an overlay number of 0. The assembler always generates XX * both forms. The only time both forms are needed is in an overlaid XX * program and the routine has been relocated by the linker, in that event XX * the '_' form is the overlay "thunk" and the '~' form is the actual XX * routine itself. Only 'text' symbols have both forms. Reducing the XX * number of symbols greatly speeds up 'nlist' processing as well as XX * cutting down memory requirements for programs such as 'adb' and 'nm'. XX * XX * NOTE: This program attempts to hold both the string and symbol tables XX * in memory. For the kernel which has not been 'strcompact'd this XX * amounts to about 49kb. IF this program runs out of memory you should XX * run 'strcompact' first - that program removes redundant strings, XX * significantly reducing the amount of memory needed. Alas, this program XX * will undo some of strcompact's work and you may/will need to run XX * strcompact once more after removing excess symbols. XX * XX * Register symbols are removed to save memory. This program was initially XX * used with a smaller kernel, adding an additional driver caused the symbol XX * table to grow enough that memory couldn't be allocated for strings. See XX * the comments in 'symorder.c' - they explain why register variables are XX * no big loss. XX*/ XX XX#include XX#include XX#include XX#include XX#include XX#include XX#include XX XX char *Pgm; XXstatic char strtmp[20]; XX XXmain(argc, argv) XX int argc; XX char **argv; XX { XX FILE *fp, *strfp; XX int cnt, nsyms, len, c, symsremoved = 0, i; XX void cleanup(); XX char *strtab; XX char fbuf1[BUFSIZ], fbuf2[BUFSIZ]; XX off_t symoff, stroff, ltmp; XX long strsiz; XX register struct nlist *sp, *sp2; XX struct nlist *symtab, *symtabend, syment; XX struct xexec xhdr; XX XX Pgm = argv[0]; XX signal(SIGQUIT, cleanup); XX signal(SIGINT, cleanup); XX signal(SIGHUP, cleanup); XX XX if (argc != 2) XX { XX fprintf(stderr, "%s: filename argument missing\n", Pgm); XX exit(EX_USAGE); XX } XX fp = fopen(argv[1], "r+"); XX if (!fp) XX { XX fprintf(stderr, "%s: can't open '%s' for update\n", Pgm, XX argv[1]); XX exit(EX_NOINPUT); XX } XX setbuf(fp, fbuf1); XX cnt = fread(&xhdr, 1, sizeof (xhdr), fp); XX if (cnt < sizeof (xhdr.e)) XX { XX fprintf(stderr, "%s: Premature EOF reading header\n", Pgm); XX exit(EX_DATAERR); XX } XX if (N_BADMAG(xhdr.e)) XX { XX fprintf(stderr, "%s: Bad magic number\n", Pgm); XX exit(EX_DATAERR); XX } XX nsyms = xhdr.e.a_syms / sizeof (struct nlist); XX if (!nsyms) XX { XX fprintf(stderr, "%s: '%s' stripped\n", Pgm); XX exit(EX_OK); XX } XX stroff = N_STROFF(xhdr); XX symoff = N_SYMOFF(xhdr); XX/* XX * Seek to the string table size longword and read it. Then attempt to XX * malloc memory to hold the string table. First make a sanity check on XX * the size. XX*/ XX fseek(fp, stroff, L_SET); XX fread(&strsiz, sizeof (long), 1, fp); XX if (strsiz > 48 * 1024L) XX { XX fprintf(stderr, "%s: string table > 48kb\n", Pgm); XX exit(EX_DATAERR); XX } XX strtab = (char *)malloc((int)strsiz); XX if (!strtab) XX { XX fprintf(stderr, "%s: no memory for strings\n", Pgm); XX exit(EX_OSERR); XX } XX/* XX * Now read the string table into memory. Reduce the size read because XX * we've already retrieved the string table size longword. Adjust the XX * address used so that we don't have to adjust each symbol table entry's XX * string offset. XX*/ XX cnt = fread(strtab + sizeof (long), 1, (int)strsiz - sizeof (long), fp); XX if (cnt != (int)strsiz - sizeof (long)) XX { XX fprintf(stderr, "%s: Premature EOF reading strings\n", Pgm); XX exit(EX_DATAERR); XX } XX/* XX * Seek to the symbol table. Scan it and count how many symbols are XX * significant. XX*/ XX fseek(fp, symoff, L_SET); XX cnt = 0; XX for (i = 0; i < nsyms; i++) XX { XX fread(&syment, sizeof (syment), 1, fp); XX if (exclude(&syment)) XX continue; XX cnt++; XX } XX XX/* XX * Allocate memory for the symbol table. XX*/ XX symtab = (struct nlist *)malloc(cnt * sizeof (struct nlist)); XX if (!symtab) XX { XX fprintf(stderr, "%s: no memory for symbols\n", Pgm); XX exit(EX_OSERR); XX } XX XX/* XX * Now read the symbols in, excluding the same ones as before, and XX * assign the in-memory string addresses at the same time XX*/ XX sp = symtab; XX fseek(fp, symoff, L_SET); XX XX for (i = 0; i < nsyms; i++) XX { XX fread(&syment, sizeof (syment), 1, fp); XX if (exclude(&syment)) XX continue; XX bcopy(&syment, sp, sizeof (syment)); XX sp->n_un.n_name = strtab + (int)sp->n_un.n_strx; XX sp++; XX } XX symtabend = &symtab[cnt]; XX XX/* XX * Now look for symbols with overlay numbers of 0 (root/base segment) and XX * of type 'text'. For each symbol found check if there exists both a '~' XX * and '_' prefixed form of the symbol. Preserve the '_' form and clear XX * the '~' entry by zeroing the string address of the '~' symbol. XX*/ XX for (sp = symtab; sp < symtabend; sp++) XX { XX if (sp->n_ovly) XX continue; XX if ((sp->n_type & N_TYPE) != N_TEXT) XX continue; XX if (sp->n_un.n_name[0] != '~') XX continue; XX/* XX * At this point we have the '~' form of a non overlaid text symbol. Look XX * thru the symbol table for the '_' form. All of 1) symbol type, 2) Symbol XX * value and 3) symbol name (starting after the first character) must match. XX*/ XX for (sp2 = symtab; sp2 < symtabend; sp2++) XX { XX if (sp2->n_ovly) XX continue; XX if ((sp2->n_type & N_TYPE) != N_TEXT) XX continue; XX if (sp2->n_un.n_name[0] != '_') XX continue; XX if (sp2->n_value != sp->n_value) XX continue; XX if (strcmp(sp->n_un.n_name+1, sp2->n_un.n_name+1)) XX continue; XX/* XX * Found a match. Null out the '~' symbol's string address. XX*/ XX symsremoved++; XX sp->n_un.n_strx = NULL; XX break; XX } XX } XX/* XX * Done with the nested scanning of the symbol table. Now create a new XX * string table (from the remaining symbols) in a temporary file. XX*/ XX strcpy(strtmp, "/tmp/strXXXXXX"); XX mktemp(strtmp); XX strfp = fopen(strtmp, "w+"); XX if (!strfp) XX { XX fprintf(stderr, "%s: can't create '%s'\n", Pgm, strtmp); XX exit(EX_CANTCREAT); XX } XX setbuf(strfp, fbuf2); XX XX/* XX * As each symbol is written to the tmp file the symbol's string offset XX * is updated with the new file string table offset. XX*/ XX ltmp = sizeof (long); XX for (sp = symtab; sp < symtabend; sp++) XX { XX if (!sp->n_un.n_name) XX continue; XX len = strlen(sp->n_un.n_name) + 1; XX fwrite(sp->n_un.n_name, len, 1, strfp); XX sp->n_un.n_strx = ltmp; XX ltmp += len; XX } XX/* XX * We're done with the memory string table - give it back. Then reposition XX * the new string table file to the beginning. XX*/ XX free(strtab); XX rewind(strfp); XX XX/* XX * Position the executable file to where the symbol table begins. Truncate XX * the file. Write out the valid symbols, counting each one so that the XX * a.out header can be updated when we're done. XX*/ XX nsyms = 0; XX fseek(fp, symoff, L_SET); XX ftruncate(fileno(fp), ftell(fp)); XX for (sp = symtab; sp < symtabend; sp++) XX { XX if (sp->n_un.n_strx == 0) XX continue; XX nsyms++; XX fwrite(sp, sizeof (struct nlist), 1, fp); XX } XX/* XX * Next write out the string table size longword. XX*/ XX fwrite(<mp, sizeof (long), 1, fp); XX/* XX * We're done with the in memory symbol table, release it. Then append XX * the string table to the executable file. XX*/ XX free(symtab); XX while ((c = getc(strfp)) != EOF) XX putc(c, fp); XX fclose(strfp); XX rewind(fp); XX xhdr.e.a_syms = nsyms * sizeof (struct nlist); XX fwrite(&xhdr.e, sizeof (xhdr.e), 1, fp); XX fclose(fp); XX printf("%s: %d symbols removed\n", Pgm, symsremoved); XX cleanup(); XX } XX XXvoid XXcleanup() XX { XX if (strtmp[0]) XX unlink(strtmp); XX exit(EX_OK); XX } XX XX/* XX * Place any symbol exclusion rules in this routine, return 1 if the XX * symbol is to be excluded, 0 if the symbol is to be retained. XX*/ XX XXexclude(sp) XX register struct nlist *sp; XX { XX XX if (sp->n_type == N_REG) XX return(1); XX if (sp->n_un.n_strx == 0) XX return(1); XX return(0); XX } XSHAR_EOF Xchmod 644 '/usr/src/ucb/symcompact.c' Xfi Xif test -f '/usr/src/ucb/symdump.c' Xthen X echo shar: "will not over-write existing file '/usr/src/ucb/symdump.c'" Xelse Xsed 's/^X//' << \SHAR_EOF > '/usr/src/ucb/symdump.c' XX/* XX * Program Name: strdump.c XX * Date: January 21, 1994 XX * Author: S.M. Schultz XX * XX * ----------------- Modification History --------------- XX * Version Date Reason For Modification XX * 1.0 12Feb94 1. Initial release into the public domain. XX*/ XX XX/* XX * Dump the symbol table of a program to stdout, one symbol per line in XX * the form: XX * XX * symbol_string type overlay value XX * XX * Typical use is to feed the output of this program into: XX * XX * "sort +0 -1 +1n -2 +2n -3 +3n -4 -u" XX * XX * This program is used by 'strcompact' to compress the string (and XX * symbol) tables of an executable. XX*/ XX XX#include XX#include XX#include XX#include XX#include XX#include XX#include XX XX char **xargv; /* global copy of argv */ XX char *strp; /* pointer to in-memory string table */ XX struct xexec xhdr; /* the extended a.out header */ XX XXextern char *malloc(); XX XXmain(argc, argv) XX int argc; XX char **argv; XX { XX XX if (argc != 2) XX { XX fprintf(stderr, "%s: need a file name\n", argv[0]); XX exit(1); XX } XX xargv = ++argv; XX namelist(); XX exit(0); XX } XX XXnamelist() XX { XX char ibuf[BUFSIZ]; XX register FILE *fi; XX off_t o, stroff; XX long strsiz; XX register int n; XX XX fi = fopen(*xargv, "r"); XX if (fi == NULL) XX error("cannot open"); XX setbuf(fi, ibuf); XX XX fread((char *)&xhdr, 1, sizeof(xhdr), fi); XX if (N_BADMAG(xhdr.e)) XX error("bad format"); XX rewind(fi); XX XX o = N_SYMOFF(xhdr); XX fseek(fi, o, L_SET); XX n = xhdr.e.a_syms / sizeof(struct nlist); XX if (n == 0) XX error("no name list"); XX XX stroff = N_STROFF(xhdr); XX fseek(fi, stroff, L_SET); XX if (fread(&strsiz, sizeof (long), 1, fi) != 1) XX error("no string table"); XX strp = (char *)malloc((int)strsiz); XX if (strp == NULL || strsiz > 48 * 1024L) XX error("no memory for strings"); XX if (fread(strp+sizeof(strsiz),(int)strsiz-sizeof(strsiz),1,fi)!=1) XX error("error reading strings"); XX XX fseek(fi, o, L_SET); XX dumpsyms(fi, n); XX free((char *)strp); XX fclose(fi); XX } XX XXdumpsyms(fi, nsyms) XX register FILE *fi; XX int nsyms; XX { XX register int n; XX struct nlist sym; XX register struct nlist *sp; XX XX sp = &sym; XX for (n = 0; n < nsyms; n++) XX { XX fread(&sym, sizeof sym, 1, fi); XX printf("%s %u %u %u\n", strp + (int)sp->n_un.n_strx, sp->n_type, XX sp->n_ovly, sp->n_value); XX } XX } XX XXerror(s) XX char *s; XX { XX fprintf(stderr, "syms: %s: %s\n", *xargv, s); XX exit(1); XX } XSHAR_EOF Xchmod 644 '/usr/src/ucb/symdump.c' Xfi Xif test -f '/usr/src/ucb/strcompact.c' Xthen X echo shar: "will not over-write existing file '/usr/src/ucb/strcompact.c'" Xelse Xsed 's/^X//' << \SHAR_EOF > '/usr/src/ucb/strcompact.c' XX/* XX * Program Name: strcompact.c XX * Date: February 12, 1994 XX * Author: S.M. Schultz XX * XX * ----------------- Modification History --------------- XX * Version Date Reason For Modification XX * 1.0 21Jan94 1. Initial release into the public domain. XX * 2.0 12Feb94 2. Rewrite. Use new utility program 'symdump' XX * and a multi-key sort to not only create XX * shared symbol strings but remove identical XX * symbols (identical absolute local symbols XX * are quite common). Execution speed was XX * speed up by about a factor of 4. XX*/ XX XX/* XX * This program compacts the string table of an executable image by XX * preserving only a single string definition of a symbol and updating XX * the symbol table string offsets. Multiple symbols having the same XX * string are very common - local symbols in a function often have the XX * same name ('int error' inside a function for example). This program XX * reduced the string table size of the kernel at least 25%! XX * XX * In addition, local symbols with the same value (frame offset within XX * a function) are very common. By retaining only a single '~error=2' XX * for example the symbol table is reduced even further (about 500 symbols XX * are removed from a typical kernel). XX*/ XX XX#include XX#include XX#include XX#include XX#include XX#include XX XX char *Pgm; XX char *Sort = "/usr/bin/sort"; XX char *Symdump = "/usr/ucb/symdump"; XXstatic char strfn[32], symfn[32]; XX XXmain(argc, argv) XX int argc; XX char **argv; XX { XX struct nlist sym; XX char buf1[128], symname[64], savedname[64]; XX struct xexec xhdr; XX int nsyms, len; XX FILE *symfp, *strfp, *sortfp; XXregister FILE *fpin; XX long stroff; XX unsigned short type, value, ovly; XX void cleanup(); XX XX Pgm = argv[0]; XX signal(SIGQUIT, cleanup); XX signal(SIGINT, cleanup); XX signal(SIGHUP, cleanup); XX XX if (argc != 2) XX { XX fprintf(stderr, "%s: missing filename argument\n", Pgm); XX exit(EX_USAGE); XX } XX fpin = fopen(argv[1], "r+"); XX if (!fpin) XX { XX fprintf(stderr, "%s: can not open '%s' for update\n", XX Pgm, argv[1]); XX exit(EX_NOINPUT); XX } XX if (fread(&xhdr, 1, sizeof (xhdr), fpin) < sizeof (xhdr.e)) XX { XX fprintf(stderr, "%s: premature EOF\n", Pgm); XX exit(EX_DATAERR); XX } XX if (N_BADMAG(xhdr.e)) XX { XX fprintf(stderr, "%s: Bad magic number\n", Pgm); XX exit(EX_DATAERR); XX } XX nsyms = xhdr.e.a_syms / sizeof (struct nlist); XX if (!nsyms) XX { XX fprintf(stderr, "%s: '%s' stripped\n", Pgm, argv[1]); XX exit(EX_OK); XX } XX XX strcpy(strfn, "/tmp/strXXXXXX"); XX mktemp(strfn); XX strcpy(symfn, "/tmp/symXXXXXX"); XX mktemp(symfn); XX XX sprintf(buf1, "%s %s | %s +0 -1 +1n -2 +2n -3 +3n -4 -u", Symdump, XX argv[1], Sort); XX sortfp = popen(buf1, "r"); XX if (!sortfp) XX { XX fprintf(stderr, "%s: symdump | sort failed\n", Pgm); XX exit(EX_SOFTWARE); XX } XX symfp = fopen(symfn, "w+"); XX strfp = fopen(strfn, "w+"); XX if (!symfp || !strfp) XX { XX fprintf(stderr, "%s: can't create %s or %s\n", symfn, strfn); XX exit(EX_CANTCREAT); XX } XX XX stroff = sizeof (long); XX len = 0; XX nsyms = 0; XX while (fscanf(sortfp, "%s %u %u %u\n", symname, &type, &ovly, XX &value) == 4) XX { XX if (strcmp(symname, savedname)) XX { XX stroff += len; XX len = strlen(symname) + 1; XX fwrite(symname, len, 1, strfp); XX strcpy(savedname, symname); XX } XX sym.n_un.n_strx = stroff; XX sym.n_type = type; XX sym.n_ovly = ovly; XX sym.n_value = value; XX fwrite(&sym, sizeof (sym), 1, symfp); XX nsyms++; XX } XX stroff += len; XX XX pclose(sortfp); XX rewind(symfp); XX rewind(strfp); XX XX if (nsyms == 0) XX { XX fprintf(stderr, "%s: No symbols - %s not modified\n", argv[1]); XX cleanup(); XX } XX XX fseek(fpin, N_SYMOFF(xhdr), L_SET); XX XX/* XX * Now append the new symbol table. Then write the string table length XX * followed by the string table. Finally truncate the file to the new XX * length, reflecting the smaller string table. XX*/ XX copyfile(symfp, fpin); XX fwrite(&stroff, sizeof (long), 1, fpin); XX copyfile(strfp, fpin); XX ftruncate(fileno(fpin), ftell(fpin)); XX XX/* XX * Update the header with the correct symbol table size. XX*/ XX rewind(fpin); XX xhdr.e.a_syms = nsyms * sizeof (sym); XX fwrite(&xhdr, sizeof (xhdr.e), 1, fpin); XX XX fclose(fpin); XX fclose(symfp); XX fclose(strfp); XX cleanup(); XX } XX XXcopyfile(in, out) XX register FILE *in, *out; XX { XX register int c; XX XX while ((c = getc(in)) != EOF) XX putc(c, out); XX } XX XXfatal(str) XX char *str; XX { XX XX if (strfn[0]) XX unlink(strfn); XX if (symfn[0]) XX unlink(symfn); XX if (!str) XX exit(EX_OK); XX fprintf(stderr, "%s: %s\n", str); XX exit(EX_SOFTWARE); XX } XX XXvoid XXcleanup() XX { XX fatal((char *)NULL); XX } XSHAR_EOF Xchmod 644 '/usr/src/ucb/strcompact.c' Xfi Xexit 0 X# End of shell archive SHAR_EOF chmod 644 '/tmp/new.sources' fi exit 0 # End of shell archive