/*
 *
 * Sort utility.
 *
 * Author:
 *	David Conroy
 *
 * Edits:
 *    - Very slightly modified by Martin Minow (13-Mar-81 for new library)
 *    - Heapsort algorithm changed, program commented by Bob Denny
 *	 (old algorithm failed on large files) (09-Jul-81)
 *    - Quicksort algorithm changed to non-recursive version by Tim Coad
 *       and Bob Denny. This version requires only a tiny stack, as it
 *       manually manages the sort 'request' stack, and always stacks the
 *       larger segment.  Now you only need log(2) of NLINE cells of stack,
 *       so we upped NLINE to 512. Finally, the "pivot" point was changed
 *       so as to minimize worst case performance degradation. For more
 *       info, see "Algorithms + Data Structures = Programs" by Niklaus
 *       Wirth, Prentice Hall, 1976, Section 2.2.6. While we were at it,
 *       we cleaned up the error handling to conform to DEC-ish standards.
 *
 *       Drop us a line if you have other improvements or you find bugs:
 *
 *       Bob Denny & Tim Coad
 *       Creative System Design Co.
 *       3452 E. Foothill Blvd.  Suite 601
 *       Pasadena, CA  91107
 *       (213) 792-9474  (24 hrs)
 *
 *
 * sort [-nr] [-o outputfile] [file ...]
 *
 * Sort sorts all of the named files together and writes the  result
 * to  the standard output.  The standard input is sorted if no file
 * names are supplied;  sort may be used as a filter.
 *
 * The `-o' option causes the sorted output to  be  written  to  the
 * named  output file instead of to the standard output.  The output
 * file may be the same as one of the input files.
 *
 * The default sort key is the entire  line.   Default  ordering  is
 * lexicographic  in  ASCII  collating sequence with upper and lower
 * case considered different.
 *
 * The `-n' option changes the ordering to ascending arithmetic on a
 * leading numeric string consisting of digits and an optional sign.
 *
 * The `-r' option reverses the sense of the comparisons.
 *
 * Error messages:
 *      The following messages occur on a non-severe error. SORTC
 *      will exit with "error" status.
 *
 *      "?SORT-F-Cannot create temp. file"
 *             The required temporary file cannot be created  in  the
 *             current directory.
 *      "?SORT-F-Cannot open input file."
 *             An input file cannot be accessed for reading.
 *      "?SORT-F-Cannot create output file."
 *             An output file cannot be  created  for writing.
 *      "?SORT-F-Out of space."
 *             There was insufficient memory space for the sort.
 *
 *      The following messages occur on a severe error. SORT will
 *      exit with "severe error" status. Get help.
 *
 *      "?SORT-U-Stack overflow"
 *      "?SORT-U-Unexpected end of file"
 *      "?SORT-U-Empty run"
 *      "?SORT-U-Cannot reopen temp. file"
 *
 * Bugs:
 *
 * Dynamic memory could be better managed.
 *
 */

#include <stdio.h>

#define cycle while(1)                 /* Custom control structure */

/*** #define debug ***/

#define  NLINE  512
#define  TEMP   "sort.tmp"

struct  run    {                       /* RUN DESCRIPTOR NODE */
               struct  run *r_rp;      /* Forward link */
               long    r_seek;         /* File position */
               int     r_size;         /* Size of run */
               };

                                       /* RUN DESCRIPTOR LINKED LIST */
struct  run *crp  = NULL;              /* Current run pointer */
struct  run *frp  = NULL;              /* First run pointer */
struct  run *lrp  = NULL;              /* Last run pointer */

struct  heap   {                       /* HEAP DESCRIPTOR NODE */
               struct  run *h_rp;
               char    *h_lp;
               };

struct  heap *heap;

char *line[NLINE];                     /* This is static now */

                                       /* FILES */
FILE    *ofp;                          /* Output file */
FILE    *tfp    = NULL;                /* Temporary file */
FILE    *ifp    = NULL;                /* Input file */

char    *ofn    = NULL;

long tstart;                           /* Start time mark */
int nline  = 0;                        /* # lines in this run */
int nruns  = 0;                        /* # runs in this sort */
int nrec   = 0;                        /* # records in this sort */

char    lbuf[256];                     /* Line buffer */

int     nflag;                         /* Numeric sort flag */
int     rflag;                         /* Reverse sort flag */

extern  char *getline();               /* Forward declaration */
extern  char *nalloc();                /* Forward declaration */
extern  long ftell();
extern  long time();

/* Non-recursive quicksort's little stack */

#define STACKSIZE 10  /* LOG2 of NLINES plus 1 */

static struct stack {
                    int rght, lft;
                    };

static struct stack stack[STACKSIZE];
static struct stack *stackptr = stack + (STACKSIZE - 1);

/*
 * Main program.
 */
main(argc, argv)
char *argv[];
   {
   register char *cp;
   register struct run *rp;
   register struct heap *hp;
   int nlbuf;                          /* # cells needed for this line */
   int i, nf;
   char c;

   /*
    * Process command line. Set switches.
    */
   tstart = time(0);                   /* Mark starting time */
   nf = argc - 1;                      /* Assume all args are files */
   for (i=1; i<argc; ++i)              /* For all args (except cmd) */
      {
      cp = argv[i];                    /* Make a fast pointer to arg */
      if (*cp == '?')                  /* "?" gets help */
         usage();
      if (*cp == '-')                  /* If it starts with "-" ... */
         {
         --nf;                         /* ...it's not a file */
         argv[i] = NULL;               /* (null this arg) */
         ++cp;                         /* Scan by the "-", and ... */
         while (c = *cp++)             /* ... process switch */
            switch (c)
               {

               case 'n':
               case 'N':
                  ++nflag;
                  break;

               case 'o':
               case 'O':
                  if (++i >= argc)
                     usage();
                  ofn = argv[i];
                  --nf;
                  argv[i] = NULL;
                  break;

               case 'r':
               case 'R':
                  ++rflag;
                  break;

               default:
                  usage();
               }
         }
      }
   if (nf == 0)                        /* Default input from stdin */
      ifp = stdin;

   /*
    * Open the temporary file
    */
   if ((tfp = fopen(TEMP, "w")) == NULL)
      {
      errxit("Cannot create temp. file.\n");
      }

   /*
    * Allocate a current run descriptor node.
    */
   crp = nalloc(sizeof(struct run));

   /*
    * Step 1. Create the temp file and the linked list of run
    *         descriptors. Read all given files, breaking them
    *         up into NLINE line blocks (runs), sorting each run,
    *         writing the sorted block of lines to the temp file,
    *         filling in a 'run descriptor', and linking it in
    *         to the run list (whew!).
    */
   cycle                               /* For each file in the list ... */
      {

      /*
       * If no input file open, open the file on 'ifp'. If no more
       * files on the list, break out of this cycle loop.
       */
      if (ifp == NULL)                 /* If not running from stdin, */
         {                             /*  open the next file in the list */
         for (i=1; i<argc; ++i)
            if ((cp = argv[i]) != NULL)
               break;                  /* (end of file list) */
         if (i >= argc)
            break;                     /* (same here) */
         argv[i] = NULL;               /* Mark the name as used */
         if ((ifp = fopen(cp, "r")) == NULL)
            {
            fprintf(stderr, "\"%s\"\n", cp);
            errxit("Cannot open input file.\n");
            }
         }

      /*
       * Read a line into lbuf. Handle EOF condition.
       */
      if (fgets(lbuf, sizeof lbuf, ifp) == NULL) /* Get a line or EOF */
         {                             /* EOF */
         if (nf == 0)                  /* If using stdin, break. */
            break;
         fclose(ifp);                  /* Else close this file */
         ifp = NULL;                   /* Mark no input file open */
         continue;                     /* Go for another input file */
         }

      nlbuf = strlen(lbuf) * sizeof(char); /* # cells needed for this line */
      nrec++;                          /* Count the record */

      /*
       * If we've read NLINE lines, or have run out of dynamic memory,
       * sort this run and save it, then allocate a new current run
       * descriptor node.
       *
       * In either case, allocate cp --> dynamic area for just read line.
       */
      if (nline >= NLINE || (cp = malloc(nlbuf)) == NULL)
         {                             /* End of this run ... */
         quick(0, nline-1);            /* Sort this batch of lines */
         saverun();                    /* Link in this run descriptor */
         putline(tfp);                 /* File the sorted lines */
         crp = nalloc(sizeof(struct run)); /* Make new run desc. node */
         cp  = nalloc(nlbuf);          /* Reserve room for line in lbuf */
         }

      /*
       * Copy the line into the new space. Fill in the next pointer
       * in the 'line' pointer array.
       */
      strcpy(cp, lbuf);
      line[nline++] = cp;
      }

   /*
    * Nothing more to read in. There is a (probably) partial run in
    * core now. Sort its lines.
    */
   quick(0, nline-1);

   /*
    * Here we handle tiny input files, those so small that we are
    * still in the first run. If this is the case, just write the
    * sorted lines to the output file, clean up and we're done.
    */
   if (frp == NULL)
      {
#ifdef debug
      msg ("Small file. In-core sort was done.");
#endif
      openoutput();
      putline(ofp);
      quit();
      }

   /*
    * Multi-run sort. Save the (partial) last run we just sorted;
    * close the temp file.
    */
   saverun();
   putline(tfp);
   fclose(tfp);

#ifdef debug
   printf("There are %d records in %d runs.\n", nrec, nruns);
#endif

   /*
    * Temp file is constructed. It consists of the concatenation of
    * run blocks from (all of) the input file(s).
    */
   openoutput();                       /* Open the output file */
   if ((tfp = fopen(TEMP, "r")) == NULL) /* Reopen temp file for read */
      panic("Cannot reopen temp. file.\n");

   /*
    * Step 2. - Merge runs via heap filter for output.
    *
    * Here we do a 'heapsort' on the data in the runs, extracting
    * lines from the runs one at a time and sifting them through
    * the heap. This process is described in
    * "Algorithms + Data Structures = Programs" by N.Wirth, Prentice Hall
    * 1976, section 2.2.5, pp73-76.
    *
    * Construct a 'heap' with the number of heap nodes equal to the
    * number of runs. Set up fast pointers to the first run descriptor
    * node and (just past) the last heap descriptor node.
    */

#ifdef debug
   puts("Build the initial heap.");
#endif

   heap = nalloc(nruns * sizeof(struct heap)); /* Make the heap */
   rp = frp;                           /* Fast run pointer --> first run */
   hp = heap;
   /*
    * Now build the initial heap. This is done in 2 steps,
    * per the "R. W. Floyd" method given by Wirth (loc. cit.).
    * First, read the first line from each run into a node in
    * the heap.
    */
   while (rp != NULL)
      {
      hp->h_rp = rp;
      if (((hp++)->h_lp = getline(rp)) == NULL)
         panic("Empty run encountered.\n");
      rp = rp->r_rp;
      }
#ifdef debug
   printf("Raw heap:\n");
   for(i=0; i<nruns;)
      printf("N%4d|%s", i, hp[i++].h_lp);
#endif

   /*
    * Next, sift the top half of the heap.
    */
   i = nruns/2;
   while (i>0)
      sift(--i);

#ifdef debug
      puts("\nPerform heapsort procedure:");
#endif

   hp = heap;
   while (nruns)
      {
      cp = hp[0].h_lp;
      fputs(cp, ofp);
      mfree(cp);
      if ((hp[0].h_lp = getline(hp[0].h_rp)) == NULL)
         {
         --nruns;
         hp[0].h_rp = hp[nruns].h_rp;
         hp[0].h_lp = hp[nruns].h_lp;
#ifdef debug
         printf("Run exhausted. Nruns now = %d\n", nruns);
#endif
         }
      sift(0);
      }
   quit();
   } /* END OF MAIN */

/*
 * Open the output file and stash its file
 * pointer in `ofp'. If no output file is
 * given `ofp' is a dup. of `stdout'.
 */
openoutput()
   {
   if (ofn == NULL)
      ofp = stdout;
   else if ((ofp = fopen(ofn, "w")) == NULL)
      {
      fprintf(stderr, "\"%s\"\n", ofn);
      errxit("Cannot create output file.\n");
      }
   }


/*
 * Quicksort as described in N. Wirths's
 * "Algorithms + Data Structures = Programs"
 * A pearl of software engineering.
 *
 */

quick()
   {
   register i, j;
   int l, r;
   struct lstr *t;
   struct lstr *p;

   stackptr--;                          /* push initial partition on stack */
   stackptr->lft = 0;
   stackptr->rght = nline - 1;
   do
      {
      l = stackptr->lft;                /* pop top partition from stack */
      r = stackptr->rght;
      stackptr++;
      do
         {
         i = l;
         j = r;
         p = line[(l + r) / 2];       /* split partition */
         do
            {
            while (compare(line[i], p) < 0) i++;
            while (compare(p, line[j]) < 0) j--;
            if (i <= j)
               {
               t = line[i];           /* swap position of recs */
               line[i] = line[j];
               line[j] = t;
               i++;
               j--;
               }
            } while (i <= j);
         if (j-l < r-i)                /* continue sorting smaller section */
            {
            if (i < r)
               {                       /* stack request for */
               stackptr--;             /* sorting right partition */
               if (stackptr < stack)
                  panic("Stack overflow.\n");
               stackptr->lft = i;
               stackptr->rght = r;
               }
            r = j;                     /* continue sorting left */
            }
         else
            {
            if (l < j)
               {                       /* stack request for */
               stackptr--;             /* sorting left partition */
               if (stackptr < stack)
                  panic("Stack overflow.\n");
               stackptr->lft = l;
               stackptr->rght = j;
               }
            l = i;                     /* continue sorting right */
            }
         } while (l < r);
      } while (stackptr != stack + STACKSIZE);
   }

/*
 * Sift an item through the heap. Handles variable size heap,
 * sifting from node 'n' through the bottom (node 'nruns-1').
 * Algorithm due to R.W. Floyd., described in Wirth (loc. cit.)
 */
sift(n)
int n;                                 /* Index of current top node */
   {
   register int i, j;
   register struct heap *h;            /* Fast heap node pointer */
   struct run *trp;                    /* Temp run ptr. */
   char *tlp;                          /* Temp line ptr. */

#ifdef debug
   printf("Next|%s", heap[n].h_lp);
#endif
   i = n;
   h=heap;
   trp = h[i].h_rp;
   tlp = h[i].h_lp;

   while((j=2*i+1) < nruns)
      {
      if (j < nruns-1 && compare(h[j+1].h_lp, h[j].h_lp) < 0)
         ++j;
      if (compare(tlp, h[j].h_lp) <= 0)
         break;
      /*
       * Sift.
       */
      h[i].h_rp = h[j].h_rp;
      h[i].h_lp = h[j].h_lp;
      i = j;
      }
   h[i].h_rp = trp;
   h[i].h_lp = tlp;
#ifdef debug
   puts("Sifted. Heap now is:");
   for(j=0; j<nruns-1 ;)
      printf("N%4d|%s", j, heap[j++].h_lp);
#endif
   }

/*
 * Save a run.
 * The run block has been preallocated
 * because there may not be enough space
 * to allocate it now.
 */
saverun()
   {
   crp->r_rp = NULL;
   crp->r_seek = ftell(tfp);
   crp->r_size = nline;
   if (frp == NULL)
      frp = crp;
   else
      lrp->r_rp = crp;
   lrp = crp;
   ++nruns;
   }

/*
 * Get a line from the specified run
 * on the temp. file.
 * Pack the line into allocated storage
 * and return a pointer to it.
 * Return NULL if there are no lines left
 * in the run; real end of file is an
 * internal botch.
 */
char *
getline(rp)
register struct run *rp;
   {
   register char *cp;

   if (rp->r_size == 0)
      return (NULL);
   flseek(tfp, rp->r_seek, 0);
   if (fgets(lbuf, sizeof lbuf, tfp) == NULL)
      panic("Unexpected end of temp. file\n");
   rp->r_seek = ftell(tfp);
   --rp->r_size;
   cp = nalloc(strlen(lbuf) * sizeof(char));
   strcpy(cp, lbuf);
   return (cp);
   }

/*
 * Dump the lines in the array `line' to
 * the temp. file.
 */
putline(fp)
register FILE *fp;
   {
   register i;
   for (i=0; i<nline; ++i)
      {
      fputs(line[i], fp);
      mfree(line[i]);
      }
   nline = 0;
   }

/*
 * Compare routine.
 */
compare(a, b)
char *a, *b;
   {
   register c;
   long d, atol();

   if (nflag)
      {
      c = 0;
      if ((d = atol(a)-atol(b)) < 0)
         --c;
      else if (d > 0)
         ++c;
      }
   else
      c = strcmp(a, b);
   if (rflag)
      c = -c;
   return (c);
   }

/*
 * Allocate space.
 * If no space, abort with a nasty
 * little message, exit with "error" status.
 */
char *
nalloc(n)
   {
   register char *p;

   if ((p = malloc(n)) == NULL)
      {
      errxit("Out of space.\n");
      }
   return (p);
   }

/*
 * Quit.
 * Get rid of the temp. file.
 * Exit with "success" status.
 */
quit()
   {
   if (tfp != NULL)
      fmkdl(tfp);
   fprintf(stderr,"SORT-I-Complete. %d records sorted in %ld sec.\n",
             nrec, time(0)-tstart);
   exit(1);
   }

/*
 * Tell the user just what is expected
 * of him, exit with "error" status.
 */
usage()
   {
   error("Usage: sort [-nr] [-o outputfile] [file ...]\n");
   }

/*
 * Errors.
 * Print a message and die with "error" status on RT/RSX,
 * "error" on UNIX (I think).
 */
errxit(a)
   {
   fprintf(stderr,"?SORT-F-%r", &a);
#ifdef decus                           /* Exit status per rt/rsx/unix */
#ifdef rt11
   exit(4);
#endif
#ifdef rsx
   exit(2);
#endif
#else
   exit(0);
#endif
   }

/*
 * Severe errors.
 * Print a message and die with "severe error" status on RT/RSX,
 * "error" on UNIX (I think).
 */
panic(a)
   {
   fprintf(stderr,"?SORT-U-%r", &a);
#ifdef decus                           /* Exit status per rt/rsx/unix */
#ifdef rt11
   exit(8);
#endif
#ifdef rsx
   exit(4);
#endif
#else
   exit(0);
#endif
   }
