/*
 * GRESDX - grep soundex match routines
 *
 * Edits:
 *
 *       10-Oct-85   lmf   Consolidate with multi-parameter match
 *       20-Oct-85   lmf   Fix soundex algorithm
 *       25-Jan-86   lmf   BOL/EOL match for SOUNDEX & speedup
 */
#define IDENT "v1.3.c"
#include <stdio.h>
#include "grep.h"

/*
 * Soundex match can be exact length or allow trailing consonants.
 * For example, 'john' would exact match:  john, jon, jonn, and johann,
 * but not johnson, etc.
 * Define the following line to enable exact, restrictive matching
 * characteristic.
 */
/**** #define EXACT_SDX_MATCH ****/

#define SD_BOL    (1)                  /* soundex match only at start */
#define SD_EOL    (2)                  /* soundex match only at end */

extern int debug ;                     /* True for debug displays */
extern char *pat_beg ;                 /* Start of matched pattern */
extern char *pat_end ;                 /*  and its end */
extern char lbuf[LMAX] ;               /* Input line buffer */

int   sdxflags[NPATS] ;                /* special match flags: BOL, EOL */
long  sdxhash[NPATS] ;                 /* soundex hash for patterns */
long  sdxrange[NPATS] ;                /* variance for short patterns */
long  sdx_var ;                        /* variance returned by soundex() */

/*++
 * SCOMPILE(pointer, index)
 *
 * Compile argument string into soundex hash and flags.  Store resultant
 * data into soundex match vectors at index given in patno.
 */
scompile(p, patno)
char  *p ;
int   patno ;
{
   long soundex() ;

   sdxflags[patno] = 0 ;
   if(*p == BOL_CH) {
      sdxflags[patno] |= SD_BOL ;      /* flag start of line only */
      p++ ;                            /* skip over BOL_CH */
   }
   if(p[strlen(p)-1] == EOL_CH) {      /* end of line match wanted? */
      sdxflags[patno] |= SD_EOL ;      /* indicate with flag word */
   }
   sdxhash[patno] = soundex(p) ;       /* collect word hash */
   sdxrange[patno] = sdx_var ;     /* and allowed range for short patterns */

   if(debug)
      printf("SC: pn=%d flg=%d pat=%s\n", patno, sdxflags[patno], p ) ;
}

/*++
 * SOUNDEX(string)
 *
 * Return the soundex hash value for the argument string.
 *   S_V should be zero for efficiency.
 */

#define  S_V   0                       /* Vowels: a e i o u (maybe y, h, w) */
#define  S_SK  1                       /* "hard" consonants: s c z x k q */
#define  S_TD  2                       /* Dental stops: t d         */
#define  S_FV  3                       /* f v                       */
#define  S_GJ  4                       /* g j                       */
#define  S_B   5                       /* b                         */
#define  S_L   6                       /* l                         */
#define  S_M   7                       /* m                         */
#define  S_N   8                       /* n                         */
#define  S_P   9                       /* p                         */
#define  S_R   10                      /* r                         */
#define  S_MAX 11                      /* "radix" of soundex values */

/*
 * The following are the hash values for each letter.
 */

static char sx_letters[] = {
/* a     b     c     d     e     f     g     h     i     j     k     l     m */
  S_V,  S_B,  S_SK, S_TD, S_V,  S_FV, S_GJ, S_V,  S_V,  S_GJ, S_SK, S_L,  S_M,
/* n     o     p     q     r     s     t     u     v     w     x     y     z */
  S_N,  S_V,  S_P,  S_SK, S_R,  S_SK, S_TD, S_V,  S_FV, S_V,  S_SK, S_V,  S_SK
};

/*
 * The first letter of the following consonant pairs is silent.
 */
static char *sx_leading_silent = "cztstzghgnknpnptpf";

/*
 * Compute the soundex hash for the argument string
 * -- somewhat modified from the original algorithm.
 * "Margaret K. Odell and Robert C. Russell.  U.S.
 * patents 1261167 (1918) and 1435663 (1922)."  as
 * reprinted in Donald Knuth, Sorting and Searching.
 */
long
soundex(instring)
char  *instring ;
{
   register int   c;                   /* Current character */
   register int   next;                /* Next character in the string */
   register char  *string ;            /* working string pointer */
   char     *sxp;                      /* -> leading silent string */
   int      last;                      /* Previous character for doubles */
   long     value;                     /* Soundex value */
   int      temp;                      /* Hash for this character */
   int      count;                     /* Want only three digits */
   int      first ;                    /* First character flag */

#ifdef DEBUG
   if (debug)
      printf("SDX: %s :\n", string);
#endif
   string = instring ;                 /* get register copy of pointer */
   first = TRUE ;                      /* first character */
   last = ' ' ;
   c = tolower(*string) ;
   string++ ;
   count = 5 ;                         /* Maximum consonants for this loop */

   while (count && isalpha(c)) {       /* build hash code */
      next = tolower(*string) ;
      string++ ;
      if (next != EOS) {
/*
 * Ignore the first letter in a silent-pair.
 */
         for (sxp = sx_leading_silent; *sxp ; sxp++) {
            if (*sxp++ == c && *sxp == next) {
               c = next ;
               next = tolower(*string) ;
               string++ ;
            }
         }
/*
 * Change 'ph' to 'f'
 */
         if (c == 'p' && next == 'h') {
            c = 'f';
            next = tolower(*string) ;
            string++ ;
         }
/*
 * Check for consonant pairs
 */
         if (c == next && sx_letters[c - 'a'] != S_V ) {
            next = tolower(*string) ;
            string++ ;
         }
      }
      if (first) {
         value = c - 'a' + 1 ;         /* Initial hash == first letter value */
         first = FALSE ;
      }
      else {
         if ((temp = sx_letters[c - 'a']) != S_V && c != last) {
            value *= S_MAX;
            value += temp;
            count-- ;
         }
      }
#ifdef DEBUG
      if (debug)
         printf("%d: %c=%d %ld\n", count, c, temp, value) ;
#endif
      last = c ;
      c = next ;
   }
   sdx_var = 1 ;
   while (count-- > 0) {               /* Finish off the hash code */
      value *= S_MAX ;                 /* Hash code */
      sdx_var *= S_MAX ;               /* Allowance for trailing chars */
   }
#ifdef DEBUG
   if (debug > 1)
       printf("SDX hash = %ld %ld\n", value, value+sdx_var);
#endif

   return (value) ;
}

/*++
 * SMATCH - Match the current line using soundex, return 1 if it does
 */
smatch(patno)
int   patno ;                          /* Argument number to match */
{
   register char *l;                   /* Line pointer */
   char *ptbeg ;                       /* saved pattern pointer */
   long sndl ;                         /* soundex hash code for word */

   for (l = lbuf; *l; l++) {           /* step through line */
      while (!isalpha(*l) && *l != EOS) /* skip any leading trash */
         l++ ;
      if((l != lbuf) && (sdxflags[patno] & SD_BOL)) /* at front of line? */
         return(FALSE) ;
      if(*l == EOS)                    /* nothing worth checking */
         return(FALSE) ;
      ptbeg = l ;                      /* start of match */
      sndl = soundex(l) ;              /* collect hash code */
      while (isalpha(*l))              /* skip rest of word */
         l++ ;

#ifdef DEBUG
      if (debug)
         printf("%ld %ld %ld: %s\n", sdxhash[patno], sndl, sdxrange[patno], l) ;
#endif

#ifdef EXACT_SDX_MATCH
      if (sndl == sdxhash[patno]) {
#else
      if (sdxhash[patno] <= sndl && sndl < sdxhash[patno] + sdxrange[patno]) {
#endif
         if(*l != EOS && (sdxflags[patno] & SD_EOL)) /* not at end of line  */
            return(FALSE) ;
         pat_beg = ptbeg ;             /* start of matched string */
         pat_end = l ;                 /* end of match */
         return(TRUE);                 /* yeah, got one */
      }
   }
   return(FALSE);                      /* no good */
}
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              