{$Y-}
program typo;

{
  Computer Detection of Typographical Errors

  		Author : C. E. Chew
  		Date   : April 1985

  Link with base of 3000 octal.
  Underline escape sequences for LX80 printer.
}



const
  wordsize = 20;			{maximum size of words}
  wordsperline = 3;

  lx80 = true;				{LX80 printer support}
  vt100 = false;			{VT100 support}

  typo = "TYPO";
  version = 'TYPO  2.1  December 1985';

  optx = 1;				{options}
  inpx = 2;				{input file}
  outx = 3;				{output file}
  dicx = 4;				{dictionary file}

  debug = false;

  nul = chr(0);
  hyphen = '-';
  apostrophe = '''';
  space = ' ';
  esc = chr(27);

  disize = 541;
  trisize = 2741;
  wsize = 1987;

  radix = 28;				{character compression radix}
  radixc0 = '`';			{dummy character}

  tfudge = 20257;
  dfudge = 20259;
  wfudge = 20259;


type
  relation = (lt, le, eq, ge, gt, ne);
  string = array [1..wordsize+1] of char;

  wordarr = record
              w : string
            end;

  wordrec = record
              word : @wordarr;
              index : real
            end;

  wordptr = @word;
  word = record
           word : array [0..maxint] of wordrec
         end;

  xgram = integer;

  gramrec = record
              gram : xgram;
              occu : integer
            end;

  freqptr = @freqrec;
  freqrec = record
              gram : array [0..maxint] of gramrec
            end;



var
  digram, trigram : freqptr;		{binary tree of xgrams}
  words : wordptr;			{binary tree of words}
  wordinx : integer;
  dlongest, dnumber, dtotal, dsearch : integer;
  tlongest, tnumber, ttotal, tsearch : integer;
  wlongest, wnumber, wtotal, wsearch : integer;

  initopt : boolean;			{initialise xgram tables}
  statopt : boolean;			{display statistics}
  unlnopt : boolean;			{underline weirdest trigram}

  inix : integer;			{xgram initialisation file}


procedure rerun; external;

function match(s1, s2 : string):relation; external;

function length(s : string):integer; external;

procedure fatal(s,m : string); external;



function initialise:boolean;

var
  i : integer;
  ch : char;

begin
  initialise := false;
  initopt := false;
  statopt := false;
  unlnopt := false;
  dlongest := 0;
  dnumber := 0;
  dtotal := 0;
  dsearch := 0;
  tlongest := 0;
  tnumber := 0;
  ttotal := 0;
  tsearch := 0;
  wlongest := 0;
  wnumber := 0;
  wtotal := 0;
  wsearch := 0;

  if argc > dicx then begin
    if argv[optx]@[0] = hyphen then begin
      i := 0;
      repeat
	ch := argv[optx]@[i];
	if ch = 'I' then
	  initopt := true
	else if ch = 'S' then
	  statopt := true
	else if ch = 'U' then
	  unlnopt := true;
	i := i + 1
      until ch = nul
    end;
    if initopt then begin
      argc := argc - 1;
      inix := argc
    end;
    if argc > dicx then begin
      new(digram, disize);
      new(trigram, trisize);
      new(words, wsize);
      with digram@ do
	for i := 0 to disize-1 do
	  gram[i].occu := 0;
      with trigram@ do
	for i := 0 to trisize-1 do
	  gram[i].occu := 0;
      with words@ do
	for i := 0 to wsize-1 do
	  word[i].word := nil;
      initialise := true
    end
  end
end;



procedure setunderline(var ch : char);

begin
  if (ch > nul) and (ch < chr(128)) then
    ch := chr(ord(ch) + 128)
end;



procedure printchar(var lst : text; ch : char);

var
  underline : boolean;

begin
  underline := false;
  if ch >= chr(128) then begin
    ch := chr(ord(ch) - 128);
    underline := true;
    if unlnopt then begin
      if lx80 then
	write(lst, esc, '-1')	{underline mode for LX80 printer}
      else if vt100 then
	write(lst, esc, '[4m')	{underline mode for VT100}
    end
  end;
  if ch = '{' then
    ch := apostrophe;
  write(lst, ch);
  if unlnopt and underline then begin
    if lx80 then
      write(lst, esc, '-0')	{normal mode for LX80 printer}
    else if vt100 then
      write(lst, esc, '[0m')	{normal mode for VT100}
  end
end;



function insertgram(g : xgram; var t : freqptr; i, s, fudge : integer;
                    var longest, number, total, search : integer):integer;
var
  h, q, r, k : integer;

begin
  h := g * fudge;
  if h < 0 then
    h := h - 32768;
  q := h div s;
  if q = 0 then
    q := 1;
  r := h mod s;
  k := 0;
  if statopt then
    search := search + 1;
  repeat
    k := k + 1;
    with t@.gram[r] do begin
      if occu = 0 then begin
	gram := g;
	occu := i;
	insertgram := occu;
	if statopt then begin
	  longest := max(longest, k);
	  number := number + 1;
	  total := total + k
	end;
	k := 0
      end
      else if gram = g then begin
	occu := occu + i;
	insertgram := occu;
	if statopt then
	  total := total + k;
	k := 0
      end
      else begin
	r := r - q;
	if r < 0 then
	  r := r + s
      end
    end
  until (k = 0) or (k > s);
  if k <> 0 then
    fatal(typo, "Hash table overflow")
end;



procedure insertword(var w : string; var words : wordptr; s, fudge : integer;
                     var longest, number, total, search : integer);

var
  y : @wordarr;
  h, r, q, i, k : integer;
  ch : char;

begin
  i := 1;
  ch := w[i];
  h := 1;
  k := 1;
  while ch <> nul do begin
    h := h + k*ord(ch);
    k := k*2;
    if k = 512 then
      k := 1;
    i := i + 1;
    ch := w[i]
  end;
  if h < 0 then
    h := h - 32768;
  h := h * fudge;
  if h < 0 then
    h := h - 32768;
  q := h div s;
  if q = 0 then
    q := 1;
  r := h mod s;
  k := 0;
  if statopt then
    search := search + 1;
  repeat
    k := k + 1;
    with words@.word[r] do begin
      if word = nil then begin
	i := length(w) + 1;
	new(y, i);
	i := 0;
	repeat
	  i := i + 1;
	  ch := w[i];
	  y@.w[i] := ch
	until ch = nul;
	word := y;
	if statopt then begin
	  longest := max(longest, k);
	  number := number + 1;
	  total := total + k
	end;
	k := 0
      end
      else if match(w, word@.w) = eq then begin
	if statopt then
	  total := total + k;
	k := 0
      end
      else begin
	r := r - q;
	if r < 0 then
	  r := r + s
      end
    end
  until (k = 0) or (k > s);
  if k <> 0 then
    fatal(typo, "Word hash table overflow")
end;



procedure sortwords(words : wordptr; wordinx : integer; index : boolean);

procedure sorting(l, r : integer);

var
  xword : string;
  xr : real;
  t : wordrec;
  i, j : integer;

begin
  i := l;
  j := r;
  if index then
    xr := words@.word[(i+j) div 2].index
  else
    xword := words@.word[(i+j) div 2].word@.w;
  repeat
    if index then begin
      while words@.word[i].index > xr do
	i := i + 1;
      while words@.word[j].index < xr do
	j := j - 1
    end
    else begin
      while match(words@.word[i].word@.w, xword) = lt do
	i := i + 1;
      while match(words@.word[j].word@.w, xword) = gt do
	j := j - 1
    end;
    if i < j then begin
      t := words@.word[j];
      words@.word[j] := words@.word[i];
      words@.word[i] := t
    end;
    if i <= j then begin
      i := i + 1;
      j := j - 1
    end
  until i > j;
  if l < j then
    sorting(l, j);
  if i < r then
    sorting(i, r)
end;

begin
  if wordinx > 0 then
    sorting(0, wordinx)
end;



procedure initgrams;

var
  grm : file of gramrec;
  p : freqptr;

begin
  reset(grm, argv[inix]@, "GRM");
  while grm@.gram <> 0 do begin
    if insertgram(grm@.gram, trigram, grm@.occu, trisize, tfudge,
                  tlongest, tnumber, ttotal, tsearch) = 0 then ;
    get(grm)
  end;
  get(grm);
  while grm@.gram <> 0 do begin
    if insertgram(grm@.gram, digram, grm@.occu, disize, dfudge,
                  dlongest, dnumber, dtotal, dsearch) = 0 then ;
    get(grm)
  end
end;



procedure scaninput;

var
  inp : text;
  di, tri, i, c, cl : integer;
  p : freqptr;
  w : string;


procedure getword(var w : string);

var
  ch : char;
  i : integer;


procedure getchar(var ch : char);

begin
  if eof(inp) then
    ch := nul
  else begin
    read(inp, ch);
    if (ch >= 'A') and (ch <= 'Z') then
      ch := chr(ord(ch) + (ord('a')-ord('A')))
    else if ch < space then
      ch := space
  end
end;


begin
  repeat
    getchar(ch)
  until ((ch >= 'a') and (ch <= 'z')) or (ch = nul);

  i := 1;
  while (ch >= 'a') and (ch <= '{') do begin
    w[i] := ch;
    i := i + 1;
    if i > wordsize then
      ch := nul
    else begin
      getchar(ch);
      if ch = apostrophe then
	ch := '{'
      else if ch = hyphen then
	repeat
	  getchar(ch)
	until ch <> space
    end
  end;
  w[i] := nul
end;


begin
  reset(inp, argv[inpx]@, "RNO");

  getword(w);
  while w[1] <> nul do begin
    insertword(w, words, wsize, wfudge,
               wlongest, wnumber, wtotal, wsearch);
    c := ord(w[1]) - ord(radixc0);
    cl := 0;
    i := 2;
    repeat
      di := cl * radix + c;
      cl := c;
      c := max(ord(w[i]) - ord(radixc0), 0);
      tri := di * radix + c;
      if insertgram(di, digram, 1, disize, dfudge,
                    dlongest, dnumber, dtotal, dsearch)= 0 then ;
      if insertgram(tri, trigram, 1, trisize, tfudge,
                    tlongest, tnumber, ttotal, tsearch) = 0 then ;
      i := i + 1
    until c = 0;
    if insertgram(cl * radix, digram, 1, disize, dfudge,
                  dlongest, dnumber, dtotal, dsearch) = 0 then ;
    getword(w)
  end
end;



procedure linearwords;

var
  i, j : integer;

begin
  wordinx := -1;
  for i := 0 to wsize-1 do begin
    if words@.word[i].word <> nil then begin
      wordinx := wordinx + 1;
      if i <> wordinx then
	words@.word[wordinx] := words@.word[i]
    end
  end;
  sortwords(words, wordinx, false);
  if debug then begin
    writeln('List of Unique Words');
    for i := 0 to wordinx do
      writeln(words@.word[i].word@.w);
  end
end;



procedure dictionaryscan;

var
  dic : text;
  i, wordinxx : integer;
  d : string;
  r : relation;

procedure nextword;

var
  i, j : integer;
  dd : string;
  ch : char;

begin {nextword}
  if eof(dic) then
    d[1] := nul
  else begin
    readln(dic, dd);
    if dd[1] >= 'a' then
      d := dd
    else begin
      i := ord(dd[1]) - ord(space);
      j := 1;
      repeat
	j := j + 1;
	i := i + 1;
	ch := dd[j];
	d[i] := ch
      until ch = nul
    end;
    if debug then
      writeln('Dictionary : ', d)
  end
end;

procedure peculiarity(words : wordptr; n : integer);

var
  c, cl, i, dil, dir, tri : integer;
  ldir, ldil, ltri, sum : real;
  strangest, pinx : real;
  mostpeculiar : integer;

function count(g : xgram; t : freqptr; s, fudge : integer;
               var longest, number, total, search : integer):real;

const
  loge = 0.4342944819;

var
  kount : integer;

begin {count}
  kount := insertgram(g, t, 0, s, fudge, longest, number, total, search);
  if kount = 1 then
    count := -10.0
  else
    count := ln(float(kount - 1)) * loge
end;

begin {peculiarity}
  with words@.word[n].word@ do begin
    if debug then
      writeln('Peculiar : ', w);
    strangest := -1.0;
    sum := 0.0;
    c := ord(w[1]) - ord(radixc0);
    cl := 0;
    dir := c;
    ldir := count(dir, digram, disize, dfudge,
                  dlongest, dnumber, dtotal, dsearch);
    i := 1;
    repeat
      i := i + 1;
      dil := dir;
      ldil := ldir;
      cl := c;
      c := max(ord(w[i]) - ord(radixc0), 0);
      dir := cl * radix + c;
      ldir := count(dir, digram, disize, dfudge,
                    dlongest, dnumber, dtotal, dsearch);
      tri := dil * radix + c;
      ltri := count(tri, trigram, trisize, tfudge,
                    tlongest, tnumber, ttotal, tsearch);
      pinx := sqr((ldil + ldir)/2.0 - ltri);
      if pinx > strangest then begin
	strangest := pinx;
	mostpeculiar := i
      end;
      sum := sum + pinx
    until c = 0;
    sum := sqrt(sum / float(i-1));
    words@.word[n].index := sum;
    setunderline(w[mostpeculiar - 0]);
    setunderline(w[mostpeculiar - 1]);
    if mostpeculiar > 2 then
      setunderline(w[mostpeculiar - 2])
  end
end;

procedure move(words : wordptr; var i, j : integer);

var
  t : wordrec;

begin
  j := j + 1;
  if j < i then begin
    t := words@.word[j];
    words@.word[j] := words@.word[i];
    words@.word[i] := t
  end;
  i := i + 1
end;

begin {dictionaryscan}
  reset(dic, argv[dicx]@, "DIC");
  i := 0;
  wordinxx := -1;
  nextword;
  while (d[1] <> nul) and (i <= wordinx) do begin
    r := match(words@.word[i].word@.w, d);
    if r = lt then begin
      peculiarity(words, i);
      move(words, i, wordinxx)
    end
    else begin
      if r = eq then
	i := i + 1;
      nextword
    end
  end;
  while i <= wordinx do begin
    peculiarity(words, i);
    move(words, i, wordinxx)
  end;
  wordinx := wordinxx
end;



procedure orderlist;

begin
  sortwords(words, wordinx, true)
end;



procedure listing;

var
  lst : text;

procedure list(words : wordptr; wordinx : integer);

var
  i, j : integer;
  ch : char;

begin
  for i := 0 to wordinx do begin
    if i mod wordsperline = 0 then
      writeln(lst);
    with words@.word[i] do begin
      write(lst, index:5:1, ' ');
      j := 1;
      ch := word@.w[1];
      repeat
	printchar(lst, ch);
	j := j + 1;
	ch := word@.w[j]
      until ch = nul;
      if i mod wordsperline <> wordsperline - 1 then
	write(lst, space:(wordsize-j+2))
    end
  end
end;

begin
  rewrite(lst, argv[outx]@, "LST");
  writeln(lst);
  writeln(lst, version);
  writeln(lst);
  write(lst, 'List of Peculiar Words - Source: ');
  writeln(lst, argv[inpx]@, ' - Dictionary: ', argv[dicx]@);
  writeln(lst);
  list(words, wordinx);
  writeln(lst)
end;



procedure statistics;

begin
  if statopt then begin
    writeln('Statistics');
    writeln('----------');
    writeln;
    writeln('Digram Longest Probe        : ',
	    dlongest);
    writeln('Digram Average Probe        : ',
	    float(dtotal)/float(dsearch):8:2);
    writeln('Digram Number of Keys       : ',
	    dnumber);
    writeln('Digram Storage Utilisation  : ',
	    float(dnumber)/float(disize)*100.0:8:1, '%');
    writeln;
    writeln('Trigram Longest Probe       : ',
	    tlongest);
    writeln('Trigram Average Probe       : ',
	    float(ttotal)/float(tsearch):8:2);
    writeln('Trigram Number of Keys      : ',
	    tnumber);
    writeln('Trigram Storage Utilisation : ',
	    float(tnumber)/float(trisize)*100.0:8:1, '%');
    writeln;
    writeln('Word Longest Probe          : ',
	    wlongest);
    writeln('Word Average Probe          : ',
	    float(wtotal)/float(wsearch):8:2);
    writeln('Word Number of Keys         : ',
	    wnumber);
    writeln('Word Storage Utilisation    : ',
	    float(wnumber)/float(wsize)*100.0:8:1, '%')
  end
end;



begin
  if not initialise then begin
    writeln(version);
    writeln;
    writeln('$options input output dictionary [gram]');
    writeln;
    writeln('Option I	Initialise trigram and digram tables');
    writeln('Option S	Gather statistics and print on terminal');
    writeln('Option U	Underline weirdest trigram in each word');
    writeln;
    rerun
  end
  else begin
    writeln;
    if initopt then begin
      writeln('Initialising Tables');
      initgrams
    end;
    writeln('Scanning Document');
    scaninput;
    writeln('Resolving Internal Word List');
    linearwords;
    writeln('Scanning Dictionary');
    dictionaryscan;
    writeln('Sorting Results');
    orderlist;
    writeln('Listing Words');
    listing;
    statistics
  end
end.
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       