#!/usr/bin/env python3

import sys
import os
import collections
import gzip

FILL18 = [ 0o777777 ] * 256
FILL12 = [ 0o777777 ] * 86
    
# Build the mapping table from the data track bits (1-3 in the dump
# file) to the actual data.  It appears they are in reverse order
# (track 3 is LSB) and complemented.
bmap = list ()
for i in range (8):
    r = 0
    if i & 1:
        r |= 4
    if i & 2:
        r |= 2
    if i & 4:
        r |= 1
    bmap.append (~r & 7)
bmap = tuple (bmap)

def oc (d):
    # Obverse complement of an 18-bit value
    ret = 0
    d = ~d
    for i in range (6):
        ret = (ret << 3) | (d & 7)
        d >>= 3
    return ret

def rdata (fn):
    bfn, x = os.path.splitext (fn)
    if x == ".gz":
        f = gzip.GzipFile (fn)
        fn = bfn
    else:
        f = open (fn, "rb")
    ret = f.read ()
    f.close ()
    return ret[:-1].rstrip (b"\000"), fn

def pstats (d):
    stats = collections.Counter ()
    dlen = len (d)
    for b in d:
        for i in range (8):
            bit = 1 << i
            if b & bit:
                stats[i] += 1
    for i in range (8):
        print ("{}: {:>7d} {:>7.3f}%".format (i, stats[i],
                                              stats[i] * 100 / dlen))
    
def writevcd (fn, d):
    import vcd
    ofn, x = os.path.splitext (fn)
    ofn += ".vcd"
    with open (ofn, "wt") as v:
        with vcd.writer.VCDWriter (v, timescale = "1 us") as vf:
            vl = vf.register_var ("module", "track",
                                  vcd.writer.VarType.integer, 8)
            for x, b in enumerate (d):
                vf.change (vl, x, b)

mark = 0
data = 0
checksum = 0

def frame (di):
    # Read one, frame, accumulate current mark and data words
    global mark, data
    b = next (di)
    mark = ((mark << 1) | (b & 1)) & 0o77
    data = ((data << 3) | bmap [(b >> 1) & 7]) & 0o777777

def word (di):
    # Read an 18 bit word.  Note that we need to have block sync at
    # this point, so this is only done after the start of block (block
    # number field) has been recognized.
    global checksum
    for i in range (6):
        frame (di)
    checksum ^= ((data >> 12) & 0o77);
    checksum ^= ((data >> 6) & 0o77);
    checksum ^= (data & 0o77);

def endzone (di):
    # Search for the reverse endzone, i.e., the beginning of the
    # portion of the tape that has been formatted.  Typically there's
    # a bunch of noise before this point, we want to skip that
    # silently.
    while True:
        while mark != 0o55:
            frame (di)
        for i in range (5):
            # look for another 5 words with "reverse end zone" mark
            word (di)
            if mark != 0o55:
                break
        else:
            return

class MTE (Exception):
    def __init__ (self, got, exp):
        self.got = got
        self.exp = exp

    def __str__ (self):
        exp = self.exp
        if isinstance (exp, set):
            exp = " or ".join ("{:0>2o}".format (i) for i in exp)
        else:
            exp = "{:0>2o}".format (exp)
        return "Mark track error, got {:0>2o}, expected {}" \
                .format (self.got, exp)

def rmark (exp):
    if mark != exp:
        raise MTE (mark, exp)
    
def block (di, eblk):
    global checksum

    while True:
        if mark == 0o22:
            # Looks like endzone, see if it's real
            for i in range (5):
                word (di)
                # Some tapes have end zone alternating octal 22 and 26
                if mark != 0o22 and mark != 0o26:
                    break
            else:
                return eblk, False
        elif mark == 0o26:
            # Block number, save that.  Then read the next word.  We
            # do this to confirm block framing; it's possible to get a
            # false 26 mark word with some of the noise I have seen,
            # but checking for the next word being correctly framed
            # will avoid that issue.
            # Sometimes high order bits are set in block number, don't know
            # why, get rid of that.
            bnum = data & 0o7777
            word (di)
            if mark == 0o32:
                break
        frame (di)
    if bnum != eblk:
        print ("Unexpected block, got {:0>6o}, expected {:0>6o}".format (bnum, eblk))
        # Skip a frame to restart the block number search
        frame (di)
        return bnum, None
    word (di)
    rmark (0o10)
    word (di)
    rmark (0o10)
    checksum = data & 0o77

    bd = list ()
    while mark != 0o73:
        word (di)
        if len (bd) < 2:
            exp = { 0o10 }
        else:
            exp = { 0o70, 0o73 }
        if mark not in exp:
            raise MTE (mark, exp)
        bd.append (data)
    # Get last word
    word (di)
    rmark (0o73)
    bd.append (data)
    # TODO: check word
    checksum_hold = checksum;
    word (di)
    rmark (0o73)
    checksum = checksum_hold ^ (data >> 12)
    if checksum != 0o77:
       print("Checksum mismatch on decimal block", bnum);
    word (di)
    rmark (0o73)
    word (di)
    rmark (0o51)
    word (di)
    rmark (0o45)
    brev = oc (data) & 0o7777
    if bnum != brev:
        print ("block {:0>6o} rev mismatch {:0>6o}".format (bnum, brev))
    return bnum, bd

def process (d):
    blks = [ None ] * 1474
    di = iter (d)
    bs = None
    curblk = blkcnt = 0
    endzone (di)
    while True:
        try:
            ret = block (di, curblk)
        except MTE as e:
            print ("block {:0>4o} {!s}".format (curblk, e))
            if bs == 256:
                ret = curblk, FILL18
            else:
                ret = curblk, FILL12
        except StopIteration:
            print ("Unexpected EOF, expected block", curblk)
            break
        if ret:
            bnum, bd = ret
            if bd is False:
                if bnum != blkcnt:
                    print ("Endzone reached at block", bnum)
                break
            elif bd is None:
                if bs == 256:
                    bd = FILL18
                else:
                    bd = FILL12
            #print (bnum, len (bd))
            curblk = bnum + 1
            if bs:
                if len (bd) != bs:
                    print ("Block length mismatch, got {}, expected {}, block {}".format (len (bd), bs, bnum))
                    bs = len (bd)
            else:
                bs = len (bd)
                if bs == 86:
                    # PDP 5/8/12 format
                    blkcnt = 1474
                elif bs == 256:
                    # PDP 1/4/7/9/10/11/15 format
                    blkcnt = 578
                else:
                    print ("Strange block length", bs)
            if bnum < len (blks):
                blks[bnum] = bd
        else:
            pass
    return blks[:blkcnt]

def write12 (fn, blks):
    # Write a SIMH DECtape image, 18 bit format
    bfn, x = os.path.splitext (fn)
    with open (bfn + ".t12", "wb") as f:
        for blk in blks:
            if blk is None:
                blk = FILL12
            bi = iter (blk)
            for w in bi:
                w = (w << 18) | next (bi)
                f.write (((w >> 24) & 0o7777).to_bytes (2, "little"))
                f.write (((w >> 12) & 0o7777).to_bytes (2, "little"))
                f.write ((w & 0o7777).to_bytes (2, "little"))
                
def write16 (fn, blks):
    # Write a SIMH DECtape image, 16 bit format
    bfn, x = os.path.splitext (fn)
    with open (bfn + ".t16", "wb") as f:
        for blk in blks:
            if blk is None:
                blk = FILL18
            for w in blk:
                f.write ((w & 0o177777).to_bytes (2, "little"))
                
def write18 (fn, blks):
    # Write a SIMH DECtape image, 18 bit format
    bfn, x = os.path.splitext (fn)
    with open (bfn + ".t18", "wb") as f:
        for blk in blks:
            if blk is None:
                blk = FILL18
            for w in blk:
                f.write ((w).to_bytes (4, "little"))
                
def main (args):
    vcdsw = False
    for fn in args:
        if fn == "-v":
            vcdsw = True
            continue
        print (fn)
        d, fn = rdata (fn)
        if vcdsw:
            writevcd (fn, d)
        blks = process (d)
        if not blks:
            print ("No readable blocks found in", fn)
            continue
        if len (blks[0]) == 256:
            # 18 bit format
            print (fn, "is 16/18/36 bit format")
            if len (blks) != 578:
                print ("block count is off, expecting 578, got",
                       len (blks),"in", fn)
            write18 (fn, blks)
            write16 (fn, blks)
        else:
            print (fn, "is 12 bit format")
            if len (blks) != 1474:
                print ("block count is off, expecting 1474, got",
                       len (blks),"in", fn)
            write12 (fn, blks)
        
if __name__ == "__main__":
    main (sys.argv[1:])
