#include "../h/local.h"

#ifdef  SCCS_ID
static char SCCS_ID [] = "@(#)bio.c    	5.3	 15:24:18 - 83/03/16 ";
#endif  SCCS_ID

#include "../h/param.h"
#include "../h/systm.h"
#include "../h/dir.h"
#include "../h/user.h"
#include "../h/buf.h"
#include "../h/conf.h"
#include "../h/proc.h"
#include "../h/seg.h"
#include "../h/sysmon.h" /**/
#ifdef  UCB_METER
#include "../h/vm.h"
#endif  UCB_METER

/*
 * swap IO headers.
 * they are filled in to point
 * at the desired IO operation.
 */
struct  buf     swbuf1;
struct  buf     swbuf2;

/*
 * The following several routines allocate and free
 * buffers with various side effects.  In general the
 * arguments to an allocate routine are a device and
 * a block number, and the value is a pointer to
 * to the buffer header; the buffer is marked "busy"
 * so that no one else can touch it.  If the block was
 * already in core, no I/O need be done; if it is
 * already busy, the process waits until it becomes free.
 * The following routines allocate a buffer:
 *      getblk
 *      bread
 *      breada
 * Eventually the buffer must be released, possibly with the
 * side effect of writing it out, by using one of
 *      bwrite
 *      bdwrite
 *      bawrite
 *      brelse
 */

#ifdef  UCB_BHASH
#define BUFHSZ  64      /* must be power of 2 */
#define BUFHASH(blkno)  (blkno & (BUFHSZ-1))

struct  buf     *bhash[BUFHSZ];

/*
 * initialize hash links for buffers
 */
bhinit()
{
	register int i;

	for (i = 0; i < BUFHSZ; i++)
		bhash[i] = (struct buf *) NULL;
}
#endif  UCB_BHASH

/*
 * Read in (if necessary) the block and return a buffer pointer.
 */
struct buf *
bread(dev, blkno)
dev_t dev;
daddr_t blkno;
{
	register struct buf *bp;

	bp = getblk(dev, blkno);
	if (bp->b_flags&B_DONE) {
#ifdef  DISKMON
		io_info.ncache++;
#endif  DISKMON
		return(bp);
	}
	bp->b_flags |= B_READ;
	bp->b_bcount = BSIZE;
	(*bdevsw[major(dev)].d_strategy)(bp);
#ifdef  DISKMON
	io_info.nread++;
#endif  DISKMON
#ifdef  CGL_ACCT
	u.u_cgl.cgl_inblk++;
#endif  CGL_ACCT
	iowait(bp);
	return(bp);
}

/*
 * Read in the block, like bread, but also start I/O on the
 * read-ahead block (which is not allocated to the caller)
 */
struct buf *
breada(dev, blkno, rablkno)
dev_t dev;
daddr_t blkno, rablkno;
{
	register struct buf *bp, *rabp;

#ifdef  CGL_ECS
	extern struct buf ecstab;

	if (bdevsw[major(dev)].d_tab == &ecstab)
		return(bread(dev, blkno));      /* no read-ahead on ecs dev */
#endif  CGL_ECS
#ifdef  TWG_BD
	if (bdevsw[major(dev)].d_flags & BD_NOCACHE)
		return (bread(dev, blkno));
#endif  TWG_BD
	bp = NULL;
	if (!incore(dev, blkno)) {
		bp = getblk(dev, blkno);
		if ((bp->b_flags&B_DONE) == 0) {
			bp->b_flags |= B_READ;
			bp->b_bcount = BSIZE;
			(*bdevsw[major(dev)].d_strategy)(bp);
#ifdef  DISKMON
			io_info.nread++;
#endif  DISKMON
#ifdef  MONITORING
			sysmon.nraheads++;
#endif  MONITORING
#ifdef  CGL_ACCT
			u.u_cgl.cgl_inblk++;
#endif  CGL_ACCT
		}
	}
	if (rablkno && !incore(dev, rablkno)) {
		rabp = getblk(dev, rablkno);
		if (rabp->b_flags & B_DONE)
			brelse(rabp);
		else {
			rabp->b_flags |= B_READ|B_ASYNC;
			rabp->b_bcount = BSIZE;
			(*bdevsw[major(dev)].d_strategy)(rabp);
#ifdef  DISKMON
			io_info.nreada++;
#endif  DISKMON
#ifdef  CGL_ACCT
			u.u_cgl.cgl_inblk++;
#endif  CGL_ACCT
		}
	}
	if(bp == NULL)
		return(bread(dev, blkno));
	iowait(bp);
	return(bp);
}

/*
 * Write the buffer, waiting for completion.
 * Then release the buffer.
 */
bwrite(bp)
register struct buf *bp;
{
	register flag;

	flag = bp->b_flags;
	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE);
	bp->b_bcount = BSIZE;
#ifdef  DISKMON
	io_info.nwrite++;
#endif  DISKMON
#ifdef  CGL_ACCT
	if ((flag&B_DELWRI) == 0)
		u.u_cgl.cgl_oublk++;
#endif  CGL_ACCT
	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
	if ((flag&B_ASYNC) == 0) {
		iowait(bp);
		brelse(bp);
	} else if (flag & B_DELWRI)
		bp->b_flags |= B_AGE;
	else
		geterror(bp);
}

/*
 * Release the buffer, marking it so that if it is grabbed
 * for another purpose it will be written out before being
 * given up (e.g. when writing a partial block where it is
 * assumed that another write for the same block will soon follow).
 * This can't be done for magtape, since writes must be done
 * in the same order as requested.
 */
bdwrite(bp)
register struct buf *bp;
{
	register struct buf *dp;
#ifdef	CGL_ECS
	extern struct buf ecstab;
#endif	CGL_ECS

#ifdef  TWG_BD
	if(bdevsw[major(bp->b_dev)].d_flags & BD_NOCACHE) {
		bawrite(bp);
		return;
	}
#endif  TWG_BD

#ifdef  CGL_ECS

	dp = bdevsw[major(bp->b_dev)].d_tab;
	if (dp == &ecstab || dp->b_flags&B_TAPE)
		bawrite(bp);
#else   CGL_ECS
	dp = bdevsw[major(bp->b_dev)].d_tab;
	if(dp->b_flags & B_TAPE)
		bawrite(bp);
#endif  CGL_ECS
	else {
#ifdef  CGL_ACCT
		u.u_cgl.cgl_oublk++;
#endif  CGL_ACCT
		bp->b_flags |= B_DELWRI | B_DONE;
#ifdef  MONITORING
		sysmon.ndwrites++;
#endif  MONITORING
		brelse(bp);
	}
}

/*
 * Release the buffer, start I/O on it, but don't wait for completion.
 */
bawrite(bp)
register struct buf *bp;
{

	bp->b_flags |= B_ASYNC;
	bwrite(bp);
}

/*
 * release the buffer, with no I/O implied.
 */
brelse(bp)
register struct buf *bp;
{
	register struct buf **backp;
	register s;
#ifdef  CGL_ECS
	extern struct buf ecstab;
#endif  CGL_ECS

	if (bp->b_flags&B_WANTED)
		wakeup((caddr_t)bp);
#ifdef  TWG_BD
	if(bdevsw[major(bp->b_dev)].d_flags & BD_NOCACHE) {
		bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE);
		return;
	}
#endif  TWG_BD
	if (bfreelist.b_flags&B_WANTED) {
		bfreelist.b_flags &= ~B_WANTED;
		wakeup((caddr_t)&bfreelist);
	}
	if (bp->b_flags&B_ERROR) {
#ifdef  UCB_BHASH
		bunhash(bp);
#endif  UCB_BHASH
		bp->b_dev = NODEV;  /* no assoc. on error */
#ifdef CGL_AGE
		bp->b_flags |= B_AGE;   /* abandon buffer immediately */
#endif CGL_AGE
	}
#ifdef  CGL_ECS
	if (bdevsw[major(bp->b_dev)].d_tab == &ecstab)
		bp->b_flags |= B_AGE;   /* keep ecs blocks out of buffer pool */
#endif  CGL_ECS
	s = spl6();
	if(bp->b_flags & B_AGE) {
		backp = &bfreelist.av_forw;
		(*backp)->av_back = bp;
		bp->av_forw = *backp;
		*backp = bp;
		bp->av_back = &bfreelist;
	} else {
		backp = &bfreelist.av_back;
		(*backp)->av_forw = bp;
		bp->av_back = *backp;
		*backp = bp;
		bp->av_forw = &bfreelist;
	}
	bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE);
#ifdef  MONITORING
	sysmon.nfbuf++;
#endif  MONITORING
	splx(s);
}

/*
 * See if the block is associated with some buffer
 * (mainly to avoid getting hung up on a wait in breada)
 */
incore(dev, blkno)
dev_t dev;
daddr_t blkno;
{
	register struct buf *bp;
	register struct buf *dp;
#ifdef  UCB_NKB
	register daddr_t dblkno = fsbtodb(blkno);
#endif  UCB_NKB

#ifdef  UCB_BHASH
	for(bp = bhash[BUFHASH(blkno)]; bp != NULL; bp = bp->b_link)
#else   UCB_BHASH
	dp = bdevsw[major(dev)].d_tab;
	for (bp=dp->b_forw; bp != dp; bp = bp->b_forw)
#endif  UCB_BHASH
#ifdef  UCB_NKB
		if (bp->b_blkno==dblkno && bp->b_dev==dev)
#else   UCB_NKB
		if (bp->b_blkno==blkno && bp->b_dev==dev)
#endif  UCB_NKB
			return(1);
	return(0);
}

/*
 * Assign a buffer for the given block.  If the appropriate
 * block is already associated, return it; otherwise search
 * for the oldest non-busy buffer and reassign it.
 */
struct buf *
getblk(dev, blkno)
dev_t dev;
daddr_t blkno;
{
	register struct buf *bp;
	register struct buf *dp;
#ifdef  UCB_NKB
	register daddr_t dblkno = fsbtodb(blkno);
#endif  UCB_NKB
#ifdef  DISKMON
	register i;
#endif  DISKMON
#ifdef  UCB_BHASH
	register int j;
	register struct buf *ep;
#endif  UCB_BHASH

	if(major(dev) >= nblkdev)
		panic("blkdev");

#ifdef  TWG_BD
	if(bdevsw[major(dev)].d_flags & BD_NOCACHE) {
		bp = bdevsw[major(dev)].d_tab;
		bp->av_forw = bp->av_back = bp;
		bp->b_flags = B_BUSY | B_READ;
		bp->b_dev = dev;
#ifdef  UCB_NKB
		bp->b_blkno = dblkno;
#else   UCB_NKB
		bp->b_blkno = blkno;
#endif  UCB_NKB
		notavail(bp);
		(*bdevsw[major(dev)].d_strategy)(bp);
		/*
		 * This is necessary so that a read will eventually call
		 * iowait(). Otherwise, errors are not reported.
		 */
		bp->b_flags &= ~B_DONE;
		return bp;
	}
#endif  TWG_BD
    loop:
	spl0();
	dp = bdevsw[major(dev)].d_tab;
	if(dp == NULL)
		panic("devtab");
#ifdef  UCB_BHASH
	for(bp = bhash[BUFHASH(blkno)]; bp != NULL; bp = bp->b_link)
#else   UCB_BHASH
	for (bp=dp->b_forw; bp != dp; bp = bp->b_forw)
#endif  UCB_BHASH
	{
#ifdef  UCB_NKB
		if (bp->b_blkno!=dblkno || bp->b_dev!=dev)
#else   UCB_NKB
		if (bp->b_blkno!=blkno || bp->b_dev!=dev)
#endif  UCB_NKB
			continue;
		spl6();
		if (bp->b_flags&B_BUSY) {
			bp->b_flags |= B_WANTED;
			sleep((caddr_t)bp, PRIBIO+1);
			goto loop;
		}
		spl0();
#ifdef  DISKMON
		i = 0;
		dp = bp->av_forw;
		while (dp != &bfreelist) {
			i++;
			dp = dp->av_forw;
		}
		if (i<nbuf)
			io_info.bufcount[i]++;
#endif  DISKMON
#ifdef  MONITORING
		if(major(dev) == MSM || major(dev) == MSM1 || major(dev) == MSM2 || major(dev) == MSM3)
			sysmon.hpmon.caches[(minor(dev)>>3)]++;
		    else
			sysmon.nondkcaches++;
#endif  MONITORING
		notavail(bp);
#ifdef  UCB_BHASH
		bp->b_flags |= B_CACHE;
#endif  UCB_BHASH
		return(bp);
	}
	spl6();
	if (bfreelist.av_forw == &bfreelist) {
		bfreelist.b_flags |= B_WANTED;
		sleep((caddr_t)&bfreelist, PRIBIO+1);
		goto loop;
	}
	spl0();
	notavail(bp = bfreelist.av_forw);
	if (bp->b_flags & B_DELWRI) {
		bp->b_flags |= B_ASYNC;
		bwrite(bp);
		goto loop;
	}
#ifdef  UCB_BHASH
	bunhash(bp);
#endif  UCB_BHASH
	bp->b_flags = B_BUSY;
	bp->b_back->b_forw = bp->b_forw;
	bp->b_forw->b_back = bp->b_back;
	bp->b_forw = dp->b_forw;
	bp->b_back = dp;
	dp->b_forw->b_back = bp;
	dp->b_forw = bp;
	bp->b_dev = dev;
#ifdef  UCB_NKB
	bp->b_blkno = dblkno;
#else   UCB_NKB
	bp->b_blkno = blkno;
#endif  UCB_NKB
#ifdef  UCB_BHASH
	j = BUFHASH(blkno);
#ifdef  DEBUG
	printf("hashing bp=%o bhash[j=%d]=%o old=%o\n", bp, j, bhash[j], bp->b_link);
#endif  DEBUG
	bp->b_link = bhash[j];
	bhash[j] = bp;
#endif  UCB_BHASH
	return(bp);
}

/*
 * get an empty block,
 * not assigned to any particular device
 */
struct buf *
geteblk()
{
	register struct buf *bp;
	register struct buf *dp;

loop:
	spl6();
	while (bfreelist.av_forw == &bfreelist) {
		bfreelist.b_flags |= B_WANTED;
		sleep((caddr_t)&bfreelist, PRIBIO+1);
	}
	spl0();
	dp = &bfreelist;
	notavail(bp = bfreelist.av_forw);
	if (bp->b_flags & B_DELWRI) {
		bp->b_flags |= B_ASYNC;
		bwrite(bp);
		goto loop;
	}
#ifdef  UCB_BHASH
	bunhash(bp);
#endif  UCB_BHASH
	bp->b_flags = B_BUSY;
	bp->b_back->b_forw = bp->b_forw;
	bp->b_forw->b_back = bp->b_back;
	bp->b_forw = dp->b_forw;
	bp->b_back = dp;
	dp->b_forw->b_back = bp;
	dp->b_forw = bp;
	bp->b_dev = (dev_t)NODEV;
#ifdef  UCB_BHASH
	bp->b_link = NULL;
#endif  UCB_BHASH
	return(bp);
}

#ifdef  UCB_BHASH
bunhash(bp)
register struct buf *bp;
{
	register struct buf *ep;
	register int i;

#ifdef  DEBUG
	printf("unhash\n");
#endif  DEBUG
	if (bp->b_dev == NODEV)
		return;
#ifdef  UCB_NKB
	i = BUFHASH(dbtofsb(bp->b_blkno));
#else   UCB_NKB
	i = BUFHASH(bp->b_blkno);
#endif  UCB_NKB
	ep = bhash[i];
	if (ep == NULL)
		panic("bunhash 1");
#ifdef  DEBUG
	printf("blkno=%D bhash[i=%d]=%o\n", bp->b_blkno, i, ep);
#endif  DEBUG
	if (ep == bp) {
		bhash[i] = bp->b_link;
		return;
	}
	for (; ep != NULL; ep = ep->b_link)
		if (ep->b_link == bp) {
			ep->b_link = bp->b_link;
			return;
		}
	panic("bunhash 2");
}
#endif  UCB_BHASH

/*
 * Wait for I/O completion on the buffer; return errors
 * to the user.
 */
iowait(bp)
register struct buf *bp;
{

	spl6();
	while ((bp->b_flags&B_DONE)==0)
		sleep((caddr_t)bp, PRIBIO);
	spl0();
	geterror(bp);
}

/*
 * Unlink a buffer from the available list and mark it busy.
 * (internal interface)
 */
notavail(bp)
register struct buf *bp;
{
	register s;

	s = spl6();
	bp->av_back->av_forw = bp->av_forw;
	bp->av_forw->av_back = bp->av_back;
	bp->b_flags |= B_BUSY;
#ifdef  MONITORING
	sysmon.nfbuf--;
	sysmon.nrbuf++;
	sysmon.cnfbuf =+ sysmon.nfbuf;
	if(sysmon.nfbuf < sysmon.bthres)
		sysmon.nbthres++;
#endif  MONITORING
	splx(s);
}

/*
 * Mark I/O complete on a buffer, release it if I/O is asynchronous,
 * and wake up anyone waiting for it.
 */
iodone(bp)
register struct buf *bp;
{

	if(bp->b_flags&B_MAP)
		mapfree(bp);
	bp->b_flags |= B_DONE;
	if (bp->b_flags&B_ASYNC)
		brelse(bp);
	else {
		bp->b_flags &= ~B_WANTED;
		wakeup((caddr_t)bp);
	}
}

/*
 * Zero the core associated with a buffer.
 */
clrbuf(bp)
struct buf *bp;
{
	register *p;
	register c;

	p = bp->b_un.b_words;
#ifdef CGL_CLRBUF
	c = (BSIZE/sizeof(int)) >> 2;
	do {
		*p++ = 0;
		*p++ = 0;
		*p++ = 0;
		*p++ = 0;
	} while (--c);
#else CGL_CLRBUF
	c = BSIZE/sizeof(int);
	do
		*p++ = 0;
	while (--c);
#endif CGL_CLRBUF
	bp->b_resid = 0;
}

/*
 * swap I/O
 */
swap(blkno, coreaddr, count, rdflg)
register count;
{
	register struct buf *bp;
	register tcount;
/*      printf("blkno=%x core=%x count=%x\n",swplo+blkno,coreaddr,count); */

#ifdef  UCB_METER
	if (rdflg) {
		cnt.v_pswpin += count;
		cnt.v_swpin++;
	} else {
		cnt.v_pswpout += count;
		cnt.v_swpout++;
	}
#endif  UCB_METER
	bp = &swbuf1;
	if(bp->b_flags & B_BUSY)
		if((swbuf2.b_flags&B_WANTED) == 0)
			bp = &swbuf2;
	spl6();
	while (bp->b_flags&B_BUSY) {
		bp->b_flags |= B_WANTED;
		sleep((caddr_t)bp, PSWP+1);
	}
	while (count) {
		bp->b_flags = B_BUSY | B_PHYS | rdflg;
		bp->b_dev = swapdev;
		tcount = count;
#ifdef DEC
		if (tcount >= 01700)    /* prevent byte-count wrap */
			tcount = 01700;
#endif DEC
		bp->b_bcount = ctob(tcount);
		bp->b_blkno = swplo+blkno;
		bp->b_un.b_addr = (caddr_t)ctob(coreaddr);
#ifdef DEC
		bp->b_xmem = (coreaddr>>10) & 077;
#endif DEC
#ifdef  CGL_SWPMON
#define DK_N 1  /* formerly for rk disk */
		dk_busy |= 1<<DK_N;
		dk_numb[DK_N] += 1;
		dk_wds[DK_N] += bp->b_bcount>>6;
#endif  CGL_SWPMON
		(*bdevsw[major(swapdev)].d_strategy)(bp);
		spl6();
		while((bp->b_flags&B_DONE)==0)
			sleep((caddr_t)bp, PSWP);
#ifdef  CGL_SWPMON
		dk_busy &= ~(1<<DK_N); /* may be cleared too soon */
#endif  CGL_SWPMON
		count -= tcount;
		coreaddr += tcount;
		blkno += ctod(tcount);
	}
	if (bp->b_flags&B_WANTED)
		wakeup((caddr_t)bp);
	spl0();
	bp->b_flags &= ~(B_BUSY|B_WANTED);
	if (bp->b_flags & B_ERROR)
		panic("IO err in swap");
}

/*
 * make sure all write-behind blocks
 * on dev (or NODEV for all)
 * are flushed out.
 * (from umount and update)
 */
bflush(dev)
dev_t dev;
{
	register struct buf *bp;

loop:
	spl6();
	for (bp = bfreelist.av_forw; bp != &bfreelist; bp = bp->av_forw) {
		if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) {
			bp->b_flags |= B_ASYNC;
			notavail(bp);
			bwrite(bp);
			goto loop;
		}
	}
	spl0();
}

/*
 * Raw I/O. The arguments are
 *      The strategy routine for the device
 *      A buffer, which will always be a special buffer
 *        header owned exclusively by the device for this purpose
 *      The device number
 *      Read/write flag
 * Essentially all the work is computing physical addresses and
 * validating them.
 */
physio(strat, bp, dev, rw)
register struct buf *bp;
int (*strat)();
{
	register unsigned base;
	register int nb;
	int ts;

	base = (unsigned)u.u_base;
	/*
	 * Check odd base, odd count, and address wraparound
	 */
	if (base&01 || u.u_count&01 || base>=base+u.u_count)
		goto bad;
#ifdef DEC
	ts = (u.u_tsize+127) & ~0177;
	if (u.u_sep)
		ts = 0;
	nb = base>>6;
#else  DEC
	ts = (u.u_tsize+(SEGFULL-1)) & ~(SEGFULL-1);
	nb = base>>CSHIFT;
#endif DEC
	/*
	 * Check overlap with text. (ts and nb now
	 * in clicks)
	 */
	if (nb < ts)
		goto bad;
	/*
	 * Check that transfer is either entirely in the
	 * data or in the stack: that is, either
	 * the end is in the data or the start is in the stack
	 * (remember wraparound was already checked).
	 */
#ifdef DEC
	if ((base+u.u_count)>>6 >= ts+u.u_dsize
	    && nb < 1024-u.u_ssize)
		goto bad;
#else  DEC
	if ((base+u.u_count)>>CSHIFT >= ts+u.u_dsize
	    && nb < stoc(u.u_sseg))
		goto bad;
	/*
	 * Check for passing end of stack
	 */
	if ((base+u.u_count)>>CSHIFT >= stoc(u.u_sseg)+u.u_ssize)
		goto bad;
#endif DEC
	spl6();
	while (bp->b_flags&B_BUSY) {
		bp->b_flags |= B_WANTED;
		sleep((caddr_t)bp, PRIBIO+1);
	}
	bp->b_flags = B_BUSY | B_PHYS | rw;
	bp->b_dev = dev;
	/*
	 * Compute physical address by simulating
	 * the segmentation hardware.
	 */
#ifdef DEC
	ts = (u.u_sep? UDSA: UISA)->r[nb>>7] + (nb&0177);
	bp->b_un.b_addr = (caddr_t)((ts<<6) + (base&077));
	bp->b_xmem = (ts>>10) & 077;
#else  DEC
	bp->b_un.b_addr = (caddr_t)base;
	lraddr(&bp->b_un.b_addr, uisa);
#endif DEC
#ifdef  UCB_NKB
	bp->b_blkno = u.u_offset >> PGSHIFT;
#else   UCB_NKB
	bp->b_blkno = u.u_offset >> BSHIFT;
#endif  UCB_NKB
	bp->b_bcount = u.u_count;
	bp->b_error = 0;
	u.u_procp->p_flag |= SLOCK;
	(*strat)(bp);
	spl6();
	while ((bp->b_flags&B_DONE) == 0)
		sleep((caddr_t)bp, PRIBIO);
	u.u_procp->p_flag &= ~SLOCK;
	if (bp->b_flags&B_WANTED)
		wakeup((caddr_t)bp);
	spl0();
	bp->b_flags &= ~(B_BUSY|B_WANTED);
	u.u_count = bp->b_resid;
	geterror(bp);
	return;
    bad:
	u.u_error = EFAULT;
}

/*
 * Pick up the device's error number and pass it to the user;
 * if there is an error but the number is 0 set a generalized
 * code.  Actually the latter is always true because devices
 * don't yet return specific errors.
 */
geterror(bp)
register struct buf *bp;
{

	if (bp->b_flags&B_ERROR)
		if ((u.u_error = bp->b_error)==0)
			u.u_error = EIO;
}
