4,887,235
	453	454
;Invisible-pointer traps
;If transporting was needed it has happened already
;Time= 2 cycles trapping + cycles here
(defucode inviz-trap
  (parallel
    (trap-save)
    (assign vma memory-data)
    (assign b-trans-vma memory-data))
  (trap-restore
    (memory-map read) ;Gurkh! Sometimes needed to write here????? <---
    (nop)))

;Map-miss trap
;Hardware started memory reference to first level hash table in trapped
;cycle; so the data are available in the first cycle of the trap handler.
;since trapping inserted an extra clock which drove the memory pipeline.
;This is too early since we arent ready for it that fast.
;Time = 2 cycles trapping + 4 cycles here in most favorable case.
;Its 4 cycles rather than 3 because Abus is a bottleneck (VMA, MD).
(defucode map-miss-trap
  (parallel
    (trap-save)
    (assign b-map-temp (ldb vma 16. 8)))	;With address space ID?
  (parallel
    (increment-pma)
    (if (equal-fixnum memory-data b-map-temp)	;Match pht key?
	(trap-restore (write-map-from memory-data) ;Yes, and VMA still set up
		      (map-metering))		;Spare cycle for metering
      xxxxxx)))		;Well, go off and search second level
;Disk DMA task.







;The following control regstors are set up by the background
;service task, based on the command list in main memory set up
;by Lisp code. At the same time the hardware control registers
;are (all?) set up. The background service task also bashes the
;DMA task state to start it up at the right place for read or write.
;When the DMA task is done, it wakes up the background tack which
;can tell what happened by looking at the control registers.





(defareg a-disk-ma 3000)	;Address of next word to transfer
(defareg a-disk-wc 3001)	;Number of words to transfer (minus 3)
(defareg a-disk-header 3002)	;Header value being sought
(defareg a-disk-timeout 3003)	;Number of header tries before punting
				; (maybe heads are positioned wrong)

(defareg a-disk-search-cmd 3004);Tell hardware to search for header

;Search subroutine. Returns after reading the header of the desired sector.
;Eats shit and dies if header not found after timeout (does not return).
(defucode disk-search
  (assign b-temp (io-bus-data disk-data))	;---Read Lbus directly into DP??
						;---Or use extended B memory??
  (if (equal-fixnum a-disk-header b-temp)
      (return)			;Header found. Let caller dismiss.
    (drop-through))
  (parallel
   (dismiss)
   (assign (io-bus-data disk-control) a-disk-search-cmd))	;Try again
  (parallel
   (assign a-disk-timeout (1- a-disk-timeout))
   (if alu-carry		;Not yet counted to -1
       (goto disk-search)	;Wakeup back at disk-search
       (eat-shit-and-die))))	;On next wakeup, actually

;Read routine. Initially entered via gratuitous wakeup. Call search
;routine which will return with disk entering data area.
;Most wakeups are only for 2 cycles, except at the start of the wrong
;sector we remain active for 4 cycles, and at the start of the right
;sector we remain active for 5 cycles. These could each be decreased
;by 1 as noted above, and could be decreased more I guess by having
;separate search routines for read and write.
(defucode disk-read
  (dismiss)			;Until start of sector
  (call disk-search)
  (dismiss)
  (jump disk-read-loop))

;Here for each data word
4,887,235
	455	456
(defucode disk-read-loop
  (parallel
   (assign pma a-disk-ma)
   (dma-read disk)
   (assign a-disk-ma (1+ a-disk-ma))
   (dismiss))
  (parallel
   (assign a-disk-wc (1- a-disk-wc))
   (if alu-carry		;Not yet counted to -1
       (goto disk-read-loop)	;Wake up back there
       (goto disk-read-last-3))))

;Here for the third to last data word
(defucode disk-read-last-3
  (parallel
   (assign pma a-disk-ma)
   (dma-read disk)
   (assign a-disk-ma (1+ a-disk-ma))
   (io-bus-stop-signal)		;Tell disk to stop reading after next word
   (dismiss))
  (nop)
  (parallel			;Swallow last data word
    (assign pma a-disk-ma)
    (dma-read disk)
    (assign a-disk-ma (1+ a-disk-ma))
    (dismiss))
  (nop)
  ;Here we have the ECC word and the state machine has stopped
  ;Since we arent doing any double-buffered control registers hacks.
  ;we simply stop and let the background task look at the hardware
  ;reglsters and decide what to do.
  (parallel
   (awaken-task background-service-task)
   (dismiss))
  (nop)
;XXXbrad - something missing
)

;Kernel of blting from main memory to TV
;This involves no rotation or alu function, just straight copy
;used e.g. to update a screen image.

;TV epoch corresponds to 5 microcode cycles in this version
;If a TV epoch can correspond to 6 cycles (i.e.a we use all fast
; microinstructions) things arm much easier,





(defucode tv-copy-kernel
  (parallel (assign pma a-tv-pma)		;Uses Abus
	    (assign memory-data b-temp1)	;Uses B,X,O busses
	    (increment-pma))
  (parallel (assign memory-data b-temp2)	;Store 2nd word in TV
	    (memory-map read))			;Start next memory read
  (assign a-tv-pma (+ a-tv-pma (b-constant 2)))	;Memory active, inc pma
  (parallel (assign b-temp1 memory-data)	;Stash first word from meit
	    (increment-pma))
  (parallel (assign b-temp1 memory-data)	;Uses A, X busses
	    (increment-pma)))
	    ;Here we have to be able to increment the VMA by 2
	    ;Must happen entirely in the MC because there is no
	    ;cycle with abus free to use DP adder to increment it.
	    ;If incremont-pma carries into the page bits of the VMA,
	    ;this will work.

;There is some confusion about increment-pma here. Generally it
;is assumed to increment pma and vma both. But since we are
;leaving the read address in VMA, and switching PMA back and forth
;between a direct load from Abus and mapping from VMA, it's clear
;that this increment-pma really should split in several different
;memory-control functions.

;Cycle	Address Bus		Data Bus
;1	TV address		Write data 1
;2	Memory Address		Write data 2	-- address bus conflict? --
;3	nil			nil
;4	Memory Address+1	Read data 1
;5	nil			Read data 2

;In cycle 2 the address bus wants to be the memory address so that
;the read can get started, it also wants to be the TV address+1
;for writing into the TV (except the TV doesnt actually need to
;look at this anyway).
;Also Memory Address+1 needs to come out in cycle 3, not 4,
;since the memory is interleaved rather than page-mode,

;;; -*- Mode:Lisp; Package:Micro; Base:8; Lowercase:yes -*-
;;; (c) Copyright 1982, Symbolics. Inc.

; Microcode for branch instructions
4,887,235
	457	458

;Get defmicro and all h~s hosts

(declare (cond ((not (status feature lmucode))
		(load 'udcls))))

;These are branches the compiler knows about initially

(definst branch signed-pc-relative
  (set-pc (pc-add pc macro-signed-immediate)))

;This gets an offset from memory. Would it be better to get a PC?
(definst long-branch constant-pc-relative
  (assign vma (- frame-function macro-unsigned-immediate 1))
  (start-memory read)
  (assign b-temp pc)
  (parallel (check-data-type memory-data dtp-fix)
	    (machine-version-case
	     ((tmc tmc5)
	      (sequential (assign a-temp memory-data)
			  (set-pc (pc-add b-temp a-temp))))
	     (otherwise (set-pc (pc-add b-temp memory-data))))))

(definst branch-false signed-pc-relative
  (if (data-type? top-of-stack-a dtp-nil)
      (set-pc (pc-add pc macro-signed-immediate)
	      (for-effect (popval)))
    (parallel
     (for-effect (popval))
     (next-instruction))))

(definst branch-true signed-pc-relative
  (if (not (data-type? top-of-stack-a dtp-nil))
      (set-pc (pc-add pc macro-signed-immediate)
	      (for-effect (popval)))
    (parallel
     (for-effect (popval))
     (next-instruction))))

(definst branch-false-else-pop signed-pc-relative
  (if (data-type? top-of-stack-a dtp-nil)
      (goto branch)
    (parallel
     (for-effect (popval))
     (next-instruction))))

(definst branch-true-else-pop signed-pc-relative
  (if (not (data-type? top-of-stack-a dtp-nil))
      (goto branch)
    (parallel
     (for-effect (popval))
     (next-instruction))))

(definst branch-false-and-pop signed-pc-relative
  (if (data-type? top-of-stack-a dtp-nil)
      (set-pc (pc-add pc macro-signed-immediate)
	      (for-effect (popval)))
    (next-instruction)))

(definst branch-true-and-pop signed-pc-relative
  (if (not (data-type? top-of-stack-a dtp-nil))
      (set-pc (pc-add pc macro-signed-immediate)
	      (for-effect (popval)))
    (next-instruction)))

;This is a random selection of other branches


(comment ;The compiler doesnt want to use these yet

;Note: cant test zero simultaneous with popval due to xbus conflict
;Okay since instruction has to take two cycles even if it doesnt branch
(definst branch-zerop (signed-pc-relative needs-stack)
  (parallel
   (check-fixnum-1arg-b top-of-stack
     (otherwise (signal-error unimplemented-arithmetic))) ;---
   (if (zero-fixnum top-of-stack)
       (set-pc (pc-add pc macro-signed-immediate)
	       (for-effect (popval)))
     (parallel
      (for-effect (popval))
      (next-instruction)))))

(definst branch-not-zerop (signed-pc-relative needs-stack)
  (parallel
   (check-fixnum-1arg-b top-of-stack
     (otherwise (signal-error unimplemented-arithmetic))) :---
     (if (not-zero-fixnum top-of-stack)
	 (set-pc (pc-add pc macro-signed-immediate)
		 (for-effect (popval)))
       (parallel
	(for-effect (popval))
	(next-instruction)))))

4,887,235
	459	460
(definst branch-greater-or-equal (signed-pc-relative needs-stack)
  (parallel
   (check-fixnum-2args next-on-stack top-of-stack
     (otherwise (signal-error unimplemented-arithmetic))) ;---
   (decrement-stack-pointer)
   (if (greater-or-equal-fixnum next-on-stack top-of-stack)
       (set-pc (pc-add pc macro-signed-immediate)
	       (for-effect (popval)))
     (parallel
      (for-effect (popval))
      (next-instruction)))))

(definst branch-eq (signed-pc-relative needs-stack)
  (parallel
   (decrement-stack-pointer)
   (if (equal-typed-pointer next-on-stack top-of-stack)
       (set-pc (pc-add pc macro-signed-immediate)
	       (for-effect (popval)))
     (parallel
      (for-effect (popval))
      (next-instruction)))))

(definst branch-not-eq (signed-pc-relative needs-stack)
  (parallel
   (decrement-stack-pointer)
   (if (not-equal-typed-pointer next-on-stack top-of-stack)
       (set-pc (pc-add pc macro-signed-immediate)
	       (for-effect (popval)))
     (parallel
      (for-effect (popval))
      (next-instruction)))))

);end comment

F:>lmach>ucode>bitblt-block-mode.lisp.1

; -*- Mode:Lisp; Package:Micro; Base:8: Lowercase:yes -*-
;;;; BITBLT microcode for 3600




(defmicro waiting-for-memory ()			;documentation only, I guess.
  `(nop))

(defmicro abus-array-data (&body body)
  (parallel
   (check-data-type memory-data dtp-fix)	;this traps forwarding pointers, right?
   ,@body))

(defmicro assign-vma-offset (which &rest stuff)
  (selectq which
    (S `(assign vma (+ bb-s-row-addr bb-s-offset ,@stuff)))
    (D `(assign vma (+ bb-d-data-addr bb-d-offset ,@stuff)))
    (otherwise
     (ferror () "assign-vma-offset knows about only S and D, not ~d" which))))

(defmicro parallel-with-s-access (offset &body body)
  (make-memory-access 'bb-s-row-addr 'bb-s-offset offset body))
(defmicro parallel-with-d-access (offset &body body)
  (make-memory-access 'bb-d-data-addr 'bb-d-offset offset body))

(eval-when (eval compile load)
(defun make-memory-access (baseaddr offset-sym offset body)
  (if (or (eq offset offset-sym)
	  (equal offset `(1+ ,offset-sym)))
      ()
    (ferror () "~s is not a recognized offset for ~s" offset offset-sym))
  (let* ((body (reverse body))
	 (finally `(abus-array-data ,(car body))))
    (do ((ll (reverse
	      `((assign vma ,(if (atom offset)
				 `(+ ,baseaddr ,offset)
			         `(+ ,baseaddr ,(second offset) 1)))
		(start-memory read)
		(waiting-for-memory)))
	     (cdr ll))
	 (body (cdr body) (cdr body))
	 (l))
	((and (null ll) (null body))
	 `(sequential ,@l ,finally))
      (cond ((null ll) (push (car body) l))
	    ((null body) (push (car ll) l))
	    (T (push `(parallel ,(car ll) ,(car body)) l))))))
4,887,235
	461	462
);eval-when


;;---hair these up appropriately
(defmicro 32- (operand)
  `(- (b-constant 32.) ,operand))
(defmicro 31- (operand)
  `(- (b-constant 31.) ,operand))

(defmicro dispatch-after-this (operand this &body clauses)
  `(sequential
    (dispatch-after-next ,operand
       ,@clauses)
    (parallel
     (take-dispatch)
     ,this)))







(defmicro dispatch-after-gen (dispatching-on var-and-indices-and-bod &rest clauses)
  (let* ((var-and-indices (first var-and-indices-and-bod))
	 (bod (second var-and-indices-and-bod))
	 (var (first var-and-indices))
	 (indices (rest1 var-and-indices)))
    `(dispatch-after-next ,dispatching-on
	,@(loop for index in indices
		collect `((,index) ,(progv (list var) (list index) (eval bod))))
	,@clauses)))

(defmicro incr-d-offset ()
  `(assign bb-d-offset (1+ bb-d-offset)))

(defmicro decr-d-offset ()
  `(assign bb-d-offset (1- bb-d-offset)))

(defmicro incr-wrap-s-offset ()
  `(sequential
    (assign bb-s-offset (1+ bb-s-offset))
    (if (greater-or-equal-fixnum bb-s-offset bb-s-row-length)
	(parallel
	 (lisp (format	T "~&>>>Wrapping around on bb-a-offset from ~d."
			(low32 (tr 'bb-b-offset))))
	 (assign bb-s-offset (b-constant 0)))
      (drop-through))))

(defmicro decr-wrap-s-offset ()
  `(parallel
    (assign bb-s-offset (1- bb-s-offset))
    (if (minus-fixnum obus)
	(parallel
	 (lisp (cerror T () () ">>>Decr wrapping around on bb-s-offset"))
	 (assign bb-s-offset (1- bb-e-row-length)))
      (drop-through))))

(defmicro store-word (datum)
  `(store-contents (set-type ,datum dtp-fix) () T))





(defmicro parallel-with-return (&body stm)
  `(,(if (eq *machine-version* `sim) `sequential `parallel)
    ,@stm
    (return)))

;;This is incompatible with modularity
(defmacro reserve-bitblt-scratchpad-memory (a-start b-start &rest stuff)
  (loop with a-loc = a-start and b-loc = b-start
	for (name side) in stuff
	when (eq side 'a)
	  collect `(defareg-at-loc ,name ,a-loc 0) into forms
	  and do (incf a-loc)
	when (eq side 'b)
	  collect `(defbreg-at-loc ,name ,b-boc 0) into forms
	  and do (incf b-loc)
	finally (return
		 `(progn 'compile
		   (reserve-scratchpad-memory ,a-start ,(1- a-loc)
					      ,b-start ,(1- b-loc))
		   ,@forms))))

(defvar *dp-offset-names* ())

(defmacro def-fp-offsets (&rest names)
  (loop for i upfrom 0
4,887,235
	463	464
    for name in names
    append `((defatomicro ,name (amem (frame-pointer ,i)))
	     (remprop ',name 'defareg-at-loc)
	     (remprop ',name 'defbreg-at-loc)
	     (defprop ,name ,i fp-offset)
	     (or (memq ',name *fp-offset-names*)
		 (push ',name *fp-offset-names*)))
      into foo
    finally (return '(progn 'compile ,@foo))))

;;decode fp offset numbers into symbols. Debugging only.
(defun dfp (&rest numbers)
  (loop for number in numbers
	collect (loop for name in *fp-offset-names*
		      when (equal (get name 'fp-offset) number)
		      return name
		      finally (return number))))

(def-fp-offsets
  bb-arg-alu bb-arg-width bb-arg-height		;lisp arg
  bb-arg-from-array bb-arg-from-x bb-arg-from-y	;lisp arg
  bb-arg-to-array bb-arg-to-x bb-arg-to-y	;lisp arg
  bb-width-a					;ucode arg
  bb-s-data-addr				;ucode arg
  bb-s-offset-a					;ucode erg
  bb-s-row-offset				;ucode arg
  bb-s-bitpos					;ucode erg
  bb-s-row-length				;ucode erg
  bb-d-data-addr				;ucode erg
  bb-d-offset-a					;ucode erg
  bb-d-bitpos					;ucode erg
  bb-event-count				;ucode erg
  bb-alu-operation				;ucode arg
  )

;;; Some temporaries,

(reserve-bitblt-scratchpad-memory 2650 372
  (bb-width b)					;copied from arg on A side
  (bb-s-offset b)				;..
  (bb-d-offset b)				;..
  (bb-constant b)				;..
  (bb-s-word b)					;temp
  (a-temp-3 a)					;temp
  (bb-constant-a a)				;temp
  (bb-identity a)				;temp
  (bb-s-word2 a)				;temp
  (bb-s-row-addr a)				;temp
)

(defmicro read-bb-s-word ()
  `(parallel
    (assign a-temp (+ bb-width bb-s-bitpos))
    (call read-bb-s-word1)))

;;Assumptions about setup:
;;bb-constant has:
;; >> for constant operations (0,-1): the constant;
;; >> for operations dependent only on source or destination (x, ~x, y, ~y):
;;    a 0 for x,y or -1 for ~x,~y;
;; >> for operations dependent or; both s and d: 0 for those using source directly,
;;    and -1 for those that want the source compementcd.

(defucode read-bb-s-word1
  (assign-vma-offset s)
  (parallel
   (assign byte-r (32- bb-s-bitpos))
   (start-memory read))
  (parallel
   (waiting-for-memory)
   (if (lesser-or-equal-fixnum a-temp (b-constant 32.))
       ;; source is entirely within one word
       (parallel-with-return
	(abus-array-data
	 (assign bb-s-word (logxor bb-constant (rotate memory-data byte-r)))))
     ;;source is split across two words
     (sequential
      (abus-array-data
       (assign bb-s-word (rotate memory-data byte-r)))
      (assign-vma-offset s 1)
      (parallel
       (start-memory read)		;byte-r is already Ok
       )
      (parallel
       (waiting-for-memory)
       (assign byte-s (1- a-temp)))
      (abus-array-data
       (assign bb-s-word (dpb memory-data byte-s byte-r bb-s-word)))
      (parallel-with-return
       (assign bb-s-word (logxor bb-s-word bb-constant-a)))))))

4,887,235
	465	466
(defucode bb-copy-stuff-to-b-side
  (assign b-temp bb-s-row-offset)
  (assign bb-s-row-addr (+ bb-s-data-addr b-temp))
  (assign bb-s-offset bb-s-offset-a)
  (parallel
   (assign bb-d-offset bb-d-offset-a)
   (return)))

(defmacro defucode-bitblt (name source destination neither both)
  `(defucode ,name
     (parallel (assign bb-width bb-width-a)
	       (call bb-copy-stuff-to-b-side))
     (dispatch-after-this (ldb bb-alu-operation 4 0)
			  (parallel (assign bb-constant (a-constant 0)) ;assumption, for the
				    (assign bb-constant-a (a-constant 0)))	;common case
       ((0)			;0 
	(goto ,neither))
       ((1)			;x*y
	(parallel (assign bb-identity (a-constant -1))
		  (jump ,both)))
       ((2)			;~x*y
	(assign bb-identity (a-constant -1))
	(parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1))
		  (jump ,both)))
       ((3) (return))		;y
       ((4)			;x*~y
	(parallel (assign bb-identity (a-constant -1))
		  (jump ,both)))
       ((5) (goto ,source))	;x
       ((6)			;x xor y
	(parallel (assign bb-identity (a-constant 0))
		  (jump ,both)))
       ((7)			;x+y
	(parallel (assign bb-identity (a-constant 0))
		  (jump ,both)))
       ((8)			;~x*~y
	(assign bb-identity (a-constant -1))
	(parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1))
		  (jump ,both)))
       ((9)			;~x xor y
	(assign bb-identity (a-constant 0))
	(parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1))
		  (jump ,both)))
       ((10.)			;~x
	(parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1))
		  (jump ,both)))
       ((11.)			;~x+y
	(assign bb-identity (a-constant 0))
	(parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1))
		  (jump ,both)))
       ((12.)			;~y
	(parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1))
		  (jump ,destination)))
       ((13.)			;x+~y actually, ~(~x*y)
	(assign bb-identity (a-constant -1))
	(parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1))
		  (jump ,both)))
       ((14.)			;~x+~y actually, ~(x*y)
	(parallel (assign bb-identity (a-constant -1))
		  (jump ,both)))
       ((15.)			;-1
	(parallel (assign bb-constant (a-constant -1)) (assign bb-constant-a (a-constant -1))
		  (jump ,neither))))))

(defucode-bitblt ubitblt-short-row
  ubitblt-short-row-source
  ubitblt-short-row-destination
  ubitblt-short-row-neither
  ubitblt-short-row-both)
(defucode-bitblt ubitblt-long-row
  ubitblt-long-row-source
  ubitblt-long-row-destination
  ubitblt-long-row-neither
  ubitblt-long-row-both)
(defucode-bitblt ubitblt-long-row-backwards
  ubitblt-long-row-source-backwards
  ubitblt-long-row-destination
  ubitblt-long-row-neither			;direction immaterial
  ubitblt-long-row-both-backwards)

;;; These should eventually be folded back into defucode-bitblt
(definst %bitblt-short-row no-operand
  (jump ubitblt-short-row))

(definst %bitblt-long-row no-operand
  (jump ubitblt-long-row))

(definst %bitblt-long-row-backwards no-operand
  (jump ubitblt-long-row-backwards))

(definst %bitblt-decode-arrays no-operand
4,887,235
	467	468
  (jump ubitblt-decode-arrays))

(defucode ubitblt-short-row-source
  (read-bb-s-word)
  (assign a-temp (+ bb-width bb-d-bitpos))
  (if (lesser-or-equal-fixnum a-temp (b-constant 32.))
      ;;destination is entirely within one word
      (parallel-with-d-access
        bb-d-offset
	(assign byte-s (1- bb-width))
	(assign byte-r bb-d-bitpos)
	(parallel-with-return
	 (store-word (dpb bb-s-word byte-s byte-r memory-data))))
    ;;destination is split across two words
    (sequential
      ;;store the low byte
      (parallel-with-d-access
        bb-d-offset
	(assign byte-s (31- bb-d-bitpos))
	(assign byte-r bb-d-bitpos)
	(store-word (dpb bb-s-word byte-s byte-r memory-data)))
      ;; store the high byte, using do into md as background
      (parallel-with-d-access
        (1+ bb-d-offset)
	(assign byte-s (1- a-temp))
	(assign byte-r bb-d-bitpos)
	;;byte-r is ok
	(parallel-with-return
	  (store-word (ldb bb-s-word byte-s byte-r memory-data))
	  )))))

(defucode ubitblt-short-row-destination
  (assign a-temp (+ bb-width bb-d-bitpos))
  (if (lesser-or-equal-fixnum a-temp (b-constant 32.))
      ;;destination is entirely within one word
      (sequential
        (parallel-with-d-access
	  bb-d-offset
	  (assign byte-s (1- bb-width))
	  (assign byte-r bb-d-bitpos)
	  (assign a-temp-2 memory-data))
	(assign b-temp (dpb bb-constant byte-s byte-r (a-constant 0)))
	(parallel-with-return
;;XXXbrad b-temp?
	  (store-word (logxor b-temp a-temp-2))))
    ;;destination is split across two words
    (sequential
      ;;munge the low byte
      (parallel-with-d-access
        bb-d-offset
	(assign byte-s (31- bb-d-bitpos))
	(assign byte-r bb-d-bitpos)
	(assign a-temp-2 memory-data))
      (assign b-temp (dpb bb-constant byte-s byte-r (a-constant 0)))
      (store-word (logxor b-temp a-temp-2))
      ;;munge the hign byte
      (parallel-with-d-access
	(1+ bb-d-offset)
	(assign byte-s (1- a-temp))
	(assign byte-r (a-constant 0))
	(assign a-temp-2 memory-data))
      (assign b-temp (ldb bb-constant byte-s byte-r))
      (parallel-with-return
        (store-word (logxor b-temp a-temp-2))))))

;;:the alu operation is actually a constant
(defucode ubitblt-short-row-neither
  (assign a-temp (+ bb-width bb-d-bitpos))
  (parallel
   (if (lesser-or-equal-fixnum a-temp (b-constant 32.))
       ;; destination is entirely within one wore
       (parallel-with-d-access
	bb-d-offset
	(assign byte-s (1- bb-width))
	(assign byte-r bb-d-bitpos)
	(parallel
	  (store-word (dpb bb-constant byte-s byte-r memory-data))
	  (return)))
       ;;destination is split across two words
       (sequential
	 ;;store the low bute
	 (parallel-with-d-access
	   bb-d-offset
	   (assign byte-s (31- bb-d-bitpos))
	   (assign byte-r bb-d-bitpos)
	   (store-word (dpb bb-constant byte-s byte-r memory-data)))
	 ;;store the high byte
	 (parallel-with-d-access
	  (1+ bb-d-offset)
	  (assign byte-s (1- a-temp))
	  (assign byte-r (b-constant 0))
	  (parallel
	    (store-word (dpb bb-constant byte-s byte-r memory-data))
	    (return)))))))

4,887,235
	469	470
;;the alu operation depends upon both source and destination bits
(defucode ubitblt-short-row-both
  (read-bb-s-word)
  (assign a-temp (+ bb-width bb-d-bitpos))
  (assign-vma-offset d)
  (if (lesser-or-equal-fixnum a-temp (b-constant 32.))
      ;;destination is entirely within one word
      (sequential
        (assign byte-s (1- bb-width))
	(parallel
	 (assign byte-r bb-d-bitpos)
	 (jump bb-byte-alu-operation-dispatch)))	;jcall
    ;;destination is split across two words
    (sequential
     ;;store the low byte
     (assign byte-s (31- bb-d-bitpos))
     (parallel
       (assign byte-r bb-d-bitpos)
       (call bb-byte-alu-operation-dispatch))
     ;;store the high byte
     (assign bb-s-word (rotate bb-s-word byte-r))
     (assign byte-s (1- a-temp))
     (assign-vma-offset d 1)
     (parallel
       (assign byte-r (b-constant 0))
       (jump bb-byte-alu-operation-dispatch)))))	;jcall

;(boole fn x y ...)   if fn is "abcd" then
;          y    0       1       2       3        4      5     6       7
;       | 0  1  0       x*y     ~x*y    y        x*~y   x     x#y     x+y
;   ----------
;     0 | a  c  8	9       10      11       12     13    14      15
;   x   |       ~(x+y)	~(x#y)  ~x	~x+y     ~y     x+~y  ~x+~y   -1
;     1 | b  d

;;vma and byte regs have been set up already, for DPB.
;;trashes b-temp, a-temp-2, b-temp-2, but not a-temp.
(defucode bb-byte-alu-operation-dispatch
  (dispatch-after-this (parallel (start-memory read) (ldb bb-alu-operation 4 0))
		       (parallel
			(assign b-temp (dpb bb-s-word byte-s byte-r bb-identity))
			(waiting-for-memory))
     ((1 2) ;;1 x*y logand ;;2 ~x*y	logand
      (parallel-with-return
        (parallel
	  (declare-memory-timing data-cycle)
	  (abus-array-data
	    (store-word (logand memory-data b-temp))))))
     ((4 8.) ;;4 ~(~x+y) = x*~y andc2 	;;8 ~(x+y)	= ~x*~y andcb
      (parallel
       (declare-memory-timing data-cycle)
       (abus-array-data
	(assign a-temp-2 memory-data)))
      (assign b-temp-2 (dpb (b-constant -1) byte-s byte-r 0))	;cant merge this...
      (assign a-temp-2 (logxor a-temp-2 b-temp-2))		;...with this.
      (parallel-with-return
        (store-word (logand a-temp-2 b-temp))))
     ((6 9.) ;;6 x#y logxor ;;9 ~(x#y)=~x#y logxor
      (parallel-with-return
       (parallel
	(declare-memory-timing data-cycle)
	(abus-array-data
	 (store-word (logxor b-temp memory-data))))))
     ((7 11.) ;;7 x+y logior	;;11 ~x+y logior
      (parallel-with-return
       (parallel
	(declare-memory-timing data-cycle)
	(abus-array-data
	 (store-word (logior b-temp memory-data))))))
     ((13. 14.) ;;13 x+~y = ~(~x*y) lognand	;;14 ~x+~y=~(x*y)
      (parallel
       (declare-memory-timing data-cycle)
       (abus-array-data
	(assign a-temp-2 (logand b-temp memory-data))))
      (parallel-with-return
       (store-word (logxor (dpb (b-constant -1) byte-s byte-r 8) a-temp-2))))
     (otherwise (goto cant-happen))))

;;vma has been set up already
(defucode bb-word-alu-operation-dispatch	;commonly 3 cycles (plus 1 for the call)
  (dispatch-after-this (parallel (start-memory read) (ldb bb-alu-operation 4 0))
		       (waiting-for-memory)	;---want to use this somehow...
     ((1 2)	;;1 x*y logand	;;2 ~x*y logand
      (parallel
       (declare-memory-timing data-cycle)
       (abus-array-data (store-word (logand bb-s-word memory-data)))
       (return)))
     ((4 8.)	;;4 x*~y andcb	;;8 ~(x+y) ~-x*~y andcb
      (parallel
       (declare-memory-timing data-cycle)
       (abus-array-data (store-word (andc2 bb-s-word memory-data)))
       (return)))
4,887,235
	471	472
     ((6 9.)	;;6 x#y logxor		;;9 ~(x#y)=~x#y logxor
      (parallel
       (declare-memory-timing data-cycle)
       (abus-array-data (store-word (logxor bb-s-word memory-data)))
       (return)))
     ((7 11.)	;;7 x+y logior		;;11 ~x+y logior
      (parallel
       (declare-memory=timing data-cycle)
       (abus-array-data (store-word (logior bb-s-word memory-data)))
       (return)))
     ((13. 14.) ;;13 x+~y = ~(~x*y)	;;14 ~x+~y=~(x*y)
      (parallel
       (declare-memory-timing data-cycle)
       (abus-array-data (assign a-temp-2 (logand bb-s-word memory-data))))
      (parallel
       (store-word (logxor (b-constant -1) a-temp-2))
       (return)))
     (otherwise (goto cant-happen))))

;;alu depends only on source bits
(defucode ubitblt-long-row-source
  (parallel
   (assign b-temp bb-d-bitpos)
   (if (zero-fixnum bb-d-bitpos)
       (if (zero-fixnum bb-s-bitpos)
	   (parallel
	     (assign bb-s-offset (1- bb-s-offset))	;bb-aligned-row-source will increment first
	     (jump (trace-path #/a))
	     (jump ubitblt-aligned-row-source))
	   ;;             SSSSSSSSSSSSSSSSSSSSSSSSSsssssss
	   ;;      dddddddddddddddodddddddddddddddd
	   (parallel-with-s-access
	     bb-s-offset
	     (assign byte-r (32- bb-s-bitpos))
	     (parallel
	      (assign bb-s-word (logxor bb-constant (rotate memory-data byte-r)))
	      (lisp (trace-path #/c))
	      (jump ubitblt-d-aligned-row-source))))
     (if (equal-fixnum b-temp bb-s-bitpos)
	   ;;SSSSSSSSSSSSSSSSSSSSSSSSSsssssss
	   ;;DDDDDDDDDDDDDDDDDDDDDDDDDddddddd
	 (sequential
	  (parallel-with-s-access
	    bb-s-offset
	    (assign a-temp (32- bb-d-bitpos))
	    (assign byte-r a-temp)
	    (assign bb-s-word (logxor bb-constant (rotate memory-data byte-r))))
	  (parallel-with-d-access
	    bb-d-offset
	    (assign byte-r bb-d-bitpos)
	    (assign byte-s (1- a-temp))
	    (store-word (dpb bb-s-word byte-s byte-r memory-data)))
	  (incr-d-offset)
	  (parallel
	   (assign bb-width (- bb-width a-temp))
	   (lisp (trace-path #/b))
	   (jump ubitblt-aligned-row-source)))
       (if (lesser-fixnum bb-s-bitpos b-temp)
	   ;;sssssssssSSSSSSSSSSSSSSSS.......
	   ;;         DDOODDDDDDDDDODDdddddddddddddddd
	   ;;         <- 32-d.bitpos->
	   (sequential
	    (parallel-with-s-access
	      bb-s-offset
	      (assign byte-r (32- bb-s-bitpos))
	      (parallel
	        (assign b-temp (32- bb-d-bitpos))
		(assign a-temp obus))
	      (assign bb-s-word (logxor bb-constant (rotate memory-data byte-r))))
	      ;;.......sssssssssSSSSSSSSSSSSSSSS
	    (parallel-with-d-access
	      bb-d-offset
	      (assign byte-r bb-d-bitpos)
	      (assign byte-s (1- b-temp))
	      (store-word (dpb bb-s-word byte-s byte-r memory-data)))
	    (incr-d-offset)
	    ;;rotate s-word further to right by 32-d.bitpos
	    ;;SSSSSSSSSSSSSSSS.......sssssssss
	    (assign byte-r bb-d-bitpos)	;or left by -(32-d.bitpos)
	    (assign bb-s-word (rotate bb-s-word byte-r))
	    (assign bb-width (- bb-width a-temp))
	    (parallel
	      (assign bb-s-bitpos (+ bb-s-bitpos b-temp))
	      (lisp (trace-path #/d))
	      (jump ubitblt-d-aligned-row-source)))
	 (sequential
;;The high part of the first source word is not as long as the high part of the
;;first destination word. So extract the useful part of the first source word,
;;and deposit into it as much of the second source word as needed to fill out the rest
;;of the first destination word. Then position the rest of the second source word
;;appropriately for the inner loop.
