/* signed long multiply: c = a * b */
/* Code marked (WT) is speedup code. Note that this should apply (at least  */
/* in part) to all cases that are not going to overflow -Wendy Thrash- */

lmul:	.globl	lmul
	moveml	#0x3800,sp@-	/* save d2,d3,d4 */
	movl	sp@(16),d2	/*  d2 = a */
	movl	d2,d4		/* sign of result */
	jge	1f
	negl	d2
1:
	movl	sp@(20),d3	/*  d3 = a */
	jge	2f
	negl	d3
	negl	d4
2:
	movw	d2,d0		/* d0 = alo, unsigned */
	mulu	d3,d0		/* d0 = blo*alo, unsigned */
	movw	d2,d1		/* d1 = alo */
	swap	d2		/* swap alo-ahi */

	movw	d2,d2		/* check for zero ahi (WT) */
	jne	4f		/* ahi not zero; do the multiply (WT) */
	swap	d3		/* swap blo-bhi (WT) */
	movw	d3,d3		/* check for zero bhi (WT) */
	jeq	6f		/* if bhi also zero, we're done (WT) */
	mulu	d3,d1		/* d1 = bhi*alo, unsigned (WT) */
	jmp	7f		/* go put result together (WT) */

4:	mulu	d3,d2		/* d2 = blo*ahi, unsigned */
	swap	d3		/* swap blo-bhi */

	movw	d3,d3		/* check for zero bhi (WT) */
	jne	5f		/* bhi not zero; do the multiply (WT) */
	movl	d2,d1		/* no add; put result in right place (WT) */
	jmp	7f		/* skip multiply and add (WT) */

5:	mulu	d3,d1		/* d1 = bhi*alo, unsigned */
	addl	d2,d1		/* d1 = (ahi*blo + alo*bhi) */

7:	swap	d1		/* d1 = */
	clrw	d1		/*    (ahi*blo + alo*bhi)*(2**16) */
	addl	d1,d0		/* d0 = alo*blo + (ahi*blo + alo*bhi)*(2**16) */

6:	tstl	d4		/* sign of result */
	jge	3f
	negl	d0
3:
	moveml	sp@+,#0x001C	/* restore d2,d3,d4 */
	rts
