3b1 ulrem() bug
Gene H. Olson
gene at zeno.MN.ORG
Sun Jul 24 13:45:11 AEST 1988
In article <5552 at ihlpg.ATT.COM> bgb at ihlpg.ATT.COM (Beuning) writes:
>3b1 owners,
>
> I believe I have found a bug in unsigned long
>remainder arithmetic. If you compile and execute this
>program on your 3b1, the value of 'c' is incorrect.
(He gives a very good example)
I have a fix for you. The following set of long multiply/divide
routines do not have the bug, and are faster than the standard ATT
routines by a significant margin (enough to bump the dhrystone by 100).
They won't work directly with the shared library, but if you use
the `scc' script I just posted to unix-pc.sources you will have
no trouble using it on shared library programs also.
The only hitch is that shared library modules won't use them (I think)
just modules which are not shared. So your program may still get
the wrong answer if the library routines are the ones finding the problem.
I wrote these routines, and I hereby place them in the public domain.
I would be honored if ATT would incorporate them to fix the
bug reported above.
Gene H. Olson
Smartware Consulting
Minneapolis, MN
amdahl!bungia!zeno!gene
------------------- Cut here ----------------
#***************************************************
#* Long and ulong Multiply routines. *
#***************************************************
global ulmul
global lmul
ulmul:
lmul:
mov.l 8(%sp),%d0 # Get d0
global lmul__
global ulmul__
lmul__:
ulmul__:
mov.l 4(%sp),%d1 # Get parameters
mov.l %d2,%a0 # Save d2, d3
mov.l %d3,%a1
mov.l %d0,%d2 # Copy & swap operands
mov.l %d1,%d3
swap.w %d2
swap.w %d3
mulu.w %d1,%d2 # Do the multiply
mulu.w %d0,%d3
mulu.w %d1,%d0
add.w %d3,%d2 # Add up components
swap.w %d2
clr.w %d2
add.l %d2,%d0
mov.l %a0,%d2
mov.l %a1,%d3
rts
#********************************************************
#* Signed Long Divide *
#********************************************************
global ldiv__
ldiv__:
mov.l 4(%sp),%d1 # Get operands
sdiv:
tst.l %d0 # Test dividend sign
bpl sdiv2 # - Positive
neg.l %d0 # Make positive
tst.l %d1 # Test divisor sign
bpl sdiv1 # - positive
neg.l %d1 # Make positive
bsr udiv # Divide -x / +y
neg.l %d1 # Make remainder negative
rts
sdiv1:
bsr udiv # Divide -x / -y
neg.l %d0 # Negate quotient
neg.l %d1 # Negate remainder
rts
sdiv2:
tst.l %d1 # Test divisor sign
bpl udiv # - Divide +x / +y
neg.l %d1 # Make it positive
bsr udiv # Divide +x / -y
neg.l %d0 # Negate quotient
rts
#*********************************************************
#* Unsigned Long Divide *
#*********************************************************
global uldiv__
uldiv__:
mov.l 4(%sp),%d1 # Get operands
udiv:
mov.l %d2,%a0 # Save d2
# Check if the MSB of the divisor are zero. If so,
# we can do an easy 32/16 -> 32 bit divide.
swap.w %d1 # Is the divisor 16 bits?
mov.w %d1,%d2
bne udiv2 # - no, 32 bit divide needed
swap.w %d0 # Setup MSB divide
swap.w %d1
swap.w %d2
mov.w %d0,%d2
beq udiv1 # - msb divide unnecessary
divu.w %d1,%d2 # Do MSB divide
mov.w %d2,%d0
udiv1:
swap.w %d0 # Do LSB divide
mov.w %d0,%d2
divu.w %d1,%d2
mov.w %d2,%d0 # Put quotient in d0
swap.w %d2 # Put remainder in d1
mov.w %d2,%d1
mov.l %a0,%d2 # Restore d2
rts
# Upper bits were non-zero. We have to do a
# 32/32 -> 16 bit divide.
#
# For the divide to be efficient, we need to first
# normalize the divisor, and then shift the dividend
# left by the same amount.
udiv2:
mov.l %d3,%a1 # Save d3
mov.l &16,%d3 # Initialize shift count
cmp.w %d1,&0x0080 # Normalize 8 bits?
bcc udiv3 # - not needed
rol.l &8,%d1 # Do the shift
sub.w &8,%d3 # Adjust shift count
udiv3:
cmp.w %d1,&0x0800 # Normalize 4 bits?
bcc udiv4 # - not needed
rol.l &4,%d1 # Do the shift
sub.w &4,%d3 # Adjust shift count
udiv4:
cmp.w %d1,&0x2000 # Normalize 2 bits?
bcc udiv5 # - not needed
rol.l &2,%d1 # Do the shift
sub.w &2,%d3 # Adjust shift count
udiv5:
tst.w %d1 # Single bit normalization needed?
bmi udiv6 # - no
rol.l &1,%d1 # Do the shift
sub.w &1,%d3 # Adjust shift count
udiv6:
mov.w %d0,%d2 # Shift dividend left by same amount
lsr.l %d3,%d0 # d0 = x01
swap.w %d2 # d2 = x2
clr.w %d2
lsr.l %d3,%d2
swap.w %d3
# Divide x012 by y01.
#
# q0 = x01 / y0
# r012 = (x01 % y0) * m1 + x2 - q0 * y1
# if (r01 < 0) {
# r01 += y01 ;
# q0 -= 1 ;
# }
divu.w %d1,%d0 # q0 = x01 / y0
mov.w %d0,%d3 # r01 = (x01 % y0) * m1 - q0 * y1
mov.w %d2,%d0
mov.w %d3,%d2
swap.w %d1
mulu.w %d1,%d2
sub.l %d2,%d0
bcc udiv7 # - r01 >= 0
sub.w &1,%d3 # q0 -= 1
add.l %d1,%d0 # r01 += y01
# Restore remainder.
udiv7:
mov.l &0,%d1 # Get quotient
mov.w %d3,%d1
swap.w %d3 # Get remainder
swap.w %d0
rol.l %d3,%d0
exg %d0,%d1 # d0 = quotient, d1=remainder
mov.l %a0,%d2 # Restore d2, d3
mov.l %a1,%d3
rts
#*******************************************************
#* Signed remainder *
#*******************************************************
global lrem__
lrem__:
mov.l 4(%sp),%d1 # Get operands
bsr sdiv # Do divide/modulus
exg %d0,%d1 # Get remainder
rts
#********************************************************
#* Unsigned remainder *
#********************************************************
global ulrem__
ulrem__:
mov.l 4(%sp),%d1 # Get operands
bsr udiv # Do divide/modulus
exg %d0,%d1 # Get remainder
rts
More information about the Unix-pc.general
mailing list