crufty memcmp on 386
Doug Ingraham
dpi at loft386.uucp
Mon Nov 19 08:07:55 AEST 1990
In article <1990Nov15.163550.27015 at cbnewsh.att.com>, gls at cbnewsh.att.com (Col. G. L. Sicherman) writes:
> In <1990Nov13.182133.27748 at loft386.uucp>, dpi at loft386.uucp writes:
> >
> > I can't make the original fail. Could you provide a short code segment
> > that will display the claimed behavior? If you can make AT&T's fail,
> > I would like to know if mine fails also.
>
> Sure. Here are my test program and what came out of it:
[Test program deleted]
I coded my routine from a Draft of the standard. The version provided
with SYS V 386 performs a signed test which was acceptable in pre standard
times to most. I have since updated my routine and it now does unsigned
comparisons. Col Sicherman, I wrote my test routine to test for the
signed version and wrote my original to match the version supplied by
AT&T. This one passes your test and my more complete test. Feel free
to use it in anyway you wish for any purpose. It is as bug free as I
can make it. If anyone finds a problem or an improvement please let me
know.
-------------------------------------------------------------------------
.file "memcmp.s"
/ The Intel 80386 implementation of the ANSI memcmp() function.
/ Written April 8, 1990 by Doug Ingraham. dpi at loft386.UUCP
/ Copyright 1990 by Douglas P. Ingraham. This code is freely
/ distributable so long as this notice remains intact.
/
/ Corrected November 18, 1990 to compare unsigned as per the standard.
/
.text
.align 4
.globl memcmp
memcmp:
/ Save edi and esi in edx and eax to save 4 clocks over popping them
movl %edi,%edx / 2 clocks 2 bytes
movl %esi,%eax / 2 clocks 2 bytes
/ esi is pointer to the first string
movl 0x4(%esp),%esi / 2 clocks 4 bytes
/ edi is pointer to the second string
movl 0x8(%esp),%edi / 2 clocks 4 bytes
/ This tests to see if the pointers are to the same string.
/ This test is probably a waste of time.
cmpl %esi,%edi / 2 clocks 2 bytes
je same / 7+m,3 clocks 2 bytes
/ The ecx gets the count. If Zero then by definition they are the same.
movl 0xC(%esp),%ecx / 2 clocks 4 bytes
jcxz same / 9+m,5 clocks 2 bytes
/ Test the count to make sure there are enough bytes to bother with this
/ speed optimization. 12 was chosen as a ballpark figure. Must be greater
/ than 7 as a minimum.
cmpl $12,%ecx / 2 clocks 3 bytes
jl byte / 7+m,3 clocks 2 bytes
/ Test to see if either pointer is word aligned. Most is 3 cmpsb's to align
/ so the loop is unrolled. 75 clocks worst case.
testl $3,%esi / 2 clocks 6 bytes
jz is_aligned / 7+m,3 clocks 2 bytes
testl $3,%edi / 2 clocks 6 bytes
jz is_aligned / 7+m,3 clocks 2 bytes
cmpsb / 10 clocks 1 byte
jne different / 7+m,3 clocks 2 bytes
decl %ecx / 2 clocks 1 byte
testl $3,%esi / 2 clocks 6 bytes
jz is_aligned / 7+m,3 clocks 2 bytes
testl $3,%edi / 2 clocks 6 bytes
jz is_aligned / 7+m,3 clocks 2 bytes
cmpsb / 10 clocks 1 byte
jne different / 7+m,3 clocks 2 bytes
decl %ecx / 2 clocks 1 byte
testl $3,%esi / 2 clocks 6 bytes
jz is_aligned / 7+m,3 clocks 2 bytes
testl $3,%edi / 2 clocks 6 bytes
jz is_aligned / 7+m,3 clocks 2 bytes
cmpsb / 10 clocks 1 byte
jne different / 7+m,3 clocks 2 bytes
decl %ecx / 2 clocks 1 byte
/ Save the modified count back on the stack for possible later use.
is_aligned:
movl %ecx,0xC(%esp) / 2 clocks 4 bytes
/ Divide by 4 so that we can do word compares
shrl $2,%ecx / 3 clocks 3 bytes
/ Do the long level search.
repz; cmpsl / 5+9*n clocks 2 bytes
je byte_equal / 7+m,3 clocks 2 bytes
/ backup 4 bytes
movl $4,%ecx / 2 clocks 5 bytes
subl %ecx,%esi / 2 clocks 2 bytes
subl %ecx,%edi / 2 clocks 2 bytes
/ Do the byte level search.
byte:
repz; cmpsb / 5+9*n clocks 2 bytes
je same / 7+m/3 clocks 2 bytes
/ Restore the esi so that eax is available for return value.
different:
movl %eax,%esi / 2 clocks 2 byte
/ If the carry is set, return -1
jc less_than / 7+m/3 clocks 2 bytes
greater_than:
mov $1,%eax / 2 clocks 5 bytes
/ Restore the edi.
movl %edx,%edi / 2 clocks 2 bytes
ret / 10+m clocks 1 byte
/ If the string compared out then come here.
same: movl %eax,%esi / 2 clocks 2 bytes
short_same:
movl %edx,%edi / 2 clocks 2 bytes
xorl %eax,%eax / 2 clocks 2 bytes
ret / 10+m clocks 1 byte
/ come here if the high speed test was equal
byte_equal:
movl 0xC(%esp),%ecx / 2 clocks 4 bytes
andl $3,%ecx / 2 clocks 6 bytes
repz; cmpsb / 5+9*n clocks 2 bytes
/ Restore the esi so that eax is available for return value.
movl %eax,%esi / 2 clocks 2 byte
je short_same / 7+m/3 clocks 2 bytes
jnc greater_than / 7+m/3 clocks 2 bytes
less_than:
mov $-1,%eax / 2 clocks 5 bytes
/ Restore the edi.
movl %edx,%edi / 2 clocks 2 bytes
ret / 10+m clocks 1 byte
------------------------------------------------------------------------
> --
> G. L. Sicherman
> gls at odyssey.att.COM
Thanks for finding this.
--
Doug Ingraham (SysAdmin)
Lofty Pursuits (Public Access for Rapid City SD USA)
bigtex!loft386!dpi
uunet!loft386!dpi
More information about the Comp.bugs.sys5
mailing list