diff -buHrN linux-1.3.15/arch/i386/config.in linux/arch/i386/config.in --- linux-1.3.15/arch/i386/config.in Wed Aug 2 13:27:01 1995 +++ linux/arch/i386/config.in Wed Aug 2 13:22:24 1995 @@ -38,6 +38,10 @@ bool 'Use -m486 flag for 486-specific optimizations' CONFIG_M486 y #fi +comment 'Using of assembler optimized library (experimental)' +comment 'combined with 486-specific optimizations only on 486 or 586' +bool 'Assembler optimized library' CONFIG_ASM_OPTIMIZE y + comment 'Loadable module support' bool 'Set version information on all symbols for modules' CONFIG_MODVERSIONS n diff -buHrN linux-1.3.15/include/asm-i386/byteorder.h linux/include/asm-i386/byteorder.h --- linux-1.3.15/include/asm-i386/byteorder.h Sat Apr 29 09:19:32 1995 +++ linux/include/asm-i386/byteorder.h Tue Jul 18 21:47:04 1995 @@ -27,9 +27,14 @@ extern __inline__ unsigned long int __ntohl(unsigned long int x) { +#if (defined(CONFIG_M486) || defined(CONFIG_M586)) && \ + defined(CONFIG_ASM_OPTIMIZE) + __asm__("bswap %0" +#else /* (CONFIG_M486 || CONFIG_M586) && CONFIG_ASM_OPTIMIZE */ __asm__("xchgb %b0,%h0\n\t" /* swap lower bytes */ "rorl $16,%0\n\t" /* swap words */ "xchgb %b0,%h0" /* swap higher bytes */ +#endif /* (CONFIG_M486 || CONFIG_M586) && CONFIG_ASM_OPTIMIZE */ :"=q" (x) : "0" (x)); return x; diff -buHrN linux-1.3.15/include/asm-i386/string.h linux/include/asm-i386/string.h --- linux-1.3.15/include/asm-i386/string.h Thu Jul 20 08:50:29 1995 +++ linux/include/asm-i386/string.h Tue Jul 25 20:12:07 1995 @@ -10,12 +10,30 @@ * set, making the functions fast and clean. String instructions have been * used through-out, making for "slightly" unclear code :-) * - * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 1991,1992,1993,1994 Linus Torvalds + * Revised and optimized for i486/pentium + * 1994-1995 a.vignani@crf.it, d.parodi@crf.it */ #define __HAVE_ARCH_STRCPY extern inline char * strcpy(char * dest,const char *src) { +#if defined(CONFIG_M486) || defined(CONFIG_M586) + register char *tmp= (char *)dest; + register char dummy; + __asm__ __volatile__ + ("\n1:\t" + "movb (%0),%2\n\t" + "incl %0\n\t" + "movb %2,(%1)\n\t" + "incl %1\n\t" + "testb %2,%2\n\t" + "jne 1b" + :"=r" (src), "=r" (tmp), "=q" (dummy) + :"0" (src), "1" (tmp) + :"memory"); + return dest; +#else /* CONFIG_M486 || CONFIG_M586 */ __asm__ __volatile__( "cld\n" "1:\tlodsb\n\t" @@ -25,11 +43,37 @@ : /* no output */ :"S" (src),"D" (dest):"si","di","ax","memory"); return dest; +#endif /* CONFIG_M486 || CONFIG_M586 */ } #define __HAVE_ARCH_STRNCPY extern inline char * strncpy(char * dest,const char *src,size_t count) { +#if defined(CONFIG_M486) || defined(CONFIG_M586) + register char *tmp= (char *)dest; + register char dummy; + if (count) { + __asm__ __volatile__ ( + "\n1:\t" + "movb (%0),%2\n\t" + "incl %0\n\t" + "movb %2,(%1)\n\t" + "incl %1\n\t" + "decl %3\n\t" + "je 3f\n\t" + "testb %2,%2\n\t" + "jne 1b\n\t" + "2:\tmovb %2,(%1)\n\t" + "incl %1\n\t" + "decl %3\n\t" + "jne 2b\n\t" + "3:" + :"=r" (src), "=r" (tmp), "=q" (dummy), "=r" (count) + :"0" (src), "1" (tmp), "3" (count) + :"memory"); + } + return dest; +#else /* CONFIG_M486 || CONFIG_M586 */ __asm__ __volatile__( "cld\n" "1:\tdecl %2\n\t" @@ -44,11 +88,30 @@ : /* no output */ :"S" (src),"D" (dest),"c" (count):"si","di","ax","cx","memory"); return dest; +#endif /* CONFIG_M486 || CONFIG_M586 */ } #define __HAVE_ARCH_STRCAT extern inline char * strcat(char * dest,const char * src) { +#if defined(CONFIG_M486) || defined(CONFIG_M586) + register char *tmp = (char *)(dest-1); + register char dummy; + __asm__ __volatile__ + ("\n1:\tincl %1\n\t" + "cmpb $0,(%1)\n\t" + "jne 1b\n" + "2:\tmovb (%2),%b0\n\t" + "incl %2\n\t" + "movb %b0,(%1)\n\t" + "incl %1\n\t" + "testb %b0,%b0\n\t" + "jne 2b\n" + :"=q" (dummy), "=r" (tmp), "=r" (src) + :"1" (tmp), "2" (src) + :"memory"); + return dest; +#else /* CONFIG_M486 || CONFIG_M586 */ __asm__ __volatile__( "cld\n\t" "repne\n\t" @@ -61,11 +124,34 @@ : /* no output */ :"S" (src),"D" (dest),"a" (0),"c" (0xffffffff):"si","di","ax","cx"); return dest; +#endif /* CONFIG_M486 || CONFIG_M586 */ } #define __HAVE_ARCH_STRNCAT extern inline char * strncat(char * dest,const char * src,size_t count) { +#if defined(CONFIG_M486) || defined(CONFIG_M586) + register char *tmp = (char *)(dest-1); + register char dummy; + __asm__ __volatile__ + ("\n1:\tincl %1\n\t" + "cmpb $0,(%1)\n\t" + "jne 1b\n" + "2:\tdecl %3\n\t" + "js 3f\n\t" + "movb (%2),%b0\n\t" + "incl %2\n\t" + "movb %b0,(%1)\n\t" + "incl %1\n\t" + "testb %b0,%b0\n\t" + "jne 2b\n" + "3:\txorl %0,%0\n\t" + "movb %b0,(%1)\n\t" + :"=q" (dummy), "=r" (tmp), "=r" (src), "=r" (count) + :"1" (tmp), "2" (src), "3" (count) + :"memory"); + return dest; +#else /* CONFIG_M486 || CONFIG_M586 */ __asm__ __volatile__( "cld\n\t" "repne\n\t" @@ -84,11 +170,33 @@ :"S" (src),"D" (dest),"a" (0),"c" (0xffffffff),"g" (count) :"si","di","ax","cx","memory"); return dest; +#endif /* CONFIG_M486 || CONFIG_M586 */ } #define __HAVE_ARCH_STRCMP extern inline int strcmp(const char * cs,const char * ct) { +#if defined(CONFIG_M486) || defined(CONFIG_M586) + register int __res; + __asm__ __volatile__ + ("\n1:\tmovb (%1),%b0\n\t" + "incl %1\n\t" + "cmpb %b0,(%2)\n\t" + "jne 2f\n\t" + "incl %2\n\t" + "testb %b0,%b0\n\t" + "jne 1b\n\t" + "xorl %0,%0\n\t" + "jmp 3f\n" + "2:\tmovl $1,%0\n\t" + "jb 3f\n\t" + "negl %0\n" + "3:" + :"=q" (__res), "=r" (cs), "=r" (ct) + :"1" (cs), "2" (ct) + : "memory" ); + return __res; +#else /* CONFIG_M486 || CONFIG_M586 */ register int __res; __asm__ __volatile__( "cld\n" @@ -104,11 +212,34 @@ "3:" :"=a" (__res):"S" (cs),"D" (ct):"si","di"); return __res; +#endif /* CONFIG_M486 || CONFIG_M586 */ } #define __HAVE_ARCH_STRNCMP extern inline int strncmp(const char * cs,const char * ct,size_t count) { +#if defined(CONFIG_M486) || defined(CONFIG_M586) + register int __res; + __asm__ __volatile__ + ("\n1:\tdecl %3\n\t" + "js 2f\n\t" + "movb (%1),%b0\n\t" + "incl %1\n\t" + "cmpb %b0,(%2)\n\t" + "jne 3f\n\t" + "incl %2\n\t" + "testb %b0,%b0\n\t" + "jne 1b\n" + "2:\txorl %0,%0\n\t" + "jmp 4f\n" + "3:\tmovl $1,%0\n\t" + "jb 4f\n\t" + "negl %0\n" + "4:" + :"=q" (__res), "=r" (cs), "=r" (ct), "=r" (count) + :"1" (cs), "2" (ct), "3" (count)); + return __res; +#else /* CONFIG_M486 || CONFIG_M586 */ register int __res; __asm__ __volatile__( "cld\n" @@ -126,11 +257,28 @@ "4:" :"=a" (__res):"S" (cs),"D" (ct),"c" (count):"si","di","cx"); return __res; +#endif /* CONFIG_M486 || CONFIG_M586 */ } #define __HAVE_ARCH_STRCHR extern inline char * strchr(const char * s, int c) { +#if defined(CONFIG_M486) || defined(CONFIG_M586) + register char * __res; + __asm__ __volatile__ + ("movb %%al,%%ah\n" + "1:\tmovb (%1),%%al\n\t" + "cmpb %%ah,%%al\n\t" + "je 2f\n\t" + "incl %1\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "xorl %1,%1\n" + "2:\tmovl %1,%0\n\t" + :"=a" (__res), "=r" (s) + :"0" (c), "1" (s)); + return __res; +#else /* CONFIG_M486 || CONFIG_M586 */ register char * __res; __asm__ __volatile__( "cld\n\t" @@ -145,11 +293,27 @@ "decl %0" :"=a" (__res):"S" (s),"0" (c):"si"); return __res; +#endif /* CONFIG_M486 || CONFIG_M586 */ } #define __HAVE_ARCH_STRRCHR extern inline char * strrchr(const char * s, int c) { +#if defined(CONFIG_M486) || defined(CONFIG_M586) + register char * __res; + __asm__ __volatile__ + ("movb %b2,%h2\n" + "1:\tmovb (%1),%b2\n\t" + "cmpb %h2,%b2\n\t" + "jne 2f\n\t" + "movl %1,%0\n" + "2:\tincl %1\n\t" + "testb %b2,%h2\n\t" + "jne 1b" + :"=r" (__res), "=r" (s), "=q" (c) + :"0" (0), "1" (s), "2" (c)); + return __res; +#else /* CONFIG_M486 || CONFIG_M586 */ register char * __res; __asm__ __volatile__( "cld\n\t" @@ -162,6 +326,7 @@ "jne 1b" :"=d" (__res):"0" (0),"S" (s),"a" (c):"ax","si"); return __res; +#endif /* CONFIG_M486 || CONFIG_M586 */ } #define __HAVE_ARCH_STRSPN @@ -277,6 +442,19 @@ #define __HAVE_ARCH_STRLEN extern inline size_t strlen(const char * s) { +#if defined(CONFIG_M486) || defined(CONFIG_M586) + /* slightly slower on a 486, but with better chances of register allocation */ + register char dummy, *tmp= (char *)s; + __asm__ __volatile__("\n1:\t" + "movb\t(%0),%1\n\t" + "incl\t%0\n\t" + "testb\t%1,%1\n\t" + "jne\t1b" + :"=r" (tmp),"=q" (dummy) + :"0" (s) + : "memory" ); + return (tmp-s-1); +#else /* CONFIG_M486 || CONFIG_M586 */ register int __res; __asm__ __volatile__( "cld\n\t" @@ -286,6 +464,7 @@ "decl %0" :"=c" (__res):"D" (s),"a" (0),"0" (0xffffffff):"di"); return __res; +#endif /* CONFIG_M486 || CONFIG_M586 */ } extern char * ___strtok; @@ -351,6 +530,79 @@ return __res; } +#if defined(CONFIG_M486) || defined(CONFIG_M586) + +#define __memcpy_c(d,s,count) \ +((count%4==0) ? \ + __memcpy_by4((d),(s),(count)) : \ + ((count%2==0) ? \ + __memcpy_by2((d),(s),(count)) : \ + __memcpy_g((d),(s),(count)))) + +#define memcpy(d,s,count) \ +(__builtin_constant_p(count) ? \ + __memcpy_c((d),(s),(count)) : \ + __memcpy_g((d),(s),(count))) + +extern inline void * __memcpy_by4(void * to, const void * from, size_t n) +{ + register void *tmp = (void *)to; + register int dummy1,dummy2; + __asm__ __volatile__ + ("\n1:\tmovl (%2),%0\n\t" + "addl $4,%2\n\t" + "movl %0,(%1)\n\t" + "addl $4,%1\n\t" + "decl %3\n\t" + "jnz 1b" + :"=r" (dummy1), "=r" (tmp), "=r" (from), "=r" (dummy2) + :"1" (tmp), "2" (from), "3" (n/4) + :"memory"); + return (to); +} + +extern inline void * __memcpy_by2(void * to, const void * from, size_t n) +{ + register void *tmp = (void *)to; + register int dummy1,dummy2; + __asm__ __volatile__ + ("shrl $1,%3\n\t" + "jz 2f\n" /* only a word */ + "1:\tmovl (%2),%0\n\t" + "addl $4,%2\n\t" + "movl %0,(%1)\n\t" + "addl $4,%1\n\t" + "decl %3\n\t" + "jnz 1b\n" + "2:\tmovw (%2),%w0\n\t" + "movw %w0,(%1)" + :"=r" (dummy1), "=r" (tmp), "=r" (from), "=r" (dummy2) + :"1" (tmp), "2" (from), "3" (n/2) + :"memory"); + return (to); +} + +extern inline void * __memcpy_g(void * to, const void * from, size_t n) +{ + register void *tmp = (void *)to; + __asm__ __volatile__ + ("cld\n\t" + "shrl $1,%%ecx\n\t" + "jnc 1f\n\t" + "movsb\n" + "1:\tshrl $1,%%ecx\n\t" + "jnc 2f\n\t" + "movsw\n" + "2:\trep\n\t" + "movsl" + : /* no output */ + :"c" (n),"D" ((long) tmp),"S" ((long) from) + :"cx","di","si","memory"); + return (to); +} + +#else /* CONFIG_M486 || CONFIG_M586 */ + extern inline void * __memcpy(void * to, const void * from, size_t n) { __asm__ __volatile__( @@ -415,9 +667,31 @@ __constant_memcpy((t),(f),(n)) : \ __memcpy((t),(f),(n))) +#endif /* CONFIG_M486 || CONFIG_M586 */ + #define __HAVE_ARCH_MEMMOVE extern inline void * memmove(void * dest,const void * src, size_t n) { +#if defined(CONFIG_M486) || defined(CONFIG_M586) + register void *tmp = (void *)dest; + if (dest + +char * ___strtok = NULL; + +#else /* CONFIG_ASM_OPTIMIZE */ + #include #include @@ -297,3 +305,5 @@ return NULL; } #endif + +#endif /* CONFIG_ASM_OPTIMIZE */