[packages/kernel.git] / linux-2.4.21-sparc-gcc3.patch


Here's what I did to get past gcc-3.3's unnecessarily draconion
preprocessor.  This is against 2.4.21-rc2 + previous gcc patch + udiv
patch.  It includes the checksum-clobbered patch i submitted before.

UP boots and runs fine.. SMP gives me a "Unimplemented trap = 2b" right
after freeing kernel memory.. but that may have more to do with other
issues.  I'm working on it, but I don't have a serial console so oops
tracing is.. well.. giving me writers cramp ;)

cheers.
john.c
-- 
John Clemens          http://www.deater.net/john
john@deater.net     ICQ: 7175925, IM: PianoManO8
      "I Hate Quotes" -- Samuel L. Clemens

Binary files linux-old/arch/sparc/boot/btfixupprep and linux-new/arch/sparc/boot/btfixupprep differ
diff -urN linux-old/arch/sparc/kernel/sun4d_smp.c linux-new/arch/sparc/kernel/sun4d_smp.c
--- linux-old/arch/sparc/kernel/sun4d_smp.c	2002-08-02 20:39:43.000000000 -0400
+++ linux-new/arch/sparc/kernel/sun4d_smp.c	2003-05-20 22:07:10.000000000 -0400
@@ -345,10 +345,10 @@
 			unsigned long a4 asm("i4") = arg4;
 			unsigned long a5 asm("i5") = arg5;
 					
-			__asm__ __volatile__("
-				std %0, [%6]
-				std %2, [%6 + 8]
-				std %4, [%6 + 16]" : : 
+			__asm__ __volatile__(
+				"std %0, [%6]\n\t"
+				"std %2, [%6 + 8]\n\t"
+				"std %4, [%6 + 16]\n\t" : :
 				"r"(f), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5),
 				"r" (&ccall_info.func));
 		}
diff -urN linux-old/arch/sparc/lib/Makefile linux-new/arch/sparc/lib/Makefile
--- linux-old/arch/sparc/lib/Makefile	2000-12-29 17:07:20.000000000 -0500
+++ linux-new/arch/sparc/lib/Makefile	2003-05-20 22:25:13.000000000 -0400
@@ -3,10 +3,10 @@
 #

 .S.s:
-	$(CPP) $(AFLAGS) -ansi -DST_DIV0=0x2 $< -o $*.s
+	$(CPP) $(AFLAGS) -DST_DIV0=0x2 $< -o $*.s

 .S.o:
-	$(CC) $(AFLAGS) -ansi -DST_DIV0=0x2 -c $< -o $*.o
+	$(CC) $(AFLAGS) -DST_DIV0=0x2 -c $< -o $*.o

 L_TARGET = lib.a

diff -urN linux-old/arch/sparc/math-emu/sfp-util.h linux-new/arch/sparc/math-emu/sfp-util.h
--- linux-old/arch/sparc/math-emu/sfp-util.h	1999-05-29 14:09:04.000000000 -0400
+++ linux-new/arch/sparc/math-emu/sfp-util.h	2003-05-20 22:12:21.000000000 -0400
@@ -4,8 +4,8 @@
 #include <asm/byteorder.h>

 #define add_ssaaaa(sh, sl, ah, al, bh, bl) 				\
-  __asm__ ("addcc %r4,%5,%1
-	addx %r2,%3,%0"							\
+  __asm__ ("addcc %r4,%5,%1\n\t"						\
+	   "addx %r2,%3,%0\n"						\
 	   : "=r" ((USItype)(sh)),					\
 	     "=&r" ((USItype)(sl))					\
 	   : "%rJ" ((USItype)(ah)),					\
@@ -14,8 +14,8 @@
 	     "rI" ((USItype)(bl))					\
 	   : "cc")
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) 				\
-  __asm__ ("subcc %r4,%5,%1
-	subx %r2,%3,%0"							\
+  __asm__ ("subcc %r4,%5,%1\n\t"						\
+	   "subx %r2,%3,%0\n"						\
 	   : "=r" ((USItype)(sh)),					\
 	     "=&r" ((USItype)(sl))					\
 	   : "rJ" ((USItype)(ah)),					\
@@ -25,46 +25,46 @@
 	   : "cc")

 #define umul_ppmm(w1, w0, u, v) \
-  __asm__ ("! Inlined umul_ppmm
-	wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr
-	sra	%3,31,%%g2	! Don't move this insn
-	and	%2,%%g2,%%g2	! Don't move this insn
-	andcc	%%g0,0,%%g1	! Don't move this insn
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,0,%%g1
-	add	%%g1,%%g2,%0
-	rd	%%y,%1"							\
+  __asm__ ("! Inlined umul_ppmm\n\t"					\
+	"wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr\n\t" \
+	"sra	%3,31,%%g2	! Don't move this insn\n\t"		\
+	"and	%2,%%g2,%%g2	! Don't move this insn\n\t"		\
+	"andcc	%%g0,0,%%g1	! Don't move this insn\n\t"		\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,0,%%g1\n\t" 					\
+	"add	%%g1,%%g2,%0\n\t" 					\
+	"rd	%%y,%1\n"						\
 	   : "=r" ((USItype)(w1)),					\
 	     "=r" ((USItype)(w0))					\
 	   : "%rI" ((USItype)(u)),					\
@@ -74,30 +74,30 @@
 /* It's quite necessary to add this much assembler for the sparc.
    The default udiv_qrnnd (in C) is more than 10 times slower!  */
 #define udiv_qrnnd(q, r, n1, n0, d) \
-  __asm__ ("! Inlined udiv_qrnnd
-	mov	32,%%g1
-	subcc	%1,%2,%%g0
-1:	bcs	5f
-	 addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb
-	sub	%1,%2,%1	! this kills msb of n
-	addx	%1,%1,%1	! so this can't give carry
-	subcc	%%g1,1,%%g1
-2:	bne	1b
-	 subcc	%1,%2,%%g0
-	bcs	3f
-	 addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb
-	b	3f
-	 sub	%1,%2,%1	! this kills msb of n
-4:	sub	%1,%2,%1
-5:	addxcc	%1,%1,%1
-	bcc	2b
-	 subcc	%%g1,1,%%g1
-! Got carry from n.  Subtract next step to cancel this carry.
-	bne	4b
-	 addcc	%0,%0,%0	! shift n1n0 and a 0-bit in lsb
-	sub	%1,%2,%1
-3:	xnor	%0,0,%0
-	! End of inline udiv_qrnnd"					\
+  __asm__ ("! Inlined udiv_qrnnd\n\t"					\
+	   "mov	32,%%g1\n\t"						\
+	   "subcc	%1,%2,%%g0\n\t"					\
+	   "1:	bcs	5f\n\t"						\
+	   "addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb\n\t"	\
+	   "sub	%1,%2,%1	! this kills msb of n\n\t"		\
+	   "addx	%1,%1,%1	! so this can't give carry\n\t"	\
+	   "subcc	%%g1,1,%%g1\n\t"				\
+	   "2:	bne	1b\n\t"						\
+	   "subcc	%1,%2,%%g0\n\t"					\
+	   "bcs	3f\n\t"							\
+	   "addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb\n\t"	\
+	   "b		3f\n\t"						\
+	   "sub	%1,%2,%1	! this kills msb of n\n\t"		\
+	   "4:	sub	%1,%2,%1\n\t"					\
+	   "5:	addxcc	%1,%1,%1\n\t"					\
+	   "bcc	2b\n\t"							\
+	   "subcc	%%g1,1,%%g1\n\t"				\
+	   "! Got carry from n.  Subtract next step to cancel this carry.\n\t" \
+	   "bne	4b\n\t"							\
+	   "addcc	%0,%0,%0	! shift n1n0 and a 0-bit in lsb\n\t" \
+	   "sub	%1,%2,%1\n\t"						\
+	   "3:	xnor	%0,0,%0\n\t"					\
+	   "! End of inline udiv_qrnnd\n"				\
 	   : "=&r" ((USItype)(q)),					\
 	     "=&r" ((USItype)(r))					\
 	   : "r" ((USItype)(d)),					\
diff -urN linux-old/include/asm-sparc/sfp-machine.h linux-new/include/asm-sparc/sfp-machine.h
--- linux-old/include/asm-sparc/sfp-machine.h	2000-05-09 01:00:01.000000000 -0400
+++ linux-new/include/asm-sparc/sfp-machine.h	2003-05-20 22:14:29.000000000 -0400
@@ -77,9 +77,9 @@

 /* Some assembly to speed things up. */
 #define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)			\
-  __asm__ ("addcc %r7,%8,%2
-	    addxcc %r5,%6,%1
-	    addx %r3,%4,%0"						\
+  __asm__ ("addcc %r7,%8,%2\n\t"					\
+	   "addxcc %r5,%6,%1\n\t"					\
+	   "addx %r3,%4,%0\n"						\
 	   : "=r" ((USItype)(r2)),					\
 	     "=&r" ((USItype)(r1)),					\
 	     "=&r" ((USItype)(r0))					\
@@ -92,9 +92,9 @@
 	   : "cc")

 #define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)			\
-  __asm__ ("subcc %r7,%8,%2
-	    subxcc %r5,%6,%1
-	    subx %r3,%4,%0"						\
+  __asm__ ("subcc %r7,%8,%2\n\t"					\
+	    "subxcc %r5,%6,%1\n\t"					\
+	    "subx %r3,%4,%0\n"						\
 	   : "=r" ((USItype)(r2)),					\
 	     "=&r" ((USItype)(r1)),					\
 	     "=&r" ((USItype)(r0))					\
@@ -111,11 +111,11 @@
     /* We need to fool gcc,  as we need to pass more than 10		\
        input/outputs.  */						\
     register USItype _t1 __asm__ ("g1"), _t2 __asm__ ("g2");		\
-    __asm__ __volatile__ ("
-	    addcc %r8,%9,%1
-	    addxcc %r6,%7,%0
-	    addxcc %r4,%5,%%g2
-	    addx %r2,%3,%%g1"						\
+    __asm__ __volatile__ (						\
+	    "addcc %r8,%9,%1\n\t"					\
+	    "addxcc %r6,%7,%0\n\t"					\
+	    "addxcc %r4,%5,%%g2\n\t"					\
+	    "addx %r2,%3,%%g1\n\t"					\
 	   : "=&r" ((USItype)(r1)),					\
 	     "=&r" ((USItype)(r0))					\
 	   : "%rJ" ((USItype)(x3)),					\
@@ -136,11 +136,11 @@
     /* We need to fool gcc,  as we need to pass more than 10		\
        input/outputs.  */						\
     register USItype _t1 __asm__ ("g1"), _t2 __asm__ ("g2");		\
-    __asm__ __volatile__ ("
-	    subcc %r8,%9,%1
-	    subxcc %r6,%7,%0
-	    subxcc %r4,%5,%%g2
-	    subx %r2,%3,%%g1"						\
+    __asm__ __volatile__ (						\
+	    "subcc %r8,%9,%1\n\t"					\
+	    "subxcc %r6,%7,%0\n\t"					\
+	    "subxcc %r4,%5,%%g2\n\t"					\
+	    "subx %r2,%3,%%g1\n\t"					\
 	   : "=&r" ((USItype)(r1)),					\
 	     "=&r" ((USItype)(r0))					\
 	   : "%rJ" ((USItype)(x3)),					\
@@ -161,10 +161,10 @@
 #define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0) __FP_FRAC_SUB_4(x3,x2,x1,x0,x3,x2,x1,x0,y3,y2,y1,y0)

 #define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i)					\
-  __asm__ ("addcc %3,%4,%3
-	    addxcc %2,%%g0,%2
-	    addxcc %1,%%g0,%1
-	    addx %0,%%g0,%0"						\
+  __asm__ ("addcc %3,%4,%3\n\t"						\
+	   "addxcc %2,%%g0,%2\n\t"					\
+	   "addxcc %1,%%g0,%1\n\t"					\
+	   "addx %0,%%g0,%0\n\t"					\
 	   : "=&r" ((USItype)(x3)),					\
 	     "=&r" ((USItype)(x2)),					\
 	     "=&r" ((USItype)(x1)),					\


Took me a while to find the time but here's a second crack at the
checksum.h cleanup to make gcc-3.3 happy w/ 2.4.  Can someone check this
to verify that I did it correctly this time?  I'm writing from the machine
now over the network, but that doesn't mean it's not dumb luck.

Please apply to 2.4 if it's acceptable.  If you want me to re-diff the
other gcc-3.3 fixes let me know.

patch below sig.

john.c


On Thu, 29 May 2003, John Clemens wrote:
> DaveM said:
> > Understood.
> >
> > But you have to fix this differently.  If you remove these things
> > from the clobber list, you must mark the variables passed in as
> > follows:
> >
> > 1) Either output-only, and therefore using "=&r" (which means
> >    "written before all inputs are consumed", it prevents gcc from
> >    using the same register for "ret" for other input values)
> >
> >    This applies to "ret" so merely change it form "=r" to "=&r"
> >
> > 2) or if as input, you must mention it in the outputs, also
> >    using "=&r" so that gcc knows the register is written by the
> >    asm statement.
> >
> >    This is what to do with "d", it means that all the %N numbers
> >    get changed so be careful.
> >
> > So probably this all amounts to:
> >
> >         __asm__ __volatile__ (
> >                 "call " C_LABEL_STR(__csum_partial_copy_sparc_generic)
> > "\n\t"
> >                 " mov %5, %%g7\n"
> >         : "=&r" (ret), "=&r" (d) : "0" (ret), "1" (d), "r" (l), "r" (sum) :
> >         "o2", "o3", "o4", "o5", "o7", "g2", "g3", "g4", "g5", "g7");
> >
> > Meanwhile, also would be a good idea to change this to use
> > "static inline" instead of "extern __inline__".

-- 
John Clemens          http://www.deater.net/john
john@deater.net     ICQ: 7175925, IM: PianoManO8
      "I Hate Quotes" -- Samuel L. Clemens


--- linux-old/include/asm-sparc/checksum.h	2002-08-02 20:39:45.000000000 -0400
+++ linux-new/include/asm-sparc/checksum.h	2003-06-11 00:31:08.000000000 -0400
@@ -48,7 +48,7 @@
   
 extern unsigned int __csum_partial_copy_sparc_generic (const char *, char *);
 
-extern __inline__ unsigned int 
+static inline unsigned int
 csum_partial_copy_nocheck (const char *src, char *dst, int len, 
 			   unsigned int sum)
 {
@@ -58,13 +58,14 @@
 	
 	__asm__ __volatile__ (
 		"call " C_LABEL_STR(__csum_partial_copy_sparc_generic) "\n\t"
-		" mov %4, %%g7\n"
-	: "=r" (ret) : "0" (ret), "r" (d), "r" (l), "r" (sum) :
-	"o1", "o2", "o3", "o4", "o5", "o7", "g1", "g2", "g3", "g4", "g5", "g7");
+		" mov %6, %%g7\n"
+	: "=&r" (ret), "=&r" (d), "=&r" (l)
+	: "0" (ret), "1" (d), "2" (l), "r" (sum)
+	: "o2", "o3", "o4", "o5", "o7", "g2", "g3", "g4", "g5", "g7");
 	return ret;
 }
 
-extern __inline__ unsigned int 
+static inline unsigned int
 csum_partial_copy_from_user(const char *src, char *dst, int len, 
 			    unsigned int sum, int *err)
   {
@@ -85,14 +86,15 @@
 		".previous\n"
 		"1:\n\t"
 		"call " C_LABEL_STR(__csum_partial_copy_sparc_generic) "\n\t"
-		" st %5, [%%sp + 64]\n"
-		: "=r" (ret) : "0" (ret), "r" (d), "r" (l), "r" (s), "r" (err) :
-		"o1", "o2", "o3", "o4", "o5", "o7", "g1", "g2", "g3", "g4", "g5", "g7");
+		" st %8, [%%sp + 64]\n"
+		: "=&r" (ret), "=&r" (d), "=&r" (l), "=&r" (s)
+		: "0" (ret), "1" (d), "2" (l), "3" (s), "r" (err)
+		: "o2", "o3", "o4", "o5", "o7", "g2", "g3", "g4", "g5");
 		return ret;
 	}
   }
   
-extern __inline__ unsigned int 
+static inline unsigned int
 csum_partial_copy_to_user(const char *src, char *dst, int len, 
 			  unsigned int sum, int *err)
 {
@@ -112,9 +114,10 @@
 		".previous\n"
 		"1:\n\t"
 		"call " C_LABEL_STR(__csum_partial_copy_sparc_generic) "\n\t"
-		" st %5, [%%sp + 64]\n"
-		: "=r" (ret) : "0" (ret), "r" (d), "r" (l), "r" (s), "r" (err) :
-		"o1", "o2", "o3", "o4", "o5", "o7", "g1", "g2", "g3", "g4", "g5", "g7");
+		" st %8, [%%sp + 64]\n"
+		: "=&r" (ret), "=&r" (d), "=&r" (l), "=&r" (s)
+		: "0" (ret), "1" (d), "2" (l), "3" (s), "r" (err)
+		: "o2", "o3", "o4", "o5", "o7", "g2", "g3", "g4", "g5");
 		return ret;
 	}
 }
@@ -125,7 +128,7 @@
 /* ihl is always 5 or greater, almost always is 5, and iph is word aligned
  * the majority of the time.
  */
-extern __inline__ unsigned short ip_fast_csum(__const__ unsigned char *iph,
+static inline unsigned short ip_fast_csum(__const__ unsigned char *iph,
 					      unsigned int ihl)
 {
 	unsigned short sum;
@@ -163,7 +166,7 @@
 }

 /* Fold a partial checksum without adding pseudo headers. */
-extern __inline__ unsigned int csum_fold(unsigned int sum)
+static inline unsigned int csum_fold(unsigned int sum)
 {
 	unsigned int tmp;

@@ -177,7 +180,7 @@
 	return sum;
 }

-extern __inline__ unsigned long csum_tcpudp_nofold(unsigned long saddr,
+static inline unsigned long csum_tcpudp_nofold(unsigned long saddr,
 						   unsigned long daddr,
 						   unsigned int len,
 						   unsigned short proto,
@@ -209,7 +212,7 @@

 #define _HAVE_ARCH_IPV6_CSUM

-static __inline__ unsigned short int csum_ipv6_magic(struct in6_addr *saddr,
+static inline unsigned short int csum_ipv6_magic(struct in6_addr *saddr,
 						     struct in6_addr *daddr,
 						     __u32 len,
 						     unsigned short proto,
@@ -244,7 +247,7 @@
 }

 /* this routine is used for miscellaneous IP-like checksums, mainly in icmp.c */
-extern __inline__ unsigned short ip_compute_csum(unsigned char * buff, int len)
+static inline unsigned short ip_compute_csum(unsigned char * buff, int len)
 {
 	return csum_fold(csum_partial(buff, len, 0));
 }

-
To unsubscribe from this list: send the line "unsubscribe sparclinux" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Commit	Line	Data
52e20830 JR	1
	2	Here's what I did to get past gcc-3.3's unnecessarily draconion
	3	preprocessor. This is against 2.4.21-rc2 + previous gcc patch + udiv
	4	patch. It includes the checksum-clobbered patch i submitted before.
	5
	6	UP boots and runs fine.. SMP gives me a "Unimplemented trap = 2b" right
	7	after freeing kernel memory.. but that may have more to do with other
	8	issues. I'm working on it, but I don't have a serial console so oops
	9	tracing is.. well.. giving me writers cramp ;)
	10
	11	cheers.
	12	john.c
	13	--
	14	John Clemens http://www.deater.net/john
	15	john@deater.net ICQ: 7175925, IM: PianoManO8
	16	"I Hate Quotes" -- Samuel L. Clemens
	17
	18	Binary files linux-old/arch/sparc/boot/btfixupprep and linux-new/arch/sparc/boot/btfixupprep differ
	19	diff -urN linux-old/arch/sparc/kernel/sun4d_smp.c linux-new/arch/sparc/kernel/sun4d_smp.c
	20	--- linux-old/arch/sparc/kernel/sun4d_smp.c 2002-08-02 20:39:43.000000000 -0400
	21	+++ linux-new/arch/sparc/kernel/sun4d_smp.c 2003-05-20 22:07:10.000000000 -0400
	22	@@ -345,10 +345,10 @@
	23	unsigned long a4 asm("i4") = arg4;
	24	unsigned long a5 asm("i5") = arg5;
	25
	26	- __asm__ __volatile__("
	27	- std %0, [%6]
	28	- std %2, [%6 + 8]
	29	- std %4, [%6 + 16]" : :
	30	+ __asm__ __volatile__(
	31	+ "std %0, [%6]\n\t"
	32	+ "std %2, [%6 + 8]\n\t"
	33	+ "std %4, [%6 + 16]\n\t" : :
	34	"r"(f), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5),
	35	"r" (&ccall_info.func));
	36	}
	37	diff -urN linux-old/arch/sparc/lib/Makefile linux-new/arch/sparc/lib/Makefile
	38	--- linux-old/arch/sparc/lib/Makefile 2000-12-29 17:07:20.000000000 -0500
	39	+++ linux-new/arch/sparc/lib/Makefile 2003-05-20 22:25:13.000000000 -0400
	40	@@ -3,10 +3,10 @@
	41	#
	42
	43	.S.s:
	44	- $(CPP) $(AFLAGS) -ansi -DST_DIV0=0x2 $< -o $*.s
	45	+ $(CPP) $(AFLAGS) -DST_DIV0=0x2 $< -o $*.s
	46
	47	.S.o:
	48	- $(CC) $(AFLAGS) -ansi -DST_DIV0=0x2 -c $< -o $*.o
	49	+ $(CC) $(AFLAGS) -DST_DIV0=0x2 -c $< -o $*.o
	50
	51	L_TARGET = lib.a
	52
	53	diff -urN linux-old/arch/sparc/math-emu/sfp-util.h linux-new/arch/sparc/math-emu/sfp-util.h
	54	--- linux-old/arch/sparc/math-emu/sfp-util.h 1999-05-29 14:09:04.000000000 -0400
	55	+++ linux-new/arch/sparc/math-emu/sfp-util.h 2003-05-20 22:12:21.000000000 -0400
	56	@@ -4,8 +4,8 @@
	57	#include <asm/byteorder.h>
	58
	59	#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
	60	- __asm__ ("addcc %r4,%5,%1
	61	- addx %r2,%3,%0" \
	62	+ __asm__ ("addcc %r4,%5,%1\n\t" \
	63	+ "addx %r2,%3,%0\n" \
	64	: "=r" ((USItype)(sh)), \
65	"=&r" ((USItype)(sl)) \
66	: "%rJ" ((USItype)(ah)), \
67	@@ -14,8 +14,8 @@
68	"rI" ((USItype)(bl)) \
69	: "cc")
70	#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
71	- __asm__ ("subcc %r4,%5,%1
72	- subx %r2,%3,%0" \
73	+ __asm__ ("subcc %r4,%5,%1\n\t" \
74	+ "subx %r2,%3,%0\n" \
75	: "=r" ((USItype)(sh)), \
76	"=&r" ((USItype)(sl)) \
77	: "rJ" ((USItype)(ah)), \
78	@@ -25,46 +25,46 @@
79	: "cc")
80
81	#define umul_ppmm(w1, w0, u, v) \
82	- __asm__ ("! Inlined umul_ppmm
83	- wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr
84	- sra %3,31,%%g2 ! Don't move this insn
85	- and %2,%%g2,%%g2 ! Don't move this insn
86	- andcc %%g0,0,%%g1 ! Don't move this insn
87	- mulscc %%g1,%3,%%g1
88	- mulscc %%g1,%3,%%g1
89	- mulscc %%g1,%3,%%g1
90	- mulscc %%g1,%3,%%g1
91	- mulscc %%g1,%3,%%g1
92	- mulscc %%g1,%3,%%g1
93	- mulscc %%g1,%3,%%g1
94	- mulscc %%g1,%3,%%g1
95	- mulscc %%g1,%3,%%g1
96	- mulscc %%g1,%3,%%g1
97	- mulscc %%g1,%3,%%g1
98	- mulscc %%g1,%3,%%g1
99	- mulscc %%g1,%3,%%g1
100	- mulscc %%g1,%3,%%g1
101	- mulscc %%g1,%3,%%g1
102	- mulscc %%g1,%3,%%g1
103	- mulscc %%g1,%3,%%g1
104	- mulscc %%g1,%3,%%g1
105	- mulscc %%g1,%3,%%g1
106	- mulscc %%g1,%3,%%g1
107	- mulscc %%g1,%3,%%g1
108	- mulscc %%g1,%3,%%g1
109	- mulscc %%g1,%3,%%g1
110	- mulscc %%g1,%3,%%g1
111	- mulscc %%g1,%3,%%g1
112	- mulscc %%g1,%3,%%g1
113	- mulscc %%g1,%3,%%g1
114	- mulscc %%g1,%3,%%g1
115	- mulscc %%g1,%3,%%g1
116	- mulscc %%g1,%3,%%g1
117	- mulscc %%g1,%3,%%g1
118	- mulscc %%g1,%3,%%g1
119	- mulscc %%g1,0,%%g1
120	- add %%g1,%%g2,%0
121	- rd %%y,%1" \
122	+ __asm__ ("! Inlined umul_ppmm\n\t" \
123	+ "wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n\t" \
124	+ "sra %3,31,%%g2 ! Don't move this insn\n\t" \
125	+ "and %2,%%g2,%%g2 ! Don't move this insn\n\t" \
126	+ "andcc %%g0,0,%%g1 ! Don't move this insn\n\t" \
127	+ "mulscc %%g1,%3,%%g1\n\t" \
128	+ "mulscc %%g1,%3,%%g1\n\t" \
129	+ "mulscc %%g1,%3,%%g1\n\t" \
130	+ "mulscc %%g1,%3,%%g1\n\t" \
131	+ "mulscc %%g1,%3,%%g1\n\t" \
132	+ "mulscc %%g1,%3,%%g1\n\t" \
133	+ "mulscc %%g1,%3,%%g1\n\t" \
134	+ "mulscc %%g1,%3,%%g1\n\t" \
135	+ "mulscc %%g1,%3,%%g1\n\t" \
136	+ "mulscc %%g1,%3,%%g1\n\t" \
137	+ "mulscc %%g1,%3,%%g1\n\t" \
138	+ "mulscc %%g1,%3,%%g1\n\t" \
139	+ "mulscc %%g1,%3,%%g1\n\t" \
140	+ "mulscc %%g1,%3,%%g1\n\t" \
141	+ "mulscc %%g1,%3,%%g1\n\t" \
142	+ "mulscc %%g1,%3,%%g1\n\t" \
143	+ "mulscc %%g1,%3,%%g1\n\t" \
144	+ "mulscc %%g1,%3,%%g1\n\t" \
145	+ "mulscc %%g1,%3,%%g1\n\t" \
146	+ "mulscc %%g1,%3,%%g1\n\t" \
147	+ "mulscc %%g1,%3,%%g1\n\t" \
148	+ "mulscc %%g1,%3,%%g1\n\t" \
149	+ "mulscc %%g1,%3,%%g1\n\t" \
150	+ "mulscc %%g1,%3,%%g1\n\t" \
151	+ "mulscc %%g1,%3,%%g1\n\t" \
152	+ "mulscc %%g1,%3,%%g1\n\t" \
153	+ "mulscc %%g1,%3,%%g1\n\t" \
154	+ "mulscc %%g1,%3,%%g1\n\t" \
155	+ "mulscc %%g1,%3,%%g1\n\t" \
156	+ "mulscc %%g1,%3,%%g1\n\t" \
157	+ "mulscc %%g1,%3,%%g1\n\t" \
158	+ "mulscc %%g1,%3,%%g1\n\t" \
159	+ "mulscc %%g1,0,%%g1\n\t" \
160	+ "add %%g1,%%g2,%0\n\t" \
161	+ "rd %%y,%1\n" \
162	: "=r" ((USItype)(w1)), \
163	"=r" ((USItype)(w0)) \
164	: "%rI" ((USItype)(u)), \
165	@@ -74,30 +74,30 @@
166	/* It's quite necessary to add this much assembler for the sparc.
167	The default udiv_qrnnd (in C) is more than 10 times slower! */
168	#define udiv_qrnnd(q, r, n1, n0, d) \
169	- __asm__ ("! Inlined udiv_qrnnd
170	- mov 32,%%g1
171	- subcc %1,%2,%%g0
172	-1: bcs 5f
173	- addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb
174	- sub %1,%2,%1 ! this kills msb of n
175	- addx %1,%1,%1 ! so this can't give carry
176	- subcc %%g1,1,%%g1
177	-2: bne 1b
178	- subcc %1,%2,%%g0
179	- bcs 3f
180	- addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb
181	- b 3f
182	- sub %1,%2,%1 ! this kills msb of n
183	-4: sub %1,%2,%1
184	-5: addxcc %1,%1,%1
185	- bcc 2b
186	- subcc %%g1,1,%%g1
187	-! Got carry from n. Subtract next step to cancel this carry.
188	- bne 4b
189	- addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb
190	- sub %1,%2,%1
191	-3: xnor %0,0,%0
192	- ! End of inline udiv_qrnnd" \
193	+ __asm__ ("! Inlined udiv_qrnnd\n\t" \
194	+ "mov 32,%%g1\n\t" \
195	+ "subcc %1,%2,%%g0\n\t" \
196	+ "1: bcs 5f\n\t" \
197	+ "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \
198	+ "sub %1,%2,%1 ! this kills msb of n\n\t" \
199	+ "addx %1,%1,%1 ! so this can't give carry\n\t" \
200	+ "subcc %%g1,1,%%g1\n\t" \
201	+ "2: bne 1b\n\t" \
202	+ "subcc %1,%2,%%g0\n\t" \
203	+ "bcs 3f\n\t" \
204	+ "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \
205	+ "b 3f\n\t" \
206	+ "sub %1,%2,%1 ! this kills msb of n\n\t" \
207	+ "4: sub %1,%2,%1\n\t" \
208	+ "5: addxcc %1,%1,%1\n\t" \
209	+ "bcc 2b\n\t" \
210	+ "subcc %%g1,1,%%g1\n\t" \
211	+ "! Got carry from n. Subtract next step to cancel this carry.\n\t" \
212	+ "bne 4b\n\t" \
213	+ "addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n\t" \
214	+ "sub %1,%2,%1\n\t" \
215	+ "3: xnor %0,0,%0\n\t" \
216	+ "! End of inline udiv_qrnnd\n" \
217	: "=&r" ((USItype)(q)), \
218	"=&r" ((USItype)(r)) \
219	: "r" ((USItype)(d)), \
220	diff -urN linux-old/include/asm-sparc/sfp-machine.h linux-new/include/asm-sparc/sfp-machine.h
221	--- linux-old/include/asm-sparc/sfp-machine.h 2000-05-09 01:00:01.000000000 -0400
222	+++ linux-new/include/asm-sparc/sfp-machine.h 2003-05-20 22:14:29.000000000 -0400
223	@@ -77,9 +77,9 @@
224
225	/* Some assembly to speed things up. */
226	#define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \
227	- __asm__ ("addcc %r7,%8,%2
228	- addxcc %r5,%6,%1
229	- addx %r3,%4,%0" \
230	+ __asm__ ("addcc %r7,%8,%2\n\t" \
231	+ "addxcc %r5,%6,%1\n\t" \
232	+ "addx %r3,%4,%0\n" \
233	: "=r" ((USItype)(r2)), \
234	"=&r" ((USItype)(r1)), \
235	"=&r" ((USItype)(r0)) \
236	@@ -92,9 +92,9 @@
237	: "cc")
238
239	#define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \
240	- __asm__ ("subcc %r7,%8,%2
241	- subxcc %r5,%6,%1
242	- subx %r3,%4,%0" \
243	+ __asm__ ("subcc %r7,%8,%2\n\t" \
244	+ "subxcc %r5,%6,%1\n\t" \
245	+ "subx %r3,%4,%0\n" \
246	: "=r" ((USItype)(r2)), \
247	"=&r" ((USItype)(r1)), \
248	"=&r" ((USItype)(r0)) \
249	@@ -111,11 +111,11 @@
250	/* We need to fool gcc, as we need to pass more than 10 \
251	input/outputs. */ \
252	register USItype _t1 __asm__ ("g1"), _t2 __asm__ ("g2"); \
253	- __asm__ __volatile__ ("
254	- addcc %r8,%9,%1
255	- addxcc %r6,%7,%0
256	- addxcc %r4,%5,%%g2
257	- addx %r2,%3,%%g1" \
258	+ __asm__ __volatile__ ( \
259	+ "addcc %r8,%9,%1\n\t" \
260	+ "addxcc %r6,%7,%0\n\t" \
261	+ "addxcc %r4,%5,%%g2\n\t" \
262	+ "addx %r2,%3,%%g1\n\t" \
263	: "=&r" ((USItype)(r1)), \
264	"=&r" ((USItype)(r0)) \
265	: "%rJ" ((USItype)(x3)), \
266	@@ -136,11 +136,11 @@
267	/* We need to fool gcc, as we need to pass more than 10 \
268	input/outputs. */ \
269	register USItype _t1 __asm__ ("g1"), _t2 __asm__ ("g2"); \
270	- __asm__ __volatile__ ("
271	- subcc %r8,%9,%1
272	- subxcc %r6,%7,%0
273	- subxcc %r4,%5,%%g2
274	- subx %r2,%3,%%g1" \
275	+ __asm__ __volatile__ ( \
276	+ "subcc %r8,%9,%1\n\t" \
277	+ "subxcc %r6,%7,%0\n\t" \
278	+ "subxcc %r4,%5,%%g2\n\t" \
279	+ "subx %r2,%3,%%g1\n\t" \
280	: "=&r" ((USItype)(r1)), \
281	"=&r" ((USItype)(r0)) \
282	: "%rJ" ((USItype)(x3)), \
283	@@ -161,10 +161,10 @@
284	#define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0) __FP_FRAC_SUB_4(x3,x2,x1,x0,x3,x2,x1,x0,y3,y2,y1,y0)
285
286	#define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i) \
287	- __asm__ ("addcc %3,%4,%3
288	- addxcc %2,%%g0,%2
289	- addxcc %1,%%g0,%1
290	- addx %0,%%g0,%0" \
291	+ __asm__ ("addcc %3,%4,%3\n\t" \
292	+ "addxcc %2,%%g0,%2\n\t" \
293	+ "addxcc %1,%%g0,%1\n\t" \
294	+ "addx %0,%%g0,%0\n\t" \
295	: "=&r" ((USItype)(x3)), \
296	"=&r" ((USItype)(x2)), \
297	"=&r" ((USItype)(x1)), \
298
299
300	Took me a while to find the time but here's a second crack at the
301	checksum.h cleanup to make gcc-3.3 happy w/ 2.4. Can someone check this
302	to verify that I did it correctly this time? I'm writing from the machine
303	now over the network, but that doesn't mean it's not dumb luck.
304
305	Please apply to 2.4 if it's acceptable. If you want me to re-diff the
306	other gcc-3.3 fixes let me know.
307
308	patch below sig.
309
310	john.c
311
312
313	On Thu, 29 May 2003, John Clemens wrote:
314	> DaveM said:
315	> > Understood.
316	> >
317	> > But you have to fix this differently. If you remove these things
318	> > from the clobber list, you must mark the variables passed in as
319	> > follows:
320	> >
321	> > 1) Either output-only, and therefore using "=&r" (which means
322	> > "written before all inputs are consumed", it prevents gcc from
323	> > using the same register for "ret" for other input values)
324	> >
325	> > This applies to "ret" so merely change it form "=r" to "=&r"
326	> >
327	> > 2) or if as input, you must mention it in the outputs, also
328	> > using "=&r" so that gcc knows the register is written by the
329	> > asm statement.
330	> >
331	> > This is what to do with "d", it means that all the %N numbers
332	> > get changed so be careful.
333	> >
334	> > So probably this all amounts to:
335	> >
336	> > __asm__ __volatile__ (
337	> > "call " C_LABEL_STR(__csum_partial_copy_sparc_generic)
338	> > "\n\t"
339	> > " mov %5, %%g7\n"
340	> > : "=&r" (ret), "=&r" (d) : "0" (ret), "1" (d), "r" (l), "r" (sum) :
341	> > "o2", "o3", "o4", "o5", "o7", "g2", "g3", "g4", "g5", "g7");
342	> >
343	> > Meanwhile, also would be a good idea to change this to use
344	> > "static inline" instead of "extern __inline__".
345
346	--
347	John Clemens http://www.deater.net/john
348	john@deater.net ICQ: 7175925, IM: PianoManO8
349	"I Hate Quotes" -- Samuel L. Clemens
350
351
352
353	--- linux-old/include/asm-sparc/checksum.h 2002-08-02 20:39:45.000000000 -0400
354	+++ linux-new/include/asm-sparc/checksum.h 2003-06-11 00:31:08.000000000 -0400
355	@@ -48,7 +48,7 @@
356
357	extern unsigned int __csum_partial_copy_sparc_generic (const char , char );
358
359	-extern __inline__ unsigned int
360	+static inline unsigned int
361	csum_partial_copy_nocheck (const char src, char dst, int len,
362	unsigned int sum)
363	{
364	@@ -58,13 +58,14 @@
365
366	__asm__ __volatile__ (
367	"call " C_LABEL_STR(__csum_partial_copy_sparc_generic) "\n\t"
368	- " mov %4, %%g7\n"
369	- : "=r" (ret) : "0" (ret), "r" (d), "r" (l), "r" (sum) :
370	- "o1", "o2", "o3", "o4", "o5", "o7", "g1", "g2", "g3", "g4", "g5", "g7");
371	+ " mov %6, %%g7\n"
372	+ : "=&r" (ret), "=&r" (d), "=&r" (l)
373	+ : "0" (ret), "1" (d), "2" (l), "r" (sum)
374	+ : "o2", "o3", "o4", "o5", "o7", "g2", "g3", "g4", "g5", "g7");
375	return ret;
376	}
377
378	-extern __inline__ unsigned int
379	+static inline unsigned int
380	csum_partial_copy_from_user(const char src, char dst, int len,
381	unsigned int sum, int *err)
382	{
383	@@ -85,14 +86,15 @@
384	".previous\n"
385	"1:\n\t"
386	"call " C_LABEL_STR(__csum_partial_copy_sparc_generic) "\n\t"
387	- " st %5, [%%sp + 64]\n"
388	- : "=r" (ret) : "0" (ret), "r" (d), "r" (l), "r" (s), "r" (err) :
389	- "o1", "o2", "o3", "o4", "o5", "o7", "g1", "g2", "g3", "g4", "g5", "g7");
390	+ " st %8, [%%sp + 64]\n"
391	+ : "=&r" (ret), "=&r" (d), "=&r" (l), "=&r" (s)
392	+ : "0" (ret), "1" (d), "2" (l), "3" (s), "r" (err)
393	+ : "o2", "o3", "o4", "o5", "o7", "g2", "g3", "g4", "g5");
394	return ret;
395	}
396	}
397
398	-extern __inline__ unsigned int
399	+static inline unsigned int
400	csum_partial_copy_to_user(const char src, char dst, int len,
401	unsigned int sum, int *err)
402	{
403	@@ -112,9 +114,10 @@
404	".previous\n"
405	"1:\n\t"
406	"call " C_LABEL_STR(__csum_partial_copy_sparc_generic) "\n\t"
407	- " st %5, [%%sp + 64]\n"
408	- : "=r" (ret) : "0" (ret), "r" (d), "r" (l), "r" (s), "r" (err) :
409	- "o1", "o2", "o3", "o4", "o5", "o7", "g1", "g2", "g3", "g4", "g5", "g7");
410	+ " st %8, [%%sp + 64]\n"
411	+ : "=&r" (ret), "=&r" (d), "=&r" (l), "=&r" (s)
412	+ : "0" (ret), "1" (d), "2" (l), "3" (s), "r" (err)
413	+ : "o2", "o3", "o4", "o5", "o7", "g2", "g3", "g4", "g5");
414	return ret;
415	}
416	}
417	@@ -125,7 +128,7 @@
418	/* ihl is always 5 or greater, almost always is 5, and iph is word aligned
419	* the majority of the time.
420	*/
421	-extern __inline__ unsigned short ip_fast_csum(__const__ unsigned char *iph,
422	+static inline unsigned short ip_fast_csum(__const__ unsigned char *iph,
423	unsigned int ihl)
424	{
425	unsigned short sum;
426	@@ -163,7 +166,7 @@
427	}
428
429	/* Fold a partial checksum without adding pseudo headers. */
430	-extern __inline__ unsigned int csum_fold(unsigned int sum)
431	+static inline unsigned int csum_fold(unsigned int sum)
432	{
433	unsigned int tmp;
434
435	@@ -177,7 +180,7 @@
436	return sum;
437	}
438
439	-extern __inline__ unsigned long csum_tcpudp_nofold(unsigned long saddr,
440	+static inline unsigned long csum_tcpudp_nofold(unsigned long saddr,
441	unsigned long daddr,
442	unsigned int len,
443	unsigned short proto,
444	@@ -209,7 +212,7 @@
445
446	#define _HAVE_ARCH_IPV6_CSUM
447
448	-static __inline__ unsigned short int csum_ipv6_magic(struct in6_addr *saddr,
449	+static inline unsigned short int csum_ipv6_magic(struct in6_addr *saddr,
450	struct in6_addr *daddr,
451	__u32 len,
452	unsigned short proto,
453	@@ -244,7 +247,7 @@
454	}
455
456	/* this routine is used for miscellaneous IP-like checksums, mainly in icmp.c */
457	-extern __inline__ unsigned short ip_compute_csum(unsigned char * buff, int len)
458	+static inline unsigned short ip_compute_csum(unsigned char * buff, int len)
459	{
460	return csum_fold(csum_partial(buff, len, 0));
461	}
462
463	-
464	To unsubscribe from this list: send the line "unsubscribe sparclinux" in
465	the body of a message to majordomo@vger.kernel.org
466	More majordomo info at http://vger.kernel.org/majordomo-info.html