Here's what I did to get past gcc-3.3's unnecessarily draconion
preprocessor.  This is against 2.4.21-rc2 + previous gcc patch + udiv
patch.  It includes the checksum-clobbered patch i submitted before.

UP boots and runs fine.. SMP gives me a "Unimplemented trap = 2b" right
after freeing kernel memory.. but that may have more to do with other
issues.  I'm working on it, but I don't have a serial console so oops
tracing is.. well.. giving me writers cramp ;)

cheers.
john.c
-- 
John Clemens          http://www.deater.net/john
john@deater.net     ICQ: 7175925, IM: PianoManO8
      "I Hate Quotes" -- Samuel L. Clemens

Binary files linux-old/arch/sparc/boot/btfixupprep and linux-new/arch/sparc/boot/btfixupprep differ
diff -urN linux-old/arch/sparc/kernel/sun4d_smp.c linux-new/arch/sparc/kernel/sun4d_smp.c
--- linux-old/arch/sparc/kernel/sun4d_smp.c	2002-08-02 20:39:43.000000000 -0400
+++ linux-new/arch/sparc/kernel/sun4d_smp.c	2003-05-20 22:07:10.000000000 -0400
@@ -345,10 +345,10 @@
 			unsigned long a4 asm("i4") = arg4;
 			unsigned long a5 asm("i5") = arg5;
 					
-			__asm__ __volatile__("
-				std %0, [%6]
-				std %2, [%6 + 8]
-				std %4, [%6 + 16]" : : 
+			__asm__ __volatile__(
+				"std %0, [%6]\n\t"
+				"std %2, [%6 + 8]\n\t"
+				"std %4, [%6 + 16]\n\t" : :
 				"r"(f), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5),
 				"r" (&ccall_info.func));
 		}
diff -urN linux-old/arch/sparc/lib/Makefile linux-new/arch/sparc/lib/Makefile
--- linux-old/arch/sparc/lib/Makefile	2000-12-29 17:07:20.000000000 -0500
+++ linux-new/arch/sparc/lib/Makefile	2003-05-20 22:25:13.000000000 -0400
@@ -3,10 +3,10 @@
 #

 .S.s:
-	$(CPP) $(AFLAGS) -ansi -DST_DIV0=0x2 $< -o $*.s
+	$(CPP) $(AFLAGS) -DST_DIV0=0x2 $< -o $*.s

 .S.o:
-	$(CC) $(AFLAGS) -ansi -DST_DIV0=0x2 -c $< -o $*.o
+	$(CC) $(AFLAGS) -DST_DIV0=0x2 -c $< -o $*.o

 L_TARGET = lib.a

diff -urN linux-old/arch/sparc/math-emu/sfp-util.h linux-new/arch/sparc/math-emu/sfp-util.h
--- linux-old/arch/sparc/math-emu/sfp-util.h	1999-05-29 14:09:04.000000000 -0400
+++ linux-new/arch/sparc/math-emu/sfp-util.h	2003-05-20 22:12:21.000000000 -0400
@@ -4,8 +4,8 @@
 #include <asm/byteorder.h>

 #define add_ssaaaa(sh, sl, ah, al, bh, bl) 				\
-  __asm__ ("addcc %r4,%5,%1
-	addx %r2,%3,%0"							\
+  __asm__ ("addcc %r4,%5,%1\n\t"						\
+	   "addx %r2,%3,%0\n"						\
 	   : "=r" ((USItype)(sh)),					\
 	     "=&r" ((USItype)(sl))					\
 	   : "%rJ" ((USItype)(ah)),					\
@@ -14,8 +14,8 @@
 	     "rI" ((USItype)(bl))					\
 	   : "cc")
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) 				\
-  __asm__ ("subcc %r4,%5,%1
-	subx %r2,%3,%0"							\
+  __asm__ ("subcc %r4,%5,%1\n\t"						\
+	   "subx %r2,%3,%0\n"						\
 	   : "=r" ((USItype)(sh)),					\
 	     "=&r" ((USItype)(sl))					\
 	   : "rJ" ((USItype)(ah)),					\
@@ -25,46 +25,46 @@
 	   : "cc")

 #define umul_ppmm(w1, w0, u, v) \
-  __asm__ ("! Inlined umul_ppmm
-	wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr
-	sra	%3,31,%%g2	! Don't move this insn
-	and	%2,%%g2,%%g2	! Don't move this insn
-	andcc	%%g0,0,%%g1	! Don't move this insn
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,%3,%%g1
-	mulscc	%%g1,0,%%g1
-	add	%%g1,%%g2,%0
-	rd	%%y,%1"							\
+  __asm__ ("! Inlined umul_ppmm\n\t"					\
+	"wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr\n\t" \
+	"sra	%3,31,%%g2	! Don't move this insn\n\t"		\
+	"and	%2,%%g2,%%g2	! Don't move this insn\n\t"		\
+	"andcc	%%g0,0,%%g1	! Don't move this insn\n\t"		\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,0,%%g1\n\t" 					\
+	"add	%%g1,%%g2,%0\n\t" 					\
+	"rd	%%y,%1\n"						\
 	   : "=r" ((USItype)(w1)),					\
 	     "=r" ((USItype)(w0))					\
 	   : "%rI" ((USItype)(u)),					\
@@ -74,30 +74,30 @@
 /* It's quite necessary to add this much assembler for the sparc.
    The default udiv_qrnnd (in C) is more than 10 times slower!  */
 #define udiv_qrnnd(q, r, n1, n0, d) \
-  __asm__ ("! Inlined udiv_qrnnd
-	mov	32,%%g1
-	subcc	%1,%2,%%g0
-1:	bcs	5f
-	 addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb
-	sub	%1,%2,%1	! this kills msb of n
-	addx	%1,%1,%1	! so this can't give carry
-	subcc	%%g1,1,%%g1
-2:	bne	1b
-	 subcc	%1,%2,%%g0
-	bcs	3f
-	 addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb
-	b	3f
-	 sub	%1,%2,%1	! this kills msb of n
-4:	sub	%1,%2,%1
-5:	addxcc	%1,%1,%1
-	bcc	2b
-	 subcc	%%g1,1,%%g1
-! Got carry from n.  Subtract next step to cancel this carry.
-	bne	4b
-	 addcc	%0,%0,%0	! shift n1n0 and a 0-bit in lsb
-	sub	%1,%2,%1
-3:	xnor	%0,0,%0
-	! End of inline udiv_qrnnd"					\
+  __asm__ ("! Inlined udiv_qrnnd\n\t"					\
+	   "mov	32,%%g1\n\t"						\
+	   "subcc	%1,%2,%%g0\n\t"					\
+	   "1:	bcs	5f\n\t"						\
+	   "addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb\n\t"	\
+	   "sub	%1,%2,%1	! this kills msb of n\n\t"		\
+	   "addx	%1,%1,%1	! so this can't give carry\n\t"	\
+	   "subcc	%%g1,1,%%g1\n\t"				\
+	   "2:	bne	1b\n\t"						\
+	   "subcc	%1,%2,%%g0\n\t"					\
+	   "bcs	3f\n\t"							\
+	   "addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb\n\t"	\
+	   "b		3f\n\t"						\
+	   "sub	%1,%2,%1	! this kills msb of n\n\t"		\
+	   "4:	sub	%1,%2,%1\n\t"					\
+	   "5:	addxcc	%1,%1,%1\n\t"					\
+	   "bcc	2b\n\t"							\
+	   "subcc	%%g1,1,%%g1\n\t"				\
+	   "! Got carry from n.  Subtract next step to cancel this carry.\n\t" \
+	   "bne	4b\n\t"							\
+	   "addcc	%0,%0,%0	! shift n1n0 and a 0-bit in lsb\n\t" \
+	   "sub	%1,%2,%1\n\t"						\
+	   "3:	xnor	%0,0,%0\n\t"					\
+	   "! End of inline udiv_qrnnd\n"				\
 	   : "=&r" ((USItype)(q)),					\
 	     "=&r" ((USItype)(r))					\
 	   : "r" ((USItype)(d)),					\
diff -urN linux-old/include/asm-sparc/sfp-machine.h linux-new/include/asm-sparc/sfp-machine.h
--- linux-old/include/asm-sparc/sfp-machine.h	2000-05-09 01:00:01.000000000 -0400
+++ linux-new/include/asm-sparc/sfp-machine.h	2003-05-20 22:14:29.000000000 -0400
@@ -77,9 +77,9 @@

 /* Some assembly to speed things up. */
 #define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)			\
-  __asm__ ("addcc %r7,%8,%2
-	    addxcc %r5,%6,%1
-	    addx %r3,%4,%0"						\
+  __asm__ ("addcc %r7,%8,%2\n\t"					\
+	   "addxcc %r5,%6,%1\n\t"					\
+	   "addx %r3,%4,%0\n"						\
 	   : "=r" ((USItype)(r2)),					\
 	     "=&r" ((USItype)(r1)),					\
 	     "=&r" ((USItype)(r0))					\
@@ -92,9 +92,9 @@
 	   : "cc")

 #define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)			\
-  __asm__ ("subcc %r7,%8,%2
-	    subxcc %r5,%6,%1
-	    subx %r3,%4,%0"						\
+  __asm__ ("subcc %r7,%8,%2\n\t"					\
+	    "subxcc %r5,%6,%1\n\t"					\
+	    "subx %r3,%4,%0\n"						\
 	   : "=r" ((USItype)(r2)),					\
 	     "=&r" ((USItype)(r1)),					\
 	     "=&r" ((USItype)(r0))					\
@@ -111,11 +111,11 @@
     /* We need to fool gcc,  as we need to pass more than 10		\
        input/outputs.  */						\
     register USItype _t1 __asm__ ("g1"), _t2 __asm__ ("g2");		\
-    __asm__ __volatile__ ("
-	    addcc %r8,%9,%1
-	    addxcc %r6,%7,%0
-	    addxcc %r4,%5,%%g2
-	    addx %r2,%3,%%g1"						\
+    __asm__ __volatile__ (						\
+	    "addcc %r8,%9,%1\n\t"					\
+	    "addxcc %r6,%7,%0\n\t"					\
+	    "addxcc %r4,%5,%%g2\n\t"					\
+	    "addx %r2,%3,%%g1\n\t"					\
 	   : "=&r" ((USItype)(r1)),					\
 	     "=&r" ((USItype)(r0))					\
 	   : "%rJ" ((USItype)(x3)),					\
@@ -136,11 +136,11 @@
     /* We need to fool gcc,  as we need to pass more than 10		\
        input/outputs.  */						\
     register USItype _t1 __asm__ ("g1"), _t2 __asm__ ("g2");		\
-    __asm__ __volatile__ ("
-	    subcc %r8,%9,%1
-	    subxcc %r6,%7,%0
-	    subxcc %r4,%5,%%g2
-	    subx %r2,%3,%%g1"						\
+    __asm__ __volatile__ (						\
+	    "subcc %r8,%9,%1\n\t"					\
+	    "subxcc %r6,%7,%0\n\t"					\
+	    "subxcc %r4,%5,%%g2\n\t"					\
+	    "subx %r2,%3,%%g1\n\t"					\
 	   : "=&r" ((USItype)(r1)),					\
 	     "=&r" ((USItype)(r0))					\
 	   : "%rJ" ((USItype)(x3)),					\
@@ -161,10 +161,10 @@
 #define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0) __FP_FRAC_SUB_4(x3,x2,x1,x0,x3,x2,x1,x0,y3,y2,y1,y0)

 #define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i)					\
-  __asm__ ("addcc %3,%4,%3
-	    addxcc %2,%%g0,%2
-	    addxcc %1,%%g0,%1
-	    addx %0,%%g0,%0"						\
+  __asm__ ("addcc %3,%4,%3\n\t"						\
+	   "addxcc %2,%%g0,%2\n\t"					\
+	   "addxcc %1,%%g0,%1\n\t"					\
+	   "addx %0,%%g0,%0\n\t"					\
 	   : "=&r" ((USItype)(x3)),					\
 	     "=&r" ((USItype)(x2)),					\
 	     "=&r" ((USItype)(x1)),					\


Took me a while to find the time but here's a second crack at the
checksum.h cleanup to make gcc-3.3 happy w/ 2.4.  Can someone check this
to verify that I did it correctly this time?  I'm writing from the machine
now over the network, but that doesn't mean it's not dumb luck.

Please apply to 2.4 if it's acceptable.  If you want me to re-diff the
other gcc-3.3 fixes let me know.

patch below sig.

john.c


On Thu, 29 May 2003, John Clemens wrote:
> DaveM said:
> > Understood.
> >
> > But you have to fix this differently.  If you remove these things
> > from the clobber list, you must mark the variables passed in as
> > follows:
> >
> > 1) Either output-only, and therefore using "=&r" (which means
> >    "written before all inputs are consumed", it prevents gcc from
> >    using the same register for "ret" for other input values)
> >
> >    This applies to "ret" so merely change it form "=r" to "=&r"
> >
> > 2) or if as input, you must mention it in the outputs, also
> >    using "=&r" so that gcc knows the register is written by the
> >    asm statement.
> >
> >    This is what to do with "d", it means that all the %N numbers
> >    get changed so be careful.
> >
> > So probably this all amounts to:
> >
> >         __asm__ __volatile__ (
> >                 "call " C_LABEL_STR(__csum_partial_copy_sparc_generic)
> > "\n\t"
> >                 " mov %5, %%g7\n"
> >         : "=&r" (ret), "=&r" (d) : "0" (ret), "1" (d), "r" (l), "r" (sum) :
> >         "o2", "o3", "o4", "o5", "o7", "g2", "g3", "g4", "g5", "g7");
> >
> > Meanwhile, also would be a good idea to change this to use
> > "static inline" instead of "extern __inline__".

-- 
John Clemens          http://www.deater.net/john
john@deater.net     ICQ: 7175925, IM: PianoManO8
      "I Hate Quotes" -- Samuel L. Clemens


--- linux-old/include/asm-sparc/checksum.h	2002-08-02 20:39:45.000000000 -0400
+++ linux-new/include/asm-sparc/checksum.h	2003-06-11 00:31:08.000000000 -0400
@@ -48,7 +48,7 @@
   
 extern unsigned int __csum_partial_copy_sparc_generic (const char *, char *);
 
-extern __inline__ unsigned int 
+static inline unsigned int
 csum_partial_copy_nocheck (const char *src, char *dst, int len, 
 			   unsigned int sum)
 {
@@ -58,13 +58,14 @@
 	
 	__asm__ __volatile__ (
 		"call " C_LABEL_STR(__csum_partial_copy_sparc_generic) "\n\t"
-		" mov %4, %%g7\n"
-	: "=r" (ret) : "0" (ret), "r" (d), "r" (l), "r" (sum) :
-	"o1", "o2", "o3", "o4", "o5", "o7", "g1", "g2", "g3", "g4", "g5", "g7");
+		" mov %6, %%g7\n"
+	: "=&r" (ret), "=&r" (d), "=&r" (l)
+	: "0" (ret), "1" (d), "2" (l), "r" (sum)
+	: "o2", "o3", "o4", "o5", "o7", "g2", "g3", "g4", "g5", "g7");
 	return ret;
 }
 
-extern __inline__ unsigned int 
+static inline unsigned int
 csum_partial_copy_from_user(const char *src, char *dst, int len, 
 			    unsigned int sum, int *err)
   {
@@ -85,14 +86,15 @@
 		".previous\n"
 		"1:\n\t"
 		"call " C_LABEL_STR(__csum_partial_copy_sparc_generic) "\n\t"
-		" st %5, [%%sp + 64]\n"
-		: "=r" (ret) : "0" (ret), "r" (d), "r" (l), "r" (s), "r" (err) :
-		"o1", "o2", "o3", "o4", "o5", "o7", "g1", "g2", "g3", "g4", "g5", "g7");
+		" st %8, [%%sp + 64]\n"
+		: "=&r" (ret), "=&r" (d), "=&r" (l), "=&r" (s)
+		: "0" (ret), "1" (d), "2" (l), "3" (s), "r" (err)
+		: "o2", "o3", "o4", "o5", "o7", "g2", "g3", "g4", "g5");
 		return ret;
 	}
   }
   
-extern __inline__ unsigned int 
+static inline unsigned int
 csum_partial_copy_to_user(const char *src, char *dst, int len, 
 			  unsigned int sum, int *err)
 {
@@ -112,9 +114,10 @@
 		".previous\n"
 		"1:\n\t"
 		"call " C_LABEL_STR(__csum_partial_copy_sparc_generic) "\n\t"
-		" st %5, [%%sp + 64]\n"
-		: "=r" (ret) : "0" (ret), "r" (d), "r" (l), "r" (s), "r" (err) :
-		"o1", "o2", "o3", "o4", "o5", "o7", "g1", "g2", "g3", "g4", "g5", "g7");
+		" st %8, [%%sp + 64]\n"
+		: "=&r" (ret), "=&r" (d), "=&r" (l), "=&r" (s)
+		: "0" (ret), "1" (d), "2" (l), "3" (s), "r" (err)
+		: "o2", "o3", "o4", "o5", "o7", "g2", "g3", "g4", "g5");
 		return ret;
 	}
 }
@@ -125,7 +128,7 @@
 /* ihl is always 5 or greater, almost always is 5, and iph is word aligned
  * the majority of the time.
  */
-extern __inline__ unsigned short ip_fast_csum(__const__ unsigned char *iph,
+static inline unsigned short ip_fast_csum(__const__ unsigned char *iph,
 					      unsigned int ihl)
 {
 	unsigned short sum;
@@ -163,7 +166,7 @@
 }

 /* Fold a partial checksum without adding pseudo headers. */
-extern __inline__ unsigned int csum_fold(unsigned int sum)
+static inline unsigned int csum_fold(unsigned int sum)
 {
 	unsigned int tmp;

@@ -177,7 +180,7 @@
 	return sum;
 }

-extern __inline__ unsigned long csum_tcpudp_nofold(unsigned long saddr,
+static inline unsigned long csum_tcpudp_nofold(unsigned long saddr,
 						   unsigned long daddr,
 						   unsigned int len,
 						   unsigned short proto,
@@ -209,7 +212,7 @@

 #define _HAVE_ARCH_IPV6_CSUM

-static __inline__ unsigned short int csum_ipv6_magic(struct in6_addr *saddr,
+static inline unsigned short int csum_ipv6_magic(struct in6_addr *saddr,
 						     struct in6_addr *daddr,
 						     __u32 len,
 						     unsigned short proto,
@@ -244,7 +247,7 @@
 }

 /* this routine is used for miscellaneous IP-like checksums, mainly in icmp.c */
-extern __inline__ unsigned short ip_compute_csum(unsigned char * buff, int len)
+static inline unsigned short ip_compute_csum(unsigned char * buff, int len)
 {
 	return csum_fold(csum_partial(buff, len, 0));
 }

-
To unsubscribe from this list: send the line "unsubscribe sparclinux" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html