--- /dev/null
+
+Here's what I did to get past gcc-3.3's unnecessarily draconion
+preprocessor. This is against 2.4.21-rc2 + previous gcc patch + udiv
+patch. It includes the checksum-clobbered patch i submitted before.
+
+UP boots and runs fine.. SMP gives me a "Unimplemented trap = 2b" right
+after freeing kernel memory.. but that may have more to do with other
+issues. I'm working on it, but I don't have a serial console so oops
+tracing is.. well.. giving me writers cramp ;)
+
+cheers.
+john.c
+--
+John Clemens http://www.deater.net/john
+john@deater.net ICQ: 7175925, IM: PianoManO8
+ "I Hate Quotes" -- Samuel L. Clemens
+
+Binary files linux-old/arch/sparc/boot/btfixupprep and linux-new/arch/sparc/boot/btfixupprep differ
+diff -urN linux-old/arch/sparc/kernel/sun4d_smp.c linux-new/arch/sparc/kernel/sun4d_smp.c
+--- linux-old/arch/sparc/kernel/sun4d_smp.c 2002-08-02 20:39:43.000000000 -0400
++++ linux-new/arch/sparc/kernel/sun4d_smp.c 2003-05-20 22:07:10.000000000 -0400
+@@ -345,10 +345,10 @@
+ unsigned long a4 asm("i4") = arg4;
+ unsigned long a5 asm("i5") = arg5;
+
+- __asm__ __volatile__("
+- std %0, [%6]
+- std %2, [%6 + 8]
+- std %4, [%6 + 16]" : :
++ __asm__ __volatile__(
++ "std %0, [%6]\n\t"
++ "std %2, [%6 + 8]\n\t"
++ "std %4, [%6 + 16]\n\t" : :
+ "r"(f), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5),
+ "r" (&ccall_info.func));
+ }
+diff -urN linux-old/arch/sparc/lib/Makefile linux-new/arch/sparc/lib/Makefile
+--- linux-old/arch/sparc/lib/Makefile 2000-12-29 17:07:20.000000000 -0500
++++ linux-new/arch/sparc/lib/Makefile 2003-05-20 22:25:13.000000000 -0400
+@@ -3,10 +3,10 @@
+ #
+
+ .S.s:
+- $(CPP) $(AFLAGS) -ansi -DST_DIV0=0x2 $< -o $*.s
++ $(CPP) $(AFLAGS) -DST_DIV0=0x2 $< -o $*.s
+
+ .S.o:
+- $(CC) $(AFLAGS) -ansi -DST_DIV0=0x2 -c $< -o $*.o
++ $(CC) $(AFLAGS) -DST_DIV0=0x2 -c $< -o $*.o
+
+ L_TARGET = lib.a
+
+diff -urN linux-old/arch/sparc/math-emu/sfp-util.h linux-new/arch/sparc/math-emu/sfp-util.h
+--- linux-old/arch/sparc/math-emu/sfp-util.h 1999-05-29 14:09:04.000000000 -0400
++++ linux-new/arch/sparc/math-emu/sfp-util.h 2003-05-20 22:12:21.000000000 -0400
+@@ -4,8 +4,8 @@
+ #include <asm/byteorder.h>
+
+ #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+- __asm__ ("addcc %r4,%5,%1
+- addx %r2,%3,%0" \
++ __asm__ ("addcc %r4,%5,%1\n\t" \
++ "addx %r2,%3,%0\n" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "%rJ" ((USItype)(ah)), \
+@@ -14,8 +14,8 @@
+ "rI" ((USItype)(bl)) \
+ : "cc")
+ #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+- __asm__ ("subcc %r4,%5,%1
+- subx %r2,%3,%0" \
++ __asm__ ("subcc %r4,%5,%1\n\t" \
++ "subx %r2,%3,%0\n" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "rJ" ((USItype)(ah)), \
+@@ -25,46 +25,46 @@
+ : "cc")
+
+ #define umul_ppmm(w1, w0, u, v) \
+- __asm__ ("! Inlined umul_ppmm
+- wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr
+- sra %3,31,%%g2 ! Don't move this insn
+- and %2,%%g2,%%g2 ! Don't move this insn
+- andcc %%g0,0,%%g1 ! Don't move this insn
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,%3,%%g1
+- mulscc %%g1,0,%%g1
+- add %%g1,%%g2,%0
+- rd %%y,%1" \
++ __asm__ ("! Inlined umul_ppmm\n\t" \
++ "wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n\t" \
++ "sra %3,31,%%g2 ! Don't move this insn\n\t" \
++ "and %2,%%g2,%%g2 ! Don't move this insn\n\t" \
++ "andcc %%g0,0,%%g1 ! Don't move this insn\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,%3,%%g1\n\t" \
++ "mulscc %%g1,0,%%g1\n\t" \
++ "add %%g1,%%g2,%0\n\t" \
++ "rd %%y,%1\n" \
+ : "=r" ((USItype)(w1)), \
+ "=r" ((USItype)(w0)) \
+ : "%rI" ((USItype)(u)), \
+@@ -74,30 +74,30 @@
+ /* It's quite necessary to add this much assembler for the sparc.
+ The default udiv_qrnnd (in C) is more than 10 times slower! */
+ #define udiv_qrnnd(q, r, n1, n0, d) \
+- __asm__ ("! Inlined udiv_qrnnd
+- mov 32,%%g1
+- subcc %1,%2,%%g0
+-1: bcs 5f
+- addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb
+- sub %1,%2,%1 ! this kills msb of n
+- addx %1,%1,%1 ! so this can't give carry
+- subcc %%g1,1,%%g1
+-2: bne 1b
+- subcc %1,%2,%%g0
+- bcs 3f
+- addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb
+- b 3f
+- sub %1,%2,%1 ! this kills msb of n
+-4: sub %1,%2,%1
+-5: addxcc %1,%1,%1
+- bcc 2b
+- subcc %%g1,1,%%g1
+-! Got carry from n. Subtract next step to cancel this carry.
+- bne 4b
+- addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb
+- sub %1,%2,%1
+-3: xnor %0,0,%0
+- ! End of inline udiv_qrnnd" \
++ __asm__ ("! Inlined udiv_qrnnd\n\t" \
++ "mov 32,%%g1\n\t" \
++ "subcc %1,%2,%%g0\n\t" \
++ "1: bcs 5f\n\t" \
++ "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \
++ "sub %1,%2,%1 ! this kills msb of n\n\t" \
++ "addx %1,%1,%1 ! so this can't give carry\n\t" \
++ "subcc %%g1,1,%%g1\n\t" \
++ "2: bne 1b\n\t" \
++ "subcc %1,%2,%%g0\n\t" \
++ "bcs 3f\n\t" \
++ "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \
++ "b 3f\n\t" \
++ "sub %1,%2,%1 ! this kills msb of n\n\t" \
++ "4: sub %1,%2,%1\n\t" \
++ "5: addxcc %1,%1,%1\n\t" \
++ "bcc 2b\n\t" \
++ "subcc %%g1,1,%%g1\n\t" \
++ "! Got carry from n. Subtract next step to cancel this carry.\n\t" \
++ "bne 4b\n\t" \
++ "addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n\t" \
++ "sub %1,%2,%1\n\t" \
++ "3: xnor %0,0,%0\n\t" \
++ "! End of inline udiv_qrnnd\n" \
+ : "=&r" ((USItype)(q)), \
+ "=&r" ((USItype)(r)) \
+ : "r" ((USItype)(d)), \
+diff -urN linux-old/include/asm-sparc/sfp-machine.h linux-new/include/asm-sparc/sfp-machine.h
+--- linux-old/include/asm-sparc/sfp-machine.h 2000-05-09 01:00:01.000000000 -0400
++++ linux-new/include/asm-sparc/sfp-machine.h 2003-05-20 22:14:29.000000000 -0400
+@@ -77,9 +77,9 @@
+
+ /* Some assembly to speed things up. */
+ #define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \
+- __asm__ ("addcc %r7,%8,%2
+- addxcc %r5,%6,%1
+- addx %r3,%4,%0" \
++ __asm__ ("addcc %r7,%8,%2\n\t" \
++ "addxcc %r5,%6,%1\n\t" \
++ "addx %r3,%4,%0\n" \
+ : "=r" ((USItype)(r2)), \
+ "=&r" ((USItype)(r1)), \
+ "=&r" ((USItype)(r0)) \
+@@ -92,9 +92,9 @@
+ : "cc")
+
+ #define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \
+- __asm__ ("subcc %r7,%8,%2
+- subxcc %r5,%6,%1
+- subx %r3,%4,%0" \
++ __asm__ ("subcc %r7,%8,%2\n\t" \
++ "subxcc %r5,%6,%1\n\t" \
++ "subx %r3,%4,%0\n" \
+ : "=r" ((USItype)(r2)), \
+ "=&r" ((USItype)(r1)), \
+ "=&r" ((USItype)(r0)) \
+@@ -111,11 +111,11 @@
+ /* We need to fool gcc, as we need to pass more than 10 \
+ input/outputs. */ \
+ register USItype _t1 __asm__ ("g1"), _t2 __asm__ ("g2"); \
+- __asm__ __volatile__ ("
+- addcc %r8,%9,%1
+- addxcc %r6,%7,%0
+- addxcc %r4,%5,%%g2
+- addx %r2,%3,%%g1" \
++ __asm__ __volatile__ ( \
++ "addcc %r8,%9,%1\n\t" \
++ "addxcc %r6,%7,%0\n\t" \
++ "addxcc %r4,%5,%%g2\n\t" \
++ "addx %r2,%3,%%g1\n\t" \
+ : "=&r" ((USItype)(r1)), \
+ "=&r" ((USItype)(r0)) \
+ : "%rJ" ((USItype)(x3)), \
+@@ -136,11 +136,11 @@
+ /* We need to fool gcc, as we need to pass more than 10 \
+ input/outputs. */ \
+ register USItype _t1 __asm__ ("g1"), _t2 __asm__ ("g2"); \
+- __asm__ __volatile__ ("
+- subcc %r8,%9,%1
+- subxcc %r6,%7,%0
+- subxcc %r4,%5,%%g2
+- subx %r2,%3,%%g1" \
++ __asm__ __volatile__ ( \
++ "subcc %r8,%9,%1\n\t" \
++ "subxcc %r6,%7,%0\n\t" \
++ "subxcc %r4,%5,%%g2\n\t" \
++ "subx %r2,%3,%%g1\n\t" \
+ : "=&r" ((USItype)(r1)), \
+ "=&r" ((USItype)(r0)) \
+ : "%rJ" ((USItype)(x3)), \
+@@ -161,10 +161,10 @@
+ #define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0) __FP_FRAC_SUB_4(x3,x2,x1,x0,x3,x2,x1,x0,y3,y2,y1,y0)
+
+ #define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i) \
+- __asm__ ("addcc %3,%4,%3
+- addxcc %2,%%g0,%2
+- addxcc %1,%%g0,%1
+- addx %0,%%g0,%0" \
++ __asm__ ("addcc %3,%4,%3\n\t" \
++ "addxcc %2,%%g0,%2\n\t" \
++ "addxcc %1,%%g0,%1\n\t" \
++ "addx %0,%%g0,%0\n\t" \
+ : "=&r" ((USItype)(x3)), \
+ "=&r" ((USItype)(x2)), \
+ "=&r" ((USItype)(x1)), \
+
+
+Took me a while to find the time but here's a second crack at the
+checksum.h cleanup to make gcc-3.3 happy w/ 2.4. Can someone check this
+to verify that I did it correctly this time? I'm writing from the machine
+now over the network, but that doesn't mean it's not dumb luck.
+
+Please apply to 2.4 if it's acceptable. If you want me to re-diff the
+other gcc-3.3 fixes let me know.
+
+patch below sig.
+
+john.c
+
+
+On Thu, 29 May 2003, John Clemens wrote:
+> DaveM said:
+> > Understood.
+> >
+> > But you have to fix this differently. If you remove these things
+> > from the clobber list, you must mark the variables passed in as
+> > follows:
+> >
+> > 1) Either output-only, and therefore using "=&r" (which means
+> > "written before all inputs are consumed", it prevents gcc from
+> > using the same register for "ret" for other input values)
+> >
+> > This applies to "ret" so merely change it form "=r" to "=&r"
+> >
+> > 2) or if as input, you must mention it in the outputs, also
+> > using "=&r" so that gcc knows the register is written by the
+> > asm statement.
+> >
+> > This is what to do with "d", it means that all the %N numbers
+> > get changed so be careful.
+> >
+> > So probably this all amounts to:
+> >
+> > __asm__ __volatile__ (
+> > "call " C_LABEL_STR(__csum_partial_copy_sparc_generic)
+> > "\n\t"
+> > " mov %5, %%g7\n"
+> > : "=&r" (ret), "=&r" (d) : "0" (ret), "1" (d), "r" (l), "r" (sum) :
+> > "o2", "o3", "o4", "o5", "o7", "g2", "g3", "g4", "g5", "g7");
+> >
+> > Meanwhile, also would be a good idea to change this to use
+> > "static inline" instead of "extern __inline__".
+
+--
+John Clemens http://www.deater.net/john
+john@deater.net ICQ: 7175925, IM: PianoManO8
+ "I Hate Quotes" -- Samuel L. Clemens
+
+
+
+--- linux-old/include/asm-sparc/checksum.h 2002-08-02 20:39:45.000000000 -0400
++++ linux-new/include/asm-sparc/checksum.h 2003-06-11 00:31:08.000000000 -0400
+@@ -48,7 +48,7 @@
+
+ extern unsigned int __csum_partial_copy_sparc_generic (const char *, char *);
+
+-extern __inline__ unsigned int
++static inline unsigned int
+ csum_partial_copy_nocheck (const char *src, char *dst, int len,
+ unsigned int sum)
+ {
+@@ -58,13 +58,14 @@
+
+ __asm__ __volatile__ (
+ "call " C_LABEL_STR(__csum_partial_copy_sparc_generic) "\n\t"
+- " mov %4, %%g7\n"
+- : "=r" (ret) : "0" (ret), "r" (d), "r" (l), "r" (sum) :
+- "o1", "o2", "o3", "o4", "o5", "o7", "g1", "g2", "g3", "g4", "g5", "g7");
++ " mov %6, %%g7\n"
++ : "=&r" (ret), "=&r" (d), "=&r" (l)
++ : "0" (ret), "1" (d), "2" (l), "r" (sum)
++ : "o2", "o3", "o4", "o5", "o7", "g2", "g3", "g4", "g5", "g7");
+ return ret;
+ }
+
+-extern __inline__ unsigned int
++static inline unsigned int
+ csum_partial_copy_from_user(const char *src, char *dst, int len,
+ unsigned int sum, int *err)
+ {
+@@ -85,14 +86,15 @@
+ ".previous\n"
+ "1:\n\t"
+ "call " C_LABEL_STR(__csum_partial_copy_sparc_generic) "\n\t"
+- " st %5, [%%sp + 64]\n"
+- : "=r" (ret) : "0" (ret), "r" (d), "r" (l), "r" (s), "r" (err) :
+- "o1", "o2", "o3", "o4", "o5", "o7", "g1", "g2", "g3", "g4", "g5", "g7");
++ " st %8, [%%sp + 64]\n"
++ : "=&r" (ret), "=&r" (d), "=&r" (l), "=&r" (s)
++ : "0" (ret), "1" (d), "2" (l), "3" (s), "r" (err)
++ : "o2", "o3", "o4", "o5", "o7", "g2", "g3", "g4", "g5");
+ return ret;
+ }
+ }
+
+-extern __inline__ unsigned int
++static inline unsigned int
+ csum_partial_copy_to_user(const char *src, char *dst, int len,
+ unsigned int sum, int *err)
+ {
+@@ -112,9 +114,10 @@
+ ".previous\n"
+ "1:\n\t"
+ "call " C_LABEL_STR(__csum_partial_copy_sparc_generic) "\n\t"
+- " st %5, [%%sp + 64]\n"
+- : "=r" (ret) : "0" (ret), "r" (d), "r" (l), "r" (s), "r" (err) :
+- "o1", "o2", "o3", "o4", "o5", "o7", "g1", "g2", "g3", "g4", "g5", "g7");
++ " st %8, [%%sp + 64]\n"
++ : "=&r" (ret), "=&r" (d), "=&r" (l), "=&r" (s)
++ : "0" (ret), "1" (d), "2" (l), "3" (s), "r" (err)
++ : "o2", "o3", "o4", "o5", "o7", "g2", "g3", "g4", "g5");
+ return ret;
+ }
+ }
+@@ -125,7 +128,7 @@
+ /* ihl is always 5 or greater, almost always is 5, and iph is word aligned
+ * the majority of the time.
+ */
+-extern __inline__ unsigned short ip_fast_csum(__const__ unsigned char *iph,
++static inline unsigned short ip_fast_csum(__const__ unsigned char *iph,
+ unsigned int ihl)
+ {
+ unsigned short sum;
+@@ -163,7 +166,7 @@
+ }
+
+ /* Fold a partial checksum without adding pseudo headers. */
+-extern __inline__ unsigned int csum_fold(unsigned int sum)
++static inline unsigned int csum_fold(unsigned int sum)
+ {
+ unsigned int tmp;
+
+@@ -177,7 +180,7 @@
+ return sum;
+ }
+
+-extern __inline__ unsigned long csum_tcpudp_nofold(unsigned long saddr,
++static inline unsigned long csum_tcpudp_nofold(unsigned long saddr,
+ unsigned long daddr,
+ unsigned int len,
+ unsigned short proto,
+@@ -209,7 +212,7 @@
+
+ #define _HAVE_ARCH_IPV6_CSUM
+
+-static __inline__ unsigned short int csum_ipv6_magic(struct in6_addr *saddr,
++static inline unsigned short int csum_ipv6_magic(struct in6_addr *saddr,
+ struct in6_addr *daddr,
+ __u32 len,
+ unsigned short proto,
+@@ -244,7 +247,7 @@
+ }
+
+ /* this routine is used for miscellaneous IP-like checksums, mainly in icmp.c */
+-extern __inline__ unsigned short ip_compute_csum(unsigned char * buff, int len)
++static inline unsigned short ip_compute_csum(unsigned char * buff, int len)
+ {
+ return csum_fold(csum_partial(buff, len, 0));
+ }
+
+-
+To unsubscribe from this list: send the line "unsubscribe sparclinux" in
+the body of a message to majordomo@vger.kernel.org
+More majordomo info at http://vger.kernel.org/majordomo-info.html