diff -ruN linux-2.4.20.orig/arch/i386/kernel/io_apic.c linux-2.4.20/arch/i386/kernel/io_apic.c --- linux-2.4.20.orig/arch/i386/kernel/io_apic.c Thu Nov 28 16:53:09 2002 +++ linux-2.4.20/arch/i386/kernel/io_apic.c Tue Dec 17 10:37:58 2002 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -188,6 +189,86 @@ clear_IO_APIC_pin(apic, pin); } +static void set_ioapic_affinity (unsigned int irq, unsigned long mask) +{ + unsigned long flags; + + /* + * Only the first 8 bits are valid. + */ + mask = mask << 24; + spin_lock_irqsave(&ioapic_lock, flags); + __DO_ACTION(1, = mask, ) + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +#if CONFIG_SMP + +typedef struct { + unsigned int cpu; + unsigned long timestamp; +} ____cacheline_aligned irq_balance_t; + +static irq_balance_t irq_balance[NR_IRQS] __cacheline_aligned + = { [ 0 ... NR_IRQS-1 ] = { 1, 0 } }; + +extern unsigned long irq_affinity [NR_IRQS]; + +#endif + +#define IDLE_ENOUGH(cpu,now) \ + (idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1)) + +#define IRQ_ALLOWED(cpu,allowed_mask) \ + ((1 << cpu) & (allowed_mask)) + +static unsigned long move(int curr_cpu, unsigned long allowed_mask, unsigned long now, int direction) +{ + int search_idle = 1; + int cpu = curr_cpu; + + goto inside; + + do { + if (unlikely(cpu == curr_cpu)) + search_idle = 0; +inside: + if (direction == 1) { + cpu++; + if (cpu >= smp_num_cpus) + cpu = 0; + } else { + cpu--; + if (cpu == -1) + cpu = smp_num_cpus-1; + } + } while (!IRQ_ALLOWED(cpu,allowed_mask) || + (search_idle && !IDLE_ENOUGH(cpu,now))); + + return cpu; +} + +static inline void balance_irq(int irq) +{ +#if CONFIG_SMP + irq_balance_t *entry = irq_balance + irq; + unsigned long now = jiffies; + + if (unlikely(entry->timestamp != now)) { + unsigned long allowed_mask; + int random_number; + + rdtscl(random_number); + random_number &= 1; + + allowed_mask = cpu_online_map & irq_affinity[irq]; + entry->timestamp = now; + entry->cpu = move(entry->cpu, allowed_mask, now, random_number); + set_ioapic_affinity(irq, 1 << entry->cpu); + } +#endif +} + /* * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to * specific CPU-side IRQs. @@ -693,8 +774,7 @@ } /* - * Set up the 8259A-master output pin as broadcast to all - * CPUs. + * Set up the 8259A-master output pin: */ void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector) { @@ -1214,6 +1294,7 @@ */ static void ack_edge_ioapic_irq(unsigned int irq) { + balance_irq(irq); if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) == (IRQ_PENDING | IRQ_DISABLED)) mask_IO_APIC_irq(irq); @@ -1253,6 +1334,7 @@ unsigned long v; int i; + balance_irq(irq); /* * It appears there is an erratum which affects at least version 0x11 * of I/O APIC (that's the 82093AA and cores integrated into various @@ -1309,19 +1391,6 @@ static void mask_and_ack_level_ioapic_irq (unsigned int irq) { /* nothing */ } -static void set_ioapic_affinity (unsigned int irq, unsigned long mask) -{ - unsigned long flags; - /* - * Only the first 8 bits are valid. - */ - mask = mask << 24; - - spin_lock_irqsave(&ioapic_lock, flags); - __DO_ACTION(1, = mask, ) - spin_unlock_irqrestore(&ioapic_lock, flags); -} - /* * Level and edge triggered IO-APIC interrupts need different handling, * so we use two separate IRQ descriptors. Edge triggered IRQs can be diff -ruN linux-2.4.20.orig/arch/i386/kernel/irq.c linux-2.4.20/arch/i386/kernel/irq.c --- linux-2.4.20.orig/arch/i386/kernel/irq.c Thu Nov 28 16:53:09 2002 +++ linux-2.4.20/arch/i386/kernel/irq.c Tue Dec 17 10:37:58 2002 @@ -1090,7 +1090,7 @@ static struct proc_dir_entry * smp_affinity_entry [NR_IRQS]; -static unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; +unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; static int irq_affinity_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { diff -ruN linux-2.4.20.orig/include/asm-i386/hardirq.h linux-2.4.20/include/asm-i386/hardirq.h --- linux-2.4.20.orig/include/asm-i386/hardirq.h Thu Nov 22 12:46:19 2001 +++ linux-2.4.20/include/asm-i386/hardirq.h Tue Dec 17 10:37:58 2002 @@ -12,6 +12,7 @@ unsigned int __local_bh_count; unsigned int __syscall_count; struct task_struct * __ksoftirqd_task; /* waitqueue is too large */ + unsigned long idle_timestamp; unsigned int __nmi_count; /* arch dependent */ } ____cacheline_aligned irq_cpustat_t; diff -ruN linux-2.4.20.orig/include/linux/sched.h linux-2.4.20/include/linux/sched.h --- linux-2.4.20.orig/include/linux/sched.h Thu Nov 28 16:53:15 2002 +++ linux-2.4.20/include/linux/sched.h Tue Dec 17 10:37:58 2002 @@ -147,6 +147,7 @@ extern void sched_init(void); extern void init_idle(void); +extern int idle_cpu(int cpu); extern void show_state(void); extern void cpu_init (void); extern void trap_init(void); diff -ruN linux-2.4.20.orig/kernel/sched.c linux-2.4.20/kernel/sched.c --- linux-2.4.20.orig/kernel/sched.c Thu Nov 28 16:53:15 2002 +++ linux-2.4.20/kernel/sched.c Tue Dec 17 10:37:58 2002 @@ -118,6 +118,11 @@ #define can_schedule(p,cpu) \ ((p)->cpus_runnable & (p)->cpus_allowed & (1 << cpu)) +int idle_cpu(int cpu) +{ + return cpu_curr(cpu) == idle_task(cpu); +} + #else #define idle_task(cpu) (&init_task)