]> git.pld-linux.org Git - packages/kernel.git/blame - linux-2.4.23-sched-O1.patch
- obsolete
[packages/kernel.git] / linux-2.4.23-sched-O1.patch
CommitLineData
744c6993 1diff -urN linux-2.4.22.org/arch/alpha/kernel/entry.S linux-2.4.22/arch/alpha/kernel/entry.S
a4a9fb52
AM
2--- linux-2.4.22.org/arch/alpha/kernel/entry.S 2003-11-24 18:29:46.000000000 +0100
3+++ linux-2.4.22/arch/alpha/kernel/entry.S 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
4@@ -695,7 +695,9 @@
5 ret_from_fork:
6 lda $26,ret_from_sys_call
7 mov $17,$16
8+#if CONFIG_SMP
9 jsr $31,schedule_tail
10+#endif
11 .end ret_from_fork
12
13 .align 3
14diff -urN linux-2.4.22.org/arch/alpha/kernel/process.c linux-2.4.22/arch/alpha/kernel/process.c
a4a9fb52
AM
15--- linux-2.4.22.org/arch/alpha/kernel/process.c 2003-11-24 18:29:46.000000000 +0100
16+++ linux-2.4.22/arch/alpha/kernel/process.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
17@@ -74,9 +74,6 @@
18 cpu_idle(void)
19 {
20 /* An endless idle loop with no priority at all. */
21- current->nice = 20;
22- current->counter = -100;
23-
24 while (1) {
25 /* FIXME -- EV6 and LCA45 know how to power down
26 the CPU. */
27diff -urN linux-2.4.22.org/arch/alpha/kernel/smp.c linux-2.4.22/arch/alpha/kernel/smp.c
a4a9fb52
AM
28--- linux-2.4.22.org/arch/alpha/kernel/smp.c 2003-11-24 18:29:46.000000000 +0100
29+++ linux-2.4.22/arch/alpha/kernel/smp.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
30@@ -81,6 +81,8 @@
31 int smp_num_probed; /* Internal processor count */
32 int smp_num_cpus = 1; /* Number that came online. */
33 int smp_threads_ready; /* True once the per process idle is forked. */
34+cycles_t cacheflush_time;
35+unsigned long cache_decay_ticks;
36
37 int __cpu_number_map[NR_CPUS];
38 int __cpu_logical_map[NR_CPUS];
39@@ -155,11 +157,6 @@
40 {
41 int cpuid = hard_smp_processor_id();
42
43- if (current != init_tasks[cpu_number_map(cpuid)]) {
44- printk("BUG: smp_calling: cpu %d current %p init_tasks[cpu_number_map(cpuid)] %p\n",
45- cpuid, current, init_tasks[cpu_number_map(cpuid)]);
46- }
47-
48 DBGS(("CALLIN %d state 0x%lx\n", cpuid, current->state));
49
50 /* Turn on machine checks. */
51@@ -217,9 +214,6 @@
52 DBGS(("smp_callin: commencing CPU %d current %p\n",
53 cpuid, current));
54
55- /* Setup the scheduler for this processor. */
56- init_idle();
57-
58 /* ??? This should be in init_idle. */
59 atomic_inc(&init_mm.mm_count);
60 current->active_mm = &init_mm;
61@@ -227,6 +221,57 @@
62 cpu_idle();
63 }
64
65+
66+/*
67+ * Rough estimation for SMP scheduling, this is the number of cycles it
68+ * takes for a fully memory-limited process to flush the SMP-local cache.
69+ *
70+ * We are not told how much cache there is, so we have to guess.
71+ */
72+static void __init
73+smp_tune_scheduling (int cpuid)
74+{
75+ struct percpu_struct *cpu;
76+ unsigned long on_chip_cache; /* kB */
77+ unsigned long freq; /* Hz */
78+ unsigned long bandwidth = 350; /* MB/s */
79+
80+ cpu = (struct percpu_struct*)((char*)hwrpb + hwrpb->processor_offset
81+ + cpuid * hwrpb->processor_size);
82+ switch (cpu->type)
83+ {
84+ case EV45_CPU:
85+ on_chip_cache = 16 + 16;
86+ break;
87+
88+ case EV5_CPU:
89+ case EV56_CPU:
90+ on_chip_cache = 8 + 8 + 96;
91+ break;
92+
93+ case PCA56_CPU:
94+ on_chip_cache = 16 + 8;
95+ break;
96+
97+ case EV6_CPU:
98+ case EV67_CPU:
99+ default:
100+ on_chip_cache = 64 + 64;
101+ break;
102+ }
103+
104+ freq = hwrpb->cycle_freq ? : est_cycle_freq;
105+
106+ cacheflush_time = (freq / 1000000) * (on_chip_cache << 10) / bandwidth;
107+ cache_decay_ticks = cacheflush_time / (freq / 1000) * HZ / 1000;
108+
109+ printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
110+ cacheflush_time/(freq/1000000),
111+ (cacheflush_time*100/(freq/1000000)) % 100);
112+ printk("task migration cache decay timeout: %ld msecs.\n",
113+ (cache_decay_ticks + 1) * 1000 / HZ);
114+}
115+
116 /*
117 * Send a message to a secondary's console. "START" is one such
118 * interesting message. ;-)
119@@ -449,14 +494,11 @@
120 if (idle == &init_task)
121 panic("idle process is init_task for CPU %d", cpuid);
122
123- idle->processor = cpuid;
124- idle->cpus_runnable = 1 << cpuid; /* we schedule the first task manually */
125+ init_idle(idle, cpuid);
126+ unhash_process(idle);
127+
128 __cpu_logical_map[cpunum] = cpuid;
129 __cpu_number_map[cpuid] = cpunum;
130-
131- del_from_runqueue(idle);
132- unhash_process(idle);
133- init_tasks[cpunum] = idle;
134
135 DBGS(("smp_boot_one_cpu: CPU %d state 0x%lx flags 0x%lx\n",
136 cpuid, idle->state, idle->flags));
137@@ -563,13 +605,11 @@
138
139 __cpu_number_map[boot_cpuid] = 0;
140 __cpu_logical_map[0] = boot_cpuid;
141- current->processor = boot_cpuid;
142
143 smp_store_cpu_info(boot_cpuid);
144+ smp_tune_scheduling(boot_cpuid);
145 smp_setup_percpu_timer(boot_cpuid);
146
147- init_idle();
148-
149 /* ??? This should be in init_idle. */
150 atomic_inc(&init_mm.mm_count);
151 current->active_mm = &init_mm;
152diff -urN linux-2.4.22.org/arch/arm/kernel/process.c linux-2.4.22/arch/arm/kernel/process.c
a4a9fb52
AM
153--- linux-2.4.22.org/arch/arm/kernel/process.c 2003-11-24 18:30:05.000000000 +0100
154+++ linux-2.4.22/arch/arm/kernel/process.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
155@@ -87,8 +87,6 @@
156 {
157 /* endless idle loop with no priority at all */
158 init_idle();
159- current->nice = 20;
160- current->counter = -100;
161
162 while (1) {
163 void (*idle)(void) = pm_idle;
164diff -urN linux-2.4.22.org/arch/i386/kernel/entry.S linux-2.4.22/arch/i386/kernel/entry.S
a4a9fb52
AM
165--- linux-2.4.22.org/arch/i386/kernel/entry.S 2003-11-24 18:29:45.000000000 +0100
166+++ linux-2.4.22/arch/i386/kernel/entry.S 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
167@@ -79,7 +79,7 @@
168 exec_domain = 16
169 need_resched = 20
170 tsk_ptrace = 24
171-processor = 52
172+cpu = 32
173
174 ENOSYS = 38
175
176@@ -184,9 +184,11 @@
177
178
179 ENTRY(ret_from_fork)
180+#if CONFIG_SMP
181 pushl %ebx
182 call SYMBOL_NAME(schedule_tail)
183 addl $4, %esp
184+#endif
185 GET_CURRENT(%ebx)
186 testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS
187 jne tracesys_exit
188diff -urN linux-2.4.22.org/arch/i386/kernel/process.c linux-2.4.22/arch/i386/kernel/process.c
a4a9fb52
AM
189--- linux-2.4.22.org/arch/i386/kernel/process.c 2003-11-24 18:29:45.000000000 +0100
190+++ linux-2.4.22/arch/i386/kernel/process.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
191@@ -84,7 +84,7 @@
192 {
193 if (current_cpu_data.hlt_works_ok && !hlt_counter) {
194 __cli();
195- if (!current->need_resched)
196+ if (!need_resched())
197 safe_halt();
198 else
199 __sti();
200@@ -126,9 +126,6 @@
201 void cpu_idle (void)
202 {
203 /* endless idle loop with no priority at all */
204- init_idle();
205- current->nice = 20;
206- current->counter = -100;
207
208 while (1) {
209 void (*idle)(void) = pm_idle;
210@@ -708,15 +705,17 @@
211 asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs));
212
213 /*
214- * Restore %fs and %gs.
215+ * Restore %fs and %gs if needed.
216 */
217- loadsegment(fs, next->fs);
218- loadsegment(gs, next->gs);
219+ if (unlikely(prev->fs | prev->gs | next->fs | next->gs)) {
220+ loadsegment(fs, next->fs);
221+ loadsegment(gs, next->gs);
222+ }
223
224 /*
225 * Now maybe reload the debug registers
226 */
227- if (next->debugreg[7]){
228+ if (unlikely(next->debugreg[7])) {
229 loaddebug(next, 0);
230 loaddebug(next, 1);
231 loaddebug(next, 2);
232@@ -726,7 +725,7 @@
233 loaddebug(next, 7);
234 }
235
236- if (prev->ioperm || next->ioperm) {
237+ if (unlikely(prev->ioperm || next->ioperm)) {
238 if (next->ioperm) {
239 /*
240 * 4 cachelines copy ... not good, but not that
241diff -urN linux-2.4.22.org/arch/i386/kernel/setup.c linux-2.4.22/arch/i386/kernel/setup.c
a4a9fb52
AM
242--- linux-2.4.22.org/arch/i386/kernel/setup.c 2003-11-24 18:29:45.000000000 +0100
243+++ linux-2.4.22/arch/i386/kernel/setup.c 2003-11-24 18:39:02.000000000 +0100
244@@ -3190,9 +3190,10 @@
744c6993
AM
245 load_TR(nr);
246 load_LDT(&init_mm);
247
248- /*
249- * Clear all 6 debug registers:
250- */
251+ /* Clear %fs and %gs. */
252+ asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
253+
254+ /* Clear all 6 debug registers: */
255
256 #define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) );
257
258diff -urN linux-2.4.22.org/arch/i386/kernel/smpboot.c linux-2.4.22/arch/i386/kernel/smpboot.c
a4a9fb52
AM
259--- linux-2.4.22.org/arch/i386/kernel/smpboot.c 2003-11-24 18:29:45.000000000 +0100
260+++ linux-2.4.22/arch/i386/kernel/smpboot.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
261@@ -308,14 +308,14 @@
262 if (tsc_values[i] < avg)
263 realdelta = -realdelta;
264
265- printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n",
266- i, realdelta);
267+ printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n", i, realdelta);
268 }
269
270 sum += delta;
271 }
272 if (!buggy)
273 printk("passed.\n");
274+ ;
275 }
276
277 static void __init synchronize_tsc_ap (void)
278@@ -365,7 +365,7 @@
279 * (This works even if the APIC is not enabled.)
280 */
281 phys_id = GET_APIC_ID(apic_read(APIC_ID));
282- cpuid = current->processor;
283+ cpuid = cpu();
284 if (test_and_set_bit(cpuid, &cpu_online_map)) {
285 printk("huh, phys CPU#%d, CPU#%d already present??\n",
286 phys_id, cpuid);
287@@ -435,6 +435,7 @@
288 */
289 smp_store_cpu_info(cpuid);
290
291+ disable_APIC_timer();
292 /*
293 * Allow the master to continue.
294 */
295@@ -465,6 +466,7 @@
296 smp_callin();
297 while (!atomic_read(&smp_commenced))
298 rep_nop();
299+ enable_APIC_timer();
300 /*
301 * low-memory mappings have been cleared, flush them from
302 * the local TLBs too.
303@@ -803,16 +805,13 @@
304 if (!idle)
305 panic("No idle process for CPU %d", cpu);
306
307- idle->processor = cpu;
308- idle->cpus_runnable = 1 << cpu; /* we schedule the first task manually */
309+ init_idle(idle, cpu);
310
311 map_cpu_to_boot_apicid(cpu, apicid);
312
313 idle->thread.eip = (unsigned long) start_secondary;
314
315- del_from_runqueue(idle);
316 unhash_process(idle);
317- init_tasks[cpu] = idle;
318
319 /* start_eip had better be page-aligned! */
320 start_eip = setup_trampoline();
321@@ -925,6 +924,7 @@
322 }
323
324 cycles_t cacheflush_time;
325+unsigned long cache_decay_ticks;
326
327 static void smp_tune_scheduling (void)
328 {
329@@ -958,9 +958,13 @@
330 cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
331 }
332
333+ cache_decay_ticks = (long)cacheflush_time/cpu_khz * HZ / 1000;
334+
335 printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
336 (long)cacheflush_time/(cpu_khz/1000),
337 ((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
338+ printk("task migration cache decay timeout: %ld msecs.\n",
339+ (cache_decay_ticks + 1) * 1000 / HZ);
340 }
341
342 /*
343@@ -1026,8 +1030,7 @@
344 map_cpu_to_boot_apicid(0, boot_cpu_apicid);
345
346 global_irq_holder = 0;
347- current->processor = 0;
348- init_idle();
349+ current->cpu = 0;
350 smp_tune_scheduling();
351
352 /*
353diff -urN linux-2.4.22.org/arch/i386/kernel/smp.c linux-2.4.22/arch/i386/kernel/smp.c
a4a9fb52
AM
354--- linux-2.4.22.org/arch/i386/kernel/smp.c 2003-11-24 18:29:45.000000000 +0100
355+++ linux-2.4.22/arch/i386/kernel/smp.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
356@@ -496,13 +496,23 @@
357 * it goes straight through and wastes no time serializing
358 * anything. Worst case is that we lose a reschedule ...
359 */
360-
361 void smp_send_reschedule(int cpu)
362 {
363 send_IPI_mask(1 << cpu, RESCHEDULE_VECTOR);
364 }
365
366 /*
367+ * this function sends a reschedule IPI to all (other) CPUs.
368+ * This should only be used if some 'global' task became runnable,
369+ * such as a RT task, that must be handled now. The first CPU
370+ * that manages to grab the task will run it.
371+ */
372+void smp_send_reschedule_all(void)
373+{
374+ send_IPI_allbutself(RESCHEDULE_VECTOR);
375+}
376+
377+/*
378 * Structure and data for smp_call_function(). This is designed to minimise
379 * static memory requirements. It also looks cleaner.
380 */
381diff -urN linux-2.4.22.org/arch/mips64/kernel/process.c linux-2.4.22/arch/mips64/kernel/process.c
a4a9fb52
AM
382--- linux-2.4.22.org/arch/mips64/kernel/process.c 2003-11-24 18:30:12.000000000 +0100
383+++ linux-2.4.22/arch/mips64/kernel/process.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
384@@ -39,8 +39,7 @@
385 {
386 /* endless idle loop with no priority at all */
387 init_idle();
388- current->nice = 20;
389- current->counter = -100;
390+
391 while (1) {
392 while (!current->need_resched)
393 if (cpu_wait)
394diff -urN linux-2.4.22.org/arch/parisc/kernel/process.c linux-2.4.22/arch/parisc/kernel/process.c
a4a9fb52
AM
395--- linux-2.4.22.org/arch/parisc/kernel/process.c 2003-11-24 18:30:13.000000000 +0100
396+++ linux-2.4.22/arch/parisc/kernel/process.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
397@@ -65,8 +65,6 @@
398 {
399 /* endless idle loop with no priority at all */
400 init_idle();
401- current->nice = 20;
402- current->counter = -100;
403
404 while (1) {
405 while (!current->need_resched) {
406diff -urN linux-2.4.22.org/arch/ppc/8260_io/uart.c linux-2.4.22/arch/ppc/8260_io/uart.c
a4a9fb52
AM
407--- linux-2.4.22.org/arch/ppc/8260_io/uart.c 2003-11-24 18:30:02.000000000 +0100
408+++ linux-2.4.22/arch/ppc/8260_io/uart.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
409@@ -1801,7 +1801,6 @@
410 printk("lsr = %d (jiff=%lu)...", lsr, jiffies);
411 #endif
412 current->state = TASK_INTERRUPTIBLE;
413-/* current->counter = 0; make us low-priority */
414 schedule_timeout(char_time);
415 if (signal_pending(current))
416 break;
417diff -urN linux-2.4.22.org/arch/ppc/8xx_io/uart.c linux-2.4.22/arch/ppc/8xx_io/uart.c
a4a9fb52
AM
418--- linux-2.4.22.org/arch/ppc/8xx_io/uart.c 2003-11-24 18:30:01.000000000 +0100
419+++ linux-2.4.22/arch/ppc/8xx_io/uart.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
420@@ -1856,7 +1856,6 @@
421 printk("lsr = %d (jiff=%lu)...", lsr, jiffies);
422 #endif
423 current->state = TASK_INTERRUPTIBLE;
424-/* current->counter = 0; make us low-priority */
425 schedule_timeout(char_time);
426 if (signal_pending(current))
427 break;
428diff -urN linux-2.4.22.org/arch/ppc/kernel/entry.S linux-2.4.22/arch/ppc/kernel/entry.S
a4a9fb52
AM
429--- linux-2.4.22.org/arch/ppc/kernel/entry.S 2003-11-24 18:29:55.000000000 +0100
430+++ linux-2.4.22/arch/ppc/kernel/entry.S 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
431@@ -269,7 +269,9 @@
432
433 .globl ret_from_fork
434 ret_from_fork:
435+#if CONFIG_SMP
436 bl schedule_tail
437+#endif
438 lwz r0,TASK_PTRACE(r2)
439 andi. r0,r0,PT_TRACESYS
440 bnel- syscall_trace
441diff -urN linux-2.4.22.org/arch/ppc/kernel/idle.c linux-2.4.22/arch/ppc/kernel/idle.c
a4a9fb52
AM
442--- linux-2.4.22.org/arch/ppc/kernel/idle.c 2003-11-24 18:29:55.000000000 +0100
443+++ linux-2.4.22/arch/ppc/kernel/idle.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
444@@ -46,9 +46,7 @@
445 do_power_save = 1;
446
447 /* endless loop with no priority at all */
448- current->nice = 20;
449- current->counter = -100;
450- init_idle();
451+
452 for (;;) {
453 #ifdef CONFIG_SMP
454 if (!do_power_save) {
455diff -urN linux-2.4.22.org/arch/ppc/kernel/mk_defs.c linux-2.4.22/arch/ppc/kernel/mk_defs.c
a4a9fb52
AM
456--- linux-2.4.22.org/arch/ppc/kernel/mk_defs.c 2003-11-24 18:29:55.000000000 +0100
457+++ linux-2.4.22/arch/ppc/kernel/mk_defs.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
458@@ -34,8 +34,8 @@
459 /*DEFINE(KERNELBASE, KERNELBASE);*/
460 DEFINE(STATE, offsetof(struct task_struct, state));
461 DEFINE(NEXT_TASK, offsetof(struct task_struct, next_task));
462- DEFINE(COUNTER, offsetof(struct task_struct, counter));
463- DEFINE(PROCESSOR, offsetof(struct task_struct, processor));
464+ DEFINE(COUNTER, offsetof(struct task_struct, time_slice));
465+ DEFINE(PROCESSOR, offsetof(struct task_struct, cpu));
466 DEFINE(SIGPENDING, offsetof(struct task_struct, sigpending));
467 DEFINE(THREAD, offsetof(struct task_struct, thread));
468 DEFINE(MM, offsetof(struct task_struct, mm));
469diff -urN linux-2.4.22.org/arch/ppc/kernel/process.c linux-2.4.22/arch/ppc/kernel/process.c
a4a9fb52
AM
470--- linux-2.4.22.org/arch/ppc/kernel/process.c 2003-11-24 18:29:54.000000000 +0100
471+++ linux-2.4.22/arch/ppc/kernel/process.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
472@@ -281,7 +281,7 @@
473 #endif
474
475 #ifdef CONFIG_SMP
476- printk(" CPU: %d", current->processor);
477+ printk(" CPU: %d", current->cpu);
478 #endif /* CONFIG_SMP */
479
480 printk("\n");
481diff -urN linux-2.4.22.org/arch/ppc/kernel/smp.c linux-2.4.22/arch/ppc/kernel/smp.c
a4a9fb52
AM
482--- linux-2.4.22.org/arch/ppc/kernel/smp.c 2003-11-24 18:29:54.000000000 +0100
483+++ linux-2.4.22/arch/ppc/kernel/smp.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
484@@ -51,6 +51,7 @@
485 unsigned long cpu_online_map;
486 int smp_hw_index[NR_CPUS];
487 static struct smp_ops_t *smp_ops;
488+unsigned long cache_decay_ticks = HZ/100;
489
490 /* all cpu mappings are 1-1 -- Cort */
491 volatile unsigned long cpu_callin_map[NR_CPUS];
492@@ -292,9 +293,7 @@
493 * cpu 0, the master -- Cort
494 */
495 cpu_callin_map[0] = 1;
496- current->processor = 0;
497-
498- init_idle();
499+ current->cpu = 0;
500
501 for (i = 0; i < NR_CPUS; i++) {
502 prof_counter[i] = 1;
503@@ -351,12 +350,9 @@
504 p = init_task.prev_task;
505 if (!p)
506 panic("No idle task for CPU %d", i);
507- del_from_runqueue(p);
508+ init_idle(p, i);
509 unhash_process(p);
510- init_tasks[i] = p;
511
512- p->processor = i;
513- p->cpus_runnable = 1 << i; /* we schedule the first task manually */
514 current_set[i] = p;
515
516 /*
517@@ -505,7 +501,7 @@
518
519 void __init smp_callin(void)
520 {
521- int cpu = current->processor;
522+ int cpu = current->cpu;
523
524 smp_store_cpu_info(cpu);
525 smp_ops->setup_cpu(cpu);
526diff -urN linux-2.4.22.org/arch/ppc/lib/dec_and_lock.c linux-2.4.22/arch/ppc/lib/dec_and_lock.c
a4a9fb52
AM
527--- linux-2.4.22.org/arch/ppc/lib/dec_and_lock.c 2003-11-24 18:30:01.000000000 +0100
528+++ linux-2.4.22/arch/ppc/lib/dec_and_lock.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
529@@ -1,4 +1,5 @@
530 #include <linux/module.h>
531+#include <linux/sched.h>
532 #include <linux/spinlock.h>
533 #include <asm/atomic.h>
534 #include <asm/system.h>
535diff -urN linux-2.4.22.org/arch/ppc/mm/init.c linux-2.4.22/arch/ppc/mm/init.c
a4a9fb52
AM
536--- linux-2.4.22.org/arch/ppc/mm/init.c 2003-11-24 18:29:54.000000000 +0100
537+++ linux-2.4.22/arch/ppc/mm/init.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
538@@ -192,9 +192,9 @@
539 {
540 int iscur = 0;
541 #ifdef CONFIG_SMP
542- printk("%3d ", p->processor);
543- if ( (p->processor != NO_PROC_ID) &&
544- (p == current_set[p->processor]) )
545+ printk("%3d ", p->cpu);
546+ if ( (p->cpu != NO_PROC_ID) &&
547+ (p == current_set[p->cpu]) )
548 {
549 iscur = 1;
550 printk("current");
551diff -urN linux-2.4.22.org/arch/ppc64/kernel/entry.S linux-2.4.22/arch/ppc64/kernel/entry.S
a4a9fb52
AM
552--- linux-2.4.22.org/arch/ppc64/kernel/entry.S 2003-11-24 18:29:44.000000000 +0100
553+++ linux-2.4.22/arch/ppc64/kernel/entry.S 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
554@@ -291,7 +291,9 @@
555 blr
556
557 _GLOBAL(ret_from_fork)
558+#if CONFIG_SMP
559 bl .schedule_tail
560+#endif
561 ld r0,TASK_PTRACE(r13)
562 andi. r0,r0,PT_TRACESYS
563 beq+ .ret_from_except
564diff -urN linux-2.4.22.org/arch/ppc64/kernel/idle.c linux-2.4.22/arch/ppc64/kernel/idle.c
a4a9fb52
AM
565--- linux-2.4.22.org/arch/ppc64/kernel/idle.c 2003-11-24 18:29:44.000000000 +0100
566+++ linux-2.4.22/arch/ppc64/kernel/idle.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
567@@ -68,9 +68,6 @@
568 unsigned long CTRL;
569 #endif
570
571- /* endless loop with no priority at all */
572- current->nice = 20;
573- current->counter = -100;
574 #ifdef CONFIG_PPC_ISERIES
575 /* ensure iSeries run light will be out when idle */
576 current->thread.flags &= ~PPC_FLAG_RUN_LIGHT;
577@@ -78,7 +75,7 @@
578 CTRL &= ~RUNLATCH;
579 mtspr(CTRLT, CTRL);
580 #endif
581- init_idle();
582+ /* endless loop with no priority at all */
583
584 lpaca = get_paca();
585
586diff -urN linux-2.4.22.org/arch/ppc64/kernel/process.c linux-2.4.22/arch/ppc64/kernel/process.c
a4a9fb52
AM
587--- linux-2.4.22.org/arch/ppc64/kernel/process.c 2003-11-24 18:29:43.000000000 +0100
588+++ linux-2.4.22/arch/ppc64/kernel/process.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
589@@ -106,7 +106,7 @@
590 #ifdef SHOW_TASK_SWITCHES
591 printk("%s/%d -> %s/%d NIP %08lx cpu %d root %x/%x\n",
592 prev->comm,prev->pid,
593- new->comm,new->pid,new->thread.regs->nip,new->processor,
594+ new->comm,new->pid,new->thread.regs->nip,new->cpu,
595 new->fs->root,prev->fs->root);
596 #endif
597 #ifdef CONFIG_SMP
598diff -urN linux-2.4.22.org/arch/ppc64/kernel/smp.c linux-2.4.22/arch/ppc64/kernel/smp.c
a4a9fb52
AM
599--- linux-2.4.22.org/arch/ppc64/kernel/smp.c 2003-11-24 18:29:44.000000000 +0100
600+++ linux-2.4.22/arch/ppc64/kernel/smp.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
601@@ -69,6 +69,7 @@
602 extern atomic_t ipi_sent;
603 spinlock_t kernel_flag __cacheline_aligned = SPIN_LOCK_UNLOCKED;
604 cycles_t cacheflush_time;
605+unsigned long cache_decay_ticks = HZ/100;
606 static int max_cpus __initdata = NR_CPUS;
607
608 unsigned long cpu_online_map;
609@@ -611,9 +612,7 @@
610 * cpu 0, the master -- Cort
611 */
612 cpu_callin_map[0] = 1;
613- current->processor = 0;
614-
615- init_idle();
616+ current->cpu = 0;
617
618 for (i = 0; i < NR_CPUS; i++) {
619 paca[i].prof_counter = 1;
620@@ -684,12 +683,9 @@
621
622 PPCDBG(PPCDBG_SMP,"\tProcessor %d, task = 0x%lx\n", i, p);
623
624- del_from_runqueue(p);
625+ init_idle(p, i);
626 unhash_process(p);
627- init_tasks[i] = p;
628
629- p->processor = i;
630- p->cpus_runnable = 1 << i; /* we schedule the first task manually */
631 current_set[i].task = p;
632 sp = ((unsigned long)p) + sizeof(union task_union)
633 - STACK_FRAME_OVERHEAD;
634@@ -740,7 +736,7 @@
635
636 void __init smp_callin(void)
637 {
638- int cpu = current->processor;
639+ int cpu = current->cpu;
640
641 smp_store_cpu_info(cpu);
642 set_dec(paca[cpu].default_decr);
643@@ -748,8 +744,6 @@
644
645 ppc_md.smp_setup_cpu(cpu);
646
647- init_idle();
648-
649 set_bit(smp_processor_id(), &cpu_online_map);
650
651 while(!smp_commenced) {
652@@ -768,7 +762,7 @@
653 {
654 int cpu;
655
656- cpu = current->processor;
657+ cpu = current->cpu;
658 atomic_inc(&init_mm.mm_count);
659 current->active_mm = &init_mm;
660 smp_callin();
661diff -urN linux-2.4.22.org/arch/s390/kernel/process.c linux-2.4.22/arch/s390/kernel/process.c
a4a9fb52
AM
662--- linux-2.4.22.org/arch/s390/kernel/process.c 2003-11-24 18:30:13.000000000 +0100
663+++ linux-2.4.22/arch/s390/kernel/process.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
664@@ -57,8 +57,7 @@
665
666 /* endless idle loop with no priority at all */
667 init_idle();
668- current->nice = 20;
669- current->counter = -100;
670+
671 while (1) {
672 if (current->need_resched) {
673 schedule();
674diff -urN linux-2.4.22.org/arch/s390x/kernel/process.c linux-2.4.22/arch/s390x/kernel/process.c
a4a9fb52
AM
675--- linux-2.4.22.org/arch/s390x/kernel/process.c 2003-11-24 18:30:19.000000000 +0100
676+++ linux-2.4.22/arch/s390x/kernel/process.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
677@@ -57,8 +57,7 @@
678
679 /* endless idle loop with no priority at all */
680 init_idle();
681- current->nice = 20;
682- current->counter = -100;
683+
684 while (1) {
685 if (current->need_resched) {
686 schedule();
687diff -urN linux-2.4.22.org/arch/sh/kernel/process.c linux-2.4.22/arch/sh/kernel/process.c
a4a9fb52
AM
688--- linux-2.4.22.org/arch/sh/kernel/process.c 2003-11-24 18:30:10.000000000 +0100
689+++ linux-2.4.22/arch/sh/kernel/process.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
690@@ -42,8 +42,6 @@
691 {
692 /* endless idle loop with no priority at all */
693 init_idle();
694- current->nice = 20;
695- current->counter = -100;
696
697 while (1) {
698 if (hlt_counter) {
699diff -urN linux-2.4.22.org/arch/sparc/kernel/entry.S linux-2.4.22/arch/sparc/kernel/entry.S
a4a9fb52
AM
700--- linux-2.4.22.org/arch/sparc/kernel/entry.S 2003-11-24 18:29:50.000000000 +0100
701+++ linux-2.4.22/arch/sparc/kernel/entry.S 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
702@@ -1471,7 +1471,9 @@
703
704 .globl C_LABEL(ret_from_fork)
705 C_LABEL(ret_from_fork):
706+#if CONFIG_SMP
707 call schedule_tail
708+#endif
709 mov %g3, %o0
710 b C_LABEL(ret_sys_call)
711 ld [%sp + STACKFRAME_SZ + PT_I0], %o0
712diff -urN linux-2.4.22.org/arch/sparc/kernel/process.c linux-2.4.22/arch/sparc/kernel/process.c
a4a9fb52
AM
713--- linux-2.4.22.org/arch/sparc/kernel/process.c 2003-11-24 18:29:50.000000000 +0100
714+++ linux-2.4.22/arch/sparc/kernel/process.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
715@@ -74,9 +74,6 @@
716 goto out;
717
718 /* endless idle loop with no priority at all */
719- current->nice = 20;
720- current->counter = -100;
721- init_idle();
722
723 for (;;) {
724 if (ARCH_SUN4C_SUN4) {
725@@ -128,9 +125,6 @@
726 int cpu_idle(void)
727 {
728 /* endless idle loop with no priority at all */
729- current->nice = 20;
730- current->counter = -100;
731- init_idle();
732
733 while(1) {
734 if(current->need_resched) {
735diff -urN linux-2.4.22.org/arch/sparc/kernel/smp.c linux-2.4.22/arch/sparc/kernel/smp.c
a4a9fb52
AM
736--- linux-2.4.22.org/arch/sparc/kernel/smp.c 2003-11-24 18:29:50.000000000 +0100
737+++ linux-2.4.22/arch/sparc/kernel/smp.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
738@@ -57,6 +57,7 @@
739 volatile int __cpu_number_map[NR_CPUS];
740 volatile int __cpu_logical_map[NR_CPUS];
741 cycles_t cacheflush_time = 0; /* XXX */
742+unsigned long cache_decay_ticks = HZ/100; /* XXX */
743
744 /* The only guaranteed locking primitive available on all Sparc
745 * processors is 'ldstub [%reg + immediate], %dest_reg' which atomically
746diff -urN linux-2.4.22.org/arch/sparc/kernel/sun4d_smp.c linux-2.4.22/arch/sparc/kernel/sun4d_smp.c
a4a9fb52
AM
747--- linux-2.4.22.org/arch/sparc/kernel/sun4d_smp.c 2003-11-24 18:29:50.000000000 +0100
748+++ linux-2.4.22/arch/sparc/kernel/sun4d_smp.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
749@@ -107,7 +107,6 @@
750 * the SMP initialization the master will be just allowed
751 * to call the scheduler code.
752 */
753- init_idle();
754
755 /* Get our local ticker going. */
756 smp_setup_percpu_timer();
757@@ -127,7 +126,7 @@
758 while((unsigned long)current_set[cpuid] < PAGE_OFFSET)
759 barrier();
760
761- while(current_set[cpuid]->processor != cpuid)
762+ while(current_set[cpuid]->cpu != cpuid)
763 barrier();
764
765 /* Fix idle thread fields. */
766@@ -197,10 +196,8 @@
767 mid_xlate[i] = i;
768 __cpu_number_map[boot_cpu_id] = 0;
769 __cpu_logical_map[0] = boot_cpu_id;
770- current->processor = boot_cpu_id;
771 smp_store_cpu_info(boot_cpu_id);
772 smp_setup_percpu_timer();
773- init_idle();
774 local_flush_cache_all();
775 if(linux_num_cpus == 1)
776 return; /* Not an MP box. */
777@@ -222,14 +219,10 @@
778 cpucount++;
779
780 p = init_task.prev_task;
781- init_tasks[i] = p;
782-
783- p->processor = i;
784- p->cpus_runnable = 1 << i; /* we schedule the first task manually */
785
786 current_set[i] = p;
787
788- del_from_runqueue(p);
789+ init_idle(p, i);
790 unhash_process(p);
791
792 for (no = 0; no < linux_num_cpus; no++)
793diff -urN linux-2.4.22.org/arch/sparc/kernel/sun4m_smp.c linux-2.4.22/arch/sparc/kernel/sun4m_smp.c
a4a9fb52
AM
794--- linux-2.4.22.org/arch/sparc/kernel/sun4m_smp.c 2003-11-24 18:29:50.000000000 +0100
795+++ linux-2.4.22/arch/sparc/kernel/sun4m_smp.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
796@@ -104,7 +104,6 @@
797 * the SMP initialization the master will be just allowed
798 * to call the scheduler code.
799 */
800- init_idle();
801
802 /* Allow master to continue. */
803 swap((unsigned long *)&cpu_callin_map[cpuid], 1);
804@@ -170,12 +169,10 @@
805 mid_xlate[boot_cpu_id] = (linux_cpus[boot_cpu_id].mid & ~8);
806 __cpu_number_map[boot_cpu_id] = 0;
807 __cpu_logical_map[0] = boot_cpu_id;
808- current->processor = boot_cpu_id;
809
810 smp_store_cpu_info(boot_cpu_id);
811 set_irq_udt(mid_xlate[boot_cpu_id]);
812 smp_setup_percpu_timer();
813- init_idle();
814 local_flush_cache_all();
815 if(linux_num_cpus == 1)
816 return; /* Not an MP box. */
817@@ -195,14 +192,10 @@
818 cpucount++;
819
820 p = init_task.prev_task;
821- init_tasks[i] = p;
822-
823- p->processor = i;
824- p->cpus_runnable = 1 << i; /* we schedule the first task manually */
825
826 current_set[i] = p;
827
828- del_from_runqueue(p);
829+ init_idle(p, i);
830 unhash_process(p);
831
832 /* See trampoline.S for details... */
833diff -urN linux-2.4.22.org/arch/sparc64/kernel/entry.S linux-2.4.22/arch/sparc64/kernel/entry.S
a4a9fb52
AM
834--- linux-2.4.22.org/arch/sparc64/kernel/entry.S 2003-11-24 18:30:04.000000000 +0100
835+++ linux-2.4.22/arch/sparc64/kernel/entry.S 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
836@@ -1627,7 +1627,9 @@
837 */
838 andn %o7, SPARC_FLAG_NEWCHILD, %l0
839 mov %g5, %o0 /* 'prev' */
840+#if CONFIG_SMP
841 call schedule_tail
842+#endif
843 stb %l0, [%g6 + AOFF_task_thread + AOFF_thread_flags]
844 andcc %l0, SPARC_FLAG_PERFCTR, %g0
845 be,pt %icc, 1f
846diff -urN linux-2.4.22.org/arch/sparc64/kernel/irq.c linux-2.4.22/arch/sparc64/kernel/irq.c
a4a9fb52
AM
847--- linux-2.4.22.org/arch/sparc64/kernel/irq.c 2003-11-24 18:30:04.000000000 +0100
848+++ linux-2.4.22/arch/sparc64/kernel/irq.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
849@@ -174,7 +174,7 @@
850 tid = ((tid & UPA_CONFIG_MID) << 9);
851 tid &= IMAP_TID_UPA;
852 } else {
853- tid = (starfire_translate(imap, current->processor) << 26);
854+ tid = (starfire_translate(imap, current->cpu) << 26);
855 tid &= IMAP_TID_UPA;
856 }
857
858diff -urN linux-2.4.22.org/arch/sparc64/kernel/process.c linux-2.4.22/arch/sparc64/kernel/process.c
a4a9fb52
AM
859--- linux-2.4.22.org/arch/sparc64/kernel/process.c 2003-11-24 18:30:04.000000000 +0100
860+++ linux-2.4.22/arch/sparc64/kernel/process.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
861@@ -54,9 +54,6 @@
862 return -EPERM;
863
864 /* endless idle loop with no priority at all */
865- current->nice = 20;
866- current->counter = -100;
867- init_idle();
868
869 for (;;) {
870 /* If current->need_resched is zero we should really
871@@ -80,14 +77,10 @@
872 /*
873 * the idle loop on a UltraMultiPenguin...
874 */
875-#define idle_me_harder() (cpu_data[current->processor].idle_volume += 1)
876-#define unidle_me() (cpu_data[current->processor].idle_volume = 0)
877+#define idle_me_harder() (cpu_data[current->cpu].idle_volume += 1)
878+#define unidle_me() (cpu_data[current->cpu].idle_volume = 0)
879 int cpu_idle(void)
880 {
881- current->nice = 20;
882- current->counter = -100;
883- init_idle();
884-
885 while(1) {
886 if (current->need_resched != 0) {
887 unidle_me();
888diff -urN linux-2.4.22.org/arch/sparc64/kernel/rtrap.S linux-2.4.22/arch/sparc64/kernel/rtrap.S
a4a9fb52
AM
889--- linux-2.4.22.org/arch/sparc64/kernel/rtrap.S 2003-11-24 18:30:04.000000000 +0100
890+++ linux-2.4.22/arch/sparc64/kernel/rtrap.S 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
891@@ -140,7 +140,7 @@
892 .align 64
893 .globl rtrap_clr_l6, rtrap, irqsz_patchme, rtrap_xcall
894 rtrap_clr_l6: clr %l6
895-rtrap: lduw [%g6 + AOFF_task_processor], %l0
896+rtrap: lduw [%g6 + AOFF_task_cpu], %l0
897 sethi %hi(irq_stat), %l2 ! &softirq_active
898 or %l2, %lo(irq_stat), %l2 ! &softirq_active
899 irqsz_patchme: sllx %l0, 0, %l0
900diff -urN linux-2.4.22.org/arch/sparc64/kernel/smp.c linux-2.4.22/arch/sparc64/kernel/smp.c
a4a9fb52
AM
901--- linux-2.4.22.org/arch/sparc64/kernel/smp.c 2003-11-24 18:30:04.000000000 +0100
902+++ linux-2.4.22/arch/sparc64/kernel/smp.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
903@@ -360,7 +360,7 @@
904 printk("Entering UltraSMPenguin Mode...\n");
905 __sti();
906 smp_store_cpu_info(boot_cpu_id);
907- init_idle();
908+ smp_tune_scheduling();
909
910 if (linux_num_cpus == 1)
911 return;
912@@ -383,12 +383,8 @@
913 cpucount++;
914
915 p = init_task.prev_task;
916- init_tasks[cpucount] = p;
917
918- p->processor = i;
919- p->cpus_runnable = 1UL << i; /* we schedule the first task manually */
920-
921- del_from_runqueue(p);
922+ init_idle(p, i);
923 unhash_process(p);
924
925 callin_flag = 0;
926@@ -1214,10 +1210,96 @@
927 __cpu_number_map[boot_cpu_id] = 0;
928 prom_cpu_nodes[boot_cpu_id] = linux_cpus[0].prom_node;
929 __cpu_logical_map[0] = boot_cpu_id;
930- current->processor = boot_cpu_id;
931 prof_counter(boot_cpu_id) = prof_multiplier(boot_cpu_id) = 1;
932 }
933
934+cycles_t cacheflush_time;
935+unsigned long cache_decay_ticks;
936+
937+extern unsigned long cheetah_tune_scheduling(void);
938+
939+static void __init smp_tune_scheduling(void)
940+{
941+ unsigned long orig_flush_base, flush_base, flags, *p;
942+ unsigned int ecache_size, order;
943+ cycles_t tick1, tick2, raw;
944+
945+ /* Approximate heuristic for SMP scheduling. It is an
946+ * estimation of the time it takes to flush the L2 cache
947+ * on the local processor.
948+ *
949+ * The ia32 chooses to use the L1 cache flush time instead,
950+ * and I consider this complete nonsense. The Ultra can service
951+ * a miss to the L1 with a hit to the L2 in 7 or 8 cycles, and
952+ * L2 misses are what create extra bus traffic (ie. the "cost"
953+ * of moving a process from one cpu to another).
954+ */
955+ printk("SMP: Calibrating ecache flush... ");
956+ if (tlb_type == cheetah || tlb_type == cheetah_plus) {
957+ cacheflush_time = cheetah_tune_scheduling();
958+ goto report;
959+ }
960+
961+ ecache_size = prom_getintdefault(linux_cpus[0].prom_node,
962+ "ecache-size", (512 * 1024));
963+ if (ecache_size > (4 * 1024 * 1024))
964+ ecache_size = (4 * 1024 * 1024);
965+ orig_flush_base = flush_base =
966+ __get_free_pages(GFP_KERNEL, order = get_order(ecache_size));
967+
968+ if (flush_base != 0UL) {
969+ local_irq_save(flags);
970+
971+ /* Scan twice the size once just to get the TLB entries
972+ * loaded and make sure the second scan measures pure misses.
973+ */
974+ for (p = (unsigned long *)flush_base;
975+ ((unsigned long)p) < (flush_base + (ecache_size<<1));
976+ p += (64 / sizeof(unsigned long)))
977+ *((volatile unsigned long *)p);
978+
979+ tick1 = tick_ops->get_tick();
980+
981+ __asm__ __volatile__("1:\n\t"
982+ "ldx [%0 + 0x000], %%g1\n\t"
983+ "ldx [%0 + 0x040], %%g2\n\t"
984+ "ldx [%0 + 0x080], %%g3\n\t"
985+ "ldx [%0 + 0x0c0], %%g5\n\t"
986+ "add %0, 0x100, %0\n\t"
987+ "cmp %0, %2\n\t"
988+ "bne,pt %%xcc, 1b\n\t"
989+ " nop"
990+ : "=&r" (flush_base)
991+ : "0" (flush_base),
992+ "r" (flush_base + ecache_size)
993+ : "g1", "g2", "g3", "g5");
994+
995+ tick2 = tick_ops->get_tick();
996+
997+ local_irq_restore(flags);
998+
999+ raw = (tick2 - tick1);
1000+
1001+ /* Dampen it a little, considering two processes
1002+ * sharing the cache and fitting.
1003+ */
1004+ cacheflush_time = (raw - (raw >> 2));
1005+
1006+ free_pages(orig_flush_base, order);
1007+ } else {
1008+ cacheflush_time = ((ecache_size << 2) +
1009+ (ecache_size << 1));
1010+ }
1011+report:
1012+ /* Convert ticks/sticks to jiffies. */
1013+ cache_decay_ticks = cacheflush_time / timer_tick_offset;
1014+ if (cache_decay_ticks < 1)
1015+ cache_decay_ticks = 1;
1016+
1017+ printk("Using heuristic of %ld cycles, %ld ticks.\n",
1018+ cacheflush_time, cache_decay_ticks);
1019+}
1020+
1021 static inline unsigned long find_flush_base(unsigned long size)
1022 {
1023 struct page *p = mem_map;
1024diff -urN linux-2.4.22.org/arch/sparc64/kernel/trampoline.S linux-2.4.22/arch/sparc64/kernel/trampoline.S
a4a9fb52
AM
1025--- linux-2.4.22.org/arch/sparc64/kernel/trampoline.S 2003-11-24 18:30:04.000000000 +0100
1026+++ linux-2.4.22/arch/sparc64/kernel/trampoline.S 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
1027@@ -250,7 +250,7 @@
1028 wrpr %o1, PSTATE_IG, %pstate
1029
1030 /* Get our UPA MID. */
1031- lduw [%o2 + AOFF_task_processor], %g1
1032+ lduw [%o2 + AOFF_task_cpu], %g1
1033 sethi %hi(cpu_data), %g5
1034 or %g5, %lo(cpu_data), %g5
1035
1036diff -urN linux-2.4.22.org/arch/sparc64/kernel/traps.c linux-2.4.22/arch/sparc64/kernel/traps.c
a4a9fb52
AM
1037--- linux-2.4.22.org/arch/sparc64/kernel/traps.c 2003-11-24 18:30:04.000000000 +0100
1038+++ linux-2.4.22/arch/sparc64/kernel/traps.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
1039@@ -16,6 +16,7 @@
1040 #include <linux/smp.h>
1041 #include <linux/smp_lock.h>
1042 #include <linux/mm.h>
1043+#include <linux/init.h>
1044
1045 #include <asm/delay.h>
1046 #include <asm/system.h>
1047@@ -755,6 +756,48 @@
1048 "i" (ASI_PHYS_USE_EC));
1049 }
1050
1051+#ifdef CONFIG_SMP
1052+unsigned long __init cheetah_tune_scheduling(void)
1053+{
1054+ unsigned long tick1, tick2, raw;
1055+ unsigned long flush_base = ecache_flush_physbase;
1056+ unsigned long flush_linesize = ecache_flush_linesize;
1057+ unsigned long flush_size = ecache_flush_size;
1058+
1059+ /* Run through the whole cache to guarentee the timed loop
1060+ * is really displacing cache lines.
1061+ */
1062+ __asm__ __volatile__("1: subcc %0, %4, %0\n\t"
1063+ " bne,pt %%xcc, 1b\n\t"
1064+ " ldxa [%2 + %0] %3, %%g0\n\t"
1065+ : "=&r" (flush_size)
1066+ : "0" (flush_size), "r" (flush_base),
1067+ "i" (ASI_PHYS_USE_EC), "r" (flush_linesize));
1068+
1069+ /* The flush area is 2 X Ecache-size, so cut this in half for
1070+ * the timed loop.
1071+ */
1072+ flush_base = ecache_flush_physbase;
1073+ flush_linesize = ecache_flush_linesize;
1074+ flush_size = ecache_flush_size >> 1;
1075+
1076+ __asm__ __volatile__("rd %%tick, %0" : "=r" (tick1));
1077+
1078+ __asm__ __volatile__("1: subcc %0, %4, %0\n\t"
1079+ " bne,pt %%xcc, 1b\n\t"
1080+ " ldxa [%2 + %0] %3, %%g0\n\t"
1081+ : "=&r" (flush_size)
1082+ : "0" (flush_size), "r" (flush_base),
1083+ "i" (ASI_PHYS_USE_EC), "r" (flush_linesize));
1084+
1085+ __asm__ __volatile__("rd %%tick, %0" : "=r" (tick2));
1086+
1087+ raw = (tick2 - tick1);
1088+
1089+ return (raw - (raw >> 2));
1090+}
1091+#endif
1092+
1093 /* Unfortunately, the diagnostic access to the I-cache tags we need to
1094 * use to clear the thing interferes with I-cache coherency transactions.
1095 *
1096diff -urN linux-2.4.22.org/Documentation/sched-coding.txt linux-2.4.22/Documentation/sched-coding.txt
1097--- linux-2.4.22.org/Documentation/sched-coding.txt 1970-01-01 01:00:00.000000000 +0100
a4a9fb52 1098+++ linux-2.4.22/Documentation/sched-coding.txt 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
1099@@ -0,0 +1,126 @@
1100+ Reference for various scheduler-related methods in the O(1) scheduler
1101+ Robert Love <rml@tech9.net>, MontaVista Software
1102+
1103+
1104+Note most of these methods are local to kernel/sched.c - this is by design.
1105+The scheduler is meant to be self-contained and abstracted away. This document
1106+is primarily for understanding the scheduler, not interfacing to it. Some of
1107+the discussed interfaces, however, are general process/scheduling methods.
1108+They are typically defined in include/linux/sched.h.
1109+
1110+
1111+Main Scheduling Methods
1112+-----------------------
1113+
1114+void load_balance(runqueue_t *this_rq, int idle)
1115+ Attempts to pull tasks from one cpu to another to balance cpu usage,
1116+ if needed. This method is called explicitly if the runqueues are
1117+ inbalanced or periodically by the timer tick. Prior to calling,
1118+ the current runqueue must be locked and interrupts disabled.
1119+
1120+void schedule()
1121+ The main scheduling function. Upon return, the highest priority
1122+ process will be active.
1123+
1124+
1125+Locking
1126+-------
1127+
1128+Each runqueue has its own lock, rq->lock. When multiple runqueues need
1129+to be locked, lock acquires must be ordered by ascending &runqueue value.
1130+
1131+A specific runqueue is locked via
1132+
1133+ task_rq_lock(task_t pid, unsigned long *flags)
1134+
1135+which disables preemption, disables interrupts, and locks the runqueue pid is
1136+running on. Likewise,
1137+
1138+ task_rq_unlock(task_t pid, unsigned long *flags)
1139+
1140+unlocks the runqueue pid is running on, restores interrupts to their previous
1141+state, and reenables preemption.
1142+
1143+The routines
1144+
1145+ double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
1146+
1147+and
1148+
1149+ double_rq_unlock(runqueue_t *rq1, runqueue_t rq2)
1150+
1151+safely lock and unlock, respectively, the two specified runqueues. They do
1152+not, however, disable and restore interrupts. Users are required to do so
1153+manually before and after calls.
1154+
1155+
1156+Values
1157+------
1158+
1159+MAX_PRIO
1160+ The maximum priority of the system, stored in the task as task->prio.
1161+ Lower priorities are higher. Normal (non-RT) priorities range from
1162+ MAX_RT_PRIO to (MAX_PRIO - 1).
1163+MAX_RT_PRIO
1164+ The maximum real-time priority of the system. Valid RT priorities
1165+ range from 0 to (MAX_RT_PRIO - 1).
1166+MAX_USER_RT_PRIO
1167+ The maximum real-time priority that is exported to user-space. Should
1168+ always be equal to or less than MAX_RT_PRIO. Setting it less allows
1169+ kernel threads to have higher priorities than any user-space task.
1170+MIN_TIMESLICE
1171+MAX_TIMESLICE
1172+ Respectively, the minimum and maximum timeslices (quanta) of a process.
1173+
1174+Data
1175+----
1176+
1177+struct runqueue
1178+ The main per-CPU runqueue data structure.
1179+struct task_struct
1180+ The main per-process data structure.
1181+
1182+
1183+General Methods
1184+---------------
1185+
1186+cpu_rq(cpu)
1187+ Returns the runqueue of the specified cpu.
1188+this_rq()
1189+ Returns the runqueue of the current cpu.
1190+task_rq(pid)
1191+ Returns the runqueue which holds the specified pid.
1192+cpu_curr(cpu)
1193+ Returns the task currently running on the given cpu.
1194+rt_task(pid)
1195+ Returns true if pid is real-time, false if not.
1196+
1197+
1198+Process Control Methods
1199+-----------------------
1200+
1201+void set_user_nice(task_t *p, long nice)
1202+ Sets the "nice" value of task p to the given value.
1203+int setscheduler(pid_t pid, int policy, struct sched_param *param)
1204+ Sets the scheduling policy and parameters for the given pid.
1205+void set_cpus_allowed(task_t *p, unsigned long new_mask)
1206+ Sets a given task's CPU affinity and migrates it to a proper cpu.
1207+ Callers must have a valid reference to the task and assure the
1208+ task not exit prematurely. No locks can be held during the call.
1209+set_task_state(tsk, state_value)
1210+ Sets the given task's state to the given value.
1211+set_current_state(state_value)
1212+ Sets the current task's state to the given value.
1213+void set_tsk_need_resched(struct task_struct *tsk)
1214+ Sets need_resched in the given task.
1215+void clear_tsk_need_resched(struct task_struct *tsk)
1216+ Clears need_resched in the given task.
1217+void set_need_resched()
1218+ Sets need_resched in the current task.
1219+void clear_need_resched()
1220+ Clears need_resched in the current task.
1221+int need_resched()
1222+ Returns true if need_resched is set in the current task, false
1223+ otherwise.
1224+yield()
1225+ Place the current process at the end of the runqueue and call schedule.
1226diff -urN linux-2.4.22.org/Documentation/sched-design.txt linux-2.4.22/Documentation/sched-design.txt
1227--- linux-2.4.22.org/Documentation/sched-design.txt 1970-01-01 01:00:00.000000000 +0100
a4a9fb52 1228+++ linux-2.4.22/Documentation/sched-design.txt 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
1229@@ -0,0 +1,165 @@
1230+ Goals, Design and Implementation of the
1231+ new ultra-scalable O(1) scheduler
1232+
1233+
1234+ This is an edited version of an email Ingo Molnar sent to
1235+ lkml on 4 Jan 2002. It describes the goals, design, and
1236+ implementation of Ingo's new ultra-scalable O(1) scheduler.
1237+ Last Updated: 18 April 2002.
1238+
1239+
1240+Goal
1241+====
1242+
1243+The main goal of the new scheduler is to keep all the good things we know
1244+and love about the current Linux scheduler:
1245+
1246+ - good interactive performance even during high load: if the user
1247+ types or clicks then the system must react instantly and must execute
1248+ the user tasks smoothly, even during considerable background load.
1249+
1250+ - good scheduling/wakeup performance with 1-2 runnable processes.
1251+
1252+ - fairness: no process should stay without any timeslice for any
1253+ unreasonable amount of time. No process should get an unjustly high
1254+ amount of CPU time.
1255+
1256+ - priorities: less important tasks can be started with lower priority,
1257+ more important tasks with higher priority.
1258+
1259+ - SMP efficiency: no CPU should stay idle if there is work to do.
1260+
1261+ - SMP affinity: processes which run on one CPU should stay affine to
1262+ that CPU. Processes should not bounce between CPUs too frequently.
1263+
1264+ - plus additional scheduler features: RT scheduling, CPU binding.
1265+
1266+and the goal is also to add a few new things:
1267+
1268+ - fully O(1) scheduling. Are you tired of the recalculation loop
1269+ blowing the L1 cache away every now and then? Do you think the goodness
1270+ loop is taking a bit too long to finish if there are lots of runnable
1271+ processes? This new scheduler takes no prisoners: wakeup(), schedule(),
1272+ the timer interrupt are all O(1) algorithms. There is no recalculation
1273+ loop. There is no goodness loop either.
1274+
1275+ - 'perfect' SMP scalability. With the new scheduler there is no 'big'
1276+ runqueue_lock anymore - it's all per-CPU runqueues and locks - two
1277+ tasks on two separate CPUs can wake up, schedule and context-switch
1278+ completely in parallel, without any interlocking. All
1279+ scheduling-relevant data is structured for maximum scalability.
1280+
1281+ - better SMP affinity. The old scheduler has a particular weakness that
1282+ causes the random bouncing of tasks between CPUs if/when higher
1283+ priority/interactive tasks, this was observed and reported by many
1284+ people. The reason is that the timeslice recalculation loop first needs
1285+ every currently running task to consume its timeslice. But when this
1286+ happens on eg. an 8-way system, then this property starves an
1287+ increasing number of CPUs from executing any process. Once the last
1288+ task that has a timeslice left has finished using up that timeslice,
1289+ the recalculation loop is triggered and other CPUs can start executing
1290+ tasks again - after having idled around for a number of timer ticks.
1291+ The more CPUs, the worse this effect.
1292+
1293+ Furthermore, this same effect causes the bouncing effect as well:
1294+ whenever there is such a 'timeslice squeeze' of the global runqueue,
1295+ idle processors start executing tasks which are not affine to that CPU.
1296+ (because the affine tasks have finished off their timeslices already.)
1297+
1298+ The new scheduler solves this problem by distributing timeslices on a
1299+ per-CPU basis, without having any global synchronization or
1300+ recalculation.
1301+
1302+ - batch scheduling. A significant proportion of computing-intensive tasks
1303+ benefit from batch-scheduling, where timeslices are long and processes
1304+ are roundrobin scheduled. The new scheduler does such batch-scheduling
1305+ of the lowest priority tasks - so nice +19 jobs will get
1306+ 'batch-scheduled' automatically. With this scheduler, nice +19 jobs are
1307+ in essence SCHED_IDLE, from an interactiveness point of view.
1308+
1309+ - handle extreme loads more smoothly, without breakdown and scheduling
1310+ storms.
1311+
1312+ - O(1) RT scheduling. For those RT folks who are paranoid about the
1313+ O(nr_running) property of the goodness loop and the recalculation loop.
1314+
1315+ - run fork()ed children before the parent. Andrea has pointed out the
1316+ advantages of this a few months ago, but patches for this feature
1317+ do not work with the old scheduler as well as they should,
1318+ because idle processes often steal the new child before the fork()ing
1319+ CPU gets to execute it.
1320+
1321+
1322+Design
1323+======
1324+
1325+the core of the new scheduler are the following mechanizms:
1326+
1327+ - *two*, priority-ordered 'priority arrays' per CPU. There is an 'active'
1328+ array and an 'expired' array. The active array contains all tasks that
1329+ are affine to this CPU and have timeslices left. The expired array
1330+ contains all tasks which have used up their timeslices - but this array
1331+ is kept sorted as well. The active and expired array is not accessed
1332+ directly, it's accessed through two pointers in the per-CPU runqueue
1333+ structure. If all active tasks are used up then we 'switch' the two
1334+ pointers and from now on the ready-to-go (former-) expired array is the
1335+ active array - and the empty active array serves as the new collector
1336+ for expired tasks.
1337+
1338+ - there is a 64-bit bitmap cache for array indices. Finding the highest
1339+ priority task is thus a matter of two x86 BSFL bit-search instructions.
1340+
1341+the split-array solution enables us to have an arbitrary number of active
1342+and expired tasks, and the recalculation of timeslices can be done
1343+immediately when the timeslice expires. Because the arrays are always
1344+access through the pointers in the runqueue, switching the two arrays can
1345+be done very quickly.
1346+
1347+this is a hybride priority-list approach coupled with roundrobin
1348+scheduling and the array-switch method of distributing timeslices.
1349+
1350+ - there is a per-task 'load estimator'.
1351+
1352+one of the toughest things to get right is good interactive feel during
1353+heavy system load. While playing with various scheduler variants i found
1354+that the best interactive feel is achieved not by 'boosting' interactive
1355+tasks, but by 'punishing' tasks that want to use more CPU time than there
1356+is available. This method is also much easier to do in an O(1) fashion.
1357+
1358+to establish the actual 'load' the task contributes to the system, a
1359+complex-looking but pretty accurate method is used: there is a 4-entry
1360+'history' ringbuffer of the task's activities during the last 4 seconds.
1361+This ringbuffer is operated without much overhead. The entries tell the
1362+scheduler a pretty accurate load-history of the task: has it used up more
1363+CPU time or less during the past N seconds. [the size '4' and the interval
1364+of 4x 1 seconds was found by lots of experimentation - this part is
1365+flexible and can be changed in both directions.]
1366+
1367+the penalty a task gets for generating more load than the CPU can handle
1368+is a priority decrease - there is a maximum amount to this penalty
1369+relative to their static priority, so even fully CPU-bound tasks will
1370+observe each other's priorities, and will share the CPU accordingly.
1371+
1372+the SMP load-balancer can be extended/switched with additional parallel
1373+computing and cache hierarchy concepts: NUMA scheduling, multi-core CPUs
1374+can be supported easily by changing the load-balancer. Right now it's
1375+tuned for my SMP systems.
1376+
1377+i skipped the prev->mm == next->mm advantage - no workload i know of shows
1378+any sensitivity to this. It can be added back by sacrificing O(1)
1379+schedule() [the current and one-lower priority list can be searched for a
1380+that->mm == current->mm condition], but costs a fair number of cycles
1381+during a number of important workloads, so i wanted to avoid this as much
1382+as possible.
1383+
1384+- the SMP idle-task startup code was still racy and the new scheduler
1385+triggered this. So i streamlined the idle-setup code a bit. We do not call
1386+into schedule() before all processors have started up fully and all idle
1387+threads are in place.
1388+
1389+- the patch also cleans up a number of aspects of sched.c - moves code
1390+into other areas of the kernel where it's appropriate, and simplifies
1391+certain code paths and data constructs. As a result, the new scheduler's
1392+code is smaller than the old one.
1393+
1394+ Ingo
1395diff -urN linux-2.4.22.org/drivers/char/drm-4.0/tdfx_drv.c linux-2.4.22/drivers/char/drm-4.0/tdfx_drv.c
a4a9fb52
AM
1396--- linux-2.4.22.org/drivers/char/drm-4.0/tdfx_drv.c 2003-11-24 18:29:04.000000000 +0100
1397+++ linux-2.4.22/drivers/char/drm-4.0/tdfx_drv.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
1398@@ -554,7 +554,6 @@
1399 lock.context, current->pid, j,
1400 dev->lock.lock_time, jiffies);
1401 current->state = TASK_INTERRUPTIBLE;
1402- current->policy |= SCHED_YIELD;
1403 schedule_timeout(DRM_LOCK_SLICE-j);
1404 DRM_DEBUG("jiffies=%d\n", jiffies);
1405 }
1406diff -urN linux-2.4.22.org/drivers/char/mwave/mwavedd.c linux-2.4.22/drivers/char/mwave/mwavedd.c
a4a9fb52
AM
1407--- linux-2.4.22.org/drivers/char/mwave/mwavedd.c 2003-11-24 18:29:03.000000000 +0100
1408+++ linux-2.4.22/drivers/char/mwave/mwavedd.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
1409@@ -279,7 +279,6 @@
1410 pDrvData->IPCs[ipcnum].bIsHere = FALSE;
1411 pDrvData->IPCs[ipcnum].bIsEnabled = TRUE;
1412 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0)
1413- current->nice = -20; /* boost to provide priority timing */
1414 #else
1415 current->priority = 0x28; /* boost to provide priority timing */
1416 #endif
1417diff -urN linux-2.4.22.org/drivers/char/serial_txx927.c linux-2.4.22/drivers/char/serial_txx927.c
a4a9fb52
AM
1418--- linux-2.4.22.org/drivers/char/serial_txx927.c 2003-11-24 18:29:01.000000000 +0100
1419+++ linux-2.4.22/drivers/char/serial_txx927.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
1420@@ -1533,7 +1533,6 @@
1421 printk("cisr = %d (jiff=%lu)...", cisr, jiffies);
1422 #endif
1423 current->state = TASK_INTERRUPTIBLE;
1424- current->counter = 0; /* make us low-priority */
1425 schedule_timeout(char_time);
1426 if (signal_pending(current))
1427 break;
1428diff -urN linux-2.4.22.org/drivers/md/md.c linux-2.4.22/drivers/md/md.c
a4a9fb52
AM
1429--- linux-2.4.22.org/drivers/md/md.c 2003-11-24 18:29:41.000000000 +0100
1430+++ linux-2.4.22/drivers/md/md.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
1431@@ -2939,8 +2939,6 @@
1432 * bdflush, otherwise bdflush will deadlock if there are too
1433 * many dirty RAID5 blocks.
1434 */
1435- current->policy = SCHED_OTHER;
1436- current->nice = -20;
1437 md_unlock_kernel();
1438
1439 complete(thread->event);
1440@@ -3464,11 +3462,6 @@
1441 "(but not more than %d KB/sec) for reconstruction.\n",
1442 sysctl_speed_limit_max);
1443
1444- /*
1445- * Resync has low priority.
1446- */
1447- current->nice = 19;
1448-
1449 is_mddev_idle(mddev); /* this also initializes IO event counters */
1450 for (m = 0; m < SYNC_MARKS; m++) {
1451 mark[m] = jiffies;
1452@@ -3546,16 +3539,13 @@
1453 currspeed = (j-mddev->resync_mark_cnt)/2/((jiffies-mddev->resync_mark)/HZ +1) +1;
1454
1455 if (currspeed > sysctl_speed_limit_min) {
1456- current->nice = 19;
1457-
1458 if ((currspeed > sysctl_speed_limit_max) ||
1459 !is_mddev_idle(mddev)) {
1460 current->state = TASK_INTERRUPTIBLE;
1461 md_schedule_timeout(HZ/4);
1462 goto repeat;
1463 }
1464- } else
1465- current->nice = -20;
1466+ }
1467 }
1468 printk(KERN_INFO "md: md%d: sync done.\n",mdidx(mddev));
1469 err = 0;
1470diff -urN linux-2.4.22.org/fs/binfmt_elf.c linux-2.4.22/fs/binfmt_elf.c
a4a9fb52
AM
1471--- linux-2.4.22.org/fs/binfmt_elf.c 2003-11-24 18:28:10.000000000 +0100
1472+++ linux-2.4.22/fs/binfmt_elf.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
1473@@ -1173,7 +1173,7 @@
1474 psinfo.pr_state = i;
1475 psinfo.pr_sname = (i < 0 || i > 5) ? '.' : "RSDZTD"[i];
1476 psinfo.pr_zomb = psinfo.pr_sname == 'Z';
1477- psinfo.pr_nice = current->nice;
1478+ psinfo.pr_nice = task_nice(current);
1479 psinfo.pr_flag = current->flags;
1480 psinfo.pr_uid = NEW_TO_OLD_UID(current->uid);
1481 psinfo.pr_gid = NEW_TO_OLD_GID(current->gid);
1482diff -urN linux-2.4.22.org/fs/jffs2/background.c linux-2.4.22/fs/jffs2/background.c
a4a9fb52
AM
1483--- linux-2.4.22.org/fs/jffs2/background.c 2003-11-24 18:28:15.000000000 +0100
1484+++ linux-2.4.22/fs/jffs2/background.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
1485@@ -106,9 +106,6 @@
1486
1487 sprintf(current->comm, "jffs2_gcd_mtd%d", c->mtd->index);
1488
1489- /* FIXME in the 2.2 backport */
1490- current->nice = 10;
1491-
1492 for (;;) {
1493 spin_lock_irq(&current->sigmask_lock);
1494 siginitsetinv (&current->blocked, sigmask(SIGHUP) | sigmask(SIGKILL) | sigmask(SIGSTOP) | sigmask(SIGCONT));
1495diff -urN linux-2.4.22.org/fs/proc/array.c linux-2.4.22/fs/proc/array.c
a4a9fb52
AM
1496--- linux-2.4.22.org/fs/proc/array.c 2003-11-24 18:28:11.000000000 +0100
1497+++ linux-2.4.22/fs/proc/array.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
1498@@ -339,9 +339,8 @@
1499
1500 /* scale priority and nice values from timeslices to -20..20 */
1501 /* to make it look like a "normal" Unix priority/nice value */
1502- priority = task->counter;
1503- priority = 20 - (priority * 10 + DEF_COUNTER / 2) / DEF_COUNTER;
1504- nice = task->nice;
1505+ priority = task_prio(task);
1506+ nice = task_nice(task);
1507
1508 read_lock(&tasklist_lock);
1509 ppid = task->pid ? task->p_opptr->pid : 0;
1510@@ -391,7 +390,7 @@
1511 task->nswap,
1512 task->cnswap,
1513 task->exit_signal,
1514- task->processor);
1515+ task->cpu);
1516 if(mm)
1517 mmput(mm);
1518 return res;
1519diff -urN linux-2.4.22.org/fs/proc/proc_misc.c linux-2.4.22/fs/proc/proc_misc.c
a4a9fb52
AM
1520--- linux-2.4.22.org/fs/proc/proc_misc.c 2003-11-24 18:28:11.000000000 +0100
1521+++ linux-2.4.22/fs/proc/proc_misc.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
1522@@ -109,11 +109,11 @@
1523 a = avenrun[0] + (FIXED_1/200);
1524 b = avenrun[1] + (FIXED_1/200);
1525 c = avenrun[2] + (FIXED_1/200);
1526- len = sprintf(page,"%d.%02d %d.%02d %d.%02d %d/%d %d\n",
1527+ len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
1528 LOAD_INT(a), LOAD_FRAC(a),
1529 LOAD_INT(b), LOAD_FRAC(b),
1530 LOAD_INT(c), LOAD_FRAC(c),
1531- nr_running, nr_threads, last_pid);
1532+ nr_running(), nr_threads, last_pid);
1533 return proc_calc_metrics(page, start, off, count, eof, len);
1534 }
1535
1536@@ -125,7 +125,7 @@
1537 int len;
1538
1539 uptime = jiffies;
1540- idle = init_tasks[0]->times.tms_utime + init_tasks[0]->times.tms_stime;
1541+ idle = init_task.times.tms_utime + init_task.times.tms_stime;
1542
1543 /* The formula for the fraction parts really is ((t * 100) / HZ) % 100, but
1544 that would overflow about every five days at HZ == 100.
1545@@ -374,10 +374,10 @@
1546 }
1547
1548 proc_sprintf(page, &off, &len,
1549- "\nctxt %u\n"
1550+ "\nctxt %lu\n"
1551 "btime %lu\n"
1552 "processes %lu\n",
1553- kstat.context_swtch,
1554+ nr_context_switches(),
1555 xtime.tv_sec - jif / HZ,
1556 total_forks);
1557
1558diff -urN linux-2.4.22.org/fs/reiserfs/buffer2.c linux-2.4.22/fs/reiserfs/buffer2.c
a4a9fb52
AM
1559--- linux-2.4.22.org/fs/reiserfs/buffer2.c 2003-11-24 18:28:15.000000000 +0100
1560+++ linux-2.4.22/fs/reiserfs/buffer2.c 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
1561@@ -51,11 +51,11 @@
1562 struct buffer_head * reiserfs_bread (struct super_block *super, int n_block, int n_size)
1563 {
1564 struct buffer_head *result;
1565- PROC_EXP( unsigned int ctx_switches = kstat.context_swtch );
1566+ PROC_EXP( unsigned int ctx_switches = nr_context_switches(); );
1567
1568 result = bread (super -> s_dev, n_block, n_size);
1569 PROC_INFO_INC( super, breads );
1570- PROC_EXP( if( kstat.context_swtch != ctx_switches )
1571+ PROC_EXP( if( nr_context_switches() != ctx_switches )
1572 PROC_INFO_INC( super, bread_miss ) );
1573 return result;
1574 }
1575diff -urN linux-2.4.22.org/include/asm-alpha/bitops.h linux-2.4.22/include/asm-alpha/bitops.h
a4a9fb52
AM
1576--- linux-2.4.22.org/include/asm-alpha/bitops.h 2003-11-24 18:28:26.000000000 +0100
1577+++ linux-2.4.22/include/asm-alpha/bitops.h 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
1578@@ -3,6 +3,7 @@
1579
1580 #include <linux/config.h>
1581 #include <linux/kernel.h>
1582+#include <asm/compiler.h>
1583
1584 /*
1585 * Copyright 1994, Linus Torvalds.
1586@@ -60,25 +61,25 @@
1587
1588 __asm__ __volatile__(
1589 "1: ldl_l %0,%3\n"
1590- " and %0,%2,%0\n"
1591+ " bic %0,%2,%0\n"
1592 " stl_c %0,%1\n"
1593 " beq %0,2f\n"
1594 ".subsection 2\n"
1595 "2: br 1b\n"
1596 ".previous"
1597 :"=&r" (temp), "=m" (*m)
1598- :"Ir" (~(1UL << (nr & 31))), "m" (*m));
1599+ :"Ir" (1UL << (nr & 31)), "m" (*m));
1600 }
1601
1602 /*
1603 * WARNING: non atomic version.
1604 */
1605 static __inline__ void
1606-__change_bit(unsigned long nr, volatile void * addr)
1607+__clear_bit(unsigned long nr, volatile void * addr)
1608 {
1609 int *m = ((int *) addr) + (nr >> 5);
1610
1611- *m ^= 1 << (nr & 31);
1612+ *m &= ~(1 << (nr & 31));
1613 }
1614
1615 static inline void
1616@@ -99,6 +100,17 @@
1617 :"Ir" (1UL << (nr & 31)), "m" (*m));
1618 }
1619
1620+/*
1621+ * WARNING: non atomic version.
1622+ */
1623+static __inline__ void
1624+__change_bit(unsigned long nr, volatile void * addr)
1625+{
1626+ int *m = ((int *) addr) + (nr >> 5);
1627+
1628+ *m ^= 1 << (nr & 31);
1629+}
1630+
1631 static inline int
1632 test_and_set_bit(unsigned long nr, volatile void *addr)
1633 {
1634@@ -181,20 +193,6 @@
1635 return (old & mask) != 0;
1636 }
1637
1638-/*
1639- * WARNING: non atomic version.
1640- */
1641-static __inline__ int
1642-__test_and_change_bit(unsigned long nr, volatile void * addr)
1643-{
1644- unsigned long mask = 1 << (nr & 0x1f);
1645- int *m = ((int *) addr) + (nr >> 5);
1646- int old = *m;
1647-
1648- *m = old ^ mask;
1649- return (old & mask) != 0;
1650-}
1651-
1652 static inline int
1653 test_and_change_bit(unsigned long nr, volatile void * addr)
1654 {
1655@@ -220,6 +218,20 @@
1656 return oldbit != 0;
1657 }
1658
1659+/*
1660+ * WARNING: non atomic version.
1661+ */
1662+static __inline__ int
1663+__test_and_change_bit(unsigned long nr, volatile void * addr)
1664+{
1665+ unsigned long mask = 1 << (nr & 0x1f);
1666+ int *m = ((int *) addr) + (nr >> 5);
1667+ int old = *m;
1668+
1669+ *m = old ^ mask;
1670+ return (old & mask) != 0;
1671+}
1672+
1673 static inline int
1674 test_bit(int nr, volatile void * addr)
1675 {
1676@@ -235,12 +247,15 @@
1677 */
1678 static inline unsigned long ffz_b(unsigned long x)
1679 {
1680- unsigned long sum = 0;
1681+ unsigned long sum, x1, x2, x4;
1682
1683 x = ~x & -~x; /* set first 0 bit, clear others */
1684- if (x & 0xF0) sum += 4;
1685- if (x & 0xCC) sum += 2;
1686- if (x & 0xAA) sum += 1;
1687+ x1 = x & 0xAA;
1688+ x2 = x & 0xCC;
1689+ x4 = x & 0xF0;
1690+ sum = x2 ? 2 : 0;
1691+ sum += (x4 != 0) * 4;
1692+ sum += (x1 != 0);
1693
1694 return sum;
1695 }
1696@@ -257,24 +272,46 @@
1697
1698 __asm__("cmpbge %1,%2,%0" : "=r"(bits) : "r"(word), "r"(~0UL));
1699 qofs = ffz_b(bits);
1700- __asm__("extbl %1,%2,%0" : "=r"(bits) : "r"(word), "r"(qofs));
1701+ bits = __kernel_extbl(word, qofs);
1702 bofs = ffz_b(bits);
1703
1704 return qofs*8 + bofs;
1705 #endif
1706 }
1707
1708+/*
1709+ * __ffs = Find First set bit in word. Undefined if no set bit exists.
1710+ */
1711+static inline unsigned long __ffs(unsigned long word)
1712+{
1713+#if defined(__alpha_cix__) && defined(__alpha_fix__)
1714+ /* Whee. EV67 can calculate it directly. */
1715+ unsigned long result;
1716+ __asm__("cttz %1,%0" : "=r"(result) : "r"(word));
1717+ return result;
1718+#else
1719+ unsigned long bits, qofs, bofs;
1720+
1721+ __asm__("cmpbge $31,%1,%0" : "=r"(bits) : "r"(word));
1722+ qofs = ffz_b(bits);
1723+ bits = __kernel_extbl(word, qofs);
1724+ bofs = ffz_b(~bits);
1725+
1726+ return qofs*8 + bofs;
1727+#endif
1728+}
1729+
1730 #ifdef __KERNEL__
1731
1732 /*
1733 * ffs: find first bit set. This is defined the same way as
1734 * the libc and compiler builtin ffs routines, therefore
1735- * differs in spirit from the above ffz (man ffs).
1736+ * differs in spirit from the above __ffs.
1737 */
1738
1739 static inline int ffs(int word)
1740 {
1741- int result = ffz(~word);
1742+ int result = __ffs(word);
1743 return word ? result+1 : 0;
1744 }
1745
1746@@ -316,6 +353,14 @@
1747 #define hweight16(x) hweight64((x) & 0xfffful)
1748 #define hweight8(x) hweight64((x) & 0xfful)
1749 #else
1750+static inline unsigned long hweight64(unsigned long w)
1751+{
1752+ unsigned long result;
1753+ for (result = 0; w ; w >>= 1)
1754+ result += (w & 1);
1755+ return result;
1756+}
1757+
1758 #define hweight32(x) generic_hweight32(x)
1759 #define hweight16(x) generic_hweight16(x)
1760 #define hweight8(x) generic_hweight8(x)
1761@@ -365,13 +410,77 @@
1762 }
1763
1764 /*
1765- * The optimizer actually does good code for this case..
1766+ * Find next one bit in a bitmap reasonably efficiently.
1767+ */
1768+static inline unsigned long
1769+find_next_bit(void * addr, unsigned long size, unsigned long offset)
1770+{
1771+ unsigned long * p = ((unsigned long *) addr) + (offset >> 6);
1772+ unsigned long result = offset & ~63UL;
1773+ unsigned long tmp;
1774+
1775+ if (offset >= size)
1776+ return size;
1777+ size -= result;
1778+ offset &= 63UL;
1779+ if (offset) {
1780+ tmp = *(p++);
1781+ tmp &= ~0UL << offset;
1782+ if (size < 64)
1783+ goto found_first;
1784+ if (tmp)
1785+ goto found_middle;
1786+ size -= 64;
1787+ result += 64;
1788+ }
1789+ while (size & ~63UL) {
1790+ if ((tmp = *(p++)))
1791+ goto found_middle;
1792+ result += 64;
1793+ size -= 64;
1794+ }
1795+ if (!size)
1796+ return result;
1797+ tmp = *p;
1798+found_first:
1799+ tmp &= ~0UL >> (64 - size);
1800+ if (!tmp)
1801+ return result + size;
1802+found_middle:
1803+ return result + __ffs(tmp);
1804+}
1805+
1806+/*
1807+ * The optimizer actually does good code for this case.
1808 */
1809 #define find_first_zero_bit(addr, size) \
1810 find_next_zero_bit((addr), (size), 0)
1811+#define find_first_bit(addr, size) \
1812+ find_next_bit((addr), (size), 0)
1813
1814 #ifdef __KERNEL__
1815
1816+/*
1817+ * Every architecture must define this function. It's the fastest
1818+ * way of searching a 140-bit bitmap where the first 100 bits are
1819+ * unlikely to be set. It's guaranteed that at least one of the 140
1820+ * bits is set.
1821+ */
1822+static inline unsigned long
1823+sched_find_first_bit(unsigned long b[3])
1824+{
1825+ unsigned long b0 = b[0], b1 = b[1], b2 = b[2];
1826+ unsigned long ofs;
1827+
1828+ ofs = (b1 ? 64 : 128);
1829+ b1 = (b1 ? b1 : b2);
1830+ ofs = (b0 ? 0 : ofs);
1831+ b0 = (b0 ? b0 : b1);
1832+
1833+ return __ffs(b0) + ofs;
1834+}
1835+
1836+
1837 #define ext2_set_bit __test_and_set_bit
1838 #define ext2_clear_bit __test_and_clear_bit
1839 #define ext2_test_bit test_bit
1840diff -urN linux-2.4.22.org/include/asm-alpha/smp.h linux-2.4.22/include/asm-alpha/smp.h
a4a9fb52
AM
1841--- linux-2.4.22.org/include/asm-alpha/smp.h 2003-11-24 18:28:26.000000000 +0100
1842+++ linux-2.4.22/include/asm-alpha/smp.h 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
1843@@ -55,7 +55,7 @@
1844 #define cpu_logical_map(cpu) __cpu_logical_map[cpu]
1845
1846 #define hard_smp_processor_id() __hard_smp_processor_id()
1847-#define smp_processor_id() (current->processor)
1848+#define smp_processor_id() (current->cpu)
1849
1850 extern unsigned long cpu_present_mask;
1851 #define cpu_online_map cpu_present_mask
1852diff -urN linux-2.4.22.org/include/asm-alpha/system.h linux-2.4.22/include/asm-alpha/system.h
a4a9fb52
AM
1853--- linux-2.4.22.org/include/asm-alpha/system.h 2003-11-24 18:28:26.000000000 +0100
1854+++ linux-2.4.22/include/asm-alpha/system.h 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
1855@@ -131,7 +131,6 @@
1856 extern void halt(void) __attribute__((noreturn));
1857 #define __halt() __asm__ __volatile__ ("call_pal %0 #halt" : : "i" (PAL_halt))
1858
1859-#define prepare_to_switch() do { } while(0)
1860 #define switch_to(prev,next,last) \
1861 do { \
1862 unsigned long pcbb; \
1863diff -urN linux-2.4.22.org/include/asm-arm/bitops.h linux-2.4.22/include/asm-arm/bitops.h
a4a9fb52
AM
1864--- linux-2.4.22.org/include/asm-arm/bitops.h 2003-11-24 18:28:30.000000000 +0100
1865+++ linux-2.4.22/include/asm-arm/bitops.h 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
1866@@ -2,6 +2,8 @@
1867 * Copyright 1995, Russell King.
1868 * Various bits and pieces copyrights include:
1869 * Linus Torvalds (test_bit).
1870+ * Big endian support: Copyright 2001, Nicolas Pitre
1871+ * reworked by rmk.
1872 *
1873 * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
1874 *
1875@@ -17,81 +19,271 @@
1876
1877 #ifdef __KERNEL__
1878
1879+#include <asm/system.h>
1880+
1881 #define smp_mb__before_clear_bit() do { } while (0)
1882 #define smp_mb__after_clear_bit() do { } while (0)
1883
1884 /*
1885- * Function prototypes to keep gcc -Wall happy.
1886+ * These functions are the basis of our bit ops.
1887+ * First, the atomic bitops.
1888+ *
1889+ * The endian issue for these functions is handled by the macros below.
1890 */
1891-extern void set_bit(int nr, volatile void * addr);
1892+static inline void
1893+____atomic_set_bit_mask(unsigned int mask, volatile unsigned char *p)
1894+{
1895+ unsigned long flags;
1896+
1897+ local_irq_save(flags);
1898+ *p |= mask;
1899+ local_irq_restore(flags);
1900+}
1901+
1902+static inline void
1903+____atomic_clear_bit_mask(unsigned int mask, volatile unsigned char *p)
1904+{
1905+ unsigned long flags;
1906+
1907+ local_irq_save(flags);
1908+ *p &= ~mask;
1909+ local_irq_restore(flags);
1910+}
1911+
1912+static inline void
1913+____atomic_change_bit_mask(unsigned int mask, volatile unsigned char *p)
1914+{
1915+ unsigned long flags;
1916+
1917+ local_irq_save(flags);
1918+ *p ^= mask;
1919+ local_irq_restore(flags);
1920+}
1921
1922-static inline void __set_bit(int nr, volatile void *addr)
1923+static inline int
1924+____atomic_test_and_set_bit_mask(unsigned int mask, volatile unsigned char *p)
1925 {
1926- ((unsigned char *) addr)[nr >> 3] |= (1U << (nr & 7));
1927+ unsigned long flags;
1928+ unsigned int res;
1929+
1930+ local_irq_save(flags);
1931+ res = *p;
1932+ *p = res | mask;
1933+ local_irq_restore(flags);
1934+
1935+ return res & mask;
1936 }
1937
1938-extern void clear_bit(int nr, volatile void * addr);
1939+static inline int
1940+____atomic_test_and_clear_bit_mask(unsigned int mask, volatile unsigned char *p)
1941+{
1942+ unsigned long flags;
1943+ unsigned int res;
1944+
1945+ local_irq_save(flags);
1946+ res = *p;
1947+ *p = res & ~mask;
1948+ local_irq_restore(flags);
1949+
1950+ return res & mask;
1951+}
1952
1953-static inline void __clear_bit(int nr, volatile void *addr)
1954+static inline int
1955+____atomic_test_and_change_bit_mask(unsigned int mask, volatile unsigned char *p)
1956 {
1957- ((unsigned char *) addr)[nr >> 3] &= ~(1U << (nr & 7));
1958+ unsigned long flags;
1959+ unsigned int res;
1960+
1961+ local_irq_save(flags);
1962+ res = *p;
1963+ *p = res ^ mask;
1964+ local_irq_restore(flags);
1965+
1966+ return res & mask;
1967 }
1968
1969-extern void change_bit(int nr, volatile void * addr);
1970+/*
1971+ * Now the non-atomic variants. We let the compiler handle all optimisations
1972+ * for these.
1973+ */
1974+static inline void ____nonatomic_set_bit(int nr, volatile void *p)
1975+{
1976+ ((unsigned char *) p)[nr >> 3] |= (1U << (nr & 7));
1977+}
1978
1979-static inline void __change_bit(int nr, volatile void *addr)
1980+static inline void ____nonatomic_clear_bit(int nr, volatile void *p)
1981 {
1982- ((unsigned char *) addr)[nr >> 3] ^= (1U << (nr & 7));
1983+ ((unsigned char *) p)[nr >> 3] &= ~(1U << (nr & 7));
1984 }
1985
1986-extern int test_and_set_bit(int nr, volatile void * addr);
1987+static inline void ____nonatomic_change_bit(int nr, volatile void *p)
1988+{
1989+ ((unsigned char *) p)[nr >> 3] ^= (1U << (nr & 7));
1990+}
1991
1992-static inline int __test_and_set_bit(int nr, volatile void *addr)
1993+static inline int ____nonatomic_test_and_set_bit(int nr, volatile void *p)
1994 {
1995 unsigned int mask = 1 << (nr & 7);
1996 unsigned int oldval;
1997
1998- oldval = ((unsigned char *) addr)[nr >> 3];
1999- ((unsigned char *) addr)[nr >> 3] = oldval | mask;
2000+ oldval = ((unsigned char *) p)[nr >> 3];
2001+ ((unsigned char *) p)[nr >> 3] = oldval | mask;
2002 return oldval & mask;
2003 }
2004
2005-extern int test_and_clear_bit(int nr, volatile void * addr);
2006-
2007-static inline int __test_and_clear_bit(int nr, volatile void *addr)
2008+static inline int ____nonatomic_test_and_clear_bit(int nr, volatile void *p)
2009 {
2010 unsigned int mask = 1 << (nr & 7);
2011 unsigned int oldval;
2012
2013- oldval = ((unsigned char *) addr)[nr >> 3];
2014- ((unsigned char *) addr)[nr >> 3] = oldval & ~mask;
2015+ oldval = ((unsigned char *) p)[nr >> 3];
2016+ ((unsigned char *) p)[nr >> 3] = oldval & ~mask;
2017 return oldval & mask;
2018 }
2019
2020-extern int test_and_change_bit(int nr, volatile void * addr);
2021-
2022-static inline int __test_and_change_bit(int nr, volatile void *addr)
2023+static inline int ____nonatomic_test_and_change_bit(int nr, volatile void *p)
2024 {
2025 unsigned int mask = 1 << (nr & 7);
2026 unsigned int oldval;
2027
2028- oldval = ((unsigned char *) addr)[nr >> 3];
2029- ((unsigned char *) addr)[nr >> 3] = oldval ^ mask;
2030+ oldval = ((unsigned char *) p)[nr >> 3];
2031+ ((unsigned char *) p)[nr >> 3] = oldval ^ mask;
2032 return oldval & mask;
2033 }
2034
2035-extern int find_first_zero_bit(void * addr, unsigned size);
2036-extern int find_next_zero_bit(void * addr, int size, int offset);
2037-
2038 /*
2039 * This routine doesn't need to be atomic.
2040 */
2041-static inline int test_bit(int nr, const void * addr)
2042+static inline int ____test_bit(int nr, const void * p)
2043 {
2044- return (((unsigned char *) addr)[nr >> 3] >> (nr & 7)) & 1;
2045+ return (((volatile unsigned char *) p)[nr >> 3] >> (nr & 7)) & 1;
2046 }
2047
2048 /*
2049+ * A note about Endian-ness.
2050+ * -------------------------
2051+ *
2052+ * When the ARM is put into big endian mode via CR15, the processor
2053+ * merely swaps the order of bytes within words, thus:
2054+ *
2055+ * ------------ physical data bus bits -----------
2056+ * D31 ... D24 D23 ... D16 D15 ... D8 D7 ... D0
2057+ * little byte 3 byte 2 byte 1 byte 0
2058+ * big byte 0 byte 1 byte 2 byte 3
2059+ *
2060+ * This means that reading a 32-bit word at address 0 returns the same
2061+ * value irrespective of the endian mode bit.
2062+ *
2063+ * Peripheral devices should be connected with the data bus reversed in
2064+ * "Big Endian" mode. ARM Application Note 61 is applicable, and is
2065+ * available from http://www.arm.com/.
2066+ *
2067+ * The following assumes that the data bus connectivity for big endian
2068+ * mode has been followed.
2069+ *
2070+ * Note that bit 0 is defined to be 32-bit word bit 0, not byte 0 bit 0.
2071+ */
2072+
2073+/*
2074+ * Little endian assembly bitops. nr = 0 -> byte 0 bit 0.
2075+ */
2076+extern void _set_bit_le(int nr, volatile void * p);
2077+extern void _clear_bit_le(int nr, volatile void * p);
2078+extern void _change_bit_le(int nr, volatile void * p);
2079+extern int _test_and_set_bit_le(int nr, volatile void * p);
2080+extern int _test_and_clear_bit_le(int nr, volatile void * p);
2081+extern int _test_and_change_bit_le(int nr, volatile void * p);
2082+extern int _find_first_zero_bit_le(void * p, unsigned size);
2083+extern int _find_next_zero_bit_le(void * p, int size, int offset);
2084+
2085+/*
2086+ * Big endian assembly bitops. nr = 0 -> byte 3 bit 0.
2087+ */
2088+extern void _set_bit_be(int nr, volatile void * p);
2089+extern void _clear_bit_be(int nr, volatile void * p);
2090+extern void _change_bit_be(int nr, volatile void * p);
2091+extern int _test_and_set_bit_be(int nr, volatile void * p);
2092+extern int _test_and_clear_bit_be(int nr, volatile void * p);
2093+extern int _test_and_change_bit_be(int nr, volatile void * p);
2094+extern int _find_first_zero_bit_be(void * p, unsigned size);
2095+extern int _find_next_zero_bit_be(void * p, int size, int offset);
2096+
2097+
2098+/*
2099+ * The __* form of bitops are non-atomic and may be reordered.
2100+ */
2101+#define ATOMIC_BITOP_LE(name,nr,p) \
2102+ (__builtin_constant_p(nr) ? \
2103+ ____atomic_##name##_mask(1 << ((nr) & 7), \
2104+ ((unsigned char *)(p)) + ((nr) >> 3)) : \
2105+ _##name##_le(nr,p))
2106+
2107+#define ATOMIC_BITOP_BE(name,nr,p) \
2108+ (__builtin_constant_p(nr) ? \
2109+ ____atomic_##name##_mask(1 << ((nr) & 7), \
2110+ ((unsigned char *)(p)) + (((nr) >> 3) ^ 3)) : \
2111+ _##name##_be(nr,p))
2112+
2113+#define NONATOMIC_BITOP_LE(name,nr,p) \
2114+ (____nonatomic_##name(nr, p))
2115+
2116+#define NONATOMIC_BITOP_BE(name,nr,p) \
2117+ (____nonatomic_##name(nr ^ 0x18, p))
2118+
2119+#ifndef __ARMEB__
2120+/*
2121+ * These are the little endian, atomic definitions.
2122+ */
2123+#define set_bit(nr,p) ATOMIC_BITOP_LE(set_bit,nr,p)
2124+#define clear_bit(nr,p) ATOMIC_BITOP_LE(clear_bit,nr,p)
2125+#define change_bit(nr,p) ATOMIC_BITOP_LE(change_bit,nr,p)
2126+#define test_and_set_bit(nr,p) ATOMIC_BITOP_LE(test_and_set_bit,nr,p)
2127+#define test_and_clear_bit(nr,p) ATOMIC_BITOP_LE(test_and_clear_bit,nr,p)
2128+#define test_and_change_bit(nr,p) ATOMIC_BITOP_LE(test_and_change_bit,nr,p)
2129+#define test_bit(nr,p) ____test_bit(nr,p)
2130+#define find_first_zero_bit(p,sz) _find_first_zero_bit_le(p,sz)
2131+#define find_next_zero_bit(p,sz,off) _find_next_zero_bit_le(p,sz,off)
2132+
2133+/*
2134+ * These are the little endian, non-atomic definitions.
2135+ */
2136+#define __set_bit(nr,p) NONATOMIC_BITOP_LE(set_bit,nr,p)
2137+#define __clear_bit(nr,p) NONATOMIC_BITOP_LE(clear_bit,nr,p)
2138+#define __change_bit(nr,p) NONATOMIC_BITOP_LE(change_bit,nr,p)
2139+#define __test_and_set_bit(nr,p) NONATOMIC_BITOP_LE(test_and_set_bit,nr,p)
2140+#define __test_and_clear_bit(nr,p) NONATOMIC_BITOP_LE(test_and_clear_bit,nr,p)
2141+#define __test_and_change_bit(nr,p) NONATOMIC_BITOP_LE(test_and_change_bit,nr,p)
2142+#define __test_bit(nr,p) ____test_bit(nr,p)
2143+
2144+#else
2145+
2146+/*
2147+ * These are the big endian, atomic definitions.
2148+ */
2149+#define set_bit(nr,p) ATOMIC_BITOP_BE(set_bit,nr,p)
2150+#define clear_bit(nr,p) ATOMIC_BITOP_BE(clear_bit,nr,p)
2151+#define change_bit(nr,p) ATOMIC_BITOP_BE(change_bit,nr,p)
2152+#define test_and_set_bit(nr,p) ATOMIC_BITOP_BE(test_and_set_bit,nr,p)
2153+#define test_and_clear_bit(nr,p) ATOMIC_BITOP_BE(test_and_clear_bit,nr,p)
2154+#define test_and_change_bit(nr,p) ATOMIC_BITOP_BE(test_and_change_bit,nr,p)
2155+#define test_bit(nr,p) ____test_bit((nr) ^ 0x18, p)
2156+#define find_first_zero_bit(p,sz) _find_first_zero_bit_be(p,sz)
2157+#define find_next_zero_bit(p,sz,off) _find_next_zero_bit_be(p,sz,off)
2158+
2159+/*
2160+ * These are the big endian, non-atomic definitions.
2161+ */
2162+#define __set_bit(nr,p) NONATOMIC_BITOP_BE(set_bit,nr,p)
2163+#define __clear_bit(nr,p) NONATOMIC_BITOP_BE(clear_bit,nr,p)
2164+#define __change_bit(nr,p) NONATOMIC_BITOP_BE(change_bit,nr,p)
2165+#define __test_and_set_bit(nr,p) NONATOMIC_BITOP_BE(test_and_set_bit,nr,p)
2166+#define __test_and_clear_bit(nr,p) NONATOMIC_BITOP_BE(test_and_clear_bit,nr,p)
2167+#define __test_and_change_bit(nr,p) NONATOMIC_BITOP_BE(test_and_change_bit,nr,p)
2168+#define __test_bit(nr,p) ____test_bit((nr) ^ 0x18, p)
2169+
2170+#endif
2171+
2172+/*
2173 * ffz = Find First Zero in word. Undefined if no zero exists,
2174 * so code should check against ~0UL first..
2175 */
2176@@ -110,6 +302,29 @@
2177 }
2178
2179 /*
2180+ * ffz = Find First Zero in word. Undefined if no zero exists,
2181+ * so code should check against ~0UL first..
2182+ */
2183+static inline unsigned long __ffs(unsigned long word)
2184+{
2185+ int k;
2186+
2187+ k = 31;
2188+ if (word & 0x0000ffff) { k -= 16; word <<= 16; }
2189+ if (word & 0x00ff0000) { k -= 8; word <<= 8; }
2190+ if (word & 0x0f000000) { k -= 4; word <<= 4; }
2191+ if (word & 0x30000000) { k -= 2; word <<= 2; }
2192+ if (word & 0x40000000) { k -= 1; }
2193+ return k;
2194+}
2195+
2196+/*
2197+ * fls: find last bit set.
2198+ */
2199+
2200+#define fls(x) generic_fls(x)
2201+
2202+/*
2203 * ffs: find first bit set. This is defined the same way as
2204 * the libc and compiler builtin ffs routines, therefore
2205 * differs in spirit from the above ffz (man ffs).
2206@@ -118,6 +333,22 @@
2207 #define ffs(x) generic_ffs(x)
2208
2209 /*
2210+ * Find first bit set in a 168-bit bitmap, where the first
2211+ * 128 bits are unlikely to be set.
2212+ */
2213+static inline int sched_find_first_bit(unsigned long *b)
2214+{
2215+ unsigned long v;
2216+ unsigned int off;
2217+
2218+ for (off = 0; v = b[off], off < 4; off++) {
2219+ if (unlikely(v))
2220+ break;
2221+ }
2222+ return __ffs(v) + off * 32;
2223+}
2224+
2225+/*
2226 * hweightN: returns the hamming weight (i.e. the number
2227 * of bits set) of a N-bit word
2228 */
2229@@ -126,18 +357,25 @@
2230 #define hweight16(x) generic_hweight16(x)
2231 #define hweight8(x) generic_hweight8(x)
2232
2233-#define ext2_set_bit test_and_set_bit
2234-#define ext2_clear_bit test_and_clear_bit
2235-#define ext2_test_bit test_bit
2236-#define ext2_find_first_zero_bit find_first_zero_bit
2237-#define ext2_find_next_zero_bit find_next_zero_bit
2238-
2239-/* Bitmap functions for the minix filesystem. */
2240-#define minix_test_and_set_bit(nr,addr) test_and_set_bit(nr,addr)
2241-#define minix_set_bit(nr,addr) set_bit(nr,addr)
2242-#define minix_test_and_clear_bit(nr,addr) test_and_clear_bit(nr,addr)
2243-#define minix_test_bit(nr,addr) test_bit(nr,addr)
2244-#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
2245+/*
2246+ * Ext2 is defined to use little-endian byte ordering.
2247+ * These do not need to be atomic.
2248+ */
2249+#define ext2_set_bit(nr,p) NONATOMIC_BITOP_LE(test_and_set_bit,nr,p)
2250+#define ext2_clear_bit(nr,p) NONATOMIC_BITOP_LE(test_and_clear_bit,nr,p)
2251+#define ext2_test_bit(nr,p) __test_bit(nr,p)
2252+#define ext2_find_first_zero_bit(p,sz) _find_first_zero_bit_le(p,sz)
2253+#define ext2_find_next_zero_bit(p,sz,off) _find_next_zero_bit_le(p,sz,off)
2254+
2255+/*
2256+ * Minix is defined to use little-endian byte ordering.
2257+ * These do not need to be atomic.
2258+ */
2259+#define minix_set_bit(nr,p) NONATOMIC_BITOP_LE(set_bit,nr,p)
2260+#define minix_test_bit(nr,p) __test_bit(nr,p)
2261+#define minix_test_and_set_bit(nr,p) NONATOMIC_BITOP_LE(test_and_set_bit,nr,p)
2262+#define minix_test_and_clear_bit(nr,p) NONATOMIC_BITOP_LE(test_and_clear_bit,nr,p)
2263+#define minix_find_first_zero_bit(p,sz) _find_first_zero_bit_le(p,sz)
2264
2265 #endif /* __KERNEL__ */
2266
2267diff -urN linux-2.4.22.org/include/asm-cris/bitops.h linux-2.4.22/include/asm-cris/bitops.h
a4a9fb52
AM
2268--- linux-2.4.22.org/include/asm-cris/bitops.h 2003-11-24 18:28:36.000000000 +0100
2269+++ linux-2.4.22/include/asm-cris/bitops.h 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
2270@@ -22,6 +22,7 @@
2271 /* We use generic_ffs so get it; include guards resolve the possible
2272 mutually inclusion. */
2273 #include <linux/bitops.h>
2274+#include <linux/compiler.h>
2275
2276 /*
2277 * Some hacks to defeat gcc over-optimizations..
2278@@ -44,6 +45,8 @@
2279 #define set_bit(nr, addr) (void)test_and_set_bit(nr, addr)
2280 #define __set_bit(nr, addr) (void)__test_and_set_bit(nr, addr)
2281
2282+#define __set_bit(nr, addr) (void)__test_and_set_bit(nr, addr)
2283+
2284 /*
2285 * clear_bit - Clears a bit in memory
2286 * @nr: Bit to clear
2287@@ -58,6 +61,8 @@
2288 #define clear_bit(nr, addr) (void)test_and_clear_bit(nr, addr)
2289 #define __clear_bit(nr, addr) (void)__test_and_clear_bit(nr, addr)
2290
2291+#define __clear_bit(nr, addr) (void)__test_and_clear_bit(nr, addr)
2292+
2293 /*
2294 * change_bit - Toggle a bit in memory
2295 * @nr: Bit to clear
2296@@ -91,7 +96,7 @@
2297 * It also implies a memory barrier.
2298 */
2299
2300-extern __inline__ int test_and_set_bit(int nr, void *addr)
2301+extern inline int test_and_set_bit(int nr, void *addr)
2302 {
2303 unsigned int mask, retval;
2304 unsigned long flags;
2305@@ -119,6 +124,18 @@
2306 return retval;
2307 }
2308
2309+extern inline int __test_and_set_bit(int nr, void *addr)
2310+{
2311+ unsigned int mask, retval;
2312+ unsigned int *adr = (unsigned int *)addr;
2313+
2314+ adr += nr >> 5;
2315+ mask = 1 << (nr & 0x1f);
2316+ retval = (mask & *adr) != 0;
2317+ *adr |= mask;
2318+ return retval;
2319+}
2320+
2321 /*
2322 * clear_bit() doesn't provide any barrier for the compiler.
2323 */
2324@@ -134,7 +151,7 @@
2325 * It also implies a memory barrier.
2326 */
2327
2328-extern __inline__ int test_and_clear_bit(int nr, void *addr)
2329+extern inline int test_and_clear_bit(int nr, void *addr)
2330 {
2331 unsigned int mask, retval;
2332 unsigned long flags;
2333@@ -160,7 +177,7 @@
2334 * but actually fail. You must protect multiple accesses with a lock.
2335 */
2336
2337-extern __inline__ int __test_and_clear_bit(int nr, void *addr)
2338+extern inline int __test_and_clear_bit(int nr, void *addr)
2339 {
2340 unsigned int mask, retval;
2341 unsigned int *adr = (unsigned int *)addr;
2342@@ -180,7 +197,7 @@
2343 * It also implies a memory barrier.
2344 */
2345
2346-extern __inline__ int test_and_change_bit(int nr, void *addr)
2347+extern inline int test_and_change_bit(int nr, void *addr)
2348 {
2349 unsigned int mask, retval;
2350 unsigned long flags;
2351@@ -197,7 +214,7 @@
2352
2353 /* WARNING: non atomic and it can be reordered! */
2354
2355-extern __inline__ int __test_and_change_bit(int nr, void *addr)
2356+extern inline int __test_and_change_bit(int nr, void *addr)
2357 {
2358 unsigned int mask, retval;
2359 unsigned int *adr = (unsigned int *)addr;
2360@@ -218,7 +235,7 @@
2361 * This routine doesn't need to be atomic.
2362 */
2363
2364-extern __inline__ int test_bit(int nr, const void *addr)
2365+extern inline int test_bit(int nr, const void *addr)
2366 {
2367 unsigned int mask;
2368 unsigned int *adr = (unsigned int *)addr;
2369@@ -239,7 +256,7 @@
2370 * number. They differ in that the first function also inverts all bits
2371 * in the input.
2372 */
2373-extern __inline__ unsigned long cris_swapnwbrlz(unsigned long w)
2374+extern inline unsigned long cris_swapnwbrlz(unsigned long w)
2375 {
2376 /* Let's just say we return the result in the same register as the
2377 input. Saying we clobber the input but can return the result
2378@@ -255,7 +272,7 @@
2379 return res;
2380 }
2381
2382-extern __inline__ unsigned long cris_swapwbrlz(unsigned long w)
2383+extern inline unsigned long cris_swapwbrlz(unsigned long w)
2384 {
2385 unsigned res;
2386 __asm__ ("swapwbr %0 \n\t"
2387@@ -269,7 +286,7 @@
2388 * ffz = Find First Zero in word. Undefined if no zero exists,
2389 * so code should check against ~0UL first..
2390 */
2391-extern __inline__ unsigned long ffz(unsigned long w)
2392+extern inline unsigned long ffz(unsigned long w)
2393 {
2394 /* The generic_ffs function is used to avoid the asm when the
2395 argument is a constant. */
2396@@ -282,7 +299,7 @@
2397 * Somewhat like ffz but the equivalent of generic_ffs: in contrast to
2398 * ffz we return the first one-bit *plus one*.
2399 */
2400-extern __inline__ unsigned long kernel_ffs(unsigned long w)
2401+extern inline unsigned long kernel_ffs(unsigned long w)
2402 {
2403 /* The generic_ffs function is used to avoid the asm when the
2404 argument is a constant. */
2405@@ -304,7 +321,7 @@
2406 * @offset: The bitnumber to start searching at
2407 * @size: The maximum size to search
2408 */
2409-extern __inline__ int find_next_zero_bit (void * addr, int size, int offset)
2410+extern inline int find_next_zero_bit (void * addr, int size, int offset)
2411 {
2412 unsigned long *p = ((unsigned long *) addr) + (offset >> 5);
2413 unsigned long result = offset & ~31UL;
2414@@ -375,7 +392,45 @@
2415 #define minix_test_bit(nr,addr) test_bit(nr,addr)
2416 #define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
2417
2418-#endif /* __KERNEL__ */
2419+#if 0
2420+/* TODO: see below */
2421+#define sched_find_first_zero_bit(addr) find_first_zero_bit(addr, 168)
2422+
2423+#else
2424+/* TODO: left out pending where to put it.. (there are .h dependencies) */
2425+
2426+ /*
2427+ * Every architecture must define this function. It's the fastest
2428+ * way of searching a 168-bit bitmap where the first 128 bits are
2429+ * unlikely to be set. It's guaranteed that at least one of the 168
2430+ * bits is cleared.
2431+ */
2432+#if 0
2433+#if MAX_RT_PRIO != 128 || MAX_PRIO != 168
2434+# error update this function.
2435+#endif
2436+#else
2437+#define MAX_RT_PRIO 128
2438+#define MAX_PRIO 168
2439+#endif
2440+
2441+static inline int sched_find_first_zero_bit(char *bitmap)
2442+{
2443+ unsigned int *b = (unsigned int *)bitmap;
2444+ unsigned int rt;
2445+
2446+ rt = b[0] & b[1] & b[2] & b[3];
2447+ if (unlikely(rt != 0xffffffff))
2448+ return find_first_zero_bit(bitmap, MAX_RT_PRIO);
2449+
2450+ if (b[4] != ~0)
2451+ return ffz(b[4]) + MAX_RT_PRIO;
2452+ return ffz(b[5]) + 32 + MAX_RT_PRIO;
2453+}
2454+#undef MAX_PRIO
2455+#undef MAX_RT_PRIO
2456+#endif
2457
2458+#endif /* __KERNEL__ */
2459
2460 #endif /* _CRIS_BITOPS_H */
2461diff -urN linux-2.4.22.org/include/asm-generic/bitops.h linux-2.4.22/include/asm-generic/bitops.h
a4a9fb52
AM
2462--- linux-2.4.22.org/include/asm-generic/bitops.h 2003-11-24 18:28:24.000000000 +0100
2463+++ linux-2.4.22/include/asm-generic/bitops.h 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
2464@@ -51,6 +51,12 @@
2465 return ((mask & *addr) != 0);
2466 }
2467
2468+/*
2469+ * fls: find last bit set.
2470+ */
2471+
2472+#define fls(x) generic_fls(x)
2473+
2474 #ifdef __KERNEL__
2475
2476 /*
2477diff -urN linux-2.4.22.org/include/asm-i386/bitops.h linux-2.4.22/include/asm-i386/bitops.h
a4a9fb52
AM
2478--- linux-2.4.22.org/include/asm-i386/bitops.h 2003-11-24 18:28:24.000000000 +0100
2479+++ linux-2.4.22/include/asm-i386/bitops.h 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
2480@@ -6,6 +6,7 @@
2481 */
2482
2483 #include <linux/config.h>
2484+#include <linux/compiler.h>
2485
2486 /*
2487 * These have to be done with inline assembly: that way the bit-setting
2488@@ -75,6 +76,14 @@
2489 :"=m" (ADDR)
2490 :"Ir" (nr));
2491 }
2492+
2493+static __inline__ void __clear_bit(int nr, volatile void * addr)
2494+{
2495+ __asm__ __volatile__(
2496+ "btrl %1,%0"
2497+ :"=m" (ADDR)
2498+ :"Ir" (nr));
2499+}
2500 #define smp_mb__before_clear_bit() barrier()
2501 #define smp_mb__after_clear_bit() barrier()
2502
2503@@ -284,6 +293,34 @@
2504 }
2505
2506 /**
2507+ * find_first_bit - find the first set bit in a memory region
2508+ * @addr: The address to start the search at
2509+ * @size: The maximum size to search
2510+ *
2511+ * Returns the bit-number of the first set bit, not the number of the byte
2512+ * containing a bit.
2513+ */
2514+static __inline__ int find_first_bit(void * addr, unsigned size)
2515+{
2516+ int d0, d1;
2517+ int res;
2518+
2519+ /* This looks at memory. Mark it volatile to tell gcc not to move it around */
2520+ __asm__ __volatile__(
2521+ "xorl %%eax,%%eax\n\t"
2522+ "repe; scasl\n\t"
2523+ "jz 1f\n\t"
2524+ "leal -4(%%edi),%%edi\n\t"
2525+ "bsfl (%%edi),%%eax\n"
2526+ "1:\tsubl %%ebx,%%edi\n\t"
2527+ "shll $3,%%edi\n\t"
2528+ "addl %%edi,%%eax"
2529+ :"=a" (res), "=&c" (d0), "=&D" (d1)
2530+ :"1" ((size + 31) >> 5), "2" (addr), "b" (addr));
2531+ return res;
2532+}
2533+
2534+/**
2535 * find_next_zero_bit - find the first zero bit in a memory region
2536 * @addr: The address to base the search on
2537 * @offset: The bitnumber to start searching at
2538@@ -296,7 +333,7 @@
2539
2540 if (bit) {
2541 /*
2542- * Look for zero in first byte
2543+ * Look for zero in the first 32 bits.
2544 */
2545 __asm__("bsfl %1,%0\n\t"
2546 "jne 1f\n\t"
2547@@ -317,6 +354,39 @@
2548 }
2549
2550 /**
2551+ * find_next_bit - find the first set bit in a memory region
2552+ * @addr: The address to base the search on
2553+ * @offset: The bitnumber to start searching at
2554+ * @size: The maximum size to search
2555+ */
2556+static __inline__ int find_next_bit (void * addr, int size, int offset)
2557+{
2558+ unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
2559+ int set = 0, bit = offset & 31, res;
2560+
2561+ if (bit) {
2562+ /*
2563+ * Look for nonzero in the first 32 bits:
2564+ */
2565+ __asm__("bsfl %1,%0\n\t"
2566+ "jne 1f\n\t"
2567+ "movl $32, %0\n"
2568+ "1:"
2569+ : "=r" (set)
2570+ : "r" (*p >> bit));
2571+ if (set < (32 - bit))
2572+ return set + offset;
2573+ set = 32 - bit;
2574+ p++;
2575+ }
2576+ /*
2577+ * No set bit yet, search remaining full words for a bit
2578+ */
2579+ res = find_first_bit (p, size - 32 * (p - (unsigned long *) addr));
2580+ return (offset + set + res);
2581+}
2582+
2583+/**
2584 * ffz - find first zero in word.
2585 * @word: The word to search
2586 *
2587@@ -330,8 +400,41 @@
2588 return word;
2589 }
2590
2591+/**
2592+ * __ffs - find first bit in word.
2593+ * @word: The word to search
744c6993
AM
2594+ * Undefined if no bit exists, so code should check against 0 first.
2595+ */
2596+static __inline__ unsigned long __ffs(unsigned long word)
2597+{
2598+ __asm__("bsfl %1,%0"
2599+ :"=r" (word)
2600+ :"rm" (word));
2601+ return word;
2602+}
cb5865c2 2603+#define fls(x) generic_fls(x)
744c6993
AM
2604+
2605 #ifdef __KERNEL__
2606
2607+/*
2608+ * Every architecture must define this function. It's the fastest
2609+ * way of searching a 140-bit bitmap where the first 100 bits are
2610+ * unlikely to be set. It's guaranteed that at least one of the 140
2611+ * bits is cleared.
2612+ */
2613+static inline int sched_find_first_bit(unsigned long *b)
2614+{
2615+ if (unlikely(b[0]))
2616+ return __ffs(b[0]);
2617+ if (unlikely(b[1]))
2618+ return __ffs(b[1]) + 32;
2619+ if (unlikely(b[2]))
2620+ return __ffs(b[2]) + 64;
2621+ if (b[3])
2622+ return __ffs(b[3]) + 96;
2623+ return __ffs(b[4]) + 128;
2624+}
2625+
2626 /**
2627 * ffs - find first bit set
2628 * @x: the word to search
2629diff -urN linux-2.4.22.org/include/asm-i386/mmu_context.h linux-2.4.22/include/asm-i386/mmu_context.h
a4a9fb52
AM
2630--- linux-2.4.22.org/include/asm-i386/mmu_context.h 2003-11-24 18:28:24.000000000 +0100
2631+++ linux-2.4.22/include/asm-i386/mmu_context.h 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
2632@@ -27,13 +27,13 @@
2633
2634 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu)
2635 {
2636- if (prev != next) {
2637+ if (likely(prev != next)) {
2638 /* stop flush ipis for the previous mm */
2639 clear_bit(cpu, &prev->cpu_vm_mask);
2640 /*
2641 * Re-load LDT if necessary
2642 */
2643- if (prev->context.segments != next->context.segments)
2644+ if (unlikely(prev->context.segments != next->context.segments))
2645 load_LDT(next);
2646 #ifdef CONFIG_SMP
2647 cpu_tlbstate[cpu].state = TLBSTATE_OK;
2648diff -urN linux-2.4.22.org/include/asm-i386/processor.h linux-2.4.22/include/asm-i386/processor.h
a4a9fb52
AM
2649--- linux-2.4.22.org/include/asm-i386/processor.h 2003-11-24 18:28:24.000000000 +0100
2650+++ linux-2.4.22/include/asm-i386/processor.h 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
2651@@ -473,6 +473,8 @@
2652
2653 #define cpu_relax() rep_nop()
2654
2655+#define ARCH_HAS_SMP_BALANCE
2656+
2657 /* Prefetch instructions for Pentium III and AMD Athlon */
2658 #if defined(CONFIG_MPENTIUMIII) || defined (CONFIG_MPENTIUM4)
2659
2660diff -urN linux-2.4.22.org/include/asm-i386/smp_balance.h linux-2.4.22/include/asm-i386/smp_balance.h
2661--- linux-2.4.22.org/include/asm-i386/smp_balance.h 1970-01-01 01:00:00.000000000 +0100
a4a9fb52 2662+++ linux-2.4.22/include/asm-i386/smp_balance.h 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
2663@@ -0,0 +1,66 @@
2664+#ifndef _ASM_SMP_BALANCE_H
2665+#define _ASM_SMP_BALANCE_H
2666+
2667+/*
2668+ * We have an architecture-specific SMP load balancer to improve
2669+ * scheduling behavior on hyperthreaded CPUs. Since only P4s have
2670+ * HT, maybe this should be conditional on CONFIG_MPENTIUM4...
2671+ *
2672+ */
2673+
2674+/*
2675+ * Find any idle processor package (i.e. both virtual processors are idle)
2676+ */
2677+static inline int find_idle_package(int this_cpu)
2678+{
2679+ int i;
2680+
2681+ this_cpu = cpu_number_map(this_cpu);
2682+
2683+ for (i = (this_cpu + 1) % smp_num_cpus;
2684+ i != this_cpu;
2685+ i = (i + 1) % smp_num_cpus) {
2686+ int physical = cpu_logical_map(i);
2687+ int sibling = cpu_sibling_map[physical];
2688+
2689+ if (idle_cpu(physical) && idle_cpu(sibling))
2690+ return physical;
2691+ }
2692+ return -1; /* not found */
2693+}
2694+
2695+static inline int arch_reschedule_idle_override(task_t * p, int idle)
2696+{
2697+ if (unlikely(smp_num_siblings > 1) && !idle_cpu(cpu_sibling_map[idle])) {
2698+ int true_idle = find_idle_package(idle);
2699+ if (true_idle >= 0) {
2700+ if (likely(p->cpus_allowed & (1UL << true_idle)))
2701+ idle = true_idle;
2702+ else {
2703+ true_idle = cpu_sibling_map[true_idle];
2704+ if (p->cpus_allowed & (1UL << true_idle))
2705+ idle = true_idle;
2706+ }
2707+ }
2708+ }
2709+
2710+ return idle;
2711+}
2712+
2713+static inline int arch_load_balance(int this_cpu, int idle)
2714+{
2715+ /* Special hack for hyperthreading */
2716+ if (unlikely(smp_num_siblings > 1 && idle == 2 && !idle_cpu(cpu_sibling_map[this_cpu]))) {
2717+ int found;
2718+ struct runqueue *rq_target;
2719+
2720+ if ((found = find_idle_package(this_cpu)) >= 0 ) {
2721+ rq_target = cpu_rq(found);
2722+ resched_task(rq_target->idle);
2723+ return 1;
2724+ }
2725+ }
2726+ return 0;
2727+}
2728+
2729+#endif /* _ASM_SMP_BALANCE_H */
2730diff -urN linux-2.4.22.org/include/asm-i386/smp.h linux-2.4.22/include/asm-i386/smp.h
a4a9fb52
AM
2731--- linux-2.4.22.org/include/asm-i386/smp.h 2003-11-24 18:28:24.000000000 +0100
2732+++ linux-2.4.22/include/asm-i386/smp.h 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
2733@@ -40,6 +40,7 @@
2734 extern void smp_flush_tlb(void);
2735 extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
2736 extern void smp_send_reschedule(int cpu);
2737+extern void smp_send_reschedule_all(void);
2738 extern void smp_invalidate_rcv(void); /* Process an NMI */
2739 extern void (*mtrr_hook) (void);
2740 extern void zap_low_mappings (void);
2741@@ -81,7 +82,7 @@
2742 * so this is correct in the x86 case.
2743 */
2744
2745-#define smp_processor_id() (current->processor)
2746+#define smp_processor_id() (current->cpu)
2747
2748 static __inline int hard_smp_processor_id(void)
2749 {
2750@@ -99,17 +100,5 @@
2751
2752 #define NO_PROC_ID 0xFF /* No processor magic marker */
2753
2754-/*
2755- * This magic constant controls our willingness to transfer
2756- * a process across CPUs. Such a transfer incurs misses on the L1
2757- * cache, and on a P6 or P5 with multiple L2 caches L2 hits. My
2758- * gut feeling is this will vary by board in value. For a board
2759- * with separate L2 cache it probably depends also on the RSS, and
2760- * for a board with shared L2 cache it ought to decay fast as other
2761- * processes are run.
2762- */
2763-
2764-#define PROC_CHANGE_PENALTY 15 /* Schedule penalty */
2765-
2766 #endif
2767 #endif
2768diff -urN linux-2.4.22.org/include/asm-i386/system.h linux-2.4.22/include/asm-i386/system.h
a4a9fb52
AM
2769--- linux-2.4.22.org/include/asm-i386/system.h 2003-11-24 18:28:24.000000000 +0100
2770+++ linux-2.4.22/include/asm-i386/system.h 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
2771@@ -12,25 +12,22 @@
2772 struct task_struct; /* one of the stranger aspects of C forward declarations.. */
2773 extern void FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next));
2774
2775-#define prepare_to_switch() do { } while(0)
2776 #define switch_to(prev,next,last) do { \
2777 asm volatile("pushl %%esi\n\t" \
2778 "pushl %%edi\n\t" \
2779 "pushl %%ebp\n\t" \
2780 "movl %%esp,%0\n\t" /* save ESP */ \
2781- "movl %3,%%esp\n\t" /* restore ESP */ \
2782+ "movl %2,%%esp\n\t" /* restore ESP */ \
2783 "movl $1f,%1\n\t" /* save EIP */ \
2784- "pushl %4\n\t" /* restore EIP */ \
2785+ "pushl %3\n\t" /* restore EIP */ \
2786 "jmp __switch_to\n" \
2787 "1:\t" \
2788 "popl %%ebp\n\t" \
2789 "popl %%edi\n\t" \
2790 "popl %%esi\n\t" \
2791- :"=m" (prev->thread.esp),"=m" (prev->thread.eip), \
2792- "=b" (last) \
2793+ :"=m" (prev->thread.esp),"=m" (prev->thread.eip) \
2794 :"m" (next->thread.esp),"m" (next->thread.eip), \
2795- "a" (prev), "d" (next), \
2796- "b" (prev)); \
2797+ "a" (prev), "d" (next)); \
2798 } while (0)
2799
2800 #define _set_base(addr,base) do { unsigned long __pr; \
2801diff -urN linux-2.4.22.org/include/asm-ia64/bitops.h linux-2.4.22/include/asm-ia64/bitops.h
a4a9fb52
AM
2802--- linux-2.4.22.org/include/asm-ia64/bitops.h 2003-11-24 18:28:32.000000000 +0100
2803+++ linux-2.4.22/include/asm-ia64/bitops.h 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
2804@@ -4,6 +4,9 @@
2805 /*
2806 * Copyright (C) 1998-2003 Hewlett-Packard Co
2807 * David Mosberger-Tang <davidm@hpl.hp.com>
2808+ *
2809+ * 02/06/02 find_next_bit() and find_first_bit() added from Erich Focht's ia64 O(1)
2810+ * scheduler patch
2811 */
2812
2813 #include <linux/types.h>
2814@@ -91,6 +94,17 @@
2815 }
2816
2817 /**
2818+ * __clear_bit - Clears a bit in memory (non-atomic version)
2819+ */
2820+static __inline__ void
2821+__clear_bit (int nr, volatile void *addr)
2822+{
2823+ volatile __u32 *p = (__u32 *) addr + (nr >> 5);
2824+ __u32 m = 1 << (nr & 31);
2825+ *p &= ~m;
2826+}
2827+
2828+/**
2829 * change_bit - Toggle a bit in memory
2830 * @nr: Bit to clear
2831 * @addr: Address to start counting from
2832@@ -266,12 +280,11 @@
2833 }
2834
2835 /**
2836- * ffz - find the first zero bit in a memory region
2837- * @x: The address to start the search at
2838+ * ffz - find the first zero bit in a long word
2839+ * @x: The long word to find the bit in
2840 *
2841- * Returns the bit-number (0..63) of the first (least significant) zero bit, not
2842- * the number of the byte containing a bit. Undefined if no zero exists, so
2843- * code should check against ~0UL first...
2844+ * Returns the bit-number (0..63) of the first (least significant) zero bit. Undefined if
2845+ * no zero exists, so code should check against ~0UL first...
2846 */
2847 static inline unsigned long
2848 ffz (unsigned long x)
2849@@ -297,6 +310,21 @@
2850 return result;
2851 }
2852
2853+/**
2854+ * __ffs - find first bit in word.
2855+ * @x: The word to search
2856+ *
2857+ * Undefined if no bit exists, so code should check against 0 first.
2858+ */
2859+static __inline__ unsigned long
2860+__ffs (unsigned long x)
2861+{
2862+ unsigned long result;
2863+
2864+ __asm__ ("popcnt %0=%1" : "=r" (result) : "r" ((x - 1) & ~x));
2865+ return result;
2866+}
2867+
2868 #ifdef __KERNEL__
2869
2870 /*
2871@@ -313,6 +341,12 @@
2872 return exp - 0xffff;
2873 }
2874
2875+static int
2876+fls (int x)
2877+{
2878+ return ia64_fls((unsigned int) x);
2879+}
2880+
2881 /*
2882 * ffs: find first bit set. This is defined the same way as the libc and compiler builtin
2883 * ffs routines, therefore differs in spirit from the above ffz (man ffs): it operates on
2884@@ -385,8 +419,53 @@
2885 */
2886 #define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
2887
2888+/*
2889+ * Find next bit in a bitmap reasonably efficiently..
2890+ */
2891+static inline int
2892+find_next_bit (void *addr, unsigned long size, unsigned long offset)
2893+{
2894+ unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
2895+ unsigned long result = offset & ~63UL;
2896+ unsigned long tmp;
2897+
2898+ if (offset >= size)
2899+ return size;
2900+ size -= result;
2901+ offset &= 63UL;
2902+ if (offset) {
2903+ tmp = *(p++);
2904+ tmp &= ~0UL << offset;
2905+ if (size < 64)
2906+ goto found_first;
2907+ if (tmp)
2908+ goto found_middle;
2909+ size -= 64;
2910+ result += 64;
2911+ }
2912+ while (size & ~63UL) {
2913+ if ((tmp = *(p++)))
2914+ goto found_middle;
2915+ result += 64;
2916+ size -= 64;
2917+ }
2918+ if (!size)
2919+ return result;
2920+ tmp = *p;
2921+ found_first:
2922+ tmp &= ~0UL >> (64-size);
2923+ if (tmp == 0UL) /* Are any bits set? */
2924+ return result + size; /* Nope. */
2925+ found_middle:
2926+ return result + __ffs(tmp);
2927+}
2928+
2929+#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
2930+
2931 #ifdef __KERNEL__
2932
2933+#define __clear_bit(nr, addr) clear_bit(nr, addr)
2934+
2935 #define ext2_set_bit test_and_set_bit
2936 #define ext2_clear_bit test_and_clear_bit
2937 #define ext2_test_bit test_bit
2938@@ -400,6 +479,16 @@
2939 #define minix_test_bit(nr,addr) test_bit(nr,addr)
2940 #define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
2941
2942+static inline int
2943+sched_find_first_bit (unsigned long *b)
2944+{
2945+ if (unlikely(b[0]))
2946+ return __ffs(b[0]);
2947+ if (unlikely(b[1]))
2948+ return 64 + __ffs(b[1]);
2949+ return __ffs(b[2]) + 128;
2950+}
2951+
2952 #endif /* __KERNEL__ */
2953
2954 #endif /* _ASM_IA64_BITOPS_H */
2955diff -urN linux-2.4.22.org/include/asm-m68k/bitops.h linux-2.4.22/include/asm-m68k/bitops.h
a4a9fb52
AM
2956--- linux-2.4.22.org/include/asm-m68k/bitops.h 2003-11-24 18:28:27.000000000 +0100
2957+++ linux-2.4.22/include/asm-m68k/bitops.h 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
2958@@ -97,6 +97,7 @@
2959 (__builtin_constant_p(nr) ? \
2960 __constant_clear_bit(nr, vaddr) : \
2961 __generic_clear_bit(nr, vaddr))
2962+#define __clear_bit(nr,vaddr) clear_bit(nr,vaddr)
2963
2964 extern __inline__ void __constant_clear_bit(int nr, volatile void * vaddr)
2965 {
2966@@ -239,6 +240,28 @@
2967
2968 return 32 - cnt;
2969 }
2970+#define __ffs(x) (ffs(x) - 1)
2971+
2972+
2973+/*
2974+ * Every architecture must define this function. It's the fastest
2975+ * way of searching a 140-bit bitmap where the first 100 bits are
2976+ * unlikely to be set. It's guaranteed that at least one of the 140
2977+ * bits is cleared.
2978+ */
2979+static inline int sched_find_first_bit(unsigned long *b)
2980+{
2981+ if (unlikely(b[0]))
2982+ return __ffs(b[0]);
2983+ if (unlikely(b[1]))
2984+ return __ffs(b[1]) + 32;
2985+ if (unlikely(b[2]))
2986+ return __ffs(b[2]) + 64;
2987+ if (b[3])
2988+ return __ffs(b[3]) + 96;
2989+ return __ffs(b[4]) + 128;
2990+}
2991+
2992
2993 /*
2994 * hweightN: returns the hamming weight (i.e. the number
2995diff -urN linux-2.4.22.org/include/asm-mips/bitops.h linux-2.4.22/include/asm-mips/bitops.h
a4a9fb52
AM
2996--- linux-2.4.22.org/include/asm-mips/bitops.h 2003-11-24 18:28:25.000000000 +0100
2997+++ linux-2.4.22/include/asm-mips/bitops.h 2003-11-24 18:39:02.000000000 +0100
744c6993
AM
2998@@ -51,6 +51,8 @@
2999
3000 #ifdef CONFIG_CPU_HAS_LLSC
3001
3002+#include <asm/mipsregs.h>
3003+
3004 /*
3005 * These functions for MIPS ISA > 1 are interrupt and SMP proof and
3006 * interrupt friendly
3007@@ -593,21 +595,30 @@
3008 *
3009 * Undefined if no zero exists, so code should check against ~0UL first.
3010 */
3011-static __inline__ unsigned long ffz(unsigned long word)
3012+extern __inline__ unsigned long ffz(unsigned long word)
3013 {
3014- int b = 0, s;
3015+ unsigned int __res;
3016+ unsigned int mask = 1;
3017
3018- word = ~word;
3019- s = 16; if (word << 16 != 0) s = 0; b += s; word >>= s;
3020- s = 8; if (word << 24 != 0) s = 0; b += s; word >>= s;
3021- s = 4; if (word << 28 != 0) s = 0; b += s; word >>= s;
3022- s = 2; if (word << 30 != 0) s = 0; b += s; word >>= s;
3023- s = 1; if (word << 31 != 0) s = 0; b += s;
3024+ __asm__ (
3025+ ".set\tnoreorder\n\t"
3026+ ".set\tnoat\n\t"
3027+ "move\t%0,$0\n"
3028+ "1:\tand\t$1,%2,%1\n\t"
3029+ "beqz\t$1,2f\n\t"
3030+ "sll\t%1,1\n\t"
3031+ "bnez\t%1,1b\n\t"
3032+ "addiu\t%0,1\n\t"
3033+ ".set\tat\n\t"
3034+ ".set\treorder\n"
3035+ "2:\n\t"
3036+ : "=&r" (__res), "=r" (mask)
3037+ : "r" (word), "1" (mask)
3038+ : "$1");
3039
3040- return b;
3041+ return __res;
3042 }
3043
3044-
3045 #ifdef __KERNEL__
3046
3047 /*
3048diff -urN linux-2.4.22.org/include/asm-mips64/bitops.h linux-2.4.22/include/asm-mips64/bitops.h
a4a9fb52
AM
3049--- linux-2.4.22.org/include/asm-mips64/bitops.h 2003-11-24 18:28:33.000000000 +0100
3050+++ linux-2.4.22/include/asm-mips64/bitops.h 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
3051@@ -27,6 +27,7 @@
3052
3053 #include <asm/system.h>
3054 #include <asm/sgidefs.h>
3055+#include <asm/mipsregs.h>
3056
3057 /*
3058 * set_bit - Atomically set a bit in memory
3059@@ -38,7 +39,8 @@
3060 * Note that @nr may be almost arbitrarily large; this function is not
3061 * restricted to acting on a single-word quantity.
3062 */
3063-static inline void set_bit(unsigned long nr, volatile void *addr)
3064+extern __inline__ void
3065+set_bit(unsigned long nr, volatile void *addr)
3066 {
3067 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
3068 unsigned long temp;
3069@@ -62,7 +64,7 @@
3070 * If it's called on the same region of memory simultaneously, the effect
3071 * may be that only one operation succeeds.
3072 */
3073-static inline void __set_bit(int nr, volatile void * addr)
3074+extern __inline__ void __set_bit(int nr, volatile void * addr)
3075 {
3076 unsigned long * m = ((unsigned long *) addr) + (nr >> 6);
3077
3078@@ -79,7 +81,8 @@
3079 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
3080 * in order to ensure changes are visible on other processors.
3081 */
3082-static inline void clear_bit(unsigned long nr, volatile void *addr)
3083+extern __inline__ void
3084+clear_bit(unsigned long nr, volatile void *addr)
3085 {
3086 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
3087 unsigned long temp;
3088@@ -105,7 +108,8 @@
3089 * Note that @nr may be almost arbitrarily large; this function is not
3090 * restricted to acting on a single-word quantity.
3091 */
3092-static inline void change_bit(unsigned long nr, volatile void *addr)
3093+extern __inline__ void
3094+change_bit(unsigned long nr, volatile void *addr)
3095 {
3096 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
3097 unsigned long temp;
3098@@ -128,7 +132,7 @@
3099 * If it's called on the same region of memory simultaneously, the effect
3100 * may be that only one operation succeeds.
3101 */
3102-static inline void __change_bit(int nr, volatile void * addr)
3103+extern __inline__ void __change_bit(int nr, volatile void * addr)
3104 {
3105 unsigned long * m = ((unsigned long *) addr) + (nr >> 6);
3106
3107@@ -143,8 +147,8 @@
3108 * This operation is atomic and cannot be reordered.
3109 * It also implies a memory barrier.
3110 */
3111-static inline unsigned long test_and_set_bit(unsigned long nr,
3112- volatile void *addr)
3113+extern __inline__ unsigned long
3114+test_and_set_bit(unsigned long nr, volatile void *addr)
3115 {
3116 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
3117 unsigned long temp, res;
3118@@ -176,7 +180,8 @@
3119 * If two examples of this operation race, one can appear to succeed
3120 * but actually fail. You must protect multiple accesses with a lock.
3121 */
3122-static inline int __test_and_set_bit(int nr, volatile void *addr)
3123+extern __inline__ int
3124+__test_and_set_bit(int nr, volatile void * addr)
3125 {
3126 unsigned long mask, retval;
3127 long *a = (unsigned long *) addr;
3128@@ -197,8 +202,8 @@
3129 * This operation is atomic and cannot be reordered.
3130 * It also implies a memory barrier.
3131 */
3132-static inline unsigned long test_and_clear_bit(unsigned long nr,
3133- volatile void *addr)
3134+extern __inline__ unsigned long
3135+test_and_clear_bit(unsigned long nr, volatile void *addr)
3136 {
3137 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
3138 unsigned long temp, res;
3139@@ -231,7 +236,8 @@
3140 * If two examples of this operation race, one can appear to succeed
3141 * but actually fail. You must protect multiple accesses with a lock.
3142 */
3143-static inline int __test_and_clear_bit(int nr, volatile void * addr)
3144+extern __inline__ int
3145+__test_and_clear_bit(int nr, volatile void * addr)
3146 {
3147 unsigned long mask, retval;
3148 unsigned long *a = (unsigned long *) addr;
3149@@ -252,8 +258,8 @@
3150 * This operation is atomic and cannot be reordered.
3151 * It also implies a memory barrier.
3152 */
3153-static inline unsigned long test_and_change_bit(unsigned long nr,
3154- volatile void *addr)
3155+extern __inline__ unsigned long
3156+test_and_change_bit(unsigned long nr, volatile void *addr)
3157 {
3158 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
3159 unsigned long temp, res;
3160@@ -285,7 +291,8 @@
3161 * If two examples of this operation race, one can appear to succeed
3162 * but actually fail. You must protect multiple accesses with a lock.
3163 */
3164-static inline int __test_and_change_bit(int nr, volatile void *addr)
3165+extern __inline__ int
3166+__test_and_change_bit(int nr, volatile void * addr)
3167 {
3168 unsigned long mask, retval;
3169 unsigned long *a = (unsigned long *) addr;
3170@@ -302,7 +309,8 @@
3171 * @nr: bit number to test
3172 * @addr: Address to start counting from
3173 */
3174-static inline int test_bit(int nr, volatile void * addr)
3175+extern __inline__ unsigned long
3176+test_bit(int nr, volatile void * addr)
3177 {
3178 return 1UL & (((const volatile unsigned long *) addr)[nr >> SZLONG_LOG] >> (nr & SZLONG_MASK));
3179 }
3180@@ -313,19 +321,20 @@
3181 *
3182 * Undefined if no zero exists, so code should check against ~0UL first.
3183 */
3184-static __inline__ unsigned long ffz(unsigned long word)
3185+extern __inline__ unsigned long ffz(unsigned long word)
3186 {
3187- int b = 0, s;
3188+ unsigned long k;
3189
3190 word = ~word;
3191- s = 32; if (word << 32 != 0) s = 0; b += s; word >>= s;
3192- s = 16; if (word << 48 != 0) s = 0; b += s; word >>= s;
3193- s = 8; if (word << 56 != 0) s = 0; b += s; word >>= s;
3194- s = 4; if (word << 60 != 0) s = 0; b += s; word >>= s;
3195- s = 2; if (word << 62 != 0) s = 0; b += s; word >>= s;
3196- s = 1; if (word << 63 != 0) s = 0; b += s;
3197+ k = 63;
3198+ if (word & 0x00000000ffffffffUL) { k -= 32; word <<= 32; }
3199+ if (word & 0x0000ffff00000000UL) { k -= 16; word <<= 16; }
3200+ if (word & 0x00ff000000000000UL) { k -= 8; word <<= 8; }
3201+ if (word & 0x0f00000000000000UL) { k -= 4; word <<= 4; }
3202+ if (word & 0x3000000000000000UL) { k -= 2; word <<= 2; }
3203+ if (word & 0x4000000000000000UL) { k -= 1; }
3204
3205- return b;
3206+ return k;
3207 }
3208
3209 /*
3210@@ -334,8 +343,8 @@
3211 * @offset: The bitnumber to start searching at
3212 * @size: The maximum size to search
3213 */
3214-static inline unsigned long find_next_zero_bit(void *addr, unsigned long size,
3215- unsigned long offset)
3216+extern __inline__ unsigned long
3217+find_next_zero_bit(void *addr, unsigned long size, unsigned long offset)
3218 {
3219 unsigned long *p = ((unsigned long *) addr) + (offset >> SZLONG_LOG);
3220 unsigned long result = offset & ~SZLONG_MASK;
3221@@ -400,7 +409,8 @@
3222 #define hweight16(x) generic_hweight16(x)
3223 #define hweight8(x) generic_hweight8(x)
3224
3225-static inline int __test_and_set_le_bit(unsigned long nr, void * addr)
3226+extern inline int
3227+__test_and_set_le_bit(unsigned long nr, void * addr
3228 {
3229 unsigned char *ADDR = (unsigned char *) addr;
3230 int mask, retval;
3231@@ -413,7 +423,8 @@
3232 return retval;
3233 }
3234
3235-static inline int __test_and_clear_le_bit(unsigned long nr, void * addr)
3236+extern inline int
3237+__test_and_clear_le_bit(unsigned long nr, void * addr)
3238 {
3239 unsigned char *ADDR = (unsigned char *) addr;
3240 int mask, retval;
3241@@ -426,7 +437,8 @@
3242 return retval;
3243 }
3244
3245-static inline int test_le_bit(unsigned long nr, const void * addr)
3246+extern inline int
3247+test_le_bit(unsigned long nr, const void * addr)
3248 {
3249 const unsigned char *ADDR = (const unsigned char *) addr;
3250 int mask;
3251@@ -451,7 +463,7 @@
3252 return b;
3253 }
3254
3255-static inline unsigned long find_next_zero_le_bit(void *addr,
3256+extern inline unsigned long find_next_zero_le_bit(void *addr,
3257 unsigned long size, unsigned long offset)
3258 {
3259 unsigned int *p = ((unsigned int *) addr) + (offset >> 5);
3260diff -urN linux-2.4.22.org/include/asm-ppc/bitops.h linux-2.4.22/include/asm-ppc/bitops.h
a4a9fb52
AM
3261--- linux-2.4.22.org/include/asm-ppc/bitops.h 2003-11-24 18:28:28.000000000 +0100
3262+++ linux-2.4.22/include/asm-ppc/bitops.h 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
3263@@ -7,6 +7,7 @@
3264 #define _PPC_BITOPS_H
3265
3266 #include <linux/config.h>
3267+#include <linux/compiler.h>
3268 #include <asm/byteorder.h>
3269 #include <asm/atomic.h>
3270
3271@@ -26,7 +27,7 @@
3272 * These used to be if'd out here because using : "cc" as a constraint
3273 * resulted in errors from egcs. Things appear to be OK with gcc-2.95.
3274 */
3275-static __inline__ void set_bit(int nr, volatile void * addr)
3276+static __inline__ void set_bit(int nr, volatile unsigned long * addr)
3277 {
3278 unsigned long old;
3279 unsigned long mask = 1 << (nr & 0x1f);
3280@@ -46,7 +47,7 @@
3281 /*
3282 * non-atomic version
3283 */
3284-static __inline__ void __set_bit(int nr, volatile void *addr)
3285+static __inline__ void __set_bit(int nr, volatile unsigned long *addr)
3286 {
3287 unsigned long mask = 1 << (nr & 0x1f);
3288 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
3289@@ -60,7 +61,7 @@
3290 #define smp_mb__before_clear_bit() smp_mb()
3291 #define smp_mb__after_clear_bit() smp_mb()
3292
3293-static __inline__ void clear_bit(int nr, volatile void *addr)
3294+static __inline__ void clear_bit(int nr, volatile unsigned long *addr)
3295 {
3296 unsigned long old;
3297 unsigned long mask = 1 << (nr & 0x1f);
3298@@ -80,7 +81,7 @@
3299 /*
3300 * non-atomic version
3301 */
3302-static __inline__ void __clear_bit(int nr, volatile void *addr)
3303+static __inline__ void __clear_bit(int nr, volatile unsigned long *addr)
3304 {
3305 unsigned long mask = 1 << (nr & 0x1f);
3306 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
3307@@ -88,7 +89,7 @@
3308 *p &= ~mask;
3309 }
3310
3311-static __inline__ void change_bit(int nr, volatile void *addr)
3312+static __inline__ void change_bit(int nr, volatile unsigned long *addr)
3313 {
3314 unsigned long old;
3315 unsigned long mask = 1 << (nr & 0x1f);
3316@@ -108,7 +109,7 @@
3317 /*
3318 * non-atomic version
3319 */
3320-static __inline__ void __change_bit(int nr, volatile void *addr)
3321+static __inline__ void __change_bit(int nr, volatile unsigned long *addr)
3322 {
3323 unsigned long mask = 1 << (nr & 0x1f);
3324 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
3325@@ -119,7 +120,7 @@
3326 /*
3327 * test_and_*_bit do imply a memory barrier (?)
3328 */
3329-static __inline__ int test_and_set_bit(int nr, volatile void *addr)
3330+static __inline__ int test_and_set_bit(int nr, volatile unsigned long *addr)
3331 {
3332 unsigned int old, t;
3333 unsigned int mask = 1 << (nr & 0x1f);
3334@@ -142,7 +143,7 @@
3335 /*
3336 * non-atomic version
3337 */
3338-static __inline__ int __test_and_set_bit(int nr, volatile void *addr)
3339+static __inline__ int __test_and_set_bit(int nr, volatile unsigned long *addr)
3340 {
3341 unsigned long mask = 1 << (nr & 0x1f);
3342 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
3343@@ -152,7 +153,7 @@
3344 return (old & mask) != 0;
3345 }
3346
3347-static __inline__ int test_and_clear_bit(int nr, volatile void *addr)
3348+static __inline__ int test_and_clear_bit(int nr, volatile unsigned long *addr)
3349 {
3350 unsigned int old, t;
3351 unsigned int mask = 1 << (nr & 0x1f);
3352@@ -175,7 +176,7 @@
3353 /*
3354 * non-atomic version
3355 */
3356-static __inline__ int __test_and_clear_bit(int nr, volatile void *addr)
3357+static __inline__ int __test_and_clear_bit(int nr, volatile unsigned long *addr)
3358 {
3359 unsigned long mask = 1 << (nr & 0x1f);
3360 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
3361@@ -185,7 +186,7 @@
3362 return (old & mask) != 0;
3363 }
3364
3365-static __inline__ int test_and_change_bit(int nr, volatile void *addr)
3366+static __inline__ int test_and_change_bit(int nr, volatile unsigned long *addr)
3367 {
3368 unsigned int old, t;
3369 unsigned int mask = 1 << (nr & 0x1f);
3370@@ -208,7 +209,7 @@
3371 /*
3372 * non-atomic version
3373 */
3374-static __inline__ int __test_and_change_bit(int nr, volatile void *addr)
3375+static __inline__ int __test_and_change_bit(int nr, volatile unsigned long *addr)
3376 {
3377 unsigned long mask = 1 << (nr & 0x1f);
3378 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
3379@@ -218,7 +219,7 @@
3380 return (old & mask) != 0;
3381 }
3382
3383-static __inline__ int test_bit(int nr, __const__ volatile void *addr)
3384+static __inline__ int test_bit(int nr, __const__ volatile unsigned long *addr)
3385 {
3386 __const__ unsigned int *p = (__const__ unsigned int *) addr;
3387
3388@@ -226,7 +227,7 @@
3389 }
3390
3391 /* Return the bit position of the most significant 1 bit in a word */
3392-static __inline__ int __ilog2(unsigned int x)
3393+static __inline__ int __ilog2(unsigned long x)
3394 {
3395 int lz;
3396
3397@@ -234,13 +235,18 @@
3398 return 31 - lz;
3399 }
3400
3401-static __inline__ int ffz(unsigned int x)
3402+static __inline__ int ffz(unsigned long x)
3403 {
3404 if ((x = ~x) == 0)
3405 return 32;
3406 return __ilog2(x & -x);
3407 }
3408
3409+static inline int __ffs(unsigned long x)
3410+{
3411+ return __ilog2(x & -x);
3412+}
3413+
3414 /*
3415 * ffs: find first bit set. This is defined the same way as
3416 * the libc and compiler builtin ffs routines, therefore
3417@@ -252,6 +258,18 @@
3418 }
3419
3420 /*
3421+ * fls: find last (most-significant) bit set.
3422+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
3423+ */
3424+static __inline__ int fls(unsigned int x)
3425+{
3426+ int lz;
3427+
3428+ asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x));
3429+ return 32 - lz;
3430+}
3431+
3432+/*
3433 * hweightN: returns the hamming weight (i.e. the number
3434 * of bits set) of a N-bit word
3435 */
3436@@ -261,13 +279,86 @@
3437 #define hweight8(x) generic_hweight8(x)
3438
3439 /*
3440+ * Find the first bit set in a 140-bit bitmap.
3441+ * The first 100 bits are unlikely to be set.
3442+ */
3443+static inline int sched_find_first_bit(unsigned long *b)
3444+{
3445+ if (unlikely(b[0]))
3446+ return __ffs(b[0]);
3447+ if (unlikely(b[1]))
3448+ return __ffs(b[1]) + 32;
3449+ if (unlikely(b[2]))
3450+ return __ffs(b[2]) + 64;
3451+ if (b[3])
3452+ return __ffs(b[3]) + 96;
3453+ return __ffs(b[4]) + 128;
3454+}
3455+
3456+/**
3457+ * find_next_bit - find the next set bit in a memory region
3458+ * @addr: The address to base the search on
3459+ * @offset: The bitnumber to start searching at
3460+ * @size: The maximum size to search
3461+ */
3462+static __inline__ unsigned long find_next_bit(unsigned long *addr,
3463+ unsigned long size, unsigned long offset)
3464+{
3465+ unsigned int *p = ((unsigned int *) addr) + (offset >> 5);
3466+ unsigned int result = offset & ~31UL;
3467+ unsigned int tmp;
3468+
3469+ if (offset >= size)
3470+ return size;
3471+ size -= result;
3472+ offset &= 31UL;
3473+ if (offset) {
3474+ tmp = *p++;
3475+ tmp &= ~0UL << offset;
3476+ if (size < 32)
3477+ goto found_first;
3478+ if (tmp)
3479+ goto found_middle;
3480+ size -= 32;
3481+ result += 32;
3482+ }
3483+ while (size >= 32) {
3484+ if ((tmp = *p++) != 0)
3485+ goto found_middle;
3486+ result += 32;
3487+ size -= 32;
3488+ }
3489+ if (!size)
3490+ return result;
3491+ tmp = *p;
3492+
3493+found_first:
3494+ tmp &= ~0UL >> (32 - size);
3495+ if (tmp == 0UL) /* Are any bits set? */
3496+ return result + size; /* Nope. */
3497+found_middle:
3498+ return result + __ffs(tmp);
3499+}
3500+
3501+/**
3502+ * find_first_bit - find the first set bit in a memory region
3503+ * @addr: The address to start the search at
3504+ * @size: The maximum size to search
3505+ *
3506+ * Returns the bit-number of the first set bit, not the number of the byte
3507+ * containing a bit.
3508+ */
3509+#define find_first_bit(addr, size) \
3510+ find_next_bit((addr), (size), 0)
3511+
3512+/*
3513 * This implementation of find_{first,next}_zero_bit was stolen from
3514 * Linus' asm-alpha/bitops.h.
3515 */
3516 #define find_first_zero_bit(addr, size) \
3517 find_next_zero_bit((addr), (size), 0)
3518
3519-static __inline__ unsigned long find_next_zero_bit(void * addr,
3520+static __inline__ unsigned long find_next_zero_bit(unsigned long * addr,
3521 unsigned long size, unsigned long offset)
3522 {
3523 unsigned int * p = ((unsigned int *) addr) + (offset >> 5);
3524@@ -306,8 +397,8 @@
3525 }
3526
3527
3528-#define ext2_set_bit(nr, addr) __test_and_set_bit((nr) ^ 0x18, addr)
3529-#define ext2_clear_bit(nr, addr) __test_and_clear_bit((nr) ^ 0x18, addr)
3530+#define ext2_set_bit(nr, addr) __test_and_set_bit((nr) ^ 0x18, (unsigned long *)(addr))
3531+#define ext2_clear_bit(nr, addr) __test_and_clear_bit((nr) ^ 0x18, (unsigned long *)(addr))
3532
3533 static __inline__ int ext2_test_bit(int nr, __const__ void * addr)
3534 {
3535diff -urN linux-2.4.22.org/include/asm-ppc/smp.h linux-2.4.22/include/asm-ppc/smp.h
a4a9fb52
AM
3536--- linux-2.4.22.org/include/asm-ppc/smp.h 2003-11-24 18:28:28.000000000 +0100
3537+++ linux-2.4.22/include/asm-ppc/smp.h 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
3538@@ -45,7 +45,7 @@
3539 #define cpu_logical_map(cpu) (cpu)
3540 #define cpu_number_map(x) (x)
3541
3542-#define smp_processor_id() (current->processor)
3543+#define smp_processor_id() (current->cpu)
3544
3545 extern int smp_hw_index[NR_CPUS];
3546 #define hard_smp_processor_id() (smp_hw_index[smp_processor_id()])
3547diff -urN linux-2.4.22.org/include/asm-ppc64/bitops.h linux-2.4.22/include/asm-ppc64/bitops.h
a4a9fb52
AM
3548--- linux-2.4.22.org/include/asm-ppc64/bitops.h 2003-11-24 18:28:17.000000000 +0100
3549+++ linux-2.4.22/include/asm-ppc64/bitops.h 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
3550@@ -41,12 +41,12 @@
3551 #define smp_mb__before_clear_bit() smp_mb()
3552 #define smp_mb__after_clear_bit() smp_mb()
3553
3554-static __inline__ int test_bit(unsigned long nr, __const__ volatile void *addr)
3555+static __inline__ int test_bit(unsigned long nr, __const__ volatile unsigned long *addr)
3556 {
3557 return (1UL & (((__const__ long *) addr)[nr >> 6] >> (nr & 63)));
3558 }
3559
3560-static __inline__ void set_bit(unsigned long nr, volatile void *addr)
3561+static __inline__ void set_bit(unsigned long nr, volatile unsigned long *addr)
3562 {
3563 unsigned long old;
3564 unsigned long mask = 1UL << (nr & 0x3f);
3565@@ -62,7 +62,7 @@
3566 : "cc");
3567 }
3568
3569-static __inline__ void clear_bit(unsigned long nr, volatile void *addr)
3570+static __inline__ void clear_bit(unsigned long nr, volatile unsigned long *addr)
3571 {
3572 unsigned long old;
3573 unsigned long mask = 1UL << (nr & 0x3f);
3574@@ -78,7 +78,7 @@
3575 : "cc");
3576 }
3577
3578-static __inline__ void change_bit(unsigned long nr, volatile void *addr)
3579+static __inline__ void change_bit(unsigned long nr, volatile unsigned long *addr)
3580 {
3581 unsigned long old;
3582 unsigned long mask = 1UL << (nr & 0x3f);
3583@@ -94,7 +94,7 @@
3584 : "cc");
3585 }
3586
3587-static __inline__ int test_and_set_bit(unsigned long nr, volatile void *addr)
3588+static __inline__ int test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
3589 {
3590 unsigned long old, t;
3591 unsigned long mask = 1UL << (nr & 0x3f);
3592@@ -114,7 +114,7 @@
3593 return (old & mask) != 0;
3594 }
3595
3596-static __inline__ int test_and_clear_bit(unsigned long nr, volatile void *addr)
3597+static __inline__ int test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
3598 {
3599 unsigned long old, t;
3600 unsigned long mask = 1UL << (nr & 0x3f);
3601@@ -134,7 +134,7 @@
3602 return (old & mask) != 0;
3603 }
3604
3605-static __inline__ int test_and_change_bit(unsigned long nr, volatile void *addr)
3606+static __inline__ int test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
3607 {
3608 unsigned long old, t;
3609 unsigned long mask = 1UL << (nr & 0x3f);
3610@@ -157,7 +157,7 @@
3611 /*
3612 * non-atomic versions
3613 */
3614-static __inline__ void __set_bit(unsigned long nr, volatile void *addr)
3615+static __inline__ void __set_bit(unsigned long nr, volatile unsigned long *addr)
3616 {
3617 unsigned long mask = 1UL << (nr & 0x3f);
3618 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
3619@@ -165,7 +165,7 @@
3620 *p |= mask;
3621 }
3622
3623-static __inline__ void __clear_bit(unsigned long nr, volatile void *addr)
3624+static __inline__ void __clear_bit(unsigned long nr, volatile unsigned long *addr)
3625 {
3626 unsigned long mask = 1UL << (nr & 0x3f);
3627 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
3628@@ -173,7 +173,7 @@
3629 *p &= ~mask;
3630 }
3631
3632-static __inline__ void __change_bit(unsigned long nr, volatile void *addr)
3633+static __inline__ void __change_bit(unsigned long nr, volatile unsigned long *addr)
3634 {
3635 unsigned long mask = 1UL << (nr & 0x3f);
3636 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
3637@@ -181,7 +181,7 @@
3638 *p ^= mask;
3639 }
3640
3641-static __inline__ int __test_and_set_bit(unsigned long nr, volatile void *addr)
3642+static __inline__ int __test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
3643 {
3644 unsigned long mask = 1UL << (nr & 0x3f);
3645 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
3646@@ -191,7 +191,7 @@
3647 return (old & mask) != 0;
3648 }
3649
3650-static __inline__ int __test_and_clear_bit(unsigned long nr, volatile void *addr)
3651+static __inline__ int __test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
3652 {
3653 unsigned long mask = 1UL << (nr & 0x3f);
3654 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
3655@@ -201,7 +201,7 @@
3656 return (old & mask) != 0;
3657 }
3658
3659-static __inline__ int __test_and_change_bit(unsigned long nr, volatile void *addr)
3660+static __inline__ int __test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
3661 {
3662 unsigned long mask = 1UL << (nr & 0x3f);
3663 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
3664diff -urN linux-2.4.22.org/include/asm-s390/bitops.h linux-2.4.22/include/asm-s390/bitops.h
a4a9fb52
AM
3665--- linux-2.4.22.org/include/asm-s390/bitops.h 2003-11-24 18:28:35.000000000 +0100
3666+++ linux-2.4.22/include/asm-s390/bitops.h 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
3667@@ -47,272 +47,217 @@
3668 extern const char _oi_bitmap[];
3669 extern const char _ni_bitmap[];
3670 extern const char _zb_findmap[];
3671+extern const char _sb_findmap[];
3672
3673 #ifdef CONFIG_SMP
3674 /*
3675 * SMP save set_bit routine based on compare and swap (CS)
3676 */
3677-static __inline__ void set_bit_cs(int nr, volatile void * addr)
3678+static inline void set_bit_cs(int nr, volatile void *ptr)
3679 {
3680- unsigned long bits, mask;
3681- __asm__ __volatile__(
3682+ unsigned long addr, old, new, mask;
3683+
3684+ addr = (unsigned long) ptr;
3685 #if ALIGN_CS == 1
3686- " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
3687- " nr %2,%1\n" /* isolate last 2 bits of address */
3688- " xr %1,%2\n" /* make addr % 4 == 0 */
3689- " sll %2,3\n"
3690- " ar %0,%2\n" /* add alignement to bitnr */
3691+ addr ^= addr & 3; /* align address to 4 */
3692+ nr += (addr & 3) << 3; /* add alignment to bit number */
3693 #endif
3694- " lhi %2,31\n"
3695- " nr %2,%0\n" /* make shift value */
3696- " xr %0,%2\n"
3697- " srl %0,3\n"
3698- " lhi %3,1\n"
3699- " la %1,0(%0,%1)\n" /* calc. address for CS */
3700- " sll %3,0(%2)\n" /* make OR mask */
3701- " l %0,0(%1)\n"
3702- "0: lr %2,%0\n" /* CS loop starts here */
3703- " or %2,%3\n" /* set bit */
3704- " cs %0,%2,0(%1)\n"
3705- " jl 0b"
3706- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
3707- : "cc", "memory" );
3708+ addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
3709+ mask = 1UL << (nr & 31); /* make OR mask */
3710+ asm volatile(
3711+ " l %0,0(%4)\n"
3712+ "0: lr %1,%0\n"
3713+ " or %1,%3\n"
3714+ " cs %0,%1,0(%4)\n"
3715+ " jl 0b"
3716+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
3717+ : "d" (mask), "a" (addr)
3718+ : "cc" );
3719 }
3720
3721 /*
3722 * SMP save clear_bit routine based on compare and swap (CS)
3723 */
3724-static __inline__ void clear_bit_cs(int nr, volatile void * addr)
3725+static inline void clear_bit_cs(int nr, volatile void *ptr)
3726 {
3727- static const int minusone = -1;
3728- unsigned long bits, mask;
3729- __asm__ __volatile__(
3730+ unsigned long addr, old, new, mask;
3731+
3732+ addr = (unsigned long) ptr;
3733 #if ALIGN_CS == 1
3734- " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
3735- " nr %2,%1\n" /* isolate last 2 bits of address */
3736- " xr %1,%2\n" /* make addr % 4 == 0 */
3737- " sll %2,3\n"
3738- " ar %0,%2\n" /* add alignement to bitnr */
3739+ addr ^= addr & 3; /* align address to 4 */
3740+ nr += (addr & 3) << 3; /* add alignment to bit number */
3741 #endif
3742- " lhi %2,31\n"
3743- " nr %2,%0\n" /* make shift value */
3744- " xr %0,%2\n"
3745- " srl %0,3\n"
3746- " lhi %3,1\n"
3747- " la %1,0(%0,%1)\n" /* calc. address for CS */
3748- " sll %3,0(%2)\n"
3749- " x %3,%4\n" /* make AND mask */
3750- " l %0,0(%1)\n"
3751- "0: lr %2,%0\n" /* CS loop starts here */
3752- " nr %2,%3\n" /* clear bit */
3753- " cs %0,%2,0(%1)\n"
3754- " jl 0b"
3755- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask)
3756- : "m" (minusone) : "cc", "memory" );
3757+ addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
3758+ mask = ~(1UL << (nr & 31)); /* make AND mask */
3759+ asm volatile(
3760+ " l %0,0(%4)\n"
3761+ "0: lr %1,%0\n"
3762+ " nr %1,%3\n"
3763+ " cs %0,%1,0(%4)\n"
3764+ " jl 0b"
3765+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
3766+ : "d" (mask), "a" (addr)
3767+ : "cc" );
3768 }
3769
3770 /*
3771 * SMP save change_bit routine based on compare and swap (CS)
3772 */
3773-static __inline__ void change_bit_cs(int nr, volatile void * addr)
3774+static inline void change_bit_cs(int nr, volatile void *ptr)
3775 {
3776- unsigned long bits, mask;
3777- __asm__ __volatile__(
3778+ unsigned long addr, old, new, mask;
3779+
3780+ addr = (unsigned long) ptr;
3781 #if ALIGN_CS == 1
3782- " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
3783- " nr %2,%1\n" /* isolate last 2 bits of address */
3784- " xr %1,%2\n" /* make addr % 4 == 0 */
3785- " sll %2,3\n"
3786- " ar %0,%2\n" /* add alignement to bitnr */
3787+ addr ^= addr & 3; /* align address to 4 */
3788+ nr += (addr & 3) << 3; /* add alignment to bit number */
3789 #endif
3790- " lhi %2,31\n"
3791- " nr %2,%0\n" /* make shift value */
3792- " xr %0,%2\n"
3793- " srl %0,3\n"
3794- " lhi %3,1\n"
3795- " la %1,0(%0,%1)\n" /* calc. address for CS */
3796- " sll %3,0(%2)\n" /* make XR mask */
3797- " l %0,0(%1)\n"
3798- "0: lr %2,%0\n" /* CS loop starts here */
3799- " xr %2,%3\n" /* change bit */
3800- " cs %0,%2,0(%1)\n"
3801- " jl 0b"
3802- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
3803- : "cc", "memory" );
3804+ addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
3805+ mask = 1UL << (nr & 31); /* make XOR mask */
3806+ asm volatile(
3807+ " l %0,0(%4)\n"
3808+ "0: lr %1,%0\n"
3809+ " xr %1,%3\n"
3810+ " cs %0,%1,0(%4)\n"
3811+ " jl 0b"
3812+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
3813+ : "d" (mask), "a" (addr)
3814+ : "cc" );
3815 }
3816
3817 /*
3818 * SMP save test_and_set_bit routine based on compare and swap (CS)
3819 */
3820-static __inline__ int test_and_set_bit_cs(int nr, volatile void * addr)
3821+static inline int test_and_set_bit_cs(int nr, volatile void *ptr)
3822 {
3823- unsigned long bits, mask;
3824- __asm__ __volatile__(
3825+ unsigned long addr, old, new, mask;
3826+
3827+ addr = (unsigned long) ptr;
3828 #if ALIGN_CS == 1
3829- " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
3830- " nr %2,%1\n" /* isolate last 2 bits of address */
3831- " xr %1,%2\n" /* make addr % 4 == 0 */
3832- " sll %2,3\n"
3833- " ar %0,%2\n" /* add alignement to bitnr */
3834+ addr ^= addr & 3; /* align address to 4 */
3835+ nr += (addr & 3) << 3; /* add alignment to bit number */
3836 #endif
3837- " lhi %2,31\n"
3838- " nr %2,%0\n" /* make shift value */
3839- " xr %0,%2\n"
3840- " srl %0,3\n"
3841- " lhi %3,1\n"
3842- " la %1,0(%0,%1)\n" /* calc. address for CS */
3843- " sll %3,0(%2)\n" /* make OR mask */
3844- " l %0,0(%1)\n"
3845- "0: lr %2,%0\n" /* CS loop starts here */
3846- " or %2,%3\n" /* set bit */
3847- " cs %0,%2,0(%1)\n"
3848- " jl 0b\n"
3849- " nr %0,%3\n" /* isolate old bit */
3850- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
3851- : "cc", "memory" );
3852- return nr != 0;
3853+ addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
3854+ mask = 1UL << (nr & 31); /* make OR/test mask */
3855+ asm volatile(
3856+ " l %0,0(%4)\n"
3857+ "0: lr %1,%0\n"
3858+ " or %1,%3\n"
3859+ " cs %0,%1,0(%4)\n"
3860+ " jl 0b"
3861+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
3862+ : "d" (mask), "a" (addr)
3863+ : "cc" );
3864+ return (old & mask) != 0;
3865 }
3866
3867 /*
3868 * SMP save test_and_clear_bit routine based on compare and swap (CS)
3869 */
3870-static __inline__ int test_and_clear_bit_cs(int nr, volatile void * addr)
3871+static inline int test_and_clear_bit_cs(int nr, volatile void *ptr)
3872 {
3873- static const int minusone = -1;
3874- unsigned long bits, mask;
3875- __asm__ __volatile__(
3876+ unsigned long addr, old, new, mask;
3877+
3878+ addr = (unsigned long) ptr;
3879 #if ALIGN_CS == 1
3880- " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
3881- " nr %2,%1\n" /* isolate last 2 bits of address */
3882- " xr %1,%2\n" /* make addr % 4 == 0 */
3883- " sll %2,3\n"
3884- " ar %0,%2\n" /* add alignement to bitnr */
3885+ addr ^= addr & 3; /* align address to 4 */
3886+ nr += (addr & 3) << 3; /* add alignment to bit number */
3887 #endif
3888- " lhi %2,31\n"
3889- " nr %2,%0\n" /* make shift value */
3890- " xr %0,%2\n"
3891- " srl %0,3\n"
3892- " lhi %3,1\n"
3893- " la %1,0(%0,%1)\n" /* calc. address for CS */
3894- " sll %3,0(%2)\n"
3895- " l %0,0(%1)\n"
3896- " x %3,%4\n" /* make AND mask */
3897- "0: lr %2,%0\n" /* CS loop starts here */
3898- " nr %2,%3\n" /* clear bit */
3899- " cs %0,%2,0(%1)\n"
3900- " jl 0b\n"
3901- " x %3,%4\n"
3902- " nr %0,%3\n" /* isolate old bit */
3903- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask)
3904- : "m" (minusone) : "cc", "memory" );
3905- return nr;
3906+ addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
3907+ mask = ~(1UL << (nr & 31)); /* make AND mask */
3908+ asm volatile(
3909+ " l %0,0(%4)\n"
3910+ "0: lr %1,%0\n"
3911+ " nr %1,%3\n"
3912+ " cs %0,%1,0(%4)\n"
3913+ " jl 0b"
3914+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
3915+ : "d" (mask), "a" (addr)
3916+ : "cc" );
3917+ return (old ^ new) != 0;
3918 }
3919
3920 /*
3921 * SMP save test_and_change_bit routine based on compare and swap (CS)
3922 */
3923-static __inline__ int test_and_change_bit_cs(int nr, volatile void * addr)
3924+static inline int test_and_change_bit_cs(int nr, volatile void *ptr)
3925 {
3926- unsigned long bits, mask;
3927- __asm__ __volatile__(
3928+ unsigned long addr, old, new, mask;
3929+
3930+ addr = (unsigned long) ptr;
3931 #if ALIGN_CS == 1
3932- " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
3933- " nr %2,%1\n" /* isolate last 2 bits of address */
3934- " xr %1,%2\n" /* make addr % 4 == 0 */
3935- " sll %2,3\n"
3936- " ar %0,%2\n" /* add alignement to bitnr */
3937+ addr ^= addr & 3; /* align address to 4 */
3938+ nr += (addr & 3) << 3; /* add alignment to bit number */
3939 #endif
3940- " lhi %2,31\n"
3941- " nr %2,%0\n" /* make shift value */
3942- " xr %0,%2\n"
3943- " srl %0,3\n"
3944- " lhi %3,1\n"
3945- " la %1,0(%0,%1)\n" /* calc. address for CS */
3946- " sll %3,0(%2)\n" /* make OR mask */
3947- " l %0,0(%1)\n"
3948- "0: lr %2,%0\n" /* CS loop starts here */
3949- " xr %2,%3\n" /* change bit */
3950- " cs %0,%2,0(%1)\n"
3951- " jl 0b\n"
3952- " nr %0,%3\n" /* isolate old bit */
3953- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
3954- : "cc", "memory" );
3955- return nr != 0;
3956+ addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
3957+ mask = 1UL << (nr & 31); /* make XOR mask */
3958+ asm volatile(
3959+ " l %0,0(%4)\n"
3960+ "0: lr %1,%0\n"
3961+ " xr %1,%3\n"
3962+ " cs %0,%1,0(%4)\n"
3963+ " jl 0b"
3964+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
3965+ : "d" (mask), "a" (addr)
3966+ : "cc" );
3967+ return (old & mask) != 0;
3968 }
3969 #endif /* CONFIG_SMP */
3970
3971 /*
3972 * fast, non-SMP set_bit routine
3973 */
3974-static __inline__ void __set_bit(int nr, volatile void * addr)
3975+static inline void __set_bit(int nr, volatile void *ptr)
3976 {
3977- unsigned long reg1, reg2;
3978- __asm__ __volatile__(
3979- " lhi %1,24\n"
3980- " lhi %0,7\n"
3981- " xr %1,%2\n"
3982- " nr %0,%2\n"
3983- " srl %1,3\n"
3984- " la %1,0(%1,%3)\n"
3985- " la %0,0(%0,%4)\n"
3986- " oc 0(1,%1),0(%0)"
3987- : "=&a" (reg1), "=&a" (reg2)
3988- : "r" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
3989-}
3990-
3991-static __inline__ void
3992-__constant_set_bit(const int nr, volatile void * addr)
3993-{
3994- switch (nr&7) {
3995- case 0:
3996- __asm__ __volatile__ ("la 1,%0\n\t"
3997- "oi 0(1),0x01"
3998- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
3999- : : "1", "cc", "memory");
4000- break;
4001- case 1:
4002- __asm__ __volatile__ ("la 1,%0\n\t"
4003- "oi 0(1),0x02"
4004- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4005- : : "1", "cc", "memory" );
4006- break;
4007- case 2:
4008- __asm__ __volatile__ ("la 1,%0\n\t"
4009- "oi 0(1),0x04"
4010- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4011- : : "1", "cc", "memory" );
4012- break;
4013- case 3:
4014- __asm__ __volatile__ ("la 1,%0\n\t"
4015- "oi 0(1),0x08"
4016- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4017- : : "1", "cc", "memory" );
4018- break;
4019- case 4:
4020- __asm__ __volatile__ ("la 1,%0\n\t"
4021- "oi 0(1),0x10"
4022- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4023- : : "1", "cc", "memory" );
4024- break;
4025- case 5:
4026- __asm__ __volatile__ ("la 1,%0\n\t"
4027- "oi 0(1),0x20"
4028- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4029- : : "1", "cc", "memory" );
4030- break;
4031- case 6:
4032- __asm__ __volatile__ ("la 1,%0\n\t"
4033- "oi 0(1),0x40"
4034- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4035- : : "1", "cc", "memory" );
4036- break;
4037- case 7:
4038- __asm__ __volatile__ ("la 1,%0\n\t"
4039- "oi 0(1),0x80"
4040- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4041- : : "1", "cc", "memory" );
4042- break;
4043- }
4044+ unsigned long addr;
4045+
4046+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
4047+ asm volatile("oc 0(1,%1),0(%2)"
4048+ : "+m" (*(char *) addr)
4049+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
4050+ : "cc" );
4051+}
4052+
4053+static inline void
4054+__constant_set_bit(const int nr, volatile void *ptr)
4055+{
4056+ unsigned long addr;
4057+
4058+ addr = ((unsigned long) ptr) + ((nr >> 3) ^ 3);
4059+ switch (nr&7) {
4060+ case 0:
4061+ asm volatile ("oi 0(%1),0x01"
4062+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4063+ break;
4064+ case 1:
4065+ asm volatile ("oi 0(%1),0x02"
4066+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4067+ break;
4068+ case 2:
4069+ asm volatile ("oi 0(%1),0x04"
4070+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4071+ break;
4072+ case 3:
4073+ asm volatile ("oi 0(%1),0x08"
4074+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4075+ break;
4076+ case 4:
4077+ asm volatile ("oi 0(%1),0x10"
4078+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4079+ break;
4080+ case 5:
4081+ asm volatile ("oi 0(%1),0x20"
4082+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4083+ break;
4084+ case 6:
4085+ asm volatile ("oi 0(%1),0x40"
4086+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4087+ break;
4088+ case 7:
4089+ asm volatile ("oi 0(%1),0x80"
4090+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4091+ break;
4092+ }
4093 }
4094
4095 #define set_bit_simple(nr,addr) \
4096@@ -323,76 +268,58 @@
4097 /*
4098 * fast, non-SMP clear_bit routine
4099 */
4100-static __inline__ void
4101-__clear_bit(int nr, volatile void * addr)
4102+static inline void
4103+__clear_bit(int nr, volatile void *ptr)
4104 {
4105- unsigned long reg1, reg2;
4106- __asm__ __volatile__(
4107- " lhi %1,24\n"
4108- " lhi %0,7\n"
4109- " xr %1,%2\n"
4110- " nr %0,%2\n"
4111- " srl %1,3\n"
4112- " la %1,0(%1,%3)\n"
4113- " la %0,0(%0,%4)\n"
4114- " nc 0(1,%1),0(%0)"
4115- : "=&a" (reg1), "=&a" (reg2)
4116- : "r" (nr), "a" (addr), "a" (&_ni_bitmap) : "cc", "memory" );
4117-}
4118-
4119-static __inline__ void
4120-__constant_clear_bit(const int nr, volatile void * addr)
4121-{
4122- switch (nr&7) {
4123- case 0:
4124- __asm__ __volatile__ ("la 1,%0\n\t"
4125- "ni 0(1),0xFE"
4126- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4127- : : "1", "cc", "memory" );
4128- break;
4129- case 1:
4130- __asm__ __volatile__ ("la 1,%0\n\t"
4131- "ni 0(1),0xFD"
4132- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4133- : : "1", "cc", "memory" );
4134- break;
4135- case 2:
4136- __asm__ __volatile__ ("la 1,%0\n\t"
4137- "ni 0(1),0xFB"
4138- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4139- : : "1", "cc", "memory" );
4140- break;
4141- case 3:
4142- __asm__ __volatile__ ("la 1,%0\n\t"
4143- "ni 0(1),0xF7"
4144- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4145- : : "1", "cc", "memory" );
4146- break;
4147- case 4:
4148- __asm__ __volatile__ ("la 1,%0\n\t"
4149- "ni 0(1),0xEF"
4150- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4151- : : "cc", "memory" );
4152- break;
4153- case 5:
4154- __asm__ __volatile__ ("la 1,%0\n\t"
4155- "ni 0(1),0xDF"
4156- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4157- : : "1", "cc", "memory" );
4158- break;
4159- case 6:
4160- __asm__ __volatile__ ("la 1,%0\n\t"
4161- "ni 0(1),0xBF"
4162- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4163- : : "1", "cc", "memory" );
4164- break;
4165- case 7:
4166- __asm__ __volatile__ ("la 1,%0\n\t"
4167- "ni 0(1),0x7F"
4168- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4169- : : "1", "cc", "memory" );
4170- break;
4171- }
4172+ unsigned long addr;
4173+
4174+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
4175+ asm volatile("nc 0(1,%1),0(%2)"
4176+ : "+m" (*(char *) addr)
4177+ : "a" (addr), "a" (_ni_bitmap + (nr & 7))
4178+ : "cc" );
4179+}
4180+
4181+static inline void
4182+__constant_clear_bit(const int nr, volatile void *ptr)
4183+{
4184+ unsigned long addr;
4185+
4186+ addr = ((unsigned long) ptr) + ((nr >> 3) ^ 3);
4187+ switch (nr&7) {
4188+ case 0:
4189+ asm volatile ("ni 0(%1),0xFE"
4190+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4191+ break;
4192+ case 1:
4193+ asm volatile ("ni 0(%1),0xFD"
4194+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4195+ break;
4196+ case 2:
4197+ asm volatile ("ni 0(%1),0xFB"
4198+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4199+ break;
4200+ case 3:
4201+ asm volatile ("ni 0(%1),0xF7"
4202+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4203+ break;
4204+ case 4:
4205+ asm volatile ("ni 0(%1),0xEF"
4206+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4207+ break;
4208+ case 5:
4209+ asm volatile ("ni 0(%1),0xDF"
4210+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4211+ break;
4212+ case 6:
4213+ asm volatile ("ni 0(%1),0xBF"
4214+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4215+ break;
4216+ case 7:
4217+ asm volatile ("ni 0(%1),0x7F"
4218+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4219+ break;
4220+ }
4221 }
4222
4223 #define clear_bit_simple(nr,addr) \
4224@@ -403,75 +330,57 @@
4225 /*
4226 * fast, non-SMP change_bit routine
4227 */
4228-static __inline__ void __change_bit(int nr, volatile void * addr)
4229+static inline void __change_bit(int nr, volatile void *ptr)
4230 {
4231- unsigned long reg1, reg2;
4232- __asm__ __volatile__(
4233- " lhi %1,24\n"
4234- " lhi %0,7\n"
4235- " xr %1,%2\n"
4236- " nr %0,%2\n"
4237- " srl %1,3\n"
4238- " la %1,0(%1,%3)\n"
4239- " la %0,0(%0,%4)\n"
4240- " xc 0(1,%1),0(%0)"
4241- : "=&a" (reg1), "=&a" (reg2)
4242- : "r" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
4243-}
4244-
4245-static __inline__ void
4246-__constant_change_bit(const int nr, volatile void * addr)
4247-{
4248- switch (nr&7) {
4249- case 0:
4250- __asm__ __volatile__ ("la 1,%0\n\t"
4251- "xi 0(1),0x01"
4252- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4253- : : "cc", "memory" );
4254- break;
4255- case 1:
4256- __asm__ __volatile__ ("la 1,%0\n\t"
4257- "xi 0(1),0x02"
4258- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4259- : : "cc", "memory" );
4260- break;
4261- case 2:
4262- __asm__ __volatile__ ("la 1,%0\n\t"
4263- "xi 0(1),0x04"
4264- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4265- : : "cc", "memory" );
4266- break;
4267- case 3:
4268- __asm__ __volatile__ ("la 1,%0\n\t"
4269- "xi 0(1),0x08"
4270- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4271- : : "cc", "memory" );
4272- break;
4273- case 4:
4274- __asm__ __volatile__ ("la 1,%0\n\t"
4275- "xi 0(1),0x10"
4276- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4277- : : "cc", "memory" );
4278- break;
4279- case 5:
4280- __asm__ __volatile__ ("la 1,%0\n\t"
4281- "xi 0(1),0x20"
4282- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4283- : : "1", "cc", "memory" );
4284- break;
4285- case 6:
4286- __asm__ __volatile__ ("la 1,%0\n\t"
4287- "xi 0(1),0x40"
4288- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4289- : : "1", "cc", "memory" );
4290- break;
4291- case 7:
4292- __asm__ __volatile__ ("la 1,%0\n\t"
4293- "xi 0(1),0x80"
4294- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4295- : : "1", "cc", "memory" );
4296- break;
4297- }
4298+ unsigned long addr;
4299+
4300+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
4301+ asm volatile("xc 0(1,%1),0(%2)"
4302+ : "+m" (*(char *) addr)
4303+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
4304+ : "cc" );
4305+}
4306+
4307+static inline void
4308+__constant_change_bit(const int nr, volatile void *ptr)
4309+{
4310+ unsigned long addr;
4311+
4312+ addr = ((unsigned long) ptr) + ((nr >> 3) ^ 3);
4313+ switch (nr&7) {
4314+ case 0:
4315+ asm volatile ("xi 0(%1),0x01"
4316+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4317+ break;
4318+ case 1:
4319+ asm volatile ("xi 0(%1),0x02"
4320+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4321+ break;
4322+ case 2:
4323+ asm volatile ("xi 0(%1),0x04"
4324+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4325+ break;
4326+ case 3:
4327+ asm volatile ("xi 0(%1),0x08"
4328+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4329+ break;
4330+ case 4:
4331+ asm volatile ("xi 0(%1),0x10"
4332+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4333+ break;
4334+ case 5:
4335+ asm volatile ("xi 0(%1),0x20"
4336+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4337+ break;
4338+ case 6:
4339+ asm volatile ("xi 0(%1),0x40"
4340+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4341+ break;
4342+ case 7:
4343+ asm volatile ("xi 0(%1),0x80"
4344+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4345+ break;
4346+ }
4347 }
4348
4349 #define change_bit_simple(nr,addr) \
4350@@ -482,74 +391,54 @@
4351 /*
4352 * fast, non-SMP test_and_set_bit routine
4353 */
4354-static __inline__ int test_and_set_bit_simple(int nr, volatile void * addr)
4355+static inline int test_and_set_bit_simple(int nr, volatile void *ptr)
4356 {
4357- unsigned long reg1, reg2;
4358- int oldbit;
4359- __asm__ __volatile__(
4360- " lhi %1,24\n"
4361- " lhi %2,7\n"
4362- " xr %1,%3\n"
4363- " nr %2,%3\n"
4364- " srl %1,3\n"
4365- " la %1,0(%1,%4)\n"
4366- " ic %0,0(%1)\n"
4367- " srl %0,0(%2)\n"
4368- " la %2,0(%2,%5)\n"
4369- " oc 0(1,%1),0(%2)"
4370- : "=d&" (oldbit), "=&a" (reg1), "=&a" (reg2)
4371- : "r" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
4372- return oldbit & 1;
4373+ unsigned long addr;
4374+ unsigned char ch;
4375+
4376+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
4377+ ch = *(unsigned char *) addr;
4378+ asm volatile("oc 0(1,%1),0(%2)"
4379+ : "+m" (*(char *) addr)
4380+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
4381+ : "cc" );
4382+ return (ch >> (nr & 7)) & 1;
4383 }
4384 #define __test_and_set_bit(X,Y) test_and_set_bit_simple(X,Y)
4385
4386 /*
4387 * fast, non-SMP test_and_clear_bit routine
4388 */
4389-static __inline__ int test_and_clear_bit_simple(int nr, volatile void * addr)
4390+static inline int test_and_clear_bit_simple(int nr, volatile void *ptr)
4391 {
4392- unsigned long reg1, reg2;
4393- int oldbit;
4394+ unsigned long addr;
4395+ unsigned char ch;
4396
4397- __asm__ __volatile__(
4398- " lhi %1,24\n"
4399- " lhi %2,7\n"
4400- " xr %1,%3\n"
4401- " nr %2,%3\n"
4402- " srl %1,3\n"
4403- " la %1,0(%1,%4)\n"
4404- " ic %0,0(%1)\n"
4405- " srl %0,0(%2)\n"
4406- " la %2,0(%2,%5)\n"
4407- " nc 0(1,%1),0(%2)"
4408- : "=d&" (oldbit), "=&a" (reg1), "=&a" (reg2)
4409- : "r" (nr), "a" (addr), "a" (&_ni_bitmap) : "cc", "memory" );
4410- return oldbit & 1;
4411+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
4412+ ch = *(unsigned char *) addr;
4413+ asm volatile("nc 0(1,%1),0(%2)"
4414+ : "+m" (*(char *) addr)
4415+ : "a" (addr), "a" (_ni_bitmap + (nr & 7))
4416+ : "cc" );
4417+ return (ch >> (nr & 7)) & 1;
4418 }
4419 #define __test_and_clear_bit(X,Y) test_and_clear_bit_simple(X,Y)
4420
4421 /*
4422 * fast, non-SMP test_and_change_bit routine
4423 */
4424-static __inline__ int test_and_change_bit_simple(int nr, volatile void * addr)
4425+static inline int test_and_change_bit_simple(int nr, volatile void *ptr)
4426 {
4427- unsigned long reg1, reg2;
4428- int oldbit;
4429+ unsigned long addr;
4430+ unsigned char ch;
4431
4432- __asm__ __volatile__(
4433- " lhi %1,24\n"
4434- " lhi %2,7\n"
4435- " xr %1,%3\n"
4436- " nr %2,%1\n"
4437- " srl %1,3\n"
4438- " la %1,0(%1,%4)\n"
4439- " ic %0,0(%1)\n"
4440- " srl %0,0(%2)\n"
4441- " la %2,0(%2,%5)\n"
4442- " xc 0(1,%1),0(%2)"
4443- : "=d&" (oldbit), "=&a" (reg1), "=&a" (reg2)
4444- : "r" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
4445- return oldbit & 1;
4446+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
4447+ ch = *(unsigned char *) addr;
4448+ asm volatile("xc 0(1,%1),0(%2)"
4449+ : "+m" (*(char *) addr)
4450+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
4451+ : "cc" );
4452+ return (ch >> (nr & 7)) & 1;
4453 }
4454 #define __test_and_change_bit(X,Y) test_and_change_bit_simple(X,Y)
4455
4456@@ -574,25 +463,17 @@
4457 * This routine doesn't need to be atomic.
4458 */
4459
4460-static __inline__ int __test_bit(int nr, volatile void * addr)
4461+static inline int __test_bit(int nr, volatile void *ptr)
4462 {
4463- unsigned long reg1, reg2;
4464- int oldbit;
4465+ unsigned long addr;
4466+ unsigned char ch;
4467
4468- __asm__ __volatile__(
4469- " lhi %2,24\n"
4470- " lhi %1,7\n"
4471- " xr %2,%3\n"
4472- " nr %1,%3\n"
4473- " srl %2,3\n"
4474- " ic %0,0(%2,%4)\n"
4475- " srl %0,0(%1)"
4476- : "=d&" (oldbit), "=&a" (reg1), "=&a" (reg2)
4477- : "r" (nr), "a" (addr) : "cc" );
4478- return oldbit & 1;
4479+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
4480+ ch = *(unsigned char *) addr;
4481+ return (ch >> (nr & 7)) & 1;
4482 }
4483
4484-static __inline__ int __constant_test_bit(int nr, volatile void * addr) {
4485+static inline int __constant_test_bit(int nr, volatile void * addr) {
4486 return (((volatile char *) addr)[(nr>>3)^3] & (1<<(nr&7))) != 0;
4487 }
4488
4489@@ -604,7 +485,7 @@
4490 /*
4491 * Find-bit routines..
4492 */
4493-static __inline__ int find_first_zero_bit(void * addr, unsigned size)
4494+static inline int find_first_zero_bit(void * addr, unsigned size)
4495 {
4496 unsigned long cmp, count;
4497 int res;
4498@@ -642,7 +523,45 @@
4499 return (res < size) ? res : size;
4500 }
4501
4502-static __inline__ int find_next_zero_bit (void * addr, int size, int offset)
4503+static inline int find_first_bit(void * addr, unsigned size)
4504+{
4505+ unsigned long cmp, count;
4506+ int res;
4507+
4508+ if (!size)
4509+ return 0;
4510+ __asm__(" slr %1,%1\n"
4511+ " lr %2,%3\n"
4512+ " slr %0,%0\n"
4513+ " ahi %2,31\n"
4514+ " srl %2,5\n"
4515+ "0: c %1,0(%0,%4)\n"
4516+ " jne 1f\n"
4517+ " ahi %0,4\n"
4518+ " brct %2,0b\n"
4519+ " lr %0,%3\n"
4520+ " j 4f\n"
4521+ "1: l %2,0(%0,%4)\n"
4522+ " sll %0,3\n"
4523+ " lhi %1,0xff\n"
4524+ " tml %2,0xffff\n"
4525+ " jnz 2f\n"
4526+ " ahi %0,16\n"
4527+ " srl %2,16\n"
4528+ "2: tml %2,0x00ff\n"
4529+ " jnz 3f\n"
4530+ " ahi %0,8\n"
4531+ " srl %2,8\n"
4532+ "3: nr %2,%1\n"
4533+ " ic %2,0(%2,%5)\n"
4534+ " alr %0,%2\n"
4535+ "4:"
4536+ : "=&a" (res), "=&d" (cmp), "=&a" (count)
4537+ : "a" (size), "a" (addr), "a" (&_sb_findmap) : "cc" );
4538+ return (res < size) ? res : size;
4539+}
4540+
4541+static inline int find_next_zero_bit (void * addr, int size, int offset)
4542 {
4543 unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
4544 unsigned long bitvec, reg;
4545@@ -680,11 +599,49 @@
4546 return (offset + res);
4547 }
4548
4549+static inline int find_next_bit (void * addr, int size, int offset)
4550+{
4551+ unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
4552+ unsigned long bitvec, reg;
4553+ int set, bit = offset & 31, res;
4554+
4555+ if (bit) {
4556+ /*
4557+ * Look for set bit in first word
4558+ */
4559+ bitvec = (*p) >> bit;
4560+ __asm__(" slr %0,%0\n"
4561+ " lhi %2,0xff\n"
4562+ " tml %1,0xffff\n"
4563+ " jnz 0f\n"
4564+ " ahi %0,16\n"
4565+ " srl %1,16\n"
4566+ "0: tml %1,0x00ff\n"
4567+ " jnz 1f\n"
4568+ " ahi %0,8\n"
4569+ " srl %1,8\n"
4570+ "1: nr %1,%2\n"
4571+ " ic %1,0(%1,%3)\n"
4572+ " alr %0,%1"
4573+ : "=&d" (set), "+a" (bitvec), "=&d" (reg)
4574+ : "a" (&_sb_findmap) : "cc" );
4575+ if (set < (32 - bit))
4576+ return set + offset;
4577+ offset += 32 - bit;
4578+ p++;
4579+ }
4580+ /*
4581+ * No set bit yet, search remaining full words for a bit
4582+ */
4583+ res = find_first_bit (p, size - 32 * (p - (unsigned long *) addr));
4584+ return (offset + res);
4585+}
4586+
4587 /*
4588 * ffz = Find First Zero in word. Undefined if no zero exists,
4589 * so code should check against ~0UL first..
4590 */
4591-static __inline__ unsigned long ffz(unsigned long word)
4592+static inline unsigned long ffz(unsigned long word)
4593 {
4594 unsigned long reg;
4595 int result;
4596@@ -708,40 +665,109 @@
4597 }
4598
4599 /*
4600+ * __ffs = find first bit in word. Undefined if no bit exists,
4601+ * so code should check against 0UL first..
4602+ */
4603+static inline unsigned long __ffs(unsigned long word)
4604+{
4605+ unsigned long reg, result;
4606+
4607+ __asm__(" slr %0,%0\n"
4608+ " lhi %2,0xff\n"
4609+ " tml %1,0xffff\n"
4610+ " jnz 0f\n"
4611+ " ahi %0,16\n"
4612+ " srl %1,16\n"
4613+ "0: tml %1,0x00ff\n"
4614+ " jnz 1f\n"
4615+ " ahi %0,8\n"
4616+ " srl %1,8\n"
4617+ "1: nr %1,%2\n"
4618+ " ic %1,0(%1,%3)\n"
4619+ " alr %0,%1"
4620+ : "=&d" (result), "+a" (word), "=&d" (reg)
4621+ : "a" (&_sb_findmap) : "cc" );
4622+ return result;
4623+}
4624+
4625+/*
4626+ * Every architecture must define this function. It's the fastest
4627+ * way of searching a 140-bit bitmap where the first 100 bits are
4628+ * unlikely to be set. It's guaranteed that at least one of the 140
4629+ * bits is cleared.
4630+ */
4631+static inline int sched_find_first_bit(unsigned long *b)
4632+{
4633+ return find_first_bit(b, 140);
4634+}
4635+
4636+/*
4637 * ffs: find first bit set. This is defined the same way as
4638 * the libc and compiler builtin ffs routines, therefore
4639 * differs in spirit from the above ffz (man ffs).
4640 */
4641
4642-extern int __inline__ ffs (int x)
4643+extern int inline ffs (int x)
4644 {
4645- int r;
4646+ int r = 1;
4647
4648 if (x == 0)
4649- return 0;
4650- __asm__(" slr %0,%0\n"
4651- " tml %1,0xffff\n"
4652+ return 0;
4653+ __asm__(" tml %1,0xffff\n"
4654 " jnz 0f\n"
4655- " ahi %0,16\n"
4656 " srl %1,16\n"
4657+ " ahi %0,16\n"
4658 "0: tml %1,0x00ff\n"
4659 " jnz 1f\n"
4660- " ahi %0,8\n"
4661 " srl %1,8\n"
4662+ " ahi %0,8\n"
4663 "1: tml %1,0x000f\n"
4664 " jnz 2f\n"
4665- " ahi %0,4\n"
4666 " srl %1,4\n"
4667+ " ahi %0,4\n"
4668 "2: tml %1,0x0003\n"
4669 " jnz 3f\n"
4670- " ahi %0,2\n"
4671 " srl %1,2\n"
4672+ " ahi %0,2\n"
4673 "3: tml %1,0x0001\n"
4674 " jnz 4f\n"
4675 " ahi %0,1\n"
4676 "4:"
4677 : "=&d" (r), "+d" (x) : : "cc" );
4678- return r+1;
4679+ return r;
4680+}
4681+
4682+/*
4683+ * fls: find last bit set.
4684+ */
4685+extern __inline__ int fls(int x)
4686+{
4687+ int r = 32;
4688+
4689+ if (x == 0)
4690+ return 0;
4691+ __asm__(" tmh %1,0xffff\n"
4692+ " jz 0f\n"
4693+ " sll %1,16\n"
4694+ " ahi %0,-16\n"
4695+ "0: tmh %1,0xff00\n"
4696+ " jz 1f\n"
4697+ " sll %1,8\n"
4698+ " ahi %0,-8\n"
4699+ "1: tmh %1,0xf000\n"
4700+ " jz 2f\n"
4701+ " sll %1,4\n"
4702+ " ahi %0,-4\n"
4703+ "2: tmh %1,0xc000\n"
4704+ " jz 3f\n"
4705+ " sll %1,2\n"
4706+ " ahi %0,-2\n"
4707+ "3: tmh %1,0x8000\n"
4708+ " jz 4f\n"
4709+ " ahi %0,-1\n"
4710+ "4:"
4711+ : "+d" (r), "+d" (x) : : "cc" );
4712+ return r;
4713 }
4714
4715 /*
4716@@ -769,7 +795,7 @@
4717 #define ext2_set_bit(nr, addr) test_and_set_bit((nr)^24, addr)
4718 #define ext2_clear_bit(nr, addr) test_and_clear_bit((nr)^24, addr)
4719 #define ext2_test_bit(nr, addr) test_bit((nr)^24, addr)
4720-static __inline__ int ext2_find_first_zero_bit(void *vaddr, unsigned size)
4721+static inline int ext2_find_first_zero_bit(void *vaddr, unsigned size)
4722 {
4723 unsigned long cmp, count;
4724 int res;
4725@@ -808,7 +834,7 @@
4726 return (res < size) ? res : size;
4727 }
4728
4729-static __inline__ int
4730+static inline int
4731 ext2_find_next_zero_bit(void *vaddr, unsigned size, unsigned offset)
4732 {
4733 unsigned long *addr = vaddr;
4734diff -urN linux-2.4.22.org/include/asm-s390x/bitops.h linux-2.4.22/include/asm-s390x/bitops.h
a4a9fb52
AM
4735--- linux-2.4.22.org/include/asm-s390x/bitops.h 2003-11-24 18:28:36.000000000 +0100
4736+++ linux-2.4.22/include/asm-s390x/bitops.h 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
4737@@ -51,271 +51,220 @@
4738 extern const char _oi_bitmap[];
4739 extern const char _ni_bitmap[];
4740 extern const char _zb_findmap[];
4741+extern const char _sb_findmap[];
4742
4743 #ifdef CONFIG_SMP
4744 /*
4745 * SMP save set_bit routine based on compare and swap (CS)
4746 */
4747-static __inline__ void set_bit_cs(unsigned long nr, volatile void * addr)
4748+static inline void set_bit_cs(unsigned long nr, volatile void *ptr)
4749 {
4750- unsigned long bits, mask;
4751- __asm__ __volatile__(
4752+ unsigned long addr, old, new, mask;
4753+
4754+ addr = (unsigned long) ptr;
4755 #if ALIGN_CS == 1
4756- " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
4757- " ngr %2,%1\n" /* isolate last 2 bits of address */
4758- " xgr %1,%2\n" /* make addr % 4 == 0 */
4759- " sllg %2,%2,3\n"
4760- " agr %0,%2\n" /* add alignement to bitnr */
4761+ addr ^= addr & 7; /* align address to 8 */
4762+ nr += (addr & 7) << 3; /* add alignment to bit number */
4763 #endif
4764- " lghi %2,63\n"
4765- " nr %2,%0\n" /* make shift value */
4766- " xr %0,%2\n"
4767- " srlg %0,%0,3\n"
4768- " lghi %3,1\n"
4769- " la %1,0(%0,%1)\n" /* calc. address for CS */
4770- " sllg %3,%3,0(%2)\n" /* make OR mask */
4771- " lg %0,0(%1)\n"
4772- "0: lgr %2,%0\n" /* CS loop starts here */
4773- " ogr %2,%3\n" /* set bit */
4774- " csg %0,%2,0(%1)\n"
4775- " jl 0b"
4776- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
4777- : "cc", "memory" );
4778+ addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
4779+ mask = 1UL << (nr & 63); /* make OR mask */
4780+ asm volatile(
4781+ " lg %0,0(%4)\n"
4782+ "0: lgr %1,%0\n"
4783+ " ogr %1,%3\n"
4784+ " csg %0,%1,0(%4)\n"
4785+ " jl 0b"
4786+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
4787+ : "d" (mask), "a" (addr)
4788+ : "cc" );
4789 }
4790
4791 /*
4792 * SMP save clear_bit routine based on compare and swap (CS)
4793 */
4794-static __inline__ void clear_bit_cs(unsigned long nr, volatile void * addr)
4795+static inline void clear_bit_cs(unsigned long nr, volatile void *ptr)
4796 {
4797- unsigned long bits, mask;
4798- __asm__ __volatile__(
4799+ unsigned long addr, old, new, mask;
4800+
4801+ addr = (unsigned long) ptr;
4802 #if ALIGN_CS == 1
4803- " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
4804- " ngr %2,%1\n" /* isolate last 2 bits of address */
4805- " xgr %1,%2\n" /* make addr % 4 == 0 */
4806- " sllg %2,%2,3\n"
4807- " agr %0,%2\n" /* add alignement to bitnr */
4808+ addr ^= addr & 7; /* align address to 8 */
4809+ nr += (addr & 7) << 3; /* add alignment to bit number */
4810 #endif
4811- " lghi %2,63\n"
4812- " nr %2,%0\n" /* make shift value */
4813- " xr %0,%2\n"
4814- " srlg %0,%0,3\n"
4815- " lghi %3,-2\n"
4816- " la %1,0(%0,%1)\n" /* calc. address for CS */
4817- " lghi %3,-2\n"
4818- " rllg %3,%3,0(%2)\n" /* make AND mask */
4819- " lg %0,0(%1)\n"
4820- "0: lgr %2,%0\n" /* CS loop starts here */
4821- " ngr %2,%3\n" /* clear bit */
4822- " csg %0,%2,0(%1)\n"
4823- " jl 0b"
4824- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
4825- : "cc", "memory" );
4826+ addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
4827+ mask = ~(1UL << (nr & 63)); /* make AND mask */
4828+ asm volatile(
4829+ " lg %0,0(%4)\n"
4830+ "0: lgr %1,%0\n"
4831+ " ngr %1,%3\n"
4832+ " csg %0,%1,0(%4)\n"
4833+ " jl 0b"
4834+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
4835+ : "d" (mask), "a" (addr)
4836+ : "cc" );
4837 }
4838
4839 /*
4840 * SMP save change_bit routine based on compare and swap (CS)
4841 */
4842-static __inline__ void change_bit_cs(unsigned long nr, volatile void * addr)
4843+static inline void change_bit_cs(unsigned long nr, volatile void *ptr)
4844 {
4845- unsigned long bits, mask;
4846- __asm__ __volatile__(
4847+ unsigned long addr, old, new, mask;
4848+
4849+ addr = (unsigned long) ptr;
4850 #if ALIGN_CS == 1
4851- " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
4852- " ngr %2,%1\n" /* isolate last 2 bits of address */
4853- " xgr %1,%2\n" /* make addr % 4 == 0 */
4854- " sllg %2,%2,3\n"
4855- " agr %0,%2\n" /* add alignement to bitnr */
4856+ addr ^= addr & 7; /* align address to 8 */
4857+ nr += (addr & 7) << 3; /* add alignment to bit number */
4858 #endif
4859- " lghi %2,63\n"
4860- " nr %2,%0\n" /* make shift value */
4861- " xr %0,%2\n"
4862- " srlg %0,%0,3\n"
4863- " lghi %3,1\n"
4864- " la %1,0(%0,%1)\n" /* calc. address for CS */
4865- " sllg %3,%3,0(%2)\n" /* make XR mask */
4866- " lg %0,0(%1)\n"
4867- "0: lgr %2,%0\n" /* CS loop starts here */
4868- " xgr %2,%3\n" /* change bit */
4869- " csg %0,%2,0(%1)\n"
4870- " jl 0b"
4871- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
4872- : "cc", "memory" );
4873+ addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
4874+ mask = 1UL << (nr & 63); /* make XOR mask */
4875+ asm volatile(
4876+ " lg %0,0(%4)\n"
4877+ "0: lgr %1,%0\n"
4878+ " xgr %1,%3\n"
4879+ " csg %0,%1,0(%4)\n"
4880+ " jl 0b"
4881+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
4882+ : "d" (mask), "a" (addr)
4883+ : "cc" );
4884 }
4885
4886 /*
4887 * SMP save test_and_set_bit routine based on compare and swap (CS)
4888 */
4889-static __inline__ int
4890-test_and_set_bit_cs(unsigned long nr, volatile void * addr)
4891+static inline int
4892+test_and_set_bit_cs(unsigned long nr, volatile void *ptr)
4893 {
4894- unsigned long bits, mask;
4895- __asm__ __volatile__(
4896+ unsigned long addr, old, new, mask;
4897+
4898+ addr = (unsigned long) ptr;
4899 #if ALIGN_CS == 1
4900- " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
4901- " ngr %2,%1\n" /* isolate last 2 bits of address */
4902- " xgr %1,%2\n" /* make addr % 4 == 0 */
4903- " sllg %2,%2,3\n"
4904- " agr %0,%2\n" /* add alignement to bitnr */
4905+ addr ^= addr & 7; /* align address to 8 */
4906+ nr += (addr & 7) << 3; /* add alignment to bit number */
4907 #endif
4908- " lghi %2,63\n"
4909- " nr %2,%0\n" /* make shift value */
4910- " xr %0,%2\n"
4911- " srlg %0,%0,3\n"
4912- " lghi %3,1\n"
4913- " la %1,0(%0,%1)\n" /* calc. address for CS */
4914- " sllg %3,%3,0(%2)\n" /* make OR mask */
4915- " lg %0,0(%1)\n"
4916- "0: lgr %2,%0\n" /* CS loop starts here */
4917- " ogr %2,%3\n" /* set bit */
4918- " csg %0,%2,0(%1)\n"
4919- " jl 0b\n"
4920- " ngr %0,%3\n" /* isolate old bit */
4921- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
4922- : "cc", "memory" );
4923- return nr != 0;
4924+ addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
4925+ mask = 1UL << (nr & 63); /* make OR/test mask */
4926+ asm volatile(
4927+ " lg %0,0(%4)\n"
4928+ "0: lgr %1,%0\n"
4929+ " ogr %1,%3\n"
4930+ " csg %0,%1,0(%4)\n"
4931+ " jl 0b"
4932+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
4933+ : "d" (mask), "a" (addr)
4934+ : "cc" );
4935+ return (old & mask) != 0;
4936 }
4937
4938 /*
4939 * SMP save test_and_clear_bit routine based on compare and swap (CS)
4940 */
4941-static __inline__ int
4942-test_and_clear_bit_cs(unsigned long nr, volatile void * addr)
4943+static inline int
4944+test_and_clear_bit_cs(unsigned long nr, volatile void *ptr)
4945 {
4946- unsigned long bits, mask;
4947- __asm__ __volatile__(
4948+ unsigned long addr, old, new, mask;
4949+
4950+ addr = (unsigned long) ptr;
4951 #if ALIGN_CS == 1
4952- " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
4953- " ngr %2,%1\n" /* isolate last 2 bits of address */
4954- " xgr %1,%2\n" /* make addr % 4 == 0 */
4955- " sllg %2,%2,3\n"
4956- " agr %0,%2\n" /* add alignement to bitnr */
4957+ addr ^= addr & 7; /* align address to 8 */
4958+ nr += (addr & 7) << 3; /* add alignment to bit number */
4959 #endif
4960- " lghi %2,63\n"
4961- " nr %2,%0\n" /* make shift value */
4962- " xr %0,%2\n"
4963- " srlg %0,%0,3\n"
4964- " lghi %3,-2\n"
4965- " la %1,0(%0,%1)\n" /* calc. address for CS */
4966- " rllg %3,%3,0(%2)\n" /* make AND mask */
4967- " lg %0,0(%1)\n"
4968- "0: lgr %2,%0\n" /* CS loop starts here */
4969- " ngr %2,%3\n" /* clear bit */
4970- " csg %0,%2,0(%1)\n"
4971- " jl 0b\n"
4972- " xgr %0,%2\n" /* isolate old bit */
4973- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
4974- : "cc", "memory" );
4975- return nr != 0;
4976+ addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
4977+ mask = ~(1UL << (nr & 63)); /* make AND mask */
4978+ asm volatile(
4979+ " lg %0,0(%4)\n"
4980+ "0: lgr %1,%0\n"
4981+ " ngr %1,%3\n"
4982+ " csg %0,%1,0(%4)\n"
4983+ " jl 0b"
4984+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
4985+ : "d" (mask), "a" (addr)
4986+ : "cc" );
4987+ return (old ^ new) != 0;
4988 }
4989
4990 /*
4991 * SMP save test_and_change_bit routine based on compare and swap (CS)
4992 */
4993-static __inline__ int
4994-test_and_change_bit_cs(unsigned long nr, volatile void * addr)
4995+static inline int
4996+test_and_change_bit_cs(unsigned long nr, volatile void *ptr)
4997 {
4998- unsigned long bits, mask;
4999- __asm__ __volatile__(
5000+ unsigned long addr, old, new, mask;
5001+
5002+ addr = (unsigned long) ptr;
5003 #if ALIGN_CS == 1
5004- " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
5005- " ngr %2,%1\n" /* isolate last 2 bits of address */
5006- " xgr %1,%2\n" /* make addr % 4 == 0 */
5007- " sllg %2,%2,3\n"
5008- " agr %0,%2\n" /* add alignement to bitnr */
5009+ addr ^= addr & 7; /* align address to 8 */
5010+ nr += (addr & 7) << 3; /* add alignment to bit number */
5011 #endif
5012- " lghi %2,63\n"
5013- " nr %2,%0\n" /* make shift value */
5014- " xr %0,%2\n"
5015- " srlg %0,%0,3\n"
5016- " lghi %3,1\n"
5017- " la %1,0(%0,%1)\n" /* calc. address for CS */
5018- " sllg %3,%3,0(%2)\n" /* make OR mask */
5019- " lg %0,0(%1)\n"
5020- "0: lgr %2,%0\n" /* CS loop starts here */
5021- " xgr %2,%3\n" /* change bit */
5022- " csg %0,%2,0(%1)\n"
5023- " jl 0b\n"
5024- " ngr %0,%3\n" /* isolate old bit */
5025- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
5026- : "cc", "memory" );
5027- return nr != 0;
5028+ addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
5029+ mask = 1UL << (nr & 63); /* make XOR mask */
5030+ asm volatile(
5031+ " lg %0,0(%4)\n"
5032+ "0: lgr %1,%0\n"
5033+ " xgr %1,%3\n"
5034+ " csg %0,%1,0(%4)\n"
5035+ " jl 0b"
5036+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
5037+ : "d" (mask), "a" (addr)
5038+ : "cc" );
5039+ return (old & mask) != 0;
5040 }
5041 #endif /* CONFIG_SMP */
5042
5043 /*
5044 * fast, non-SMP set_bit routine
5045 */
5046-static __inline__ void __set_bit(unsigned long nr, volatile void * addr)
5047+static inline void __set_bit(unsigned long nr, volatile void *ptr)
5048 {
5049- unsigned long reg1, reg2;
5050- __asm__ __volatile__(
5051- " lghi %1,56\n"
5052- " lghi %0,7\n"
5053- " xgr %1,%2\n"
5054- " nr %0,%2\n"
5055- " srlg %1,%1,3\n"
5056- " la %1,0(%1,%3)\n"
5057- " la %0,0(%0,%4)\n"
5058- " oc 0(1,%1),0(%0)"
5059- : "=&a" (reg1), "=&a" (reg2)
5060- : "a" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
5061-}
5062-
5063-static __inline__ void
5064-__constant_set_bit(const unsigned long nr, volatile void * addr)
5065-{
5066- switch (nr&7) {
5067- case 0:
5068- __asm__ __volatile__ ("la 1,%0\n\t"
5069- "oi 0(1),0x01"
5070- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5071- : : "1", "cc", "memory");
5072- break;
5073- case 1:
5074- __asm__ __volatile__ ("la 1,%0\n\t"
5075- "oi 0(1),0x02"
5076- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5077- : : "1", "cc", "memory" );
5078- break;
5079- case 2:
5080- __asm__ __volatile__ ("la 1,%0\n\t"
5081- "oi 0(1),0x04"
5082- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5083- : : "1", "cc", "memory" );
5084- break;
5085- case 3:
5086- __asm__ __volatile__ ("la 1,%0\n\t"
5087- "oi 0(1),0x08"
5088- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5089- : : "1", "cc", "memory" );
5090- break;
5091- case 4:
5092- __asm__ __volatile__ ("la 1,%0\n\t"
5093- "oi 0(1),0x10"
5094- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5095- : : "1", "cc", "memory" );
5096- break;
5097- case 5:
5098- __asm__ __volatile__ ("la 1,%0\n\t"
5099- "oi 0(1),0x20"
5100- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5101- : : "1", "cc", "memory" );
5102- break;
5103- case 6:
5104- __asm__ __volatile__ ("la 1,%0\n\t"
5105- "oi 0(1),0x40"
5106- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5107- : : "1", "cc", "memory" );
5108- break;
5109- case 7:
5110- __asm__ __volatile__ ("la 1,%0\n\t"
5111- "oi 0(1),0x80"
5112- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5113- : : "1", "cc", "memory" );
5114- break;
5115- }
5116+ unsigned long addr;
5117+
5118+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
5119+ asm volatile("oc 0(1,%1),0(%2)"
5120+ : "+m" (*(char *) addr)
5121+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
5122+ : "cc" );
5123+}
5124+
5125+static inline void
5126+__constant_set_bit(const unsigned long nr, volatile void *ptr)
5127+{
5128+ unsigned long addr;
5129+
5130+ addr = ((unsigned long) ptr) + ((nr >> 3) ^ 7);
5131+ switch (nr&7) {
5132+ case 0:
5133+ asm volatile ("oi 0(%1),0x01"
5134+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5135+ break;
5136+ case 1:
5137+ asm volatile ("oi 0(%1),0x02"
5138+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5139+ break;
5140+ case 2:
5141+ asm volatile ("oi 0(%1),0x04"
5142+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5143+ break;
5144+ case 3:
5145+ asm volatile ("oi 0(%1),0x08"
5146+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5147+ break;
5148+ case 4:
5149+ asm volatile ("oi 0(%1),0x10"
5150+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5151+ break;
5152+ case 5:
5153+ asm volatile ("oi 0(%1),0x20"
5154+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5155+ break;
5156+ case 6:
5157+ asm volatile ("oi 0(%1),0x40"
5158+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5159+ break;
5160+ case 7:
5161+ asm volatile ("oi 0(%1),0x80"
5162+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5163+ break;
5164+ }
5165 }
5166
5167 #define set_bit_simple(nr,addr) \
5168@@ -326,76 +275,58 @@
5169 /*
5170 * fast, non-SMP clear_bit routine
5171 */
5172-static __inline__ void
5173-__clear_bit(unsigned long nr, volatile void * addr)
5174+static inline void
5175+__clear_bit(unsigned long nr, volatile void *ptr)
5176 {
5177- unsigned long reg1, reg2;
5178- __asm__ __volatile__(
5179- " lghi %1,56\n"
5180- " lghi %0,7\n"
5181- " xgr %1,%2\n"
5182- " nr %0,%2\n"
5183- " srlg %1,%1,3\n"
5184- " la %1,0(%1,%3)\n"
5185- " la %0,0(%0,%4)\n"
5186- " nc 0(1,%1),0(%0)"
5187- : "=&a" (reg1), "=&a" (reg2)
5188- : "d" (nr), "a" (addr), "a" (&_ni_bitmap) : "cc", "memory" );
5189-}
5190-
5191-static __inline__ void
5192-__constant_clear_bit(const unsigned long nr, volatile void * addr)
5193-{
5194- switch (nr&7) {
5195- case 0:
5196- __asm__ __volatile__ ("la 1,%0\n\t"
5197- "ni 0(1),0xFE"
5198- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5199- : : "1", "cc", "memory" );
5200- break;
5201- case 1:
5202- __asm__ __volatile__ ("la 1,%0\n\t"
5203- "ni 0(1),0xFD"
5204- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5205- : : "1", "cc", "memory" );
5206- break;
5207- case 2:
5208- __asm__ __volatile__ ("la 1,%0\n\t"
5209- "ni 0(1),0xFB"
5210- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5211- : : "1", "cc", "memory" );
5212- break;
5213- case 3:
5214- __asm__ __volatile__ ("la 1,%0\n\t"
5215- "ni 0(1),0xF7"
5216- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5217- : : "1", "cc", "memory" );
5218- break;
5219- case 4:
5220- __asm__ __volatile__ ("la 1,%0\n\t"
5221- "ni 0(1),0xEF"
5222- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5223- : : "cc", "memory" );
5224- break;
5225- case 5:
5226- __asm__ __volatile__ ("la 1,%0\n\t"
5227- "ni 0(1),0xDF"
5228- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5229- : : "1", "cc", "memory" );
5230- break;
5231- case 6:
5232- __asm__ __volatile__ ("la 1,%0\n\t"
5233- "ni 0(1),0xBF"
5234- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5235- : : "1", "cc", "memory" );
5236- break;
5237- case 7:
5238- __asm__ __volatile__ ("la 1,%0\n\t"
5239- "ni 0(1),0x7F"
5240- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5241- : : "1", "cc", "memory" );
5242- break;
5243- }
5244+ unsigned long addr;
5245+
5246+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
5247+ asm volatile("nc 0(1,%1),0(%2)"
5248+ : "+m" (*(char *) addr)
5249+ : "a" (addr), "a" (_ni_bitmap + (nr & 7))
5250+ : "cc" );
5251+}
5252+
5253+static inline void
5254+__constant_clear_bit(const unsigned long nr, volatile void *ptr)
5255+{
5256+ unsigned long addr;
5257+
5258+ addr = ((unsigned long) ptr) + ((nr >> 3) ^ 7);
5259+ switch (nr&7) {
5260+ case 0:
5261+ asm volatile ("ni 0(%1),0xFE"
5262+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5263+ break;
5264+ case 1:
5265+ asm volatile ("ni 0(%1),0xFD"
5266+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5267+ break;
5268+ case 2:
5269+ asm volatile ("ni 0(%1),0xFB"
5270+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5271+ break;
5272+ case 3:
5273+ asm volatile ("ni 0(%1),0xF7"
5274+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5275+ break;
5276+ case 4:
5277+ asm volatile ("ni 0(%1),0xEF"
5278+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5279+ break;
5280+ case 5:
5281+ asm volatile ("ni 0(%1),0xDF"
5282+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5283+ break;
5284+ case 6:
5285+ asm volatile ("ni 0(%1),0xBF"
5286+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5287+ break;
5288+ case 7:
5289+ asm volatile ("ni 0(%1),0x7F"
5290+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5291+ break;
5292+ }
5293 }
5294
5295 #define clear_bit_simple(nr,addr) \
5296@@ -406,75 +337,57 @@
5297 /*
5298 * fast, non-SMP change_bit routine
5299 */
5300-static __inline__ void __change_bit(unsigned long nr, volatile void * addr)
5301+static inline void __change_bit(unsigned long nr, volatile void *ptr)
5302 {
5303- unsigned long reg1, reg2;
5304- __asm__ __volatile__(
5305- " lghi %1,56\n"
5306- " lghi %0,7\n"
5307- " xgr %1,%2\n"
5308- " nr %0,%2\n"
5309- " srlg %1,%1,3\n"
5310- " la %1,0(%1,%3)\n"
5311- " la %0,0(%0,%4)\n"
5312- " xc 0(1,%1),0(%0)"
5313- : "=&a" (reg1), "=&a" (reg2)
5314- : "d" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
5315-}
5316-
5317-static __inline__ void
5318-__constant_change_bit(const unsigned long nr, volatile void * addr)
5319-{
5320- switch (nr&7) {
5321- case 0:
5322- __asm__ __volatile__ ("la 1,%0\n\t"
5323- "xi 0(1),0x01"
5324- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5325- : : "cc", "memory" );
5326- break;
5327- case 1:
5328- __asm__ __volatile__ ("la 1,%0\n\t"
5329- "xi 0(1),0x02"
5330- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5331- : : "cc", "memory" );
5332- break;
5333- case 2:
5334- __asm__ __volatile__ ("la 1,%0\n\t"
5335- "xi 0(1),0x04"
5336- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5337- : : "cc", "memory" );
5338- break;
5339- case 3:
5340- __asm__ __volatile__ ("la 1,%0\n\t"
5341- "xi 0(1),0x08"
5342- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5343- : : "cc", "memory" );
5344- break;
5345- case 4:
5346- __asm__ __volatile__ ("la 1,%0\n\t"
5347- "xi 0(1),0x10"
5348- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5349- : : "cc", "memory" );
5350- break;
5351- case 5:
5352- __asm__ __volatile__ ("la 1,%0\n\t"
5353- "xi 0(1),0x20"
5354- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5355- : : "1", "cc", "memory" );
5356- break;
5357- case 6:
5358- __asm__ __volatile__ ("la 1,%0\n\t"
5359- "xi 0(1),0x40"
5360- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5361- : : "1", "cc", "memory" );
5362- break;
5363- case 7:
5364- __asm__ __volatile__ ("la 1,%0\n\t"
5365- "xi 0(1),0x80"
5366- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5367- : : "1", "cc", "memory" );
5368- break;
5369- }
5370+ unsigned long addr;
5371+
5372+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
5373+ asm volatile("xc 0(1,%1),0(%2)"
5374+ : "+m" (*(char *) addr)
5375+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
5376+ : "cc" );
5377+}
5378+
5379+static inline void
5380+__constant_change_bit(const unsigned long nr, volatile void *ptr)
5381+{
5382+ unsigned long addr;
5383+
5384+ addr = ((unsigned long) ptr) + ((nr >> 3) ^ 7);
5385+ switch (nr&7) {
5386+ case 0:
5387+ asm volatile ("xi 0(%1),0x01"
5388+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5389+ break;
5390+ case 1:
5391+ asm volatile ("xi 0(%1),0x02"
5392+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5393+ break;
5394+ case 2:
5395+ asm volatile ("xi 0(%1),0x04"
5396+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5397+ break;
5398+ case 3:
5399+ asm volatile ("xi 0(%1),0x08"
5400+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5401+ break;
5402+ case 4:
5403+ asm volatile ("xi 0(%1),0x10"
5404+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5405+ break;
5406+ case 5:
5407+ asm volatile ("xi 0(%1),0x20"
5408+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5409+ break;
5410+ case 6:
5411+ asm volatile ("xi 0(%1),0x40"
5412+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5413+ break;
5414+ case 7:
5415+ asm volatile ("xi 0(%1),0x80"
5416+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5417+ break;
5418+ }
5419 }
5420
5421 #define change_bit_simple(nr,addr) \
5422@@ -485,77 +398,57 @@
5423 /*
5424 * fast, non-SMP test_and_set_bit routine
5425 */
5426-static __inline__ int
5427-test_and_set_bit_simple(unsigned long nr, volatile void * addr)
5428+static inline int
5429+test_and_set_bit_simple(unsigned long nr, volatile void *ptr)
5430 {
5431- unsigned long reg1, reg2;
5432- int oldbit;
5433- __asm__ __volatile__(
5434- " lghi %1,56\n"
5435- " lghi %2,7\n"
5436- " xgr %1,%3\n"
5437- " nr %2,%3\n"
5438- " srlg %1,%1,3\n"
5439- " la %1,0(%1,%4)\n"
5440- " ic %0,0(%1)\n"
5441- " srl %0,0(%2)\n"
5442- " la %2,0(%2,%5)\n"
5443- " oc 0(1,%1),0(%2)"
5444- : "=&d" (oldbit), "=&a" (reg1), "=&a" (reg2)
5445- : "d" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
5446- return oldbit & 1;
5447+ unsigned long addr;
5448+ unsigned char ch;
5449+
5450+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
5451+ ch = *(unsigned char *) addr;
5452+ asm volatile("oc 0(1,%1),0(%2)"
5453+ : "+m" (*(char *) addr)
5454+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
5455+ : "cc" );
5456+ return (ch >> (nr & 7)) & 1;
5457 }
5458 #define __test_and_set_bit(X,Y) test_and_set_bit_simple(X,Y)
5459
5460 /*
5461 * fast, non-SMP test_and_clear_bit routine
5462 */
5463-static __inline__ int
5464-test_and_clear_bit_simple(unsigned long nr, volatile void * addr)
5465+static inline int
5466+test_and_clear_bit_simple(unsigned long nr, volatile void *ptr)
5467 {
5468- unsigned long reg1, reg2;
5469- int oldbit;
5470+ unsigned long addr;
5471+ unsigned char ch;
5472
5473- __asm__ __volatile__(
5474- " lghi %1,56\n"
5475- " lghi %2,7\n"
5476- " xgr %1,%3\n"
5477- " nr %2,%3\n"
5478- " srlg %1,%1,3\n"
5479- " la %1,0(%1,%4)\n"
5480- " ic %0,0(%1)\n"
5481- " srl %0,0(%2)\n"
5482- " la %2,0(%2,%5)\n"
5483- " nc 0(1,%1),0(%2)"
5484- : "=&d" (oldbit), "=&a" (reg1), "=&a" (reg2)
5485- : "d" (nr), "a" (addr), "a" (&_ni_bitmap) : "cc", "memory" );
5486- return oldbit & 1;
5487+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
5488+ ch = *(unsigned char *) addr;
5489+ asm volatile("nc 0(1,%1),0(%2)"
5490+ : "+m" (*(char *) addr)
5491+ : "a" (addr), "a" (_ni_bitmap + (nr & 7))
5492+ : "cc" );
5493+ return (ch >> (nr & 7)) & 1;
5494 }
5495 #define __test_and_clear_bit(X,Y) test_and_clear_bit_simple(X,Y)
5496
5497 /*
5498 * fast, non-SMP test_and_change_bit routine
5499 */
5500-static __inline__ int
5501-test_and_change_bit_simple(unsigned long nr, volatile void * addr)
5502+static inline int
5503+test_and_change_bit_simple(unsigned long nr, volatile void *ptr)
5504 {
5505- unsigned long reg1, reg2;
5506- int oldbit;
5507+ unsigned long addr;
5508+ unsigned char ch;
5509
5510- __asm__ __volatile__(
5511- " lghi %1,56\n"
5512- " lghi %2,7\n"
5513- " xgr %1,%3\n"
5514- " nr %2,%3\n"
5515- " srlg %1,%1,3\n"
5516- " la %1,0(%1,%4)\n"
5517- " ic %0,0(%1)\n"
5518- " srl %0,0(%2)\n"
5519- " la %2,0(%2,%5)\n"
5520- " xc 0(1,%1),0(%2)"
5521- : "=&d" (oldbit), "=&a" (reg1), "=&a" (reg2)
5522- : "d" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
5523- return oldbit & 1;
5524+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
5525+ ch = *(unsigned char *) addr;
5526+ asm volatile("xc 0(1,%1),0(%2)"
5527+ : "+m" (*(char *) addr)
5528+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
5529+ : "cc" );
5530+ return (ch >> (nr & 7)) & 1;
5531 }
5532 #define __test_and_change_bit(X,Y) test_and_change_bit_simple(X,Y)
5533
5534@@ -580,26 +473,18 @@
5535 * This routine doesn't need to be atomic.
5536 */
5537
5538-static __inline__ int __test_bit(unsigned long nr, volatile void * addr)
5539+static inline int __test_bit(unsigned long nr, volatile void *ptr)
5540 {
5541- unsigned long reg1, reg2;
5542- int oldbit;
5543+ unsigned long addr;
5544+ unsigned char ch;
5545
5546- __asm__ __volatile__(
5547- " lghi %2,56\n"
5548- " lghi %1,7\n"
5549- " xgr %2,%3\n"
5550- " nr %1,%3\n"
5551- " srlg %2,%2,3\n"
5552- " ic %0,0(%2,%4)\n"
5553- " srl %0,0(%1)\n"
5554- : "=&d" (oldbit), "=&a" (reg1), "=&a" (reg2)
5555- : "d" (nr), "a" (addr) : "cc" );
5556- return oldbit & 1;
5557+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
5558+ ch = *(unsigned char *) addr;
5559+ return (ch >> (nr & 7)) & 1;
5560 }
5561
5562-static __inline__ int
5563-__constant_test_bit(unsigned long nr, volatile void * addr) {
5564+static inline int
5565+__constant_test_bit(unsigned long nr, volatile void *addr) {
5566 return (((volatile char *) addr)[(nr>>3)^7] & (1<<(nr&7))) != 0;
5567 }
5568
5569@@ -611,7 +496,7 @@
5570 /*
5571 * Find-bit routines..
5572 */
5573-static __inline__ unsigned long
5574+static inline unsigned long
5575 find_first_zero_bit(void * addr, unsigned long size)
5576 {
5577 unsigned long res, cmp, count;
5578@@ -653,7 +538,49 @@
5579 return (res < size) ? res : size;
5580 }
5581
5582-static __inline__ unsigned long
5583+static inline unsigned long
5584+find_first_bit(void * addr, unsigned long size)
5585+{
5586+ unsigned long res, cmp, count;
5587+
5588+ if (!size)
5589+ return 0;
5590+ __asm__(" slgr %1,%1\n"
5591+ " lgr %2,%3\n"
5592+ " slgr %0,%0\n"
5593+ " aghi %2,63\n"
5594+ " srlg %2,%2,6\n"
5595+ "0: cg %1,0(%0,%4)\n"
5596+ " jne 1f\n"
5597+ " aghi %0,8\n"
5598+ " brct %2,0b\n"
5599+ " lgr %0,%3\n"
5600+ " j 5f\n"
5601+ "1: lg %2,0(%0,%4)\n"
5602+ " sllg %0,%0,3\n"
5603+ " clr %2,%1\n"
5604+ " jne 2f\n"
5605+ " aghi %0,32\n"
5606+ " srlg %2,%2,32\n"
5607+ "2: lghi %1,0xff\n"
5608+ " tmll %2,0xffff\n"
5609+ " jnz 3f\n"
5610+ " aghi %0,16\n"
5611+ " srl %2,16\n"
5612+ "3: tmll %2,0x00ff\n"
5613+ " jnz 4f\n"
5614+ " aghi %0,8\n"
5615+ " srl %2,8\n"
5616+ "4: ngr %2,%1\n"
5617+ " ic %2,0(%2,%5)\n"
5618+ " algr %0,%2\n"
5619+ "5:"
5620+ : "=&a" (res), "=&d" (cmp), "=&a" (count)
5621+ : "a" (size), "a" (addr), "a" (&_sb_findmap) : "cc" );
5622+ return (res < size) ? res : size;
5623+}
5624+
5625+static inline unsigned long
5626 find_next_zero_bit (void * addr, unsigned long size, unsigned long offset)
5627 {
5628 unsigned long * p = ((unsigned long *) addr) + (offset >> 6);
5629@@ -697,14 +624,56 @@
5630 return (offset + res);
5631 }
5632
5633+static inline unsigned long
5634+find_next_bit (void * addr, unsigned long size, unsigned long offset)
5635+{
5636+ unsigned long * p = ((unsigned long *) addr) + (offset >> 6);
5637+ unsigned long bitvec, reg;
5638+ unsigned long set, bit = offset & 63, res;
5639+
5640+ if (bit) {
5641+ /*
5642+ * Look for zero in first word
5643+ */
5644+ bitvec = (*p) >> bit;
5645+ __asm__(" slgr %0,%0\n"
5646+ " ltr %1,%1\n"
5647+ " jnz 0f\n"
5648+ " aghi %0,32\n"
5649+ " srlg %1,%1,32\n"
5650+ "0: lghi %2,0xff\n"
5651+ " tmll %1,0xffff\n"
5652+ " jnz 1f\n"
5653+ " aghi %0,16\n"
5654+ " srlg %1,%1,16\n"
5655+ "1: tmll %1,0x00ff\n"
5656+ " jnz 2f\n"
5657+ " aghi %0,8\n"
5658+ " srlg %1,%1,8\n"
5659+ "2: ngr %1,%2\n"
5660+ " ic %1,0(%1,%3)\n"
5661+ " algr %0,%1"
5662+ : "=&d" (set), "+a" (bitvec), "=&d" (reg)
5663+ : "a" (&_sb_findmap) : "cc" );
5664+ if (set < (64 - bit))
5665+ return set + offset;
5666+ offset += 64 - bit;
5667+ p++;
5668+ }
5669+ /*
5670+ * No set bit yet, search remaining full words for a bit
5671+ */
5672+ res = find_first_bit (p, size - 64 * (p - (unsigned long *) addr));
5673+ return (offset + res);
5674+}
5675+
5676 /*
5677 * ffz = Find First Zero in word. Undefined if no zero exists,
5678 * so code should check against ~0UL first..
5679 */
5680-static __inline__ unsigned long ffz(unsigned long word)
5681+static inline unsigned long ffz(unsigned long word)
5682 {
5683- unsigned long reg;
5684- int result;
5685+ unsigned long reg, result;
5686
5687 __asm__(" lhi %2,-1\n"
5688 " slgr %0,%0\n"
5689@@ -730,40 +699,112 @@
5690 }
5691
5692 /*
5693+ * __ffs = find first bit in word. Undefined if no bit exists,
5694+ * so code should check against 0UL first..
5695+ */
5696+static inline unsigned long __ffs (unsigned long word)
5697+{
5698+ unsigned long reg, result;
5699+
5700+ __asm__(" slgr %0,%0\n"
5701+ " ltr %1,%1\n"
5702+ " jnz 0f\n"
5703+ " aghi %0,32\n"
5704+ " srlg %1,%1,32\n"
5705+ "0: lghi %2,0xff\n"
5706+ " tmll %1,0xffff\n"
5707+ " jnz 1f\n"
5708+ " aghi %0,16\n"
5709+ " srlg %1,%1,16\n"
5710+ "1: tmll %1,0x00ff\n"
5711+ " jnz 2f\n"
5712+ " aghi %0,8\n"
5713+ " srlg %1,%1,8\n"
5714+ "2: ngr %1,%2\n"
5715+ " ic %1,0(%1,%3)\n"
5716+ " algr %0,%1"
5717+ : "=&d" (result), "+a" (word), "=&d" (reg)
5718+ : "a" (&_sb_findmap) : "cc" );
5719+ return result;
5720+}
5721+
5722+/*
5723+ * Every architecture must define this function. It's the fastest
5724+ * way of searching a 140-bit bitmap where the first 100 bits are
5725+ * unlikely to be set. It's guaranteed that at least one of the 140
5726+ * bits is cleared.
5727+ */
5728+static inline int sched_find_first_bit(unsigned long *b)
5729+{
5730+ return find_first_bit(b, 140);
5731+}
5732+
5733+/*
5734 * ffs: find first bit set. This is defined the same way as
5735 * the libc and compiler builtin ffs routines, therefore
5736 * differs in spirit from the above ffz (man ffs).
5737 */
5738-
5739-extern int __inline__ ffs (int x)
5740+extern int inline ffs (int x)
5741 {
5742- int r;
5743+ int r = 1;
5744
5745 if (x == 0)
5746- return 0;
5747- __asm__(" slr %0,%0\n"
5748- " tml %1,0xffff\n"
5749+ return 0;
5750+ __asm__(" tml %1,0xffff\n"
5751 " jnz 0f\n"
5752- " ahi %0,16\n"
5753 " srl %1,16\n"
5754+ " ahi %0,16\n"
5755 "0: tml %1,0x00ff\n"
5756 " jnz 1f\n"
5757- " ahi %0,8\n"
5758 " srl %1,8\n"
5759+ " ahi %0,8\n"
5760 "1: tml %1,0x000f\n"
5761 " jnz 2f\n"
5762- " ahi %0,4\n"
5763 " srl %1,4\n"
5764+ " ahi %0,4\n"
5765 "2: tml %1,0x0003\n"
5766 " jnz 3f\n"
5767- " ahi %0,2\n"
5768 " srl %1,2\n"
5769+ " ahi %0,2\n"
5770 "3: tml %1,0x0001\n"
5771 " jnz 4f\n"
5772 " ahi %0,1\n"
5773 "4:"
5774 : "=&d" (r), "+d" (x) : : "cc" );
5775- return r+1;
5776+ return r;
5777+}
5778+
5779+/*
5780+ * fls: find last bit set.
5781+ */
5782+extern __inline__ int fls(int x)
5783+{
5784+ int r = 32;
5785+
5786+ if (x == 0)
5787+ return 0;
5788+ __asm__(" tmh %1,0xffff\n"
5789+ " jz 0f\n"
5790+ " sll %1,16\n"
5791+ " ahi %0,-16\n"
5792+ "0: tmh %1,0xff00\n"
5793+ " jz 1f\n"
5794+ " sll %1,8\n"
5795+ " ahi %0,-8\n"
5796+ "1: tmh %1,0xf000\n"
5797+ " jz 2f\n"
5798+ " sll %1,4\n"
5799+ " ahi %0,-4\n"
5800+ "2: tmh %1,0xc000\n"
5801+ " jz 3f\n"
5802+ " sll %1,2\n"
5803+ " ahi %0,-2\n"
5804+ "3: tmh %1,0x8000\n"
5805+ " jz 4f\n"
5806+ " ahi %0,-1\n"
5807+ "4:"
5808+ : "+d" (r), "+d" (x) : : "cc" );
5809+ return r;
5810 }
5811
5812 /*
5813@@ -791,7 +832,7 @@
5814 #define ext2_set_bit(nr, addr) test_and_set_bit((nr)^56, addr)
5815 #define ext2_clear_bit(nr, addr) test_and_clear_bit((nr)^56, addr)
5816 #define ext2_test_bit(nr, addr) test_bit((nr)^56, addr)
5817-static __inline__ unsigned long
5818+static inline unsigned long
5819 ext2_find_first_zero_bit(void *vaddr, unsigned long size)
5820 {
5821 unsigned long res, cmp, count;
5822@@ -833,7 +874,7 @@
5823 return (res < size) ? res : size;
5824 }
5825
5826-static __inline__ unsigned long
5827+static inline unsigned long
5828 ext2_find_next_zero_bit(void *vaddr, unsigned long size, unsigned long offset)
5829 {
5830 unsigned long *addr = vaddr;
5831diff -urN linux-2.4.22.org/include/asm-sparc/bitops.h linux-2.4.22/include/asm-sparc/bitops.h
a4a9fb52
AM
5832--- linux-2.4.22.org/include/asm-sparc/bitops.h 2003-11-24 18:28:27.000000000 +0100
5833+++ linux-2.4.22/include/asm-sparc/bitops.h 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
5834@@ -231,6 +231,57 @@
5835 return result;
5836 }
5837
5838+/**
5839+ * __ffs - find first bit in word.
5840+ * @word: The word to search
5841+ *
5842+ * Undefined if no bit exists, so code should check against 0 first.
5843+ */
5844+static __inline__ int __ffs(unsigned long word)
5845+{
5846+ int num = 0;
5847+
5848+ if ((word & 0xffff) == 0) {
5849+ num += 16;
5850+ word >>= 16;
5851+ }
5852+ if ((word & 0xff) == 0) {
5853+ num += 8;
5854+ word >>= 8;
5855+ }
5856+ if ((word & 0xf) == 0) {
5857+ num += 4;
5858+ word >>= 4;
5859+ }
5860+ if ((word & 0x3) == 0) {
5861+ num += 2;
5862+ word >>= 2;
5863+ }
5864+ if ((word & 0x1) == 0)
5865+ num += 1;
5866+ return num;
5867+}
5868+
5869+/*
5870+ * Every architecture must define this function. It's the fastest
5871+ * way of searching a 140-bit bitmap where the first 100 bits are
5872+ * unlikely to be set. It's guaranteed that at least one of the 140
5873+ * bits is cleared.
5874+ */
5875+static __inline__ int sched_find_first_bit(unsigned long *b)
5876+{
5877+
5878+ if (unlikely(b[0]))
5879+ return __ffs(b[0]);
5880+ if (unlikely(b[1]))
5881+ return __ffs(b[1]) + 32;
5882+ if (unlikely(b[2]))
5883+ return __ffs(b[2]) + 64;
5884+ if (b[3])
5885+ return __ffs(b[3]) + 96;
5886+ return __ffs(b[4]) + 128;
5887+}
5888+
5889 /*
5890 * ffs: find first bit set. This is defined the same way as
5891 * the libc and compiler builtin ffs routines, therefore
5892@@ -296,6 +347,32 @@
5893 #define find_first_zero_bit(addr, size) \
5894 find_next_zero_bit((addr), (size), 0)
5895
5896+/**
5897+ * find_next_bit - find the first set bit in a memory region
5898+ * @addr: The address to base the search on
5899+ * @offset: The bitnumber to start searching at
5900+ * @size: The maximum size to search
5901+ *
5902+ * Scheduler induced bitop, do not use.
5903+ */
5904+static inline int find_next_bit(unsigned long *addr, int size, int offset)
5905+{
5906+ unsigned long *p = addr + (offset >> 5);
5907+ int num = offset & ~0x1f;
5908+ unsigned long word;
5909+
5910+ word = *p++;
5911+ word &= ~((1 << (offset & 0x1f)) - 1);
5912+ while (num < size) {
5913+ if (word != 0) {
5914+ return __ffs(word) + num;
5915+ }
5916+ word = *p++;
5917+ num += 0x20;
5918+ }
5919+ return num;
5920+}
5921+
5922 static inline int test_le_bit(int nr, __const__ void * addr)
5923 {
5924 __const__ unsigned char *ADDR = (__const__ unsigned char *) addr;
5925diff -urN linux-2.4.22.org/include/asm-sparc/system.h linux-2.4.22/include/asm-sparc/system.h
a4a9fb52
AM
5926--- linux-2.4.22.org/include/asm-sparc/system.h 2003-11-24 18:28:27.000000000 +0100
5927+++ linux-2.4.22/include/asm-sparc/system.h 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
5928@@ -88,7 +88,7 @@
5929 *
5930 * SWITCH_ENTER and SWITH_DO_LAZY_FPU do not work yet (e.g. SMP does not work)
5931 */
5932-#define prepare_to_switch() do { \
5933+#define prepare_arch_switch(rq, next) do { \
5934 __asm__ __volatile__( \
5935 ".globl\tflush_patch_switch\nflush_patch_switch:\n\t" \
5936 "save %sp, -0x40, %sp; save %sp, -0x40, %sp; save %sp, -0x40, %sp\n\t" \
5937@@ -96,6 +96,8 @@
5938 "save %sp, -0x40, %sp\n\t" \
5939 "restore; restore; restore; restore; restore; restore; restore"); \
5940 } while(0)
5941+#define finish_arch_switch(rq, next) do{ }while(0)
5942+#define task_running(rq, p) ((rq)->curr == (p))
5943
5944 /* Much care has gone into this code, do not touch it.
5945 *
5946diff -urN linux-2.4.22.org/include/asm-sparc64/bitops.h linux-2.4.22/include/asm-sparc64/bitops.h
a4a9fb52
AM
5947--- linux-2.4.22.org/include/asm-sparc64/bitops.h 2003-11-24 18:28:29.000000000 +0100
5948+++ linux-2.4.22/include/asm-sparc64/bitops.h 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
5949@@ -1,4 +1,4 @@
5950-/* $Id$
5951+/* $Id$
5952 * bitops.h: Bit string operations on the V9.
5953 *
5954 * Copyright 1996, 1997 David S. Miller (davem@caip.rutgers.edu)
5955@@ -7,11 +7,12 @@
5956 #ifndef _SPARC64_BITOPS_H
5957 #define _SPARC64_BITOPS_H
5958
5959+#include <linux/compiler.h>
5960 #include <asm/byteorder.h>
5961
5962-extern long ___test_and_set_bit(unsigned long nr, volatile void *addr);
5963-extern long ___test_and_clear_bit(unsigned long nr, volatile void *addr);
5964-extern long ___test_and_change_bit(unsigned long nr, volatile void *addr);
5965+extern long ___test_and_set_bit(unsigned long nr, volatile unsigned long *addr);
5966+extern long ___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr);
5967+extern long ___test_and_change_bit(unsigned long nr, volatile unsigned long *addr);
5968
5969 #define test_and_set_bit(nr,addr) ({___test_and_set_bit(nr,addr)!=0;})
5970 #define test_and_clear_bit(nr,addr) ({___test_and_clear_bit(nr,addr)!=0;})
5971@@ -21,109 +22,132 @@
5972 #define change_bit(nr,addr) ((void)___test_and_change_bit(nr,addr))
5973
5974 /* "non-atomic" versions... */
5975-#define __set_bit(X,Y) \
5976-do { unsigned long __nr = (X); \
5977- long *__m = ((long *) (Y)) + (__nr >> 6); \
5978- *__m |= (1UL << (__nr & 63)); \
5979-} while (0)
5980-#define __clear_bit(X,Y) \
5981-do { unsigned long __nr = (X); \
5982- long *__m = ((long *) (Y)) + (__nr >> 6); \
5983- *__m &= ~(1UL << (__nr & 63)); \
5984-} while (0)
5985-#define __change_bit(X,Y) \
5986-do { unsigned long __nr = (X); \
5987- long *__m = ((long *) (Y)) + (__nr >> 6); \
5988- *__m ^= (1UL << (__nr & 63)); \
5989-} while (0)
5990-#define __test_and_set_bit(X,Y) \
5991-({ unsigned long __nr = (X); \
5992- long *__m = ((long *) (Y)) + (__nr >> 6); \
5993- long __old = *__m; \
5994- long __mask = (1UL << (__nr & 63)); \
5995- *__m = (__old | __mask); \
5996- ((__old & __mask) != 0); \
5997-})
5998-#define __test_and_clear_bit(X,Y) \
5999-({ unsigned long __nr = (X); \
6000- long *__m = ((long *) (Y)) + (__nr >> 6); \
6001- long __old = *__m; \
6002- long __mask = (1UL << (__nr & 63)); \
6003- *__m = (__old & ~__mask); \
6004- ((__old & __mask) != 0); \
6005-})
6006-#define __test_and_change_bit(X,Y) \
6007-({ unsigned long __nr = (X); \
6008- long *__m = ((long *) (Y)) + (__nr >> 6); \
6009- long __old = *__m; \
6010- long __mask = (1UL << (__nr & 63)); \
6011- *__m = (__old ^ __mask); \
6012- ((__old & __mask) != 0); \
6013-})
6014+
6015+static __inline__ void __set_bit(int nr, volatile unsigned long *addr)
6016+{
6017+ volatile unsigned long *m = addr + (nr >> 6);
6018+
6019+ *m |= (1UL << (nr & 63));
6020+}
6021+
6022+static __inline__ void __clear_bit(int nr, volatile unsigned long *addr)
6023+{
6024+ volatile unsigned long *m = addr + (nr >> 6);
6025+
6026+ *m &= ~(1UL << (nr & 63));
6027+}
6028+
6029+static __inline__ void __change_bit(int nr, volatile unsigned long *addr)
6030+{
6031+ volatile unsigned long *m = addr + (nr >> 6);
6032+
6033+ *m ^= (1UL << (nr & 63));
6034+}
6035+
6036+static __inline__ int __test_and_set_bit(int nr, volatile unsigned long *addr)
6037+{
6038+ volatile unsigned long *m = addr + (nr >> 6);
6039+ long old = *m;
6040+ long mask = (1UL << (nr & 63));
6041+
6042+ *m = (old | mask);
6043+ return ((old & mask) != 0);
6044+}
6045+
6046+static __inline__ int __test_and_clear_bit(int nr, volatile unsigned long *addr)
6047+{
6048+ volatile unsigned long *m = addr + (nr >> 6);
6049+ long old = *m;
6050+ long mask = (1UL << (nr & 63));
6051+
6052+ *m = (old & ~mask);
6053+ return ((old & mask) != 0);
6054+}
6055+
6056+static __inline__ int __test_and_change_bit(int nr, volatile unsigned long *addr)
6057+{
6058+ volatile unsigned long *m = addr + (nr >> 6);
6059+ long old = *m;
6060+ long mask = (1UL << (nr & 63));
6061+
6062+ *m = (old ^ mask);
6063+ return ((old & mask) != 0);
6064+}
6065
6066 #define smp_mb__before_clear_bit() do { } while(0)
6067 #define smp_mb__after_clear_bit() do { } while(0)
6068
6069-extern __inline__ int test_bit(int nr, __const__ void *addr)
6070+static __inline__ int test_bit(int nr, __const__ volatile unsigned long *addr)
6071 {
6072- return (1UL & (((__const__ long *) addr)[nr >> 6] >> (nr & 63))) != 0UL;
6073+ return (1UL & ((addr)[nr >> 6] >> (nr & 63))) != 0UL;
6074 }
6075
6076 /* The easy/cheese version for now. */
6077-extern __inline__ unsigned long ffz(unsigned long word)
6078+static __inline__ unsigned long ffz(unsigned long word)
6079 {
6080 unsigned long result;
6081
6082-#ifdef ULTRA_HAS_POPULATION_COUNT /* Thanks for nothing Sun... */
6083- __asm__ __volatile__(
6084-" brz,pn %0, 1f\n"
6085-" neg %0, %%g1\n"
6086-" xnor %0, %%g1, %%g2\n"
6087-" popc %%g2, %0\n"
6088-"1: " : "=&r" (result)
6089- : "0" (word)
6090- : "g1", "g2");
6091-#else
6092-#if 1 /* def EASY_CHEESE_VERSION */
6093 result = 0;
6094 while(word & 1) {
6095 result++;
6096 word >>= 1;
6097 }
6098-#else
6099- unsigned long tmp;
6100+ return result;
6101+}
6102
6103- result = 0;
6104- tmp = ~word & -~word;
6105- if (!(unsigned)tmp) {
6106- tmp >>= 32;
6107- result = 32;
6108- }
6109- if (!(unsigned short)tmp) {
6110- tmp >>= 16;
6111- result += 16;
6112- }
6113- if (!(unsigned char)tmp) {
6114- tmp >>= 8;
6115- result += 8;
6116+/**
6117+ * __ffs - find first bit in word.
6118+ * @word: The word to search
6119+ *
6120+ * Undefined if no bit exists, so code should check against 0 first.
6121+ */
6122+static __inline__ unsigned long __ffs(unsigned long word)
6123+{
6124+ unsigned long result = 0;
6125+
6126+ while (!(word & 1UL)) {
6127+ result++;
6128+ word >>= 1;
6129 }
6130- if (tmp & 0xf0) result += 4;
6131- if (tmp & 0xcc) result += 2;
6132- if (tmp & 0xaa) result ++;
6133-#endif
6134-#endif
6135 return result;
6136 }
6137
6138+/*
6139+ * fls: find last bit set.
6140+ */
6141+
6142+#define fls(x) generic_fls(x)
6143+
6144 #ifdef __KERNEL__
6145
6146 /*
6147+ * Every architecture must define this function. It's the fastest
6148+ * way of searching a 140-bit bitmap where the first 100 bits are
6149+ * unlikely to be set. It's guaranteed that at least one of the 140
6150+ * bits is cleared.
6151+ */
6152+static inline int sched_find_first_bit(unsigned long *b)
6153+{
6154+ if (unlikely(b[0]))
6155+ return __ffs(b[0]);
6156+ if (unlikely(((unsigned int)b[1])))
6157+ return __ffs(b[1]) + 64;
6158+ if (b[1] >> 32)
6159+ return __ffs(b[1] >> 32) + 96;
6160+ return __ffs(b[2]) + 128;
6161+}
6162+
6163+/*
6164 * ffs: find first bit set. This is defined the same way as
6165 * the libc and compiler builtin ffs routines, therefore
6166 * differs in spirit from the above ffz (man ffs).
6167 */
6168-
6169-#define ffs(x) generic_ffs(x)
6170+static __inline__ int ffs(int x)
6171+{
6172+ if (!x)
6173+ return 0;
6174+ return __ffs((unsigned long)x);
6175+}
6176
6177 /*
6178 * hweightN: returns the hamming weight (i.e. the number
6179@@ -132,7 +156,7 @@
6180
6181 #ifdef ULTRA_HAS_POPULATION_COUNT
6182
6183-extern __inline__ unsigned int hweight32(unsigned int w)
6184+static __inline__ unsigned int hweight32(unsigned int w)
6185 {
6186 unsigned int res;
6187
6188@@ -140,7 +164,7 @@
6189 return res;
6190 }
6191
6192-extern __inline__ unsigned int hweight16(unsigned int w)
6193+static __inline__ unsigned int hweight16(unsigned int w)
6194 {
6195 unsigned int res;
6196
6197@@ -148,7 +172,7 @@
6198 return res;
6199 }
6200
6201-extern __inline__ unsigned int hweight8(unsigned int w)
6202+static __inline__ unsigned int hweight8(unsigned int w)
6203 {
6204 unsigned int res;
6205
6206@@ -165,14 +189,69 @@
6207 #endif
6208 #endif /* __KERNEL__ */
6209
6210+/**
6211+ * find_next_bit - find the next set bit in a memory region
6212+ * @addr: The address to base the search on
6213+ * @offset: The bitnumber to start searching at
6214+ * @size: The maximum size to search
6215+ */
6216+static __inline__ unsigned long find_next_bit(unsigned long *addr, unsigned long size, unsigned long offset)
6217+{
6218+ unsigned long *p = addr + (offset >> 6);
6219+ unsigned long result = offset & ~63UL;
6220+ unsigned long tmp;
6221+
6222+ if (offset >= size)
6223+ return size;
6224+ size -= result;
6225+ offset &= 63UL;
6226+ if (offset) {
6227+ tmp = *(p++);
6228+ tmp &= (~0UL << offset);
6229+ if (size < 64)
6230+ goto found_first;
6231+ if (tmp)
6232+ goto found_middle;
6233+ size -= 64;
6234+ result += 64;
6235+ }
6236+ while (size & ~63UL) {
6237+ if ((tmp = *(p++)))
6238+ goto found_middle;
6239+ result += 64;
6240+ size -= 64;
6241+ }
6242+ if (!size)
6243+ return result;
6244+ tmp = *p;
6245+
6246+found_first:
6247+ tmp &= (~0UL >> (64 - size));
6248+ if (tmp == 0UL) /* Are any bits set? */
6249+ return result + size; /* Nope. */
6250+found_middle:
6251+ return result + __ffs(tmp);
6252+}
6253+
6254+/**
6255+ * find_first_bit - find the first set bit in a memory region
6256+ * @addr: The address to start the search at
6257+ * @size: The maximum size to search
6258+ *
6259+ * Returns the bit-number of the first set bit, not the number of the byte
6260+ * containing a bit.
6261+ */
6262+#define find_first_bit(addr, size) \
6263+ find_next_bit((addr), (size), 0)
6264+
6265 /* find_next_zero_bit() finds the first zero bit in a bit string of length
6266 * 'size' bits, starting the search at bit 'offset'. This is largely based
6267 * on Linus's ALPHA routines, which are pretty portable BTW.
6268 */
6269
6270-extern __inline__ unsigned long find_next_zero_bit(void *addr, unsigned long size, unsigned long offset)
6271+static __inline__ unsigned long find_next_zero_bit(unsigned long *addr, unsigned long size, unsigned long offset)
6272 {
6273- unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
6274+ unsigned long *p = addr + (offset >> 6);
6275 unsigned long result = offset & ~63UL;
6276 unsigned long tmp;
6277
6278@@ -211,15 +290,15 @@
6279 #define find_first_zero_bit(addr, size) \
6280 find_next_zero_bit((addr), (size), 0)
6281
6282-extern long ___test_and_set_le_bit(int nr, volatile void *addr);
6283-extern long ___test_and_clear_le_bit(int nr, volatile void *addr);
6284+extern long ___test_and_set_le_bit(int nr, volatile unsigned long *addr);
6285+extern long ___test_and_clear_le_bit(int nr, volatile unsigned long *addr);
6286
6287 #define test_and_set_le_bit(nr,addr) ({___test_and_set_le_bit(nr,addr)!=0;})
6288 #define test_and_clear_le_bit(nr,addr) ({___test_and_clear_le_bit(nr,addr)!=0;})
6289 #define set_le_bit(nr,addr) ((void)___test_and_set_le_bit(nr,addr))
6290 #define clear_le_bit(nr,addr) ((void)___test_and_clear_le_bit(nr,addr))
6291
6292-extern __inline__ int test_le_bit(int nr, __const__ void * addr)
6293+static __inline__ int test_le_bit(int nr, __const__ unsigned long * addr)
6294 {
6295 int mask;
6296 __const__ unsigned char *ADDR = (__const__ unsigned char *) addr;
6297@@ -232,9 +311,9 @@
6298 #define find_first_zero_le_bit(addr, size) \
6299 find_next_zero_le_bit((addr), (size), 0)
6300
6301-extern __inline__ unsigned long find_next_zero_le_bit(void *addr, unsigned long size, unsigned long offset)
6302+static __inline__ unsigned long find_next_zero_le_bit(unsigned long *addr, unsigned long size, unsigned long offset)
6303 {
6304- unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
6305+ unsigned long *p = addr + (offset >> 6);
6306 unsigned long result = offset & ~63UL;
6307 unsigned long tmp;
6308
6309@@ -271,18 +350,22 @@
6310
6311 #ifdef __KERNEL__
6312
6313-#define ext2_set_bit test_and_set_le_bit
6314-#define ext2_clear_bit test_and_clear_le_bit
6315-#define ext2_test_bit test_le_bit
6316-#define ext2_find_first_zero_bit find_first_zero_le_bit
6317-#define ext2_find_next_zero_bit find_next_zero_le_bit
6318+#define ext2_set_bit(nr,addr) test_and_set_le_bit((nr),(unsigned long *)(addr))
6319+#define ext2_clear_bit(nr,addr) test_and_clear_le_bit((nr),(unsigned long *)(addr))
6320+#define ext2_test_bit(nr,addr) test_le_bit((nr),(unsigned long *)(addr))
6321+#define ext2_find_first_zero_bit(addr, size) \
6322+ find_first_zero_le_bit((unsigned long *)(addr), (size))
6323+#define ext2_find_next_zero_bit(addr, size, off) \
6324+ find_next_zero_le_bit((unsigned long *)(addr), (size), (off))
6325
6326 /* Bitmap functions for the minix filesystem. */
6327-#define minix_test_and_set_bit(nr,addr) test_and_set_bit(nr,addr)
6328-#define minix_set_bit(nr,addr) set_bit(nr,addr)
6329-#define minix_test_and_clear_bit(nr,addr) test_and_clear_bit(nr,addr)
6330-#define minix_test_bit(nr,addr) test_bit(nr,addr)
6331-#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
6332+#define minix_test_and_set_bit(nr,addr) test_and_set_bit((nr),(unsigned long *)(addr))
6333+#define minix_set_bit(nr,addr) set_bit((nr),(unsigned long *)(addr))
6334+#define minix_test_and_clear_bit(nr,addr) \
6335+ test_and_clear_bit((nr),(unsigned long *)(addr))
6336+#define minix_test_bit(nr,addr) test_bit((nr),(unsigned long *)(addr))
6337+#define minix_find_first_zero_bit(addr,size) \
6338+ find_first_zero_bit((unsigned long *)(addr),(size))
6339
6340 #endif /* __KERNEL__ */
6341
6342diff -urN linux-2.4.22.org/include/asm-sparc64/smp.h linux-2.4.22/include/asm-sparc64/smp.h
a4a9fb52
AM
6343--- linux-2.4.22.org/include/asm-sparc64/smp.h 2003-11-24 18:28:29.000000000 +0100
6344+++ linux-2.4.22/include/asm-sparc64/smp.h 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
6345@@ -111,7 +111,7 @@
6346 }
6347 }
6348
6349-#define smp_processor_id() (current->processor)
6350+#define smp_processor_id() (current->cpu)
6351
6352 /* This needn't do anything as we do not sleep the cpu
6353 * inside of the idler task, so an interrupt is not needed
6354diff -urN linux-2.4.22.org/include/asm-sparc64/system.h linux-2.4.22/include/asm-sparc64/system.h
a4a9fb52
AM
6355--- linux-2.4.22.org/include/asm-sparc64/system.h 2003-11-24 18:28:29.000000000 +0100
6356+++ linux-2.4.22/include/asm-sparc64/system.h 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
6357@@ -154,7 +154,18 @@
6358
6359 #define flush_user_windows flushw_user
6360 #define flush_register_windows flushw_all
6361-#define prepare_to_switch flushw_all
6362+
6363+#define prepare_arch_schedule(prev) task_lock(prev)
6364+#define finish_arch_schedule(prev) task_unlock(prev)
6365+#define prepare_arch_switch(rq, next) \
6366+do { spin_lock(&(next)->switch_lock); \
6367+ spin_unlock(&(rq)->lock); \
6368+ flushw_all(); \
6369+} while (0)
6370+
6371+#define finish_arch_switch(rq, prev) \
6372+do { spin_unlock_irq(&(prev)->switch_lock); \
6373+} while (0)
6374
6375 #ifndef CONFIG_DEBUG_SPINLOCK
6376 #define CHECK_LOCKS(PREV) do { } while(0)
a4a9fb52
AM
6377diff -urN linux-2.4.22.org/include/linux/bitops.h linux-2.4.22/include/linux/bitops.h
6378--- linux-2.4.22.org/include/linux/bitops.h 2003-11-24 18:28:22.000000000 +0100
6379+++ linux-2.4.22/include/linux/bitops.h 2003-11-24 18:40:50.000000000 +0100
6380@@ -1,6 +1,38 @@
6381 #ifndef _LINUX_BITOPS_H
6382 #define _LINUX_BITOPS_H
6383
6384+/*
6385+ * fls: find last bit set.
6386+ */
6387+
6388+extern __inline__ int generic_fls(int x)
6389+{
6390+ int r = 32;
6391+
6392+ if (!x)
6393+ return 0;
6394+ if (!(x & 0xffff0000u)) {
6395+ x <<= 16;
6396+ r -= 16;
6397+ }
6398+ if (!(x & 0xff000000u)) {
6399+ x <<= 8;
6400+ r -= 8;
6401+ }
6402+ if (!(x & 0xf0000000u)) {
6403+ x <<= 4;
6404+ r -= 4;
6405+ }
6406+ if (!(x & 0xc0000000u)) {
6407+ x <<= 2;
6408+ r -= 2;
6409+ }
6410+ if (!(x & 0x80000000u)) {
6411+ x <<= 1;
6412+ r -= 1;
6413+ }
6414+ return r;
6415+}
6416
6417 /*
6418 * ffs: find first bit set. This is defined the same way as
744c6993 6419diff -urN linux-2.4.22.org/include/linux/kernel_stat.h linux-2.4.22/include/linux/kernel_stat.h
a4a9fb52
AM
6420--- linux-2.4.22.org/include/linux/kernel_stat.h 2003-11-24 18:28:20.000000000 +0100
6421+++ linux-2.4.22/include/linux/kernel_stat.h 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
6422@@ -31,7 +31,6 @@
6423 #elif !defined(CONFIG_ARCH_S390)
6424 unsigned int irqs[NR_CPUS][NR_IRQS];
6425 #endif
6426- unsigned int context_swtch;
6427 };
6428
6429 extern struct kernel_stat kstat;
6430diff -urN linux-2.4.22.org/include/linux/sched.h linux-2.4.22/include/linux/sched.h
a4a9fb52
AM
6431--- linux-2.4.22.org/include/linux/sched.h 2003-11-24 18:28:20.000000000 +0100
6432+++ linux-2.4.22/include/linux/sched.h 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
6433@@ -6,6 +6,7 @@
6434 extern unsigned long event;
6435
6436 #include <linux/config.h>
6437+#include <linux/compiler.h>
6438 #include <linux/binfmts.h>
6439 #include <linux/threads.h>
6440 #include <linux/kernel.h>
6441@@ -21,7 +22,7 @@
6442 #include <asm/mmu.h>
6443
6444 #include <linux/smp.h>
6445-#include <linux/tty.h>
6446+//#include <linux/tty.h>
6447 #include <linux/sem.h>
6448 #include <linux/signal.h>
6449 #include <linux/securebits.h>
6450@@ -73,10 +74,12 @@
6451 #define CT_TO_SECS(x) ((x) / HZ)
6452 #define CT_TO_USECS(x) (((x) % HZ) * 1000000/HZ)
6453
6454-extern int nr_running, nr_threads;
6455+extern int nr_threads;
6456 extern int last_pid;
6457+extern unsigned long nr_running(void);
6458+extern unsigned long nr_uninterruptible(void);
6459
6460-#include <linux/fs.h>
6461+//#include <linux/fs.h>
6462 #include <linux/time.h>
6463 #include <linux/param.h>
6464 #include <linux/resource.h>
6465@@ -109,12 +112,6 @@
6466 #define SCHED_FIFO 1
6467 #define SCHED_RR 2
6468
6469-/*
6470- * This is an additional bit set when we want to
6471- * yield the CPU for one re-schedule..
6472- */
6473-#define SCHED_YIELD 0x10
6474-
6475 struct sched_param {
6476 int sched_priority;
6477 };
6478@@ -132,17 +129,21 @@
6479 * a separate lock).
6480 */
6481 extern rwlock_t tasklist_lock;
6482-extern spinlock_t runqueue_lock;
6483 extern spinlock_t mmlist_lock;
6484
6485+typedef struct task_struct task_t;
6486+
6487 extern void sched_init(void);
6488-extern void init_idle(void);
6489+extern void init_idle(task_t *idle, int cpu);
6490 extern void show_state(void);
6491 extern void cpu_init (void);
6492 extern void trap_init(void);
6493 extern void update_process_times(int user);
6494-extern void update_one_process(struct task_struct *p, unsigned long user,
6495+extern void update_one_process(task_t *p, unsigned long user,
6496 unsigned long system, int cpu);
6497+extern void scheduler_tick(int user_tick, int system);
6498+extern void migration_init(void);
6499+extern unsigned long cache_decay_ticks;
6500
6501 #define MAX_SCHEDULE_TIMEOUT LONG_MAX
6502 extern signed long FASTCALL(schedule_timeout(signed long timeout));
6503@@ -152,6 +153,28 @@
6504 extern void flush_scheduled_tasks(void);
6505 extern int start_context_thread(void);
6506 extern int current_is_keventd(void);
6507+extern void FASTCALL(sched_exit(task_t * p));
6508+extern int FASTCALL(idle_cpu(int cpu));
6509+
6510+/*
6511+ * Priority of a process goes from 0..MAX_PRIO-1, valid RT
6512+ * priority is 0..MAX_RT_PRIO-1, and SCHED_OTHER tasks are
6513+ * in the range MAX_RT_PRIO..MAX_PRIO-1. Priority values
6514+ * are inverted: lower p->prio value means higher priority.
6515+ *
6516+ * The MAX_RT_USER_PRIO value allows the actual maximum
6517+ * RT priority to be separate from the value exported to
6518+ * user-space. This allows kernel threads to set their
6519+ * priority to a value higher than any user task. Note:
6520+ * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
6521+ *
6522+ * Both values are configurable at compile-time.
6523+ */
6524+
6525+#define MAX_USER_RT_PRIO 100
6526+#define MAX_RT_PRIO MAX_USER_RT_PRIO
6527+
6528+#define MAX_PRIO (MAX_RT_PRIO + 40)
6529
6530 #if CONFIG_SMP
6531 extern void set_cpus_allowed(struct task_struct *p, unsigned long new_mask);
6532@@ -280,6 +303,8 @@
6533 extern struct user_struct root_user;
6534 #define INIT_USER (&root_user)
6535
6536+typedef struct prio_array prio_array_t;
6537+
6538 struct task_struct {
6539 /*
6540 * offsets of these are hardcoded elsewhere - touch with care
6541@@ -297,35 +322,26 @@
6542
6543 int lock_depth; /* Lock depth */
6544
6545-/*
6546- * offset 32 begins here on 32-bit platforms. We keep
6547- * all fields in a single cacheline that are needed for
6548- * the goodness() loop in schedule().
6549- */
6550- long counter;
6551- long nice;
6552- unsigned long policy;
6553- struct mm_struct *mm;
6554- int processor;
6555 /*
6556- * cpus_runnable is ~0 if the process is not running on any
6557- * CPU. It's (1 << cpu) if it's running on a CPU. This mask
6558- * is updated under the runqueue lock.
6559- *
6560- * To determine whether a process might run on a CPU, this
6561- * mask is AND-ed with cpus_allowed.
a4a9fb52 6562- */
744c6993
AM
6563- unsigned long cpus_runnable, cpus_allowed;
6564- /*
6565- * (only the 'next' pointer fits into the cacheline, but
6566- * that's just fine.)
a4a9fb52
AM
6567+ * offset 32 begins here on 32-bit platforms.
6568 */
744c6993
AM
6569+ unsigned int cpu;
6570+ int prio, static_prio;
a4a9fb52
AM
6571 struct list_head run_list;
6572- unsigned long sleep_time;
744c6993
AM
6573+ prio_array_t *array;
6574
6575- struct task_struct *next_task, *prev_task;
6576- struct mm_struct *active_mm;
6577+ unsigned long sleep_avg;
6578+ unsigned long sleep_timestamp;
6579+
6580+ unsigned long policy;
6581+ unsigned long cpus_allowed;
6582+ unsigned int time_slice, first_time_slice;
6583+
6584+ task_t *next_task, *prev_task;
6585+
6586+ struct mm_struct *mm, *active_mm;
6587 struct list_head local_pages;
6588+
6589 unsigned int allocation_order, nr_local_pages;
6590
6591 /* task state */
6592@@ -348,12 +364,12 @@
6593 * older sibling, respectively. (p->father can be replaced with
6594 * p->p_pptr->pid)
6595 */
6596- struct task_struct *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr;
6597+ task_t *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr;
6598 struct list_head thread_group;
6599
6600 /* PID hash table linkage. */
6601- struct task_struct *pidhash_next;
6602- struct task_struct **pidhash_pprev;
6603+ task_t *pidhash_next;
6604+ task_t **pidhash_pprev;
6605
6606 wait_queue_head_t wait_chldexit; /* for wait4() */
6607 struct completion *vfork_done; /* for vfork() */
6608@@ -412,6 +428,8 @@
6609 u32 self_exec_id;
6610 /* Protection of (de-)allocation: mm, files, fs, tty */
6611 spinlock_t alloc_lock;
6612+/* context-switch lock */
6613+ spinlock_t switch_lock;
6614
6615 /* journalling filesystem info */
6616 void *journal_info;
6617@@ -452,9 +470,15 @@
6618 */
6619 #define _STK_LIM (8*1024*1024)
6620
6621-#define DEF_COUNTER (10*HZ/100) /* 100 ms time slice */
6622-#define MAX_COUNTER (20*HZ/100)
6623-#define DEF_NICE (0)
6624+#if CONFIG_SMP
6625+extern void set_cpus_allowed(task_t *p, unsigned long new_mask);
6626+#else
6627+#define set_cpus_allowed(p, new_mask) do { } while (0)
6628+#endif
6629+
6630+extern void set_user_nice(task_t *p, long nice);
6631+extern int task_prio(task_t *p);
6632+extern int task_nice(task_t *p);
6633
6634 extern void yield(void);
6635
6636@@ -475,14 +499,14 @@
6637 addr_limit: KERNEL_DS, \
6638 exec_domain: &default_exec_domain, \
6639 lock_depth: -1, \
6640- counter: DEF_COUNTER, \
6641- nice: DEF_NICE, \
6642+ prio: MAX_PRIO-20, \
6643+ static_prio: MAX_PRIO-20, \
6644 policy: SCHED_OTHER, \
6645+ cpus_allowed: ~0UL, \
6646 mm: NULL, \
6647 active_mm: &init_mm, \
6648- cpus_runnable: ~0UL, \
6649- cpus_allowed: ~0UL, \
6650 run_list: LIST_HEAD_INIT(tsk.run_list), \
6651+ time_slice: HZ, \
6652 next_task: &tsk, \
6653 prev_task: &tsk, \
6654 p_opptr: &tsk, \
6655@@ -507,6 +531,7 @@
6656 pending: { NULL, &tsk.pending.head, {{0}}}, \
6657 blocked: {{0}}, \
6658 alloc_lock: SPIN_LOCK_UNLOCKED, \
6659+ switch_lock: SPIN_LOCK_UNLOCKED, \
6660 journal_info: NULL, \
6661 }
6662
6663@@ -516,24 +541,23 @@
6664 #endif
6665
6666 union task_union {
6667- struct task_struct task;
6668+ task_t task;
6669 unsigned long stack[INIT_TASK_SIZE/sizeof(long)];
6670 };
6671
6672 extern union task_union init_task_union;
6673
6674 extern struct mm_struct init_mm;
6675-extern struct task_struct *init_tasks[NR_CPUS];
6676
6677 /* PID hashing. (shouldnt this be dynamic?) */
6678 #define PIDHASH_SZ (4096 >> 2)
6679-extern struct task_struct *pidhash[PIDHASH_SZ];
6680+extern task_t *pidhash[PIDHASH_SZ];
6681
6682 #define pid_hashfn(x) ((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1))
6683
6684-static inline void hash_pid(struct task_struct *p)
6685+static inline void hash_pid(task_t *p)
6686 {
6687- struct task_struct **htable = &pidhash[pid_hashfn(p->pid)];
6688+ task_t **htable = &pidhash[pid_hashfn(p->pid)];
6689
6690 if((p->pidhash_next = *htable) != NULL)
6691 (*htable)->pidhash_pprev = &p->pidhash_next;
6692@@ -541,16 +565,16 @@
6693 p->pidhash_pprev = htable;
6694 }
6695
6696-static inline void unhash_pid(struct task_struct *p)
6697+static inline void unhash_pid(task_t *p)
6698 {
6699 if(p->pidhash_next)
6700 p->pidhash_next->pidhash_pprev = p->pidhash_pprev;
6701 *p->pidhash_pprev = p->pidhash_next;
6702 }
6703
6704-static inline struct task_struct *find_task_by_pid(int pid)
6705+static inline task_t *find_task_by_pid(int pid)
6706 {
6707- struct task_struct *p, **htable = &pidhash[pid_hashfn(pid)];
6708+ task_t *p, **htable = &pidhash[pid_hashfn(pid)];
6709
6710 for(p = *htable; p && p->pid != pid; p = p->pidhash_next)
6711 ;
6712@@ -558,19 +582,6 @@
6713 return p;
6714 }
6715
6716-#define task_has_cpu(tsk) ((tsk)->cpus_runnable != ~0UL)
6717-
6718-static inline void task_set_cpu(struct task_struct *tsk, unsigned int cpu)
6719-{
6720- tsk->processor = cpu;
6721- tsk->cpus_runnable = 1UL << cpu;
6722-}
6723-
6724-static inline void task_release_cpu(struct task_struct *tsk)
6725-{
6726- tsk->cpus_runnable = ~0UL;
6727-}
6728-
6729 /* per-UID process charging. */
6730 extern struct user_struct * alloc_uid(uid_t);
6731 extern void free_uid(struct user_struct *);
6732@@ -598,47 +609,50 @@
6733 extern void FASTCALL(interruptible_sleep_on(wait_queue_head_t *q));
6734 extern long FASTCALL(interruptible_sleep_on_timeout(wait_queue_head_t *q,
6735 signed long timeout));
6736-extern int FASTCALL(wake_up_process(struct task_struct * tsk));
6737+extern int FASTCALL(wake_up_process(task_t * p));
6738+extern void FASTCALL(wake_up_forked_process(task_t * p));
6739
6740 #define wake_up(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
6741 #define wake_up_nr(x, nr) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
6742 #define wake_up_all(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0)
6743-#define wake_up_sync(x) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
6744-#define wake_up_sync_nr(x, nr) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
6745 #define wake_up_interruptible(x) __wake_up((x),TASK_INTERRUPTIBLE, 1)
6746 #define wake_up_interruptible_nr(x, nr) __wake_up((x),TASK_INTERRUPTIBLE, nr)
6747 #define wake_up_interruptible_all(x) __wake_up((x),TASK_INTERRUPTIBLE, 0)
6748-#define wake_up_interruptible_sync(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
6749-#define wake_up_interruptible_sync_nr(x, nr) __wake_up_sync((x),TASK_INTERRUPTIBLE, nr)
6750+#ifdef CONFIG_SMP
6751+#define wake_up_interruptible_sync(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
6752+#else
6753+#define wake_up_interruptible_sync(x) __wake_up((x),TASK_INTERRUPTIBLE, 1)
6754+#endif
6755+
6756 asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru);
6757
6758 extern int in_group_p(gid_t);
6759 extern int in_egroup_p(gid_t);
6760
6761 extern void proc_caches_init(void);
6762-extern void flush_signals(struct task_struct *);
6763-extern void flush_signal_handlers(struct task_struct *);
6764+extern void flush_signals(task_t *);
6765+extern void flush_signal_handlers(task_t *);
6766 extern void sig_exit(int, int, struct siginfo *);
6767 extern int dequeue_signal(sigset_t *, siginfo_t *);
6768 extern void block_all_signals(int (*notifier)(void *priv), void *priv,
6769 sigset_t *mask);
6770 extern void unblock_all_signals(void);
6771-extern int send_sig_info(int, struct siginfo *, struct task_struct *);
6772-extern int force_sig_info(int, struct siginfo *, struct task_struct *);
6773+extern int send_sig_info(int, struct siginfo *, task_t *);
6774+extern int force_sig_info(int, struct siginfo *, task_t *);
6775 extern int kill_pg_info(int, struct siginfo *, pid_t);
6776 extern int kill_sl_info(int, struct siginfo *, pid_t);
6777 extern int kill_proc_info(int, struct siginfo *, pid_t);
6778-extern void notify_parent(struct task_struct *, int);
6779-extern void do_notify_parent(struct task_struct *, int);
6780-extern void force_sig(int, struct task_struct *);
6781-extern int send_sig(int, struct task_struct *, int);
6782+extern void notify_parent(task_t *, int);
6783+extern void do_notify_parent(task_t *, int);
6784+extern void force_sig(int, task_t *);
6785+extern int send_sig(int, task_t *, int);
6786 extern int kill_pg(pid_t, int, int);
6787 extern int kill_sl(pid_t, int, int);
6788 extern int kill_proc(pid_t, int, int);
6789 extern int do_sigaction(int, const struct k_sigaction *, struct k_sigaction *);
6790 extern int do_sigaltstack(const stack_t *, stack_t *, unsigned long);
6791
6792-static inline int signal_pending(struct task_struct *p)
6793+static inline int signal_pending(task_t *p)
6794 {
6795 return (p->sigpending != 0);
6796 }
6797@@ -677,7 +691,7 @@
6798 This is required every time the blocked sigset_t changes.
6799 All callers should have t->sigmask_lock. */
6800
6801-static inline void recalc_sigpending(struct task_struct *t)
6802+static inline void recalc_sigpending(task_t *t)
6803 {
6804 t->sigpending = has_pending_signals(&t->pending.signal, &t->blocked);
6805 }
6806@@ -784,16 +798,17 @@
6807 extern int expand_fdset(struct files_struct *, int nr);
6808 extern void free_fdset(fd_set *, int);
6809
6810-extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
6811+extern int copy_thread(int, unsigned long, unsigned long, unsigned long, task_t *, struct pt_regs *);
6812 extern void flush_thread(void);
6813 extern void exit_thread(void);
6814
6815-extern void exit_mm(struct task_struct *);
6816-extern void exit_files(struct task_struct *);
6817-extern void exit_sighand(struct task_struct *);
6818+extern void exit_mm(task_t *);
6819+extern void exit_files(task_t *);
6820+extern void exit_sighand(task_t *);
6821
6822 extern void reparent_to_init(void);
6823 extern void daemonize(void);
6824+extern task_t *child_reaper;
6825
6826 extern int do_execve(char *, char **, char **, struct pt_regs *);
6827 extern int do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long);
6828@@ -804,6 +819,9 @@
6829
6830 extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
6831
6832+extern void wait_task_inactive(task_t * p);
6833+extern void kick_if_running(task_t * p);
6834+
6835 #define __wait_event(wq, condition) \
6836 do { \
6837 wait_queue_t __wait; \
6838@@ -885,27 +903,12 @@
6839 for (task = next_thread(current) ; task != current ; task = next_thread(task))
6840
6841 #define next_thread(p) \
6842- list_entry((p)->thread_group.next, struct task_struct, thread_group)
6843+ list_entry((p)->thread_group.next, task_t, thread_group)
6844
6845 #define thread_group_leader(p) (p->pid == p->tgid)
6846
6847-static inline void del_from_runqueue(struct task_struct * p)
6848+static inline void unhash_process(task_t *p)
6849 {
6850- nr_running--;
6851- p->sleep_time = jiffies;
6852- list_del(&p->run_list);
6853- p->run_list.next = NULL;
6854-}
6855-
6856-static inline int task_on_runqueue(struct task_struct *p)
6857-{
6858- return (p->run_list.next != NULL);
6859-}
6860-
6861-static inline void unhash_process(struct task_struct *p)
6862-{
6863- if (task_on_runqueue(p))
6864- out_of_line_bug();
6865 write_lock_irq(&tasklist_lock);
6866 nr_threads--;
6867 unhash_pid(p);
6868@@ -915,12 +918,12 @@
6869 }
6870
6871 /* Protects ->fs, ->files, ->mm, and synchronises with wait4(). Nests inside tasklist_lock */
6872-static inline void task_lock(struct task_struct *p)
6873+static inline void task_lock(task_t *p)
6874 {
6875 spin_lock(&p->alloc_lock);
6876 }
6877
6878-static inline void task_unlock(struct task_struct *p)
6879+static inline void task_unlock(task_t *p)
6880 {
6881 spin_unlock(&p->alloc_lock);
6882 }
6883@@ -944,6 +947,26 @@
6884 return res;
6885 }
6886
6887+static inline void set_need_resched(void)
6888+{
6889+ current->need_resched = 1;
6890+}
6891+
6892+static inline void clear_need_resched(void)
6893+{
6894+ current->need_resched = 0;
6895+}
6896+
6897+static inline void set_tsk_need_resched(task_t *tsk)
6898+{
6899+ tsk->need_resched = 1;
6900+}
6901+
6902+static inline void clear_tsk_need_resched(task_t *tsk)
6903+{
6904+ tsk->need_resched = 0;
6905+}
6906+
6907 static inline int need_resched(void)
6908 {
6909 return (unlikely(current->need_resched));
6910@@ -957,4 +980,5 @@
6911 }
6912
6913 #endif /* __KERNEL__ */
6914+
6915 #endif
6916diff -urN linux-2.4.22.org/include/linux/smp_balance.h linux-2.4.22/include/linux/smp_balance.h
6917--- linux-2.4.22.org/include/linux/smp_balance.h 1970-01-01 01:00:00.000000000 +0100
a4a9fb52 6918+++ linux-2.4.22/include/linux/smp_balance.h 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
6919@@ -0,0 +1,15 @@
6920+#ifndef _LINUX_SMP_BALANCE_H
6921+#define _LINUX_SMP_BALANCE_H
6922+
6923+/*
6924+ * per-architecture load balancing logic, e.g. for hyperthreading
6925+ */
6926+
6927+#ifdef ARCH_HAS_SMP_BALANCE
6928+#include <asm/smp_balance.h>
6929+#else
6930+#define arch_load_balance(x, y) (0)
6931+#define arch_reschedule_idle_override(x, idle) (idle)
6932+#endif
6933+
6934+#endif /* _LINUX_SMP_BALANCE_H */
6935diff -urN linux-2.4.22.org/include/linux/smp.h linux-2.4.22/include/linux/smp.h
a4a9fb52
AM
6936--- linux-2.4.22.org/include/linux/smp.h 2003-11-24 18:28:22.000000000 +0100
6937+++ linux-2.4.22/include/linux/smp.h 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
6938@@ -86,6 +86,14 @@
6939 #define cpu_number_map(cpu) 0
6940 #define smp_call_function(func,info,retry,wait) ({ 0; })
6941 #define cpu_online_map 1
6942+static inline void smp_send_reschedule(int cpu) { }
6943+static inline void smp_send_reschedule_all(void) { }
6944
6945 #endif
6946+
6947+/*
6948+ * Common definitions:
6949+ */
6950+#define cpu() smp_processor_id()
6951+
6952 #endif
6953diff -urN linux-2.4.22.org/include/linux/wait.h linux-2.4.22/include/linux/wait.h
a4a9fb52
AM
6954--- linux-2.4.22.org/include/linux/wait.h 2003-11-24 18:28:20.000000000 +0100
6955+++ linux-2.4.22/include/linux/wait.h 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
6956@@ -59,6 +59,7 @@
6957 # define wq_write_lock_irq write_lock_irq
6958 # define wq_write_lock_irqsave write_lock_irqsave
6959 # define wq_write_unlock_irqrestore write_unlock_irqrestore
6960+# define wq_write_unlock_irq write_unlock_irq
6961 # define wq_write_unlock write_unlock
6962 #else
6963 # define wq_lock_t spinlock_t
6964@@ -71,6 +72,7 @@
6965 # define wq_write_lock_irq spin_lock_irq
6966 # define wq_write_lock_irqsave spin_lock_irqsave
6967 # define wq_write_unlock_irqrestore spin_unlock_irqrestore
6968+# define wq_write_unlock_irq spin_unlock_irq
6969 # define wq_write_unlock spin_unlock
6970 #endif
6971
6972diff -urN linux-2.4.22.org/init/main.c linux-2.4.22/init/main.c
a4a9fb52
AM
6973--- linux-2.4.22.org/init/main.c 2003-11-24 18:28:15.000000000 +0100
6974+++ linux-2.4.22/init/main.c 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
6975@@ -293,8 +293,6 @@
6976 extern void setup_arch(char **);
6977 extern void cpu_idle(void);
6978
6979-unsigned long wait_init_idle;
6980-
6981 #ifndef CONFIG_SMP
6982
6983 #ifdef CONFIG_X86_LOCAL_APIC
6984@@ -303,34 +301,24 @@
6985 APIC_init_uniprocessor();
6986 }
6987 #else
6988-#define smp_init() do { } while (0)
6989+#define smp_init() do { } while (0)
6990 #endif
6991
6992 #else
6993
6994-
6995 /* Called by boot processor to activate the rest. */
6996 static void __init smp_init(void)
6997 {
6998 /* Get other processors into their bootup holding patterns. */
6999 smp_boot_cpus();
7000- wait_init_idle = cpu_online_map;
7001- clear_bit(current->processor, &wait_init_idle); /* Don't wait on me! */
7002
7003 smp_threads_ready=1;
7004 smp_commence();
7005-
7006- /* Wait for the other cpus to set up their idle processes */
7007- printk("Waiting on wait_init_idle (map = 0x%lx)\n", wait_init_idle);
7008- while (wait_init_idle) {
7009- cpu_relax();
7010- barrier();
7011- }
7012- printk("All processors have done init_idle\n");
7013 }
7014
7015 #endif
7016
7017+
7018 /*
7019 * We need to finalize in a non-__init function or else race conditions
7020 * between the root thread and the init thread may cause start_kernel to
7021@@ -342,9 +330,8 @@
7022 {
7023 kernel_thread(init, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
7024 unlock_kernel();
7025- current->need_resched = 1;
7026- cpu_idle();
7027-}
7028+ cpu_idle();
7029+}
7030
7031 /*
7032 * Activate the first processor.
7033@@ -428,6 +415,7 @@
7034 check_bugs();
7035 printk("POSIX conformance testing by UNIFIX\n");
7036
7037+ init_idle(current, smp_processor_id());
7038 /*
7039 * We count on the initial thread going ok
7040 * Like idlers init is an unlocked kernel thread, which will
7041@@ -465,6 +453,10 @@
7042 */
7043 static void __init do_basic_setup(void)
7044 {
7045+ /* Start the per-CPU migration threads */
7046+#if CONFIG_SMP
7047+ migration_init();
7048+#endif
7049
7050 /*
7051 * Tell the world that we're going to be the grim
7052diff -urN linux-2.4.22.org/kernel/capability.c linux-2.4.22/kernel/capability.c
a4a9fb52
AM
7053--- linux-2.4.22.org/kernel/capability.c 2003-11-24 18:28:16.000000000 +0100
7054+++ linux-2.4.22/kernel/capability.c 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
7055@@ -8,6 +8,8 @@
7056 #include <linux/mm.h>
7057 #include <asm/uaccess.h>
7058
7059+unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
7060+
7061 kernel_cap_t cap_bset = CAP_INIT_EFF_SET;
7062
7063 /* Note: never hold tasklist_lock while spinning for this one */
7064diff -urN linux-2.4.22.org/kernel/exit.c linux-2.4.22/kernel/exit.c
a4a9fb52
AM
7065--- linux-2.4.22.org/kernel/exit.c 2003-11-24 18:28:15.000000000 +0100
7066+++ linux-2.4.22/kernel/exit.c 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
7067@@ -28,49 +28,22 @@
7068
7069 static void release_task(struct task_struct * p)
7070 {
7071- if (p != current) {
7072+ if (p == current)
7073+ BUG();
7074 #ifdef CONFIG_SMP
7075- /*
7076- * Wait to make sure the process isn't on the
7077- * runqueue (active on some other CPU still)
7078- */
7079- for (;;) {
7080- task_lock(p);
7081- if (!task_has_cpu(p))
7082- break;
7083- task_unlock(p);
7084- do {
7085- cpu_relax();
7086- barrier();
7087- } while (task_has_cpu(p));
7088- }
7089- task_unlock(p);
7090+ wait_task_inactive(p);
7091 #endif
7092- atomic_dec(&p->user->processes);
7093- free_uid(p->user);
7094- unhash_process(p);
7095-
7096- release_thread(p);
7097- current->cmin_flt += p->min_flt + p->cmin_flt;
7098- current->cmaj_flt += p->maj_flt + p->cmaj_flt;
7099- current->cnswap += p->nswap + p->cnswap;
7100- /*
7101- * Potentially available timeslices are retrieved
7102- * here - this way the parent does not get penalized
7103- * for creating too many processes.
7104- *
7105- * (this cannot be used to artificially 'generate'
7106- * timeslices, because any timeslice recovered here
7107- * was given away by the parent in the first place.)
7108- */
7109- current->counter += p->counter;
7110- if (current->counter >= MAX_COUNTER)
7111- current->counter = MAX_COUNTER;
7112- p->pid = 0;
7113- free_task_struct(p);
7114- } else {
7115- printk("task releasing itself\n");
7116- }
7117+ atomic_dec(&p->user->processes);
7118+ free_uid(p->user);
7119+ unhash_process(p);
7120+
7121+ release_thread(p);
7122+ current->cmin_flt += p->min_flt + p->cmin_flt;
7123+ current->cmaj_flt += p->maj_flt + p->cmaj_flt;
7124+ current->cnswap += p->nswap + p->cnswap;
7125+ sched_exit(p);
7126+ p->pid = 0;
7127+ free_task_struct(p);
7128 }
7129
7130 /*
7131@@ -150,6 +123,79 @@
7132 return retval;
7133 }
7134
7135+/**
7136+ * reparent_to_init() - Reparent the calling kernel thread to the init task.
7137+ *
7138+ * If a kernel thread is launched as a result of a system call, or if
7139+ * it ever exits, it should generally reparent itself to init so that
7140+ * it is correctly cleaned up on exit.
7141+ *
7142+ * The various task state such as scheduling policy and priority may have
7143+ * been inherited from a user process, so we reset them to sane values here.
7144+ *
7145+ * NOTE that reparent_to_init() gives the caller full capabilities.
7146+ */
7147+void reparent_to_init(void)
7148+{
7149+ write_lock_irq(&tasklist_lock);
7150+
7151+ /* Reparent to init */
7152+ REMOVE_LINKS(current);
7153+ current->p_pptr = child_reaper;
7154+ current->p_opptr = child_reaper;
7155+ SET_LINKS(current);
7156+
7157+ /* Set the exit signal to SIGCHLD so we signal init on exit */
7158+ current->exit_signal = SIGCHLD;
7159+
7160+ current->ptrace = 0;
7161+ if ((current->policy == SCHED_OTHER) && (task_nice(current) < 0))
7162+ set_user_nice(current, 0);
7163+ /* cpus_allowed? */
7164+ /* rt_priority? */
7165+ /* signals? */
7166+ current->cap_effective = CAP_INIT_EFF_SET;
7167+ current->cap_inheritable = CAP_INIT_INH_SET;
7168+ current->cap_permitted = CAP_FULL_SET;
7169+ current->keep_capabilities = 0;
7170+ memcpy(current->rlim, init_task.rlim, sizeof(*(current->rlim)));
7171+ current->user = INIT_USER;
7172+
7173+ write_unlock_irq(&tasklist_lock);
7174+}
7175+
7176+/*
7177+ * Put all the gunge required to become a kernel thread without
7178+ * attached user resources in one place where it belongs.
7179+ */
7180+
7181+void daemonize(void)
7182+{
7183+ struct fs_struct *fs;
7184+
7185+
7186+ /*
7187+ * If we were started as result of loading a module, close all of the
7188+ * user space pages. We don't need them, and if we didn't close them
7189+ * they would be locked into memory.
7190+ */
7191+ exit_mm(current);
7192+
7193+ current->session = 1;
7194+ current->pgrp = 1;
7195+ current->tty = NULL;
7196+
7197+ /* Become as one with the init task */
7198+
7199+ exit_fs(current); /* current->fs->count--; */
7200+ fs = init_task.fs;
7201+ current->fs = fs;
7202+ atomic_inc(&fs->count);
7203+ exit_files(current);
7204+ current->files = init_task.files;
7205+ atomic_inc(&current->files->count);
7206+}
7207+
7208 /*
7209 * When we die, we re-parent all our children.
7210 * Try to give them to another thread in our thread
7211@@ -171,6 +217,7 @@
7212 /* Make sure we're not reparenting to ourselves */
7213 p->p_opptr = child_reaper;
7214
7215+ p->first_time_slice = 0;
7216 if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0);
7217 }
7218 }
7219diff -urN linux-2.4.22.org/kernel/fork.c linux-2.4.22/kernel/fork.c
a4a9fb52
AM
7220--- linux-2.4.22.org/kernel/fork.c 2003-11-24 18:28:15.000000000 +0100
7221+++ linux-2.4.22/kernel/fork.c 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
7222@@ -31,7 +31,6 @@
7223
7224 /* The idle threads do not count.. */
7225 int nr_threads;
7226-int nr_running;
7227
7228 int max_threads;
7229 unsigned long total_forks; /* Handle normal Linux uptimes. */
7230@@ -39,6 +38,8 @@
7231
7232 struct task_struct *pidhash[PIDHASH_SZ];
7233
7234+rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */
7235+
7236 void add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
7237 {
7238 unsigned long flags;
7239@@ -697,9 +698,6 @@
7240 if (p->pid == 0 && current->pid != 0)
7241 goto bad_fork_cleanup;
7242
7243- p->run_list.next = NULL;
7244- p->run_list.prev = NULL;
7245-
7246 p->p_cptr = NULL;
7247 init_waitqueue_head(&p->wait_chldexit);
7248 p->vfork_done = NULL;
7249@@ -708,6 +706,7 @@
7250 init_completion(&vfork);
7251 }
7252 spin_lock_init(&p->alloc_lock);
7253+ spin_lock_init(&p->switch_lock);
7254
7255 p->sigpending = 0;
7256 init_sigpending(&p->pending);
7257@@ -724,11 +723,11 @@
7258 #ifdef CONFIG_SMP
7259 {
7260 int i;
7261- p->cpus_runnable = ~0UL;
7262- p->processor = current->processor;
7263+
7264 /* ?? should we just memset this ?? */
7265 for(i = 0; i < smp_num_cpus; i++)
7266- p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0;
7267+ p->per_cpu_utime[cpu_logical_map(i)] =
7268+ p->per_cpu_stime[cpu_logical_map(i)] = 0;
7269 spin_lock_init(&p->sigmask_lock);
7270 }
7271 #endif
7272@@ -766,15 +765,27 @@
7273 p->pdeath_signal = 0;
7274
7275 /*
7276- * "share" dynamic priority between parent and child, thus the
7277- * total amount of dynamic priorities in the system doesn't change,
7278- * more scheduling fairness. This is only important in the first
7279- * timeslice, on the long run the scheduling behaviour is unchanged.
7280- */
7281- p->counter = (current->counter + 1) >> 1;
7282- current->counter >>= 1;
7283- if (!current->counter)
7284- current->need_resched = 1;
7285+ * Share the timeslice between parent and child, thus the
7286+ * total amount of pending timeslices in the system doesnt change,
7287+ * resulting in more scheduling fairness.
7288+ */
7289+ __cli();
7290+ if (!current->time_slice)
7291+ BUG();
7292+ p->time_slice = (current->time_slice + 1) >> 1;
7293+ current->time_slice >>= 1;
7294+ p->first_time_slice = 1;
7295+ if (!current->time_slice) {
7296+ /*
7297+ * This case is rare, it happens when the parent has only
7298+ * a single jiffy left from its timeslice. Taking the
7299+ * runqueue lock is not a problem.
7300+ */
7301+ current->time_slice = 1;
7302+ scheduler_tick(0,0);
7303+ }
7304+ p->sleep_timestamp = jiffies;
7305+ __sti();
7306
7307 /*
7308 * Ok, add it to the run-queues and make it
7309@@ -810,11 +821,16 @@
7310
7311 if (p->ptrace & PT_PTRACED)
7312 send_sig(SIGSTOP, p, 1);
7313-
7314- wake_up_process(p); /* do this last */
7315+ wake_up_forked_process(p); /* do this last */
7316 ++total_forks;
7317 if (clone_flags & CLONE_VFORK)
7318 wait_for_completion(&vfork);
7319+ else
7320+ /*
7321+ * Let the child process run first, to avoid most of the
7322+ * COW overhead when the child exec()s afterwards.
7323+ */
7324+ current->need_resched = 1;
7325
7326 fork_out:
7327 return retval;
7328diff -urN linux-2.4.22.org/kernel/ksyms.c linux-2.4.22/kernel/ksyms.c
a4a9fb52
AM
7329--- linux-2.4.22.org/kernel/ksyms.c 2003-11-24 18:28:15.000000000 +0100
7330+++ linux-2.4.22/kernel/ksyms.c 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
7331@@ -451,7 +451,6 @@
7332 /* process management */
7333 EXPORT_SYMBOL(complete_and_exit);
7334 EXPORT_SYMBOL(__wake_up);
7335-EXPORT_SYMBOL(__wake_up_sync);
7336 EXPORT_SYMBOL(wake_up_process);
7337 EXPORT_SYMBOL(sleep_on);
7338 EXPORT_SYMBOL(sleep_on_timeout);
7339@@ -464,6 +463,8 @@
7340 #endif
7341 EXPORT_SYMBOL(yield);
7342 EXPORT_SYMBOL(__cond_resched);
7343+EXPORT_SYMBOL(set_user_nice);
7344+EXPORT_SYMBOL(nr_context_switches);
7345 EXPORT_SYMBOL(jiffies);
7346 EXPORT_SYMBOL(xtime);
7347 EXPORT_SYMBOL(do_gettimeofday);
7348@@ -474,7 +475,6 @@
7349 #endif
7350
7351 EXPORT_SYMBOL(kstat);
7352-EXPORT_SYMBOL(nr_running);
7353
7354 /* misc */
7355 EXPORT_SYMBOL(panic);
7356diff -urN linux-2.4.22.org/kernel/printk.c linux-2.4.22/kernel/printk.c
a4a9fb52
AM
7357--- linux-2.4.22.org/kernel/printk.c 2003-11-24 18:28:15.000000000 +0100
7358+++ linux-2.4.22/kernel/printk.c 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
7359@@ -26,6 +26,7 @@
7360 #include <linux/module.h>
7361 #include <linux/interrupt.h> /* For in_interrupt() */
7362 #include <linux/config.h>
7363+#include <linux/delay.h>
7364
7365 #include <asm/uaccess.h>
7366
7367diff -urN linux-2.4.22.org/kernel/ptrace.c linux-2.4.22/kernel/ptrace.c
a4a9fb52
AM
7368--- linux-2.4.22.org/kernel/ptrace.c 2003-11-24 18:28:15.000000000 +0100
7369+++ linux-2.4.22/kernel/ptrace.c 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
7370@@ -32,20 +32,7 @@
7371 if (child->state != TASK_STOPPED)
7372 return -ESRCH;
7373 #ifdef CONFIG_SMP
7374- /* Make sure the child gets off its CPU.. */
7375- for (;;) {
7376- task_lock(child);
7377- if (!task_has_cpu(child))
7378- break;
7379- task_unlock(child);
7380- do {
7381- if (child->state != TASK_STOPPED)
7382- return -ESRCH;
7383- barrier();
7384- cpu_relax();
7385- } while (task_has_cpu(child));
7386- }
7387- task_unlock(child);
7388+ wait_task_inactive(child);
7389 #endif
7390 }
7391
7392diff -urN linux-2.4.22.org/kernel/sched.c linux-2.4.22/kernel/sched.c
a4a9fb52
AM
7393--- linux-2.4.22.org/kernel/sched.c 2003-11-24 18:28:15.000000000 +0100
7394+++ linux-2.4.22/kernel/sched.c 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
7395@@ -3,340 +3,333 @@
7396 *
7397 * Kernel scheduler and related syscalls
7398 *
7399- * Copyright (C) 1991, 1992 Linus Torvalds
7400+ * Copyright (C) 1991-2002 Linus Torvalds
7401 *
7402 * 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and
7403 * make semaphores SMP safe
7404 * 1998-11-19 Implemented schedule_timeout() and related stuff
7405 * by Andrea Arcangeli
7406- * 1998-12-28 Implemented better SMP scheduling by Ingo Molnar
7407+ * 2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar:
7408+ * hybrid priority-list and round-robin design with
7409+ * an array-switch method of distributing timeslices
7410+ * and per-CPU runqueues. Additional code by Davide
7411+ * Libenzi, Robert Love, and Rusty Russell.
7412 */
7413
7414-/*
7415- * 'sched.c' is the main kernel file. It contains scheduling primitives
7416- * (sleep_on, wakeup, schedule etc) as well as a number of simple system
7417- * call functions (type getpid()), which just extract a field from
7418- * current-task
7419- */
7420-
7421-#include <linux/config.h>
7422 #include <linux/mm.h>
7423-#include <linux/init.h>
7424-#include <linux/smp_lock.h>
7425 #include <linux/nmi.h>
7426 #include <linux/interrupt.h>
7427-#include <linux/kernel_stat.h>
7428-#include <linux/completion.h>
7429-#include <linux/prefetch.h>
7430-#include <linux/compiler.h>
7431-
7432+#include <linux/init.h>
7433 #include <asm/uaccess.h>
7434+#include <linux/smp_lock.h>
7435 #include <asm/mmu_context.h>
7436-
7437-extern void timer_bh(void);
7438-extern void tqueue_bh(void);
7439-extern void immediate_bh(void);
7440+#include <linux/kernel_stat.h>
7441+#include <linux/completion.h>
7442
7443 /*
7444- * scheduler variables
7445- */
7446+ * Convert user-nice values [ -20 ... 0 ... 19 ]
7447+ * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
7448+ * and back.
7449+ */
7450+#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20)
7451+#define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20)
7452+#define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio)
7453
7454-unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
7455-
7456-extern void mem_use(void);
7457+/*
7458+ * 'User priority' is the nice value converted to something we
7459+ * can work with better when scaling various scheduler parameters,
7460+ * it's a [ 0 ... 39 ] range.
7461+ */
7462+#define USER_PRIO(p) ((p)-MAX_RT_PRIO)
7463+#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio)
7464+#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO))
7465
7466 /*
7467- * Scheduling quanta.
7468+ * These are the 'tuning knobs' of the scheduler:
7469 *
7470- * NOTE! The unix "nice" value influences how long a process
7471- * gets. The nice value ranges from -20 to +19, where a -20
7472- * is a "high-priority" task, and a "+10" is a low-priority
7473- * task.
7474- *
7475- * We want the time-slice to be around 50ms or so, so this
7476- * calculation depends on the value of HZ.
7477- */
7478-#if HZ < 200
7479-#define TICK_SCALE(x) ((x) >> 2)
7480-#elif HZ < 400
7481-#define TICK_SCALE(x) ((x) >> 1)
7482-#elif HZ < 800
7483-#define TICK_SCALE(x) (x)
7484-#elif HZ < 1600
7485-#define TICK_SCALE(x) ((x) << 1)
7486-#else
7487-#define TICK_SCALE(x) ((x) << 2)
7488-#endif
7489-
7490-#define NICE_TO_TICKS(nice) (TICK_SCALE(20-(nice))+1)
7491-
7492+ * Minimum timeslice is 10 msecs, default timeslice is 150 msecs,
7493+ * maximum timeslice is 300 msecs. Timeslices get refilled after
7494+ * they expire.
7495+ */
7496+#define MIN_TIMESLICE ( 10 * HZ / 1000)
7497+#define MAX_TIMESLICE (300 * HZ / 1000)
7498+#define CHILD_PENALTY 50
7499+#define PARENT_PENALTY 100
7500+#define PRIO_BONUS_RATIO 25
7501+#define INTERACTIVE_DELTA 2
7502+#define MAX_SLEEP_AVG (2*HZ)
7503+#define STARVATION_LIMIT (2*HZ)
7504
7505 /*
7506- * Init task must be ok at boot for the ix86 as we will check its signals
7507- * via the SMP irq return path.
7508- */
7509-
7510-struct task_struct * init_tasks[NR_CPUS] = {&init_task, };
7511+ * If a task is 'interactive' then we reinsert it in the active
7512+ * array after it has expired its current timeslice. (it will not
7513+ * continue to run immediately, it will still roundrobin with
7514+ * other interactive tasks.)
7515+ *
7516+ * This part scales the interactivity limit depending on niceness.
7517+ *
7518+ * We scale it linearly, offset by the INTERACTIVE_DELTA delta.
7519+ * Here are a few examples of different nice levels:
7520+ *
7521+ * TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0]
7522+ * TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0]
7523+ * TASK_INTERACTIVE( 0): [1,1,1,1,0,0,0,0,0,0,0]
7524+ * TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0]
7525+ * TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0]
7526+ *
7527+ * (the X axis represents the possible -5 ... 0 ... +5 dynamic
7528+ * priority range a task can explore, a value of '1' means the
7529+ * task is rated interactive.)
7530+ *
7531+ * Ie. nice +19 tasks can never get 'interactive' enough to be
7532+ * reinserted into the active array. And only heavily CPU-hog nice -20
7533+ * tasks will be expired. Default nice 0 tasks are somewhere between,
7534+ * it takes some effort for them to get interactive, but it's not
7535+ * too hard.
7536+ */
7537+
7538+#define SCALE(v1,v1_max,v2_max) \
7539+ (v1) * (v2_max) / (v1_max)
7540+
7541+#define DELTA(p) \
7542+ (SCALE(TASK_NICE(p), 40, MAX_USER_PRIO*PRIO_BONUS_RATIO/100) + \
7543+ INTERACTIVE_DELTA)
7544+
7545+#define TASK_INTERACTIVE(p) \
7546+ ((p)->prio <= (p)->static_prio - DELTA(p))
7547
7548 /*
7549- * The tasklist_lock protects the linked list of processes.
7550- *
7551- * The runqueue_lock locks the parts that actually access
7552- * and change the run-queues, and have to be interrupt-safe.
7553- *
7554- * If both locks are to be concurrently held, the runqueue_lock
7555- * nests inside the tasklist_lock.
7556+ * TASK_TIMESLICE scales user-nice values [ -20 ... 19 ]
7557+ * to time slice values.
7558 *
7559- * task->alloc_lock nests inside tasklist_lock.
7560+ * The higher a process's priority, the bigger timeslices
7561+ * it gets during one round of execution. But even the lowest
7562+ * priority process gets MIN_TIMESLICE worth of execution time.
7563 */
7564-spinlock_t runqueue_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED; /* inner */
7565-rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */
7566
7567-static LIST_HEAD(runqueue_head);
7568+#define TASK_TIMESLICE(p) (MIN_TIMESLICE + \
7569+ ((MAX_TIMESLICE - MIN_TIMESLICE) * (MAX_PRIO-1-(p)->static_prio)/39))
7570
7571 /*
7572- * We align per-CPU scheduling data on cacheline boundaries,
7573- * to prevent cacheline ping-pong.
7574+ * These are the runqueue data structures:
7575 */
7576-static union {
7577- struct schedule_data {
7578- struct task_struct * curr;
7579- cycles_t last_schedule;
7580- } schedule_data;
7581- char __pad [SMP_CACHE_BYTES];
7582-} aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}};
7583
7584-#define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr
7585-#define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule
7586+#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long))
7587
7588-struct kernel_stat kstat;
7589-extern struct task_struct *child_reaper;
7590+typedef struct runqueue runqueue_t;
7591
7592-#ifdef CONFIG_SMP
7593+struct prio_array {
7594+ int nr_active;
7595+ unsigned long bitmap[BITMAP_SIZE];
261c7d99 7596+ struct list_head queue[MAX_PRIO];
744c6993
AM
7597+};
7598
7599-#define idle_task(cpu) (init_tasks[cpu_number_map(cpu)])
7600-#define can_schedule(p,cpu) \
7601- ((p)->cpus_runnable & (p)->cpus_allowed & (1UL << cpu))
7602+/*
7603+ * This is the main, per-CPU runqueue data structure.
7604+ *
7605+ * Locking rule: those places that want to lock multiple runqueues
7606+ * (such as the load balancing or the process migration code), lock
7607+ * acquire operations must be ordered by ascending &runqueue.
7608+ */
7609+struct runqueue {
7610+ spinlock_t lock;
7611+ unsigned long nr_running, nr_switches, expired_timestamp;
7612+ task_t *curr, *idle;
7613+ prio_array_t *active, *expired, arrays[2];
7614+ long nr_uninterruptible;
7615+#ifdef CONFIG_SMP
7616+ long last_jiffy;
7617+ int prev_nr_running[NR_CPUS];
7618+ task_t *migration_thread;
261c7d99 7619+ struct list_head migration_queue;
744c6993
AM
7620+#endif
7621+} ____cacheline_aligned;
7622
7623-#else
7624+static struct runqueue runqueues[NR_CPUS] __cacheline_aligned;
7625
7626-#define idle_task(cpu) (&init_task)
7627-#define can_schedule(p,cpu) (1)
7628+#define cpu_rq(cpu) (runqueues + (cpu))
7629+#define this_rq() cpu_rq(smp_processor_id())
7630+#define task_rq(p) cpu_rq((p)->cpu)
7631+#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
7632+#define rt_task(p) ((p)->prio < MAX_RT_PRIO)
7633
7634+/*
7635+ * Default context-switch locking:
7636+ */
7637+#ifndef prepare_arch_switch
7638+# define prepare_arch_switch(rq, next) do { } while(0)
7639+# define finish_arch_switch(rq, prev) spin_unlock_irq(&(rq)->lock)
7640 #endif
7641
7642-void scheduling_functions_start_here(void) { }
7643-
7644 /*
7645- * This is the function that decides how desirable a process is..
7646- * You can weigh different processes against each other depending
7647- * on what CPU they've run on lately etc to try to handle cache
7648- * and TLB miss penalties.
7649- *
7650- * Return values:
7651- * -1000: never select this
7652- * 0: out of time, recalculate counters (but it might still be
7653- * selected)
7654- * +ve: "goodness" value (the larger, the better)
7655- * +1000: realtime process, select this.
7656+ * task_rq_lock - lock the runqueue a given task resides on and disable
7657+ * interrupts. Note the ordering: we can safely lookup the task_rq without
7658+ * explicitly disabling preemption.
7659 */
7660-
7661-static inline int goodness(struct task_struct * p, int this_cpu, struct mm_struct *this_mm)
7662+static inline runqueue_t *task_rq_lock(task_t *p, unsigned long *flags)
7663 {
7664- int weight;
7665-
7666- /*
7667- * select the current process after every other
7668- * runnable process, but before the idle thread.
7669- * Also, dont trigger a counter recalculation.
7670- */
7671- weight = -1;
7672- if (p->policy & SCHED_YIELD)
7673- goto out;
7674+ struct runqueue *rq;
7675
7676- /*
7677- * Non-RT process - normal case first.
7678- */
7679- if (p->policy == SCHED_OTHER) {
7680- /*
7681- * Give the process a first-approximation goodness value
7682- * according to the number of clock-ticks it has left.
7683- *
7684- * Don't do any other calculations if the time slice is
7685- * over..
7686- */
7687- weight = p->counter;
7688- if (!weight)
7689- goto out;
7690-
7691-#ifdef CONFIG_SMP
7692- /* Give a largish advantage to the same processor... */
7693- /* (this is equivalent to penalizing other processors) */
7694- if (p->processor == this_cpu)
7695- weight += PROC_CHANGE_PENALTY;
7696-#endif
7697-
7698- /* .. and a slight advantage to the current MM */
7699- if (p->mm == this_mm || !p->mm)
7700- weight += 1;
7701- weight += 20 - p->nice;
7702- goto out;
7703+repeat_lock_task:
7704+ rq = task_rq(p);
7705+ spin_lock_irqsave(&rq->lock, *flags);
7706+ if (unlikely(rq != task_rq(p))) {
7707+ spin_unlock_irqrestore(&rq->lock, *flags);
7708+ goto repeat_lock_task;
7709 }
7710+ return rq;
7711+}
7712
7713- /*
7714- * Realtime process, select the first one on the
7715- * runqueue (taking priorities within processes
7716- * into account).
7717- */
7718- weight = 1000 + p->rt_priority;
7719-out:
7720- return weight;
7721+static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags)
7722+{
7723+ spin_unlock_irqrestore(&rq->lock, *flags);
7724 }
7725
7726 /*
7727- * the 'goodness value' of replacing a process on a given CPU.
7728- * positive value means 'replace', zero or negative means 'dont'.
7729+ * Adding/removing a task to/from a priority array:
7730 */
7731-static inline int preemption_goodness(struct task_struct * prev, struct task_struct * p, int cpu)
7732+static inline void dequeue_task(struct task_struct *p, prio_array_t *array)
7733 {
7734- return goodness(p, cpu, prev->active_mm) - goodness(prev, cpu, prev->active_mm);
7735+ array->nr_active--;
7736+ list_del(&p->run_list);
7737+ if (list_empty(array->queue + p->prio))
7738+ __clear_bit(p->prio, array->bitmap);
7739 }
7740
7741-/*
7742- * This is ugly, but reschedule_idle() is very timing-critical.
7743- * We are called with the runqueue spinlock held and we must
7744- * not claim the tasklist_lock.
7745- */
7746-static FASTCALL(void reschedule_idle(struct task_struct * p));
7747+#define enqueue_task(p, array) __enqueue_task(p, array, NULL)
7748+static inline void __enqueue_task(struct task_struct *p, prio_array_t *array, task_t * parent)
7749+{
7750+ if (!parent) {
7751+ list_add_tail(&p->run_list, array->queue + p->prio);
7752+ __set_bit(p->prio, array->bitmap);
7753+ p->array = array;
7754+ } else {
7755+ list_add_tail(&p->run_list, &parent->run_list);
7756+ array = p->array = parent->array;
7757+ }
7758+ array->nr_active++;
7759+}
7760
7761-static void reschedule_idle(struct task_struct * p)
7762+static inline int effective_prio(task_t *p)
7763 {
7764-#ifdef CONFIG_SMP
7765- int this_cpu = smp_processor_id();
7766- struct task_struct *tsk, *target_tsk;
7767- int cpu, best_cpu, i, max_prio;
7768- cycles_t oldest_idle;
7769+ int bonus, prio;
7770
7771 /*
7772- * shortcut if the woken up task's last CPU is
7773- * idle now.
7774+ * Here we scale the actual sleep average [0 .... MAX_SLEEP_AVG]
7775+ * into the -5 ... 0 ... +5 bonus/penalty range.
7776+ *
7777+ * We use 25% of the full 0...39 priority range so that:
7778+ *
7779+ * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
7780+ * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
7781+ *
7782+ * Both properties are important to certain workloads.
7783 */
7784- best_cpu = p->processor;
7785- if (can_schedule(p, best_cpu)) {
7786- tsk = idle_task(best_cpu);
7787- if (cpu_curr(best_cpu) == tsk) {
7788- int need_resched;
7789-send_now_idle:
7790- /*
7791- * If need_resched == -1 then we can skip sending
7792- * the IPI altogether, tsk->need_resched is
7793- * actively watched by the idle thread.
7794- */
7795- need_resched = tsk->need_resched;
7796- tsk->need_resched = 1;
7797- if ((best_cpu != this_cpu) && !need_resched)
7798- smp_send_reschedule(best_cpu);
7799- return;
7800- }
7801- }
7802+ bonus = MAX_USER_PRIO*PRIO_BONUS_RATIO*p->sleep_avg/MAX_SLEEP_AVG/100 -
7803+ MAX_USER_PRIO*PRIO_BONUS_RATIO/100/2;
7804
7805- /*
7806- * We know that the preferred CPU has a cache-affine current
7807- * process, lets try to find a new idle CPU for the woken-up
7808- * process. Select the least recently active idle CPU. (that
7809- * one will have the least active cache context.) Also find
7810- * the executing process which has the least priority.
7811- */
7812- oldest_idle = (cycles_t) -1;
7813- target_tsk = NULL;
7814- max_prio = 0;
7815+ prio = p->static_prio - bonus;
7816+ if (prio < MAX_RT_PRIO)
7817+ prio = MAX_RT_PRIO;
7818+ if (prio > MAX_PRIO-1)
7819+ prio = MAX_PRIO-1;
7820+ return prio;
7821+}
7822
7823- for (i = 0; i < smp_num_cpus; i++) {
7824- cpu = cpu_logical_map(i);
7825- if (!can_schedule(p, cpu))
7826- continue;
7827- tsk = cpu_curr(cpu);
7828+#define activate_task(p, rq) __activate_task(p, rq, NULL)
7829+static inline void __activate_task(task_t *p, runqueue_t *rq, task_t * parent)
7830+{
7831+ unsigned long sleep_time = jiffies - p->sleep_timestamp;
7832+ prio_array_t *array = rq->active;
7833+
7834+ if (!parent && !rt_task(p) && sleep_time) {
7835 /*
7836- * We use the first available idle CPU. This creates
7837- * a priority list between idle CPUs, but this is not
7838- * a problem.
7839+ * This code gives a bonus to interactive tasks. We update
7840+ * an 'average sleep time' value here, based on
7841+ * sleep_timestamp. The more time a task spends sleeping,
7842+ * the higher the average gets - and the higher the priority
7843+ * boost gets as well.
7844 */
7845- if (tsk == idle_task(cpu)) {
7846-#if defined(__i386__) && defined(CONFIG_SMP)
7847- /*
7848- * Check if two siblings are idle in the same
7849- * physical package. Use them if found.
7850- */
7851- if (smp_num_siblings == 2) {
7852- if (cpu_curr(cpu_sibling_map[cpu]) ==
7853- idle_task(cpu_sibling_map[cpu])) {
7854- oldest_idle = last_schedule(cpu);
7855- target_tsk = tsk;
7856- break;
7857- }
7858-
7859- }
7860-#endif
7861- if (last_schedule(cpu) < oldest_idle) {
7862- oldest_idle = last_schedule(cpu);
7863- target_tsk = tsk;
7864- }
7865- } else {
7866- if (oldest_idle == (cycles_t)-1) {
7867- int prio = preemption_goodness(tsk, p, cpu);
7868-
7869- if (prio > max_prio) {
7870- max_prio = prio;
7871- target_tsk = tsk;
7872- }
7873- }
7874- }
7875- }
7876- tsk = target_tsk;
7877- if (tsk) {
7878- if (oldest_idle != (cycles_t)-1) {
7879- best_cpu = tsk->processor;
7880- goto send_now_idle;
7881- }
7882- tsk->need_resched = 1;
7883- if (tsk->processor != this_cpu)
7884- smp_send_reschedule(tsk->processor);
7885+ p->sleep_timestamp = jiffies;
7886+ p->sleep_avg += sleep_time;
7887+ if (p->sleep_avg > MAX_SLEEP_AVG)
7888+ p->sleep_avg = MAX_SLEEP_AVG;
7889+ p->prio = effective_prio(p);
7890 }
7891- return;
7892-
7893+ __enqueue_task(p, array, parent);
7894+ rq->nr_running++;
7895+}
7896
7897-#else /* UP */
7898- int this_cpu = smp_processor_id();
7899- struct task_struct *tsk;
7900+static inline void deactivate_task(struct task_struct *p, runqueue_t *rq)
7901+{
7902+ rq->nr_running--;
7903+ if (p->state == TASK_UNINTERRUPTIBLE)
7904+ rq->nr_uninterruptible++;
7905+ dequeue_task(p, p->array);
7906+ p->array = NULL;
7907+}
7908+
7909+static inline void resched_task(task_t *p)
7910+{
7911+#ifdef CONFIG_SMP
7912+ int need_resched;
7913
7914- tsk = cpu_curr(this_cpu);
7915- if (preemption_goodness(tsk, p, this_cpu) > 0)
7916- tsk->need_resched = 1;
7917+ need_resched = p->need_resched;
7918+ set_tsk_need_resched(p);
7919+ if (!need_resched && (p->cpu != smp_processor_id()))
7920+ smp_send_reschedule(p->cpu);
7921+#else
7922+ set_tsk_need_resched(p);
7923 #endif
7924 }
7925
7926+#ifdef CONFIG_SMP
7927+
7928 /*
7929- * Careful!
7930- *
7931- * This has to add the process to the _end_ of the
7932- * run-queue, not the beginning. The goodness value will
7933- * determine whether this process will run next. This is
7934- * important to get SCHED_FIFO and SCHED_RR right, where
7935- * a process that is either pre-empted or its time slice
7936- * has expired, should be moved to the tail of the run
7937- * queue for its priority - Bhavesh Davda
7938+ * Wait for a process to unschedule. This is used by the exit() and
7939+ * ptrace() code.
7940 */
7941-static inline void add_to_runqueue(struct task_struct * p)
7942+void wait_task_inactive(task_t * p)
7943 {
7944- list_add_tail(&p->run_list, &runqueue_head);
7945- nr_running++;
7946+ unsigned long flags;
7947+ runqueue_t *rq;
7948+
7949+repeat:
7950+ rq = task_rq(p);
7951+ if (unlikely(rq->curr == p)) {
7952+ cpu_relax();
7953+ barrier();
7954+ goto repeat;
7955+ }
7956+ rq = task_rq_lock(p, &flags);
7957+ if (unlikely(rq->curr == p)) {
7958+ task_rq_unlock(rq, &flags);
7959+ goto repeat;
7960+ }
7961+ task_rq_unlock(rq, &flags);
7962 }
7963
7964-static inline void move_last_runqueue(struct task_struct * p)
7965+/*
7966+ * Kick the remote CPU if the task is running currently,
7967+ * this code is used by the signal code to signal tasks
7968+ * which are in user-mode as quickly as possible.
7969+ *
7970+ * (Note that we do this lockless - if the task does anything
7971+ * while the message is in flight then it will notice the
7972+ * sigpending condition anyway.)
7973+ */
7974+void kick_if_running(task_t * p)
7975 {
7976- list_del(&p->run_list);
7977- list_add_tail(&p->run_list, &runqueue_head);
7978+ if (p == task_rq(p)->curr && p->cpu != smp_processor_id())
7979+ resched_task(p);
7980 }
7981+#endif
7982+
7983+#ifdef CONFIG_SMP
7984+static int FASTCALL(reschedule_idle(task_t * p));
7985+static void FASTCALL(load_balance(runqueue_t *this_rq, int idle));
7986+#endif
7987+
7988
7989 /*
7990 * Wake up a process. Put it on the run-queue if it's not
7991@@ -345,429 +338,721 @@
7992 * progress), and as such you're allowed to do the simpler
7993 * "current->state = TASK_RUNNING" to mark yourself runnable
7994 * without the overhead of this.
7995+ *
7996+ * returns failure only if the task is already active.
7997 */
7998-static inline int try_to_wake_up(struct task_struct * p, int synchronous)
7999+static int try_to_wake_up(task_t * p, int sync)
8000 {
8001 unsigned long flags;
8002 int success = 0;
8003+ long old_state;
8004+ runqueue_t *rq;
8005+#ifdef CONFIG_SMP
8006+ int migrated_to_idle = 0;
8007+#endif
8008+
8009+#ifdef CONFIG_SMP
8010+repeat_lock_task:
8011+#endif
8012+ rq = task_rq_lock(p, &flags);
8013+ old_state = p->state;
8014+ if (!p->array) {
8015+#ifdef CONFIG_SMP
8016+ if (likely(rq->curr != p)) {
8017+ /* can migrate */
8018+ if (unlikely(sync)) {
8019+ if (p->cpu != smp_processor_id() &&
8020+ p->cpus_allowed & (1UL << smp_processor_id())) {
8021+ p->cpu = smp_processor_id();
8022+ goto migrated_task;
8023+ }
8024+ } else {
8025+ if (reschedule_idle(p))
8026+ goto migrated_task;
8027+ }
8028+ }
8029+#endif
8030+ if (old_state == TASK_UNINTERRUPTIBLE)
8031+ rq->nr_uninterruptible--;
8032+ activate_task(p, rq);
8033+ if (p->prio < rq->curr->prio)
8034+ resched_task(rq->curr);
8035+ success = 1;
8036+ }
8037+ p->state = TASK_RUNNING;
8038
8039+#ifdef CONFIG_SMP
8040 /*
8041- * We want the common case fall through straight, thus the goto.
8042+ * Subtle: we can load_balance only here (before unlock)
8043+ * because it can internally drop the lock. Claim
8044+ * that the cpu is running so it will be a light rebalance,
8045+ * if this cpu will go idle soon schedule() will trigger the
8046+ * idle rescheduling balancing by itself.
8047 */
8048- spin_lock_irqsave(&runqueue_lock, flags);
8049- p->state = TASK_RUNNING;
8050- if (task_on_runqueue(p))
8051- goto out;
8052- add_to_runqueue(p);
8053- if (!synchronous || !(p->cpus_allowed & (1UL << smp_processor_id())))
8054- reschedule_idle(p);
8055- success = 1;
8056-out:
8057- spin_unlock_irqrestore(&runqueue_lock, flags);
8058+ if (success && migrated_to_idle)
8059+ load_balance(rq, 0);
8060+#endif
8061+
8062+ task_rq_unlock(rq, &flags);
8063+
8064 return success;
8065+
8066+#ifdef CONFIG_SMP
8067+ migrated_task:
8068+ task_rq_unlock(rq, &flags);
8069+ migrated_to_idle = 1;
8070+ goto repeat_lock_task;
8071+#endif
8072 }
8073
8074-inline int wake_up_process(struct task_struct * p)
8075+int wake_up_process(task_t * p)
8076 {
8077 return try_to_wake_up(p, 0);
8078 }
8079
8080-static void process_timeout(unsigned long __data)
8081+void wake_up_forked_process(task_t * p)
8082 {
8083- struct task_struct * p = (struct task_struct *) __data;
8084+ runqueue_t *rq;
8085+ task_t * parent = current;
8086
8087- wake_up_process(p);
8088-}
8089+ rq = this_rq();
8090+ spin_lock_irq(&rq->lock);
8091
8092-/**
8093- * schedule_timeout - sleep until timeout
8094- * @timeout: timeout value in jiffies
8095- *
8096- * Make the current task sleep until @timeout jiffies have
8097- * elapsed. The routine will return immediately unless
8098- * the current task state has been set (see set_current_state()).
8099- *
8100- * You can set the task state as follows -
8101- *
8102- * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
8103- * pass before the routine returns. The routine will return 0
8104- *
8105- * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
8106- * delivered to the current task. In this case the remaining time
8107- * in jiffies will be returned, or 0 if the timer expired in time
8108- *
8109- * The current task state is guaranteed to be TASK_RUNNING when this
8110- * routine returns.
8111- *
8112- * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
8113- * the CPU away without a bound on the timeout. In this case the return
8114- * value will be %MAX_SCHEDULE_TIMEOUT.
8115- *
8116- * In all cases the return value is guaranteed to be non-negative.
8117- */
8118-signed long schedule_timeout(signed long timeout)
8119-{
8120- struct timer_list timer;
8121- unsigned long expire;
8122+ p->state = TASK_RUNNING;
8123+ if (likely(!rt_task(p) && parent->array)) {
8124+ /*
8125+ * We decrease the sleep average of forked
8126+ * children, to keep max-interactive tasks
8127+ * from forking tasks that are max-interactive.
8128+ * CHILD_PENALTY is set to 50% since we have
8129+ * no clue if this is still an interactive
8130+ * task like the parent or if this will be a
8131+ * cpu bound task. The parent isn't touched
8132+ * as we don't make assumption about the parent
8133+ * changing behaviour after the child is forked.
8134+ */
8135+ parent->sleep_avg = parent->sleep_avg * PARENT_PENALTY / 100;
8136+ p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100;
8137
8138- switch (timeout)
8139- {
8140- case MAX_SCHEDULE_TIMEOUT:
8141 /*
8142- * These two special cases are useful to be comfortable
8143- * in the caller. Nothing more. We could take
8144- * MAX_SCHEDULE_TIMEOUT from one of the negative value
8145- * but I' d like to return a valid offset (>=0) to allow
8146- * the caller to do everything it want with the retval.
8147+ * For its first schedule keep the child at the same
8148+ * priority (i.e. in the same list) of the parent,
8149+ * activate_forked_task() will take care to put the
8150+ * child in front of the parent (lifo) to guarantee a
8151+ * schedule-child-first behaviour after fork.
8152 */
8153- schedule();
8154- goto out;
8155- default:
8156+ p->prio = parent->prio;
8157+ } else {
8158 /*
8159- * Another bit of PARANOID. Note that the retval will be
8160- * 0 since no piece of kernel is supposed to do a check
8161- * for a negative retval of schedule_timeout() (since it
8162- * should never happens anyway). You just have the printk()
8163- * that will tell you if something is gone wrong and where.
8164+ * Take the usual wakeup path if it's RT or if
8165+ * it's a child of the first idle task (during boot
8166+ * only).
8167 */
8168- if (timeout < 0)
8169- {
8170- printk(KERN_ERR "schedule_timeout: wrong timeout "
8171- "value %lx from %p\n", timeout,
8172- __builtin_return_address(0));
8173- current->state = TASK_RUNNING;
8174- goto out;
8175- }
8176+ p->prio = effective_prio(p);
8177+ parent = NULL;
8178 }
8179
8180- expire = timeout + jiffies;
8181+ p->cpu = smp_processor_id();
8182+ __activate_task(p, rq, parent);
8183+ spin_unlock_irq(&rq->lock);
8184+}
8185
8186- init_timer(&timer);
8187- timer.expires = expire;
8188- timer.data = (unsigned long) current;
8189- timer.function = process_timeout;
8190+/*
8191+ * Potentially available exiting-child timeslices are
8192+ * retrieved here - this way the parent does not get
8193+ * penalized for creating too many processes.
8194+ *
8195+ * (this cannot be used to 'generate' timeslices
8196+ * artificially, because any timeslice recovered here
8197+ * was given away by the parent in the first place.)
8198+ */
8199+void sched_exit(task_t * p)
8200+{
8201+ __cli();
8202+ if (p->first_time_slice) {
8203+ current->time_slice += p->time_slice;
8204+ if (unlikely(current->time_slice > MAX_TIMESLICE))
8205+ current->time_slice = MAX_TIMESLICE;
8206+ }
8207+ __sti();
8208+}
8209
8210- add_timer(&timer);
8211- schedule();
8212- del_timer_sync(&timer);
8213+#if CONFIG_SMP
8214+asmlinkage void schedule_tail(task_t *prev)
8215+{
8216+ finish_arch_switch(this_rq(), prev);
8217+}
8218+#endif
8219+
8220+static inline task_t * context_switch(task_t *prev, task_t *next)
8221+{
8222+ struct mm_struct *mm = next->mm;
8223+ struct mm_struct *oldmm = prev->active_mm;
8224
8225- timeout = expire - jiffies;
8226+ if (unlikely(!mm)) {
8227+ next->active_mm = oldmm;
8228+ atomic_inc(&oldmm->mm_count);
8229+ enter_lazy_tlb(oldmm, next, smp_processor_id());
8230+ } else
8231+ switch_mm(oldmm, mm, next, smp_processor_id());
8232
8233- out:
8234- return timeout < 0 ? 0 : timeout;
8235+ if (unlikely(!prev->mm)) {
8236+ prev->active_mm = NULL;
8237+ mmdrop(oldmm);
8238+ }
8239+
8240+ /* Here we just switch the register state and the stack. */
8241+ switch_to(prev, next, prev);
8242+
8243+ return prev;
8244 }
8245
8246-/*
8247- * schedule_tail() is getting called from the fork return path. This
8248- * cleans up all remaining scheduler things, without impacting the
8249- * common case.
8250- */
8251-static inline void __schedule_tail(struct task_struct *prev)
8252+unsigned long nr_running(void)
8253 {
8254-#ifdef CONFIG_SMP
8255- int policy;
8256+ unsigned long i, sum = 0;
8257
8258- /*
8259- * prev->policy can be written from here only before `prev'
8260- * can be scheduled (before setting prev->cpus_runnable to ~0UL).
8261- * Of course it must also be read before allowing prev
8262- * to be rescheduled, but since the write depends on the read
8263- * to complete, wmb() is enough. (the spin_lock() acquired
8264- * before setting cpus_runnable is not enough because the spin_lock()
8265- * common code semantics allows code outside the critical section
8266- * to enter inside the critical section)
8267- */
8268- policy = prev->policy;
8269- prev->policy = policy & ~SCHED_YIELD;
8270- wmb();
8271+ for (i = 0; i < smp_num_cpus; i++)
8272+ sum += cpu_rq(cpu_logical_map(i))->nr_running;
8273
8274- /*
8275- * fast path falls through. We have to clear cpus_runnable before
8276- * checking prev->state to avoid a wakeup race. Protect against
8277- * the task exiting early.
8278- */
8279- task_lock(prev);
8280- task_release_cpu(prev);
8281- mb();
8282- if (prev->state == TASK_RUNNING)
8283- goto needs_resched;
8284+ return sum;
8285+}
8286
8287-out_unlock:
8288- task_unlock(prev); /* Synchronise here with release_task() if prev is TASK_ZOMBIE */
8289- return;
8290+/* Note: the per-cpu information is useful only to get the cumulative result */
8291+unsigned long nr_uninterruptible(void)
8292+{
8293+ unsigned long i, sum = 0;
8294
8295- /*
8296- * Slow path - we 'push' the previous process and
8297- * reschedule_idle() will attempt to find a new
8298- * processor for it. (but it might preempt the
8299- * current process as well.) We must take the runqueue
8300- * lock and re-check prev->state to be correct. It might
8301- * still happen that this process has a preemption
8302- * 'in progress' already - but this is not a problem and
8303- * might happen in other circumstances as well.
8304- */
8305-needs_resched:
8306- {
8307- unsigned long flags;
8308+ for (i = 0; i < smp_num_cpus; i++)
8309+ sum += cpu_rq(cpu_logical_map(i))->nr_uninterruptible;
8310
8311- /*
8312- * Avoid taking the runqueue lock in cases where
8313- * no preemption-check is necessery:
8314- */
8315- if ((prev == idle_task(smp_processor_id())) ||
8316- (policy & SCHED_YIELD))
8317- goto out_unlock;
8318+ return sum;
8319+}
8320
8321- spin_lock_irqsave(&runqueue_lock, flags);
8322- if ((prev->state == TASK_RUNNING) && !task_has_cpu(prev))
8323- reschedule_idle(prev);
8324- spin_unlock_irqrestore(&runqueue_lock, flags);
8325- goto out_unlock;
8326- }
8327-#else
8328- prev->policy &= ~SCHED_YIELD;
8329-#endif /* CONFIG_SMP */
8330+unsigned long nr_context_switches(void)
8331+{
8332+ unsigned long i, sum = 0;
8333+
8334+ for (i = 0; i < smp_num_cpus; i++)
8335+ sum += cpu_rq(cpu_logical_map(i))->nr_switches;
8336+
8337+ return sum;
8338 }
8339
8340-asmlinkage void schedule_tail(struct task_struct *prev)
8341+inline int idle_cpu(int cpu)
8342 {
8343- __schedule_tail(prev);
8344+ return cpu_curr(cpu) == cpu_rq(cpu)->idle;
8345 }
8346
8347+#if CONFIG_SMP
8348 /*
8349- * 'schedule()' is the scheduler function. It's a very simple and nice
8350- * scheduler: it's not perfect, but certainly works for most things.
8351- *
8352- * The goto is "interesting".
8353- *
8354- * NOTE!! Task 0 is the 'idle' task, which gets called when no other
8355- * tasks can run. It can not be killed, and it cannot sleep. The 'state'
8356- * information in task[0] is never used.
8357+ * Lock the busiest runqueue as well, this_rq is locked already.
8358+ * Recalculate nr_running if we have to drop the runqueue lock.
8359 */
8360-asmlinkage void schedule(void)
8361+static inline unsigned int double_lock_balance(runqueue_t *this_rq,
8362+ runqueue_t *busiest, int this_cpu, int idle, unsigned int nr_running)
8363 {
8364- struct schedule_data * sched_data;
8365- struct task_struct *prev, *next, *p;
8366- struct list_head *tmp;
8367- int this_cpu, c;
8368+ if (unlikely(!spin_trylock(&busiest->lock))) {
8369+ if (busiest < this_rq) {
8370+ spin_unlock(&this_rq->lock);
8371+ spin_lock(&busiest->lock);
8372+ spin_lock(&this_rq->lock);
8373+ /* Need to recalculate nr_running */
8374+ if (idle || (this_rq->nr_running > this_rq->prev_nr_running[this_cpu]))
8375+ nr_running = this_rq->nr_running;
8376+ else
8377+ nr_running = this_rq->prev_nr_running[this_cpu];
8378+ } else
8379+ spin_lock(&busiest->lock);
8380+ }
8381+ return nr_running;
8382+}
8383
8384+/*
8385+ * Move a task from a remote runqueue to the local runqueue.
8386+ * Both runqueues must be locked.
8387+ */
8388+static inline int pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, runqueue_t *this_rq, int this_cpu)
8389+{
8390+ int resched = 0;
8391
8392- spin_lock_prefetch(&runqueue_lock);
8393+ dequeue_task(p, src_array);
8394+ src_rq->nr_running--;
8395+ p->cpu = this_cpu;
8396+ this_rq->nr_running++;
8397+ enqueue_task(p, this_rq->active);
8398+ /*
8399+ * Note that idle threads have a prio of MAX_PRIO, for this test
8400+ * to be always true for them.
8401+ */
8402+ if (p->prio < this_rq->curr->prio)
8403+ resched = 1;
8404
8405- BUG_ON(!current->active_mm);
8406-need_resched_back:
8407- prev = current;
8408- this_cpu = prev->processor;
8409+ return resched;
8410+}
8411
8412- if (unlikely(in_interrupt())) {
8413- printk("Scheduling in interrupt\n");
8414- BUG();
8415+static inline int idle_cpu_reschedule(task_t * p, int cpu)
8416+{
8417+ if (unlikely(!(p->cpus_allowed & (1UL << cpu))))
8418+ return 0;
8419+ return idle_cpu(cpu);
8420+}
8421+
8422+#include <linux/smp_balance.h>
8423+
8424+static int reschedule_idle(task_t * p)
8425+{
8426+ int p_cpu = p->cpu, i;
8427+
8428+ if (idle_cpu(p_cpu))
8429+ return 0;
8430+
8431+ p_cpu = cpu_number_map(p_cpu);
8432+
8433+ for (i = (p_cpu + 1) % smp_num_cpus;
8434+ i != p_cpu;
8435+ i = (i + 1) % smp_num_cpus) {
8436+ int physical = cpu_logical_map(i);
8437+
8438+ if (idle_cpu_reschedule(p, physical)) {
8439+ physical = arch_reschedule_idle_override(p, physical);
8440+ p->cpu = physical;
8441+ return 1;
8442+ }
8443 }
8444
8445- release_kernel_lock(prev, this_cpu);
8446+ return 0;
8447+}
8448+
8449+/*
8450+ * Current runqueue is empty, or rebalance tick: if there is an
8451+ * inbalance (current runqueue is too short) then pull from
8452+ * busiest runqueue(s).
8453+ *
8454+ * We call this with the current runqueue locked,
8455+ * irqs disabled.
8456+ */
8457+static void load_balance(runqueue_t *this_rq, int idle)
8458+{
8459+ int imbalance, nr_running, load, max_load,
8460+ idx, i, this_cpu = this_rq - runqueues;
8461+ task_t *tmp;
8462+ runqueue_t *busiest, *rq_src;
8463+ prio_array_t *array;
261c7d99 8464+ struct list_head *head, *curr;
744c6993
AM
8465+ int resched;
8466
8467 /*
8468- * 'sched_data' is protected by the fact that we can run
8469- * only one process per CPU.
8470+ * Handle architecture-specific balancing, such as hyperthreading.
8471 */
8472- sched_data = & aligned_data[this_cpu].schedule_data;
8473+ if (arch_load_balance(this_cpu, idle))
8474+ return;
8475
8476- spin_lock_irq(&runqueue_lock);
8477+ retry:
8478+ /*
8479+ * We search all runqueues to find the most busy one.
8480+ * We do this lockless to reduce cache-bouncing overhead,
8481+ * we re-check the 'best' source CPU later on again, with
8482+ * the lock held.
8483+ *
8484+ * We fend off statistical fluctuations in runqueue lengths by
8485+ * saving the runqueue length during the previous load-balancing
8486+ * operation and using the smaller one the current and saved lengths.
8487+ * If a runqueue is long enough for a longer amount of time then
8488+ * we recognize it and pull tasks from it.
8489+ *
8490+ * The 'current runqueue length' is a statistical maximum variable,
8491+ * for that one we take the longer one - to avoid fluctuations in
8492+ * the other direction. So for a load-balance to happen it needs
8493+ * stable long runqueue on the target CPU and stable short runqueue
8494+ * on the local runqueue.
8495+ *
8496+ * We make an exception if this CPU is about to become idle - in
8497+ * that case we are less picky about moving a task across CPUs and
8498+ * take what can be taken.
8499+ */
8500+ if (idle || (this_rq->nr_running > this_rq->prev_nr_running[this_cpu]))
8501+ nr_running = this_rq->nr_running;
8502+ else
8503+ nr_running = this_rq->prev_nr_running[this_cpu];
8504
8505- /* move an exhausted RR process to be last.. */
8506- if (unlikely(prev->policy == SCHED_RR))
8507- if (!prev->counter) {
8508- prev->counter = NICE_TO_TICKS(prev->nice);
8509- move_last_runqueue(prev);
8510- }
8511+ busiest = NULL;
8512+ max_load = 1;
8513+ for (i = 0; i < smp_num_cpus; i++) {
8514+ int logical = cpu_logical_map(i);
8515
8516- switch (prev->state) {
8517- case TASK_INTERRUPTIBLE:
8518- if (signal_pending(prev)) {
8519- prev->state = TASK_RUNNING;
8520- break;
8521- }
8522- default:
8523- del_from_runqueue(prev);
8524- case TASK_RUNNING:;
8525+ rq_src = cpu_rq(logical);
8526+ if (idle || (rq_src->nr_running < this_rq->prev_nr_running[logical]))
8527+ load = rq_src->nr_running;
8528+ else
8529+ load = this_rq->prev_nr_running[logical];
8530+ this_rq->prev_nr_running[logical] = rq_src->nr_running;
8531+
8532+ if ((load > max_load) && (rq_src != this_rq)) {
8533+ busiest = rq_src;
8534+ max_load = load;
8535+ }
8536 }
8537- prev->need_resched = 0;
8538+
8539+ if (likely(!busiest))
8540+ return;
8541+
8542+ imbalance = (max_load - nr_running) / 2;
8543+
8544+ /* It needs an at least ~25% imbalance to trigger balancing. */
8545+ if (!idle && (imbalance < (max_load + 3)/4))
8546+ return;
8547
8548 /*
8549- * this is the scheduler proper:
8550+ * Make sure nothing significant changed since we checked the
8551+ * runqueue length.
8552 */
8553+ if (double_lock_balance(this_rq, busiest, this_cpu, idle, nr_running) > nr_running ||
8554+ busiest->nr_running < max_load)
8555+ goto out_unlock_retry;
8556
8557-repeat_schedule:
8558 /*
8559- * Default process to select..
8560+ * We first consider expired tasks. Those will likely not be
8561+ * executed in the near future, and they are most likely to
8562+ * be cache-cold, thus switching CPUs has the least effect
8563+ * on them.
8564 */
8565- next = idle_task(this_cpu);
8566- c = -1000;
8567- list_for_each(tmp, &runqueue_head) {
8568- p = list_entry(tmp, struct task_struct, run_list);
8569- if (can_schedule(p, this_cpu)) {
8570- int weight = goodness(p, this_cpu, prev->active_mm);
8571- if (weight > c)
8572- c = weight, next = p;
8573+ if (busiest->expired->nr_active)
8574+ array = busiest->expired;
8575+ else
8576+ array = busiest->active;
8577+
8578+ resched = 0;
8579+new_array:
8580+ /* Start searching at priority 0: */
8581+ idx = 0;
8582+skip_bitmap:
8583+ if (!idx)
8584+ idx = sched_find_first_bit(array->bitmap);
8585+ else
8586+ idx = find_next_bit(array->bitmap, MAX_PRIO, idx);
8587+ if (idx == MAX_PRIO) {
8588+ if (array == busiest->expired) {
8589+ array = busiest->active;
8590+ goto new_array;
8591 }
8592+ goto out_unlock;
8593 }
8594
8595- /* Do we need to re-calculate counters? */
8596- if (unlikely(!c)) {
8597- struct task_struct *p;
8598-
8599- spin_unlock_irq(&runqueue_lock);
8600- read_lock(&tasklist_lock);
8601- for_each_task(p)
8602- p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice);
8603- read_unlock(&tasklist_lock);
8604- spin_lock_irq(&runqueue_lock);
8605- goto repeat_schedule;
8606+ head = array->queue + idx;
8607+ curr = head->prev;
8608+skip_queue:
8609+ tmp = list_entry(curr, task_t, run_list);
8610+
8611+ /*
8612+ * We do not migrate tasks that are:
8613+ * 1) running (obviously), or
8614+ * 2) cannot be migrated to this CPU due to cpus_allowed, or
8615+ * 3) are cache-hot on their current CPU.
8616+ */
8617+
8618+#define CAN_MIGRATE_TASK(p,rq,this_cpu) \
8619+ ((jiffies - (p)->sleep_timestamp > cache_decay_ticks) && \
8620+ ((p) != (rq)->curr) && \
8621+ ((p)->cpus_allowed & (1UL << (this_cpu))))
8622+
8623+ curr = curr->prev;
8624+
8625+ if (!CAN_MIGRATE_TASK(tmp, busiest, this_cpu)) {
8626+ if (curr != head)
8627+ goto skip_queue;
8628+ idx++;
8629+ goto skip_bitmap;
8630+ }
8631+ resched |= pull_task(busiest, array, tmp, this_rq, this_cpu);
8632+ if (--imbalance > 0) {
8633+ if (curr != head)
8634+ goto skip_queue;
8635+ idx++;
8636+ goto skip_bitmap;
8637 }
8638+out_unlock:
8639+ spin_unlock(&busiest->lock);
8640+ if (resched)
8641+ resched_task(this_rq->curr);
8642+ return;
8643+out_unlock_retry:
8644+ spin_unlock(&busiest->lock);
8645+ goto retry;
8646+}
8647
8648- /*
8649- * from this point on nothing can prevent us from
8650- * switching to the next task, save this fact in
8651- * sched_data.
8652- */
8653- sched_data->curr = next;
8654- task_set_cpu(next, this_cpu);
8655- spin_unlock_irq(&runqueue_lock);
8656-
8657- if (unlikely(prev == next)) {
8658- /* We won't go through the normal tail, so do this by hand */
8659- prev->policy &= ~SCHED_YIELD;
8660- goto same_process;
8661+/*
8662+ * One of the idle_cpu_tick() or the busy_cpu_tick() function will
8663+ * gets called every timer tick, on every CPU. Our balancing action
8664+ * frequency and balancing agressivity depends on whether the CPU is
8665+ * idle or not.
8666+ *
8667+ * busy-rebalance every 250 msecs. idle-rebalance every 100 msec.
8668+ */
8669+#define BUSY_REBALANCE_TICK (HZ/4 ?: 1)
8670+#define IDLE_REBALANCE_TICK (HZ/10 ?: 1)
8671+
8672+static inline void idle_tick(void)
8673+{
8674+ if (unlikely(time_before_eq(this_rq()->last_jiffy + IDLE_REBALANCE_TICK, jiffies))) {
8675+ spin_lock(&this_rq()->lock);
8676+ load_balance(this_rq(), 1);
8677+ spin_unlock(&this_rq()->lock);
8678+ this_rq()->last_jiffy = jiffies;
8679 }
8680+}
8681
8682-#ifdef CONFIG_SMP
8683- /*
8684- * maintain the per-process 'last schedule' value.
8685- * (this has to be recalculated even if we reschedule to
8686- * the same process) Currently this is only used on SMP,
8687- * and it's approximate, so we do not have to maintain
8688- * it while holding the runqueue spinlock.
8689- */
8690- sched_data->last_schedule = get_cycles();
8691+#endif
8692
8693- /*
8694- * We drop the scheduler lock early (it's a global spinlock),
8695- * thus we have to lock the previous process from getting
8696- * rescheduled during switch_to().
8697- */
8698+/*
8699+ * We place interactive tasks back into the active array, if possible.
8700+ *
8701+ * To guarantee that this does not starve expired tasks we ignore the
8702+ * interactivity of a task if the first expired task had to wait more
8703+ * than a 'reasonable' amount of time. This deadline timeout is
8704+ * load-dependent, as the frequency of array switched decreases with
8705+ * increasing number of running tasks:
8706+ */
8707+#define EXPIRED_STARVING(rq) \
8708+ ((rq)->expired_timestamp && \
8709+ (jiffies - (rq)->expired_timestamp >= \
8710+ STARVATION_LIMIT * ((rq)->nr_running) + 1))
8711
8712-#endif /* CONFIG_SMP */
8713+/*
8714+ * This function gets called by the timer code, with HZ frequency.
8715+ * We call it with interrupts disabled.
8716+ */
8717+void scheduler_tick(int user_tick, int system)
8718+{
8719+ int cpu = smp_processor_id();
8720+ runqueue_t *rq = this_rq();
8721+ task_t *p = current;
8722
8723- kstat.context_swtch++;
8724- /*
8725- * there are 3 processes which are affected by a context switch:
8726- *
8727- * prev == .... ==> (last => next)
8728- *
8729- * It's the 'much more previous' 'prev' that is on next's stack,
8730- * but prev is set to (the just run) 'last' process by switch_to().
8731- * This might sound slightly confusing but makes tons of sense.
8732- */
8733- prepare_to_switch();
8734- {
8735- struct mm_struct *mm = next->mm;
8736- struct mm_struct *oldmm = prev->active_mm;
8737- if (!mm) {
8738- BUG_ON(next->active_mm);
8739- next->active_mm = oldmm;
8740- atomic_inc(&oldmm->mm_count);
8741- enter_lazy_tlb(oldmm, next, this_cpu);
8742- } else {
8743- BUG_ON(next->active_mm != mm);
8744- switch_mm(oldmm, mm, next, this_cpu);
8745+ if (p == rq->idle) {
8746+ if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
8747+ kstat.per_cpu_system[cpu] += system;
8748+#if CONFIG_SMP
8749+ idle_tick();
8750+#endif
8751+ return;
8752+ }
8753+ if (TASK_NICE(p) > 0)
8754+ kstat.per_cpu_nice[cpu] += user_tick;
8755+ else
8756+ kstat.per_cpu_user[cpu] += user_tick;
8757+ kstat.per_cpu_system[cpu] += system;
8758+
8759+ /* Task might have expired already, but not scheduled off yet */
8760+ if (p->array != rq->active) {
8761+ set_tsk_need_resched(p);
8762+ return;
8763+ }
8764+ spin_lock(&rq->lock);
8765+ if (unlikely(rt_task(p))) {
8766+ /*
8767+ * RR tasks need a special form of timeslice management.
8768+ * FIFO tasks have no timeslices.
8769+ */
8770+ if ((p->policy == SCHED_RR) && !--p->time_slice) {
8771+ p->time_slice = TASK_TIMESLICE(p);
8772+ p->first_time_slice = 0;
8773+ set_tsk_need_resched(p);
8774+
8775+ /* put it at the end of the queue: */
8776+ dequeue_task(p, rq->active);
8777+ enqueue_task(p, rq->active);
8778 }
8779+ goto out;
8780+ }
8781+ /*
8782+ * The task was running during this tick - update the
8783+ * time slice counter and the sleep average. Note: we
8784+ * do not update a process's priority until it either
8785+ * goes to sleep or uses up its timeslice. This makes
8786+ * it possible for interactive tasks to use up their
8787+ * timeslices at their highest priority levels.
8788+ */
8789+ if (p->sleep_avg)
8790+ p->sleep_avg--;
8791+ if (!--p->time_slice) {
8792+ dequeue_task(p, rq->active);
8793+ set_tsk_need_resched(p);
8794+ p->prio = effective_prio(p);
8795+ p->time_slice = TASK_TIMESLICE(p);
8796+ p->first_time_slice = 0;
8797+
8798+ if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) {
8799+ if (!rq->expired_timestamp)
8800+ rq->expired_timestamp = jiffies;
8801+ enqueue_task(p, rq->expired);
8802+ } else
8803+ enqueue_task(p, rq->active);
8804+ }
8805+out:
8806+#if CONFIG_SMP
8807+ if (unlikely(time_before_eq(this_rq()->last_jiffy + BUSY_REBALANCE_TICK, jiffies))) {
8808+ load_balance(rq, 0);
8809+ rq->last_jiffy = jiffies;
8810+ }
8811+#endif
8812+ spin_unlock(&rq->lock);
8813+}
8814+
8815+void scheduling_functions_start_here(void) { }
8816+
8817+/*
8818+ * 'schedule()' is the main scheduler function.
8819+ */
8820+asmlinkage void schedule(void)
8821+{
8822+ task_t *prev, *next;
8823+ runqueue_t *rq;
8824+ prio_array_t *array;
261c7d99 8825+ struct list_head *queue;
744c6993
AM
8826+ int idx;
8827+
8828+ if (unlikely(in_interrupt()))
8829+ BUG();
8830
8831- if (!prev->mm) {
8832- prev->active_mm = NULL;
8833- mmdrop(oldmm);
8834+need_resched:
8835+ prev = current;
8836+ rq = this_rq();
8837+
8838+ release_kernel_lock(prev, smp_processor_id());
8839+ prev->sleep_timestamp = jiffies;
8840+ spin_lock_irq(&rq->lock);
8841+
8842+ switch (prev->state) {
8843+ case TASK_INTERRUPTIBLE:
8844+ if (unlikely(signal_pending(prev))) {
8845+ prev->state = TASK_RUNNING;
8846+ break;
8847 }
8848+ default:
8849+ deactivate_task(prev, rq);
8850+ case TASK_RUNNING:
8851+ ;
8852+ }
8853+#if CONFIG_SMP
8854+pick_next_task:
8855+#endif
8856+ if (unlikely(!rq->nr_running)) {
8857+#if CONFIG_SMP
8858+ load_balance(rq, 2);
8859+ rq->last_jiffy = jiffies;
8860+ if (rq->nr_running)
8861+ goto pick_next_task;
8862+#endif
8863+ next = rq->idle;
8864+ rq->expired_timestamp = 0;
8865+ goto switch_tasks;
8866 }
8867
8868- /*
8869- * This just switches the register state and the
8870- * stack.
8871- */
8872- switch_to(prev, next, prev);
8873- __schedule_tail(prev);
8874+ array = rq->active;
8875+ if (unlikely(!array->nr_active)) {
8876+ /*
8877+ * Switch the active and expired arrays.
8878+ */
8879+ rq->active = rq->expired;
8880+ rq->expired = array;
8881+ array = rq->active;
8882+ rq->expired_timestamp = 0;
8883+ }
8884+
8885+ idx = sched_find_first_bit(array->bitmap);
8886+ queue = array->queue + idx;
8887+ next = list_entry(queue->next, task_t, run_list);
8888+
8889+switch_tasks:
8890+ prefetch(next);
8891+ clear_tsk_need_resched(prev);
8892+
8893+ if (likely(prev != next)) {
8894+ rq->nr_switches++;
8895+ rq->curr = next;
8896+
8897+ prepare_arch_switch(rq, next);
8898+ prev = context_switch(prev, next);
8899+ barrier();
8900+ rq = this_rq();
8901+ finish_arch_switch(rq, prev);
8902+ } else
8903+ spin_unlock_irq(&rq->lock);
8904
8905-same_process:
8906 reacquire_kernel_lock(current);
8907- if (current->need_resched)
8908- goto need_resched_back;
8909- return;
8910+ if (need_resched())
8911+ goto need_resched;
8912 }
8913
8914 /*
8915- * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just wake everything
8916- * up. If it's an exclusive wakeup (nr_exclusive == small +ve number) then we wake all the
8917- * non-exclusive tasks and one exclusive task.
8918+ * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
8919+ * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
8920+ * number) then we wake all the non-exclusive tasks and one exclusive task.
8921 *
8922 * There are circumstances in which we can try to wake a task which has already
8923- * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns zero
8924- * in this (rare) case, and we handle it by contonuing to scan the queue.
8925+ * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
8926+ * zero in this (rare) case, and we handle it by continuing to scan the queue.
8927 */
8928-static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode,
8929- int nr_exclusive, const int sync)
8930+static inline void __wake_up_common(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, int sync)
8931 {
8932 struct list_head *tmp;
8933- struct task_struct *p;
8934-
8935- CHECK_MAGIC_WQHEAD(q);
8936- WQ_CHECK_LIST_HEAD(&q->task_list);
8937-
8938- list_for_each(tmp,&q->task_list) {
8939- unsigned int state;
8940- wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
8941+ unsigned int state;
8942+ wait_queue_t *curr;
8943+ task_t *p;
8944
8945- CHECK_MAGIC(curr->__magic);
8946+ list_for_each(tmp, &q->task_list) {
8947+ curr = list_entry(tmp, wait_queue_t, task_list);
8948 p = curr->task;
8949 state = p->state;
8950- if (state & mode) {
8951- WQ_NOTE_WAKER(curr);
8952- if (try_to_wake_up(p, sync) && (curr->flags&WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
8953+ if ((state & mode) && try_to_wake_up(p, sync) &&
8954+ ((curr->flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive))
8955 break;
8956- }
8957 }
8958 }
8959
8960-void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr)
8961+void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
8962 {
8963- if (q) {
8964- unsigned long flags;
8965- wq_read_lock_irqsave(&q->lock, flags);
8966- __wake_up_common(q, mode, nr, 0);
8967- wq_read_unlock_irqrestore(&q->lock, flags);
8968- }
8969+ unsigned long flags;
8970+
8971+ if (unlikely(!q))
8972+ return;
8973+
8974+ wq_read_lock_irqsave(&q->lock, flags);
8975+ __wake_up_common(q, mode, nr_exclusive, 0);
8976+ wq_read_unlock_irqrestore(&q->lock, flags);
8977 }
8978
8979-void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr)
8980+#if CONFIG_SMP
8981+
8982+void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
8983 {
8984- if (q) {
8985- unsigned long flags;
8986- wq_read_lock_irqsave(&q->lock, flags);
8987- __wake_up_common(q, mode, nr, 1);
8988- wq_read_unlock_irqrestore(&q->lock, flags);
8989- }
8990+ unsigned long flags;
8991+
8992+ if (unlikely(!q))
8993+ return;
8994+
8995+ wq_read_lock_irqsave(&q->lock, flags);
8996+ if (likely(nr_exclusive))
8997+ __wake_up_common(q, mode, nr_exclusive, 1);
8998+ else
8999+ __wake_up_common(q, mode, nr_exclusive, 0);
9000+ wq_read_unlock_irqrestore(&q->lock, flags);
9001 }
9002
9003+#endif
9004+
9005 void complete(struct completion *x)
9006 {
9007 unsigned long flags;
9008
9009- spin_lock_irqsave(&x->wait.lock, flags);
9010+ wq_write_lock_irqsave(&x->wait.lock, flags);
9011 x->done++;
9012 __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, 0);
9013- spin_unlock_irqrestore(&x->wait.lock, flags);
9014+ wq_write_unlock_irqrestore(&x->wait.lock, flags);
9015 }
9016
9017 void wait_for_completion(struct completion *x)
9018 {
9019- spin_lock_irq(&x->wait.lock);
9020+ wq_write_lock_irq(&x->wait.lock);
9021 if (!x->done) {
9022 DECLARE_WAITQUEUE(wait, current);
9023
9024@@ -775,14 +1060,14 @@
9025 __add_wait_queue_tail(&x->wait, &wait);
9026 do {
9027 __set_current_state(TASK_UNINTERRUPTIBLE);
9028- spin_unlock_irq(&x->wait.lock);
9029+ wq_write_unlock_irq(&x->wait.lock);
9030 schedule();
9031- spin_lock_irq(&x->wait.lock);
9032+ wq_write_lock_irq(&x->wait.lock);
9033 } while (!x->done);
9034 __remove_wait_queue(&x->wait, &wait);
9035 }
9036 x->done--;
9037- spin_unlock_irq(&x->wait.lock);
9038+ wq_write_unlock_irq(&x->wait.lock);
9039 }
9040
9041 #define SLEEP_ON_VAR \
9042@@ -850,43 +1135,40 @@
9043
9044 void scheduling_functions_end_here(void) { }
9045
9046-#if CONFIG_SMP
9047-/**
9048- * set_cpus_allowed() - change a given task's processor affinity
9049- * @p: task to bind
9050- * @new_mask: bitmask of allowed processors
9051- *
9052- * Upon return, the task is running on a legal processor. Note the caller
9053- * must have a valid reference to the task: it must not exit() prematurely.
9054- * This call can sleep; do not hold locks on call.
9055- */
9056-void set_cpus_allowed(struct task_struct *p, unsigned long new_mask)
9057+void set_user_nice(task_t *p, long nice)
9058 {
9059- new_mask &= cpu_online_map;
9060- BUG_ON(!new_mask);
9061-
9062- p->cpus_allowed = new_mask;
9063+ unsigned long flags;
9064+ prio_array_t *array;
9065+ runqueue_t *rq;
9066
9067+ if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
9068+ return;
9069 /*
9070- * If the task is on a no-longer-allowed processor, we need to move
9071- * it. If the task is not current, then set need_resched and send
9072- * its processor an IPI to reschedule.
9073+ * We have to be careful, if called from sys_setpriority(),
9074+ * the task might be in the middle of scheduling on another CPU.
9075 */
9076- if (!(p->cpus_runnable & p->cpus_allowed)) {
9077- if (p != current) {
9078- p->need_resched = 1;
9079- smp_send_reschedule(p->processor);
9080- }
9081+ rq = task_rq_lock(p, &flags);
9082+ if (rt_task(p)) {
9083+ p->static_prio = NICE_TO_PRIO(nice);
9084+ goto out_unlock;
9085+ }
9086+ array = p->array;
9087+ if (array)
9088+ dequeue_task(p, array);
9089+ p->static_prio = NICE_TO_PRIO(nice);
9090+ p->prio = NICE_TO_PRIO(nice);
9091+ if (array) {
9092+ enqueue_task(p, array);
9093 /*
9094- * Wait until we are on a legal processor. If the task is
9095- * current, then we should be on a legal processor the next
9096- * time we reschedule. Otherwise, we need to wait for the IPI.
9097+ * If the task is running and lowered its priority,
9098+ * or increased its priority then reschedule its CPU:
9099 */
9100- while (!(p->cpus_runnable & p->cpus_allowed))
9101- schedule();
9102+ if (p == rq->curr)
9103+ resched_task(rq->curr);
9104 }
9105+out_unlock:
9106+ task_rq_unlock(rq, &flags);
9107 }
9108-#endif /* CONFIG_SMP */
9109
9110 #ifndef __alpha__
9111
9112@@ -898,7 +1180,7 @@
9113
9114 asmlinkage long sys_nice(int increment)
9115 {
9116- long newprio;
9117+ long nice;
9118
9119 /*
9120 * Setpriority might change our priority at the same moment.
9121@@ -914,32 +1196,46 @@
9122 if (increment > 40)
9123 increment = 40;
9124
9125- newprio = current->nice + increment;
9126- if (newprio < -20)
9127- newprio = -20;
9128- if (newprio > 19)
9129- newprio = 19;
9130- current->nice = newprio;
9131+ nice = PRIO_TO_NICE(current->static_prio) + increment;
9132+ if (nice < -20)
9133+ nice = -20;
9134+ if (nice > 19)
9135+ nice = 19;
9136+ set_user_nice(current, nice);
9137 return 0;
9138 }
9139
9140 #endif
9141
9142-static inline struct task_struct *find_process_by_pid(pid_t pid)
9143+/*
9144+ * This is the priority value as seen by users in /proc
9145+ *
9146+ * RT tasks are offset by -200. Normal tasks are centered
9147+ * around 0, value goes from -16 to +15.
9148+ */
9149+int task_prio(task_t *p)
9150 {
9151- struct task_struct *tsk = current;
9152+ return p->prio - MAX_USER_RT_PRIO;
9153+}
9154
9155- if (pid)
9156- tsk = find_task_by_pid(pid);
9157- return tsk;
9158+int task_nice(task_t *p)
9159+{
9160+ return TASK_NICE(p);
9161+}
9162+
9163+static inline task_t *find_process_by_pid(pid_t pid)
9164+{
9165+ return pid ? find_task_by_pid(pid) : current;
9166 }
9167
9168-static int setscheduler(pid_t pid, int policy,
9169- struct sched_param *param)
9170+static int setscheduler(pid_t pid, int policy, struct sched_param *param)
9171 {
9172 struct sched_param lp;
9173- struct task_struct *p;
9174+ prio_array_t *array;
9175+ unsigned long flags;
9176+ runqueue_t *rq;
9177 int retval;
9178+ task_t *p;
9179
9180 retval = -EINVAL;
9181 if (!param || pid < 0)
9182@@ -953,14 +1249,19 @@
9183 * We play safe to avoid deadlocks.
9184 */
9185 read_lock_irq(&tasklist_lock);
9186- spin_lock(&runqueue_lock);
9187
9188 p = find_process_by_pid(pid);
9189
9190 retval = -ESRCH;
9191 if (!p)
9192- goto out_unlock;
9193-
9194+ goto out_unlock_tasklist;
9195+
9196+ /*
9197+ * To be able to change p->policy safely, the apropriate
9198+ * runqueue lock must be held.
9199+ */
9200+ rq = task_rq_lock(p, &flags);
9201+
9202 if (policy < 0)
9203 policy = p->policy;
9204 else {
9205@@ -969,40 +1270,48 @@
9206 policy != SCHED_OTHER)
9207 goto out_unlock;
9208 }
9209-
9210+
9211 /*
9212- * Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid
9213- * priority for SCHED_OTHER is 0.
9214+ * Valid priorities for SCHED_FIFO and SCHED_RR are
9215+ * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_OTHER is 0.
9216 */
9217 retval = -EINVAL;
9218- if (lp.sched_priority < 0 || lp.sched_priority > 99)
9219+ if (lp.sched_priority < 0 || lp.sched_priority > MAX_USER_RT_PRIO-1)
9220 goto out_unlock;
9221 if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))
9222 goto out_unlock;
9223
9224 retval = -EPERM;
9225- if ((policy == SCHED_FIFO || policy == SCHED_RR) &&
9226+ if ((policy == SCHED_FIFO || policy == SCHED_RR) &&
9227 !capable(CAP_SYS_NICE))
9228 goto out_unlock;
9229 if ((current->euid != p->euid) && (current->euid != p->uid) &&
9230 !capable(CAP_SYS_NICE))
9231 goto out_unlock;
9232
9233+ array = p->array;
9234+ if (array)
9235+ deactivate_task(p, task_rq(p));
9236 retval = 0;
9237 p->policy = policy;
9238 p->rt_priority = lp.sched_priority;
9239-
9240- current->need_resched = 1;
9241+ if (policy != SCHED_OTHER)
9242+ p->prio = MAX_USER_RT_PRIO-1 - p->rt_priority;
9243+ else
9244+ p->prio = p->static_prio;
9245+ if (array)
9246+ activate_task(p, task_rq(p));
9247
9248 out_unlock:
9249- spin_unlock(&runqueue_lock);
9250+ task_rq_unlock(rq, &flags);
9251+out_unlock_tasklist:
9252 read_unlock_irq(&tasklist_lock);
9253
9254 out_nounlock:
9255 return retval;
9256 }
9257
9258-asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
9259+asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
9260 struct sched_param *param)
9261 {
9262 return setscheduler(pid, policy, param);
9263@@ -1015,7 +1324,7 @@
9264
9265 asmlinkage long sys_sched_getscheduler(pid_t pid)
9266 {
9267- struct task_struct *p;
9268+ task_t *p;
9269 int retval;
9270
9271 retval = -EINVAL;
9272@@ -1026,7 +1335,7 @@
9273 read_lock(&tasklist_lock);
9274 p = find_process_by_pid(pid);
9275 if (p)
9276- retval = p->policy & ~SCHED_YIELD;
9277+ retval = p->policy;
9278 read_unlock(&tasklist_lock);
9279
9280 out_nounlock:
9281@@ -1035,7 +1344,7 @@
9282
9283 asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param *param)
9284 {
9285- struct task_struct *p;
9286+ task_t *p;
9287 struct sched_param lp;
9288 int retval;
9289
9290@@ -1066,42 +1375,64 @@
9291
9292 asmlinkage long sys_sched_yield(void)
9293 {
9294- /*
9295- * Trick. sched_yield() first counts the number of truly
9296- * 'pending' runnable processes, then returns if it's
9297- * only the current processes. (This test does not have
9298- * to be atomic.) In threaded applications this optimization
9299- * gets triggered quite often.
9300- */
9301+ runqueue_t *rq = this_rq();
9302+ prio_array_t *array;
9303+ int i;
9304
9305- int nr_pending = nr_running;
9306+ spin_lock_irq(&rq->lock);
9307+
9308+ if (unlikely(rq->nr_running == 1)) {
9309+ spin_unlock_irq(&rq->lock);
9310+ return 0;
9311+ }
9312
9313-#if CONFIG_SMP
9314- int i;
9315+ array = current->array;
9316+ if (unlikely(rt_task(current))) {
9317+ list_del(&current->run_list);
9318+ list_add_tail(&current->run_list, array->queue + current->prio);
9319+ goto out_unlock;
9320+ }
9321
9322- // Subtract non-idle processes running on other CPUs.
9323- for (i = 0; i < smp_num_cpus; i++) {
9324- int cpu = cpu_logical_map(i);
9325- if (aligned_data[cpu].schedule_data.curr != idle_task(cpu))
9326- nr_pending--;
9327+ if (unlikely(array == rq->expired) && rq->active->nr_active)
9328+ goto out_unlock;
9329+
9330+ list_del(&current->run_list);
9331+ if (!list_empty(array->queue + current->prio)) {
9332+ list_add(&current->run_list, array->queue[current->prio].next);
9333+ goto out_unlock;
9334 }
9335-#else
9336- // on UP this process is on the runqueue as well
9337- nr_pending--;
9338-#endif
9339- if (nr_pending) {
9340+
9341+ __clear_bit(current->prio, array->bitmap);
9342+ if (likely(array == rq->active) && array->nr_active == 1) {
9343 /*
9344- * This process can only be rescheduled by us,
9345- * so this is safe without any locking.
9346+ * We're the last task in the active queue so
9347+ * we must move ourself to the expired array
9348+ * to avoid running again immediatly.
9349 */
9350- if (current->policy == SCHED_OTHER)
9351- current->policy |= SCHED_YIELD;
9352- current->need_resched = 1;
9353-
9354- spin_lock_irq(&runqueue_lock);
9355- move_last_runqueue(current);
9356- spin_unlock_irq(&runqueue_lock);
9357+ array->nr_active--;
9358+ array = rq->expired;
9359+ array->nr_active++;
9360 }
9361+
9362+ i = sched_find_first_bit(array->bitmap);
9363+
9364+ BUG_ON(i == MAX_PRIO);
9365+ BUG_ON(i == current->prio && array == current->array);
9366+
9367+ if (array == current->array && i < current->prio)
9368+ i = current->prio;
9369+ else {
9370+ current->array = array;
9371+ current->prio = i;
9372+ }
9373+ list_add(&current->run_list, array->queue[i].next);
9374+ __set_bit(i, array->bitmap);
9375+
9376+out_unlock:
9377+ spin_unlock_irq(&rq->lock);
9378+
9379+ schedule();
9380+
9381 return 0;
9382 }
9383
9384@@ -1113,14 +1444,13 @@
9385 */
9386 void yield(void)
9387 {
9388- set_current_state(TASK_RUNNING);
9389+ __set_current_state(TASK_RUNNING);
9390 sys_sched_yield();
9391- schedule();
9392 }
9393
9394 void __cond_resched(void)
9395 {
9396- set_current_state(TASK_RUNNING);
9397+ __set_current_state(TASK_RUNNING);
9398 schedule();
9399 }
9400
9401@@ -1131,7 +1461,7 @@
9402 switch (policy) {
9403 case SCHED_FIFO:
9404 case SCHED_RR:
9405- ret = 99;
9406+ ret = MAX_USER_RT_PRIO-1;
9407 break;
9408 case SCHED_OTHER:
9409 ret = 0;
9410@@ -1158,7 +1488,7 @@
9411 asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
9412 {
9413 struct timespec t;
9414- struct task_struct *p;
9415+ task_t *p;
9416 int retval = -EINVAL;
9417
9418 if (pid < 0)
9419@@ -1168,8 +1498,8 @@
9420 read_lock(&tasklist_lock);
9421 p = find_process_by_pid(pid);
9422 if (p)
9423- jiffies_to_timespec(p->policy & SCHED_FIFO ? 0 : NICE_TO_TICKS(p->nice),
9424- &t);
9425+ jiffies_to_timespec(p->policy & SCHED_FIFO ?
9426+ 0 : TASK_TIMESLICE(p), &t);
9427 read_unlock(&tasklist_lock);
9428 if (p)
9429 retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
9430@@ -1177,14 +1507,14 @@
9431 return retval;
9432 }
9433
9434-static void show_task(struct task_struct * p)
9435+static void show_task(task_t * p)
9436 {
9437 unsigned long free = 0;
9438 int state;
9439 static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
9440
9441 printk("%-13.13s ", p->comm);
9442- state = p->state ? ffz(~p->state) + 1 : 0;
9443+ state = p->state ? __ffs(p->state) + 1 : 0;
9444 if (((unsigned) state) < sizeof(stat_nam)/sizeof(char *))
9445 printk(stat_nam[state]);
9446 else
9447@@ -1225,7 +1555,7 @@
9448 printk(" (NOTLB)\n");
9449
9450 {
9451- extern void show_trace_task(struct task_struct *tsk);
9452+ extern void show_trace_task(task_t *tsk);
9453 show_trace_task(p);
9454 }
9455 }
9456@@ -1247,7 +1577,7 @@
9457
9458 void show_state(void)
9459 {
9460- struct task_struct *p;
9461+ task_t *p;
9462
9463 #if (BITS_PER_LONG == 32)
9464 printk("\n"
9465@@ -1270,128 +1600,280 @@
9466 read_unlock(&tasklist_lock);
9467 }
9468
9469-/**
9470- * reparent_to_init() - Reparent the calling kernel thread to the init task.
9471- *
9472- * If a kernel thread is launched as a result of a system call, or if
9473- * it ever exits, it should generally reparent itself to init so that
9474- * it is correctly cleaned up on exit.
9475+/*
9476+ * double_rq_lock - safely lock two runqueues
9477 *
9478- * The various task state such as scheduling policy and priority may have
9479- * been inherited fro a user process, so we reset them to sane values here.
9480+ * Note this does not disable interrupts like task_rq_lock,
9481+ * you need to do so manually before calling.
9482+ */
9483+static inline void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
9484+{
9485+ if (rq1 == rq2)
9486+ spin_lock(&rq1->lock);
9487+ else {
9488+ if (rq1 < rq2) {
9489+ spin_lock(&rq1->lock);
9490+ spin_lock(&rq2->lock);
9491+ } else {
9492+ spin_lock(&rq2->lock);
9493+ spin_lock(&rq1->lock);
9494+ }
9495+ }
9496+}
9497+
9498+/*
9499+ * double_rq_unlock - safely unlock two runqueues
9500 *
9501- * NOTE that reparent_to_init() gives the caller full capabilities.
9502+ * Note this does not restore interrupts like task_rq_unlock,
9503+ * you need to do so manually after calling.
9504 */
9505-void reparent_to_init(void)
9506+static inline void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2)
9507 {
9508- struct task_struct *this_task = current;
9509+ spin_unlock(&rq1->lock);
9510+ if (rq1 != rq2)
9511+ spin_unlock(&rq2->lock);
9512+}
9513
9514- write_lock_irq(&tasklist_lock);
9515+void __init init_idle(task_t *idle, int cpu)
9516+{
9517+ runqueue_t *idle_rq = cpu_rq(cpu), *rq = cpu_rq(idle->cpu);
9518+ unsigned long flags;
9519
9520- /* Reparent to init */
9521- REMOVE_LINKS(this_task);
9522- this_task->p_pptr = child_reaper;
9523- this_task->p_opptr = child_reaper;
9524- SET_LINKS(this_task);
9525+ __save_flags(flags);
9526+ __cli();
9527+ double_rq_lock(idle_rq, rq);
9528+
9529+ idle_rq->curr = idle_rq->idle = idle;
9530+ deactivate_task(idle, rq);
9531+ idle->array = NULL;
9532+ idle->prio = MAX_PRIO;
9533+ idle->state = TASK_RUNNING;
9534+ idle->cpu = cpu;
9535+ double_rq_unlock(idle_rq, rq);
9536+ set_tsk_need_resched(idle);
9537+ __restore_flags(flags);
9538+}
9539+
9540+extern void init_timervecs(void);
9541+extern void timer_bh(void);
9542+extern void tqueue_bh(void);
9543+extern void immediate_bh(void);
9544+
9545+void __init sched_init(void)
9546+{
9547+ runqueue_t *rq;
9548+ int i, j, k;
9549+
9550+ for (i = 0; i < NR_CPUS; i++) {
9551+ prio_array_t *array;
9552
9553- /* Set the exit signal to SIGCHLD so we signal init on exit */
9554- this_task->exit_signal = SIGCHLD;
9555+ rq = cpu_rq(i);
9556+ rq->active = rq->arrays;
9557+ rq->expired = rq->arrays + 1;
9558+ spin_lock_init(&rq->lock);
9559+#ifdef CONFIG_SMP
9560+ INIT_LIST_HEAD(&rq->migration_queue);
9561+#endif
9562
9563- /* We also take the runqueue_lock while altering task fields
9564- * which affect scheduling decisions */
9565- spin_lock(&runqueue_lock);
9566+ for (j = 0; j < 2; j++) {
9567+ array = rq->arrays + j;
9568+ for (k = 0; k < MAX_PRIO; k++) {
9569+ INIT_LIST_HEAD(array->queue + k);
9570+ __clear_bit(k, array->bitmap);
9571+ }
9572+ // delimiter for bitsearch
9573+ __set_bit(MAX_PRIO, array->bitmap);
9574+ }
9575+ }
9576+ /*
9577+ * We have to do a little magic to get the first
9578+ * process right in SMP mode.
9579+ */
9580+ rq = this_rq();
9581+ rq->curr = current;
9582+ rq->idle = current;
9583+ current->cpu = smp_processor_id();
9584+ wake_up_process(current);
9585
9586- this_task->ptrace = 0;
9587- this_task->nice = DEF_NICE;
9588- this_task->policy = SCHED_OTHER;
9589- /* cpus_allowed? */
9590- /* rt_priority? */
9591- /* signals? */
9592- this_task->cap_effective = CAP_INIT_EFF_SET;
9593- this_task->cap_inheritable = CAP_INIT_INH_SET;
9594- this_task->cap_permitted = CAP_FULL_SET;
9595- this_task->keep_capabilities = 0;
9596- memcpy(this_task->rlim, init_task.rlim, sizeof(*(this_task->rlim)));
9597- switch_uid(INIT_USER);
9598+ init_timervecs();
9599+ init_bh(TIMER_BH, timer_bh);
9600+ init_bh(TQUEUE_BH, tqueue_bh);
9601+ init_bh(IMMEDIATE_BH, immediate_bh);
9602
9603- spin_unlock(&runqueue_lock);
9604- write_unlock_irq(&tasklist_lock);
9605+ /*
9606+ * The boot idle thread does lazy MMU switching as well:
9607+ */
9608+ atomic_inc(&init_mm.mm_count);
9609+ enter_lazy_tlb(&init_mm, current, smp_processor_id());
9610 }
9611
9612+#if CONFIG_SMP
9613+
9614 /*
9615- * Put all the gunge required to become a kernel thread without
9616- * attached user resources in one place where it belongs.
9617- */
9618+ * This is how migration works:
9619+ *
9620+ * 1) we queue a migration_req_t structure in the source CPU's
9621+ * runqueue and wake up that CPU's migration thread.
9622+ * 2) we down() the locked semaphore => thread blocks.
9623+ * 3) migration thread wakes up (implicitly it forces the migrated
9624+ * thread off the CPU)
9625+ * 4) it gets the migration request and checks whether the migrated
9626+ * task is still in the wrong runqueue.
9627+ * 5) if it's in the wrong runqueue then the migration thread removes
9628+ * it and puts it into the right queue.
9629+ * 6) migration thread up()s the semaphore.
9630+ * 7) we wake up and the migration is done.
9631+ */
9632+
9633+typedef struct {
261c7d99 9634+ struct list_head list;
744c6993
AM
9635+ task_t *task;
9636+ struct completion done;
9637+} migration_req_t;
9638
9639-void daemonize(void)
9640+/*
9641+ * Change a given task's CPU affinity. Migrate the process to a
9642+ * proper CPU and schedule it away if the CPU it's executing on
9643+ * is removed from the allowed bitmask.
9644+ *
9645+ * NOTE: the caller must have a valid reference to the task, the
9646+ * task must not exit() & deallocate itself prematurely. The
9647+ * call is not atomic; no spinlocks may be held.
9648+ */
9649+void set_cpus_allowed(task_t *p, unsigned long new_mask)
9650 {
9651- struct fs_struct *fs;
9652+ unsigned long flags;
9653+ migration_req_t req;
9654+ runqueue_t *rq;
9655
9656+ new_mask &= cpu_online_map;
9657+ if (!new_mask)
9658+ BUG();
9659
9660+ rq = task_rq_lock(p, &flags);
9661+ p->cpus_allowed = new_mask;
9662 /*
9663- * If we were started as result of loading a module, close all of the
9664- * user space pages. We don't need them, and if we didn't close them
9665- * they would be locked into memory.
9666+ * Can the task run on the task's current CPU? If not then
9667+ * migrate the process off to a proper CPU.
9668 */
9669- exit_mm(current);
9670+ if (new_mask & (1UL << p->cpu)) {
9671+ task_rq_unlock(rq, &flags);
9672+ return;
9673+ }
9674
9675- current->session = 1;
9676- current->pgrp = 1;
9677- current->tty = NULL;
9678+ /*
9679+ * If the task is not on a runqueue, then it is safe to
9680+ * simply update the task's cpu field.
9681+ */
9682+ if (!p->array && (p != rq->curr)) {
9683+ p->cpu = __ffs(p->cpus_allowed);
9684+ task_rq_unlock(rq, &flags);
9685+ return;
9686+ }
9687
9688- /* Become as one with the init task */
9689+ init_completion(&req.done);
9690+ req.task = p;
9691+ list_add(&req.list, &rq->migration_queue);
9692+ task_rq_unlock(rq, &flags);
9693+ wake_up_process(rq->migration_thread);
9694
9695- exit_fs(current); /* current->fs->count--; */
9696- fs = init_task.fs;
9697- current->fs = fs;
9698- atomic_inc(&fs->count);
9699- exit_files(current);
9700- current->files = init_task.files;
9701- atomic_inc(&current->files->count);
9702+ wait_for_completion(&req.done);
9703 }
9704
9705-extern unsigned long wait_init_idle;
9706+static __initdata int master_migration_thread;
9707
9708-void __init init_idle(void)
9709+static int migration_thread(void * bind_cpu)
9710 {
9711- struct schedule_data * sched_data;
9712- sched_data = &aligned_data[smp_processor_id()].schedule_data;
9713+ int cpu = cpu_logical_map((int) (long) bind_cpu);
9714+ struct sched_param param = { sched_priority: MAX_RT_PRIO-1 };
9715+ runqueue_t *rq;
9716+ int ret;
9717
9718- if (current != &init_task && task_on_runqueue(current)) {
9719- printk("UGH! (%d:%d) was on the runqueue, removing.\n",
9720- smp_processor_id(), current->pid);
9721- del_from_runqueue(current);
9722+ daemonize();
9723+ sigfillset(&current->blocked);
9724+ set_fs(KERNEL_DS);
9725+ /*
9726+ * The first migration thread is started on the boot CPU, it
9727+ * migrates the other migration threads to their destination CPUs.
9728+ */
9729+ if (cpu != master_migration_thread) {
9730+ while (!cpu_rq(master_migration_thread)->migration_thread)
9731+ yield();
9732+ set_cpus_allowed(current, 1UL << cpu);
9733 }
9734- sched_data->curr = current;
9735- sched_data->last_schedule = get_cycles();
9736- clear_bit(current->processor, &wait_init_idle);
9737-}
9738+ printk("migration_task %d on cpu=%d\n", cpu, smp_processor_id());
9739+ ret = setscheduler(0, SCHED_FIFO, &param);
9740
9741-extern void init_timervecs (void);
9742+ rq = this_rq();
9743+ rq->migration_thread = current;
9744
9745-void __init sched_init(void)
9746-{
9747- /*
9748- * We have to do a little magic to get the first
9749- * process right in SMP mode.
9750- */
9751- int cpu = smp_processor_id();
9752- int nr;
9753+ sprintf(current->comm, "migration_CPU%d", smp_processor_id());
9754
9755- init_task.processor = cpu;
9756+ for (;;) {
9757+ runqueue_t *rq_src, *rq_dest;
9758+ struct list_head *head;
9759+ int cpu_src, cpu_dest;
9760+ migration_req_t *req;
9761+ unsigned long flags;
9762+ task_t *p;
9763
9764- for(nr = 0; nr < PIDHASH_SZ; nr++)
9765- pidhash[nr] = NULL;
9766+ spin_lock_irqsave(&rq->lock, flags);
9767+ head = &rq->migration_queue;
9768+ current->state = TASK_INTERRUPTIBLE;
9769+ if (list_empty(head)) {
9770+ spin_unlock_irqrestore(&rq->lock, flags);
9771+ schedule();
9772+ continue;
9773+ }
9774+ req = list_entry(head->next, migration_req_t, list);
9775+ list_del_init(head->next);
9776+ spin_unlock_irqrestore(&rq->lock, flags);
9777+
9778+ p = req->task;
9779+ cpu_dest = __ffs(p->cpus_allowed);
9780+ rq_dest = cpu_rq(cpu_dest);
9781+repeat:
9782+ cpu_src = p->cpu;
9783+ rq_src = cpu_rq(cpu_src);
9784+
9785+ local_irq_save(flags);
9786+ double_rq_lock(rq_src, rq_dest);
9787+ if (p->cpu != cpu_src) {
9788+ double_rq_unlock(rq_src, rq_dest);
9789+ local_irq_restore(flags);
9790+ goto repeat;
9791+ }
9792+ if (rq_src == rq) {
9793+ p->cpu = cpu_dest;
9794+ if (p->array) {
9795+ deactivate_task(p, rq_src);
9796+ activate_task(p, rq_dest);
9797+ }
9798+ }
9799+ double_rq_unlock(rq_src, rq_dest);
9800+ local_irq_restore(flags);
9801
9802- init_timervecs();
9803+ complete(&req->done);
9804+ }
9805+}
9806
9807- init_bh(TIMER_BH, timer_bh);
9808- init_bh(TQUEUE_BH, tqueue_bh);
9809- init_bh(IMMEDIATE_BH, immediate_bh);
9810+void __init migration_init(void)
9811+{
9812+ int cpu;
9813
9814- /*
9815- * The boot idle thread does lazy MMU switching as well:
9816- */
9817- atomic_inc(&init_mm.mm_count);
9818- enter_lazy_tlb(&init_mm, current, cpu);
9819+ master_migration_thread = smp_processor_id();
9820+ current->cpus_allowed = 1UL << master_migration_thread;
9821+
9822+ for (cpu = 0; cpu < smp_num_cpus; cpu++) {
9823+ if (kernel_thread(migration_thread, (void *) (long) cpu,
9824+ CLONE_FS | CLONE_FILES | CLONE_SIGNAL) < 0)
9825+ BUG();
9826+ }
9827+ current->cpus_allowed = -1L;
9828+
9829+ for (cpu = 0; cpu < smp_num_cpus; cpu++)
9830+ while (!cpu_rq(cpu_logical_map(cpu))->migration_thread)
9831+ schedule_timeout(2);
9832 }
9833+
9834+#endif /* CONFIG_SMP */
9835diff -urN linux-2.4.22.org/kernel/signal.c linux-2.4.22/kernel/signal.c
a4a9fb52
AM
9836--- linux-2.4.22.org/kernel/signal.c 2003-11-24 18:28:15.000000000 +0100
9837+++ linux-2.4.22/kernel/signal.c 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
9838@@ -507,12 +507,9 @@
9839 * process of changing - but no harm is done by that
9840 * other than doing an extra (lightweight) IPI interrupt.
9841 */
9842- spin_lock(&runqueue_lock);
9843- if (task_has_cpu(t) && t->processor != smp_processor_id())
9844- smp_send_reschedule(t->processor);
9845- spin_unlock(&runqueue_lock);
9846-#endif /* CONFIG_SMP */
9847-
9848+ if ((t->state == TASK_RUNNING) && (t->cpu != cpu()))
9849+ kick_if_running(t);
9850+#endif
9851 if (t->state & TASK_INTERRUPTIBLE) {
9852 wake_up_process(t);
9853 return;
9854diff -urN linux-2.4.22.org/kernel/softirq.c linux-2.4.22/kernel/softirq.c
a4a9fb52
AM
9855--- linux-2.4.22.org/kernel/softirq.c 2003-11-24 18:28:15.000000000 +0100
9856+++ linux-2.4.22/kernel/softirq.c 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
9857@@ -364,13 +364,13 @@
9858 int cpu = cpu_logical_map(bind_cpu);
9859
9860 daemonize();
9861- current->nice = 19;
9862+ set_user_nice(current, 19);
9863 sigfillset(&current->blocked);
9864
9865 /* Migrate to the right CPU */
9866- current->cpus_allowed = 1UL << cpu;
9867- while (smp_processor_id() != cpu)
9868- schedule();
9869+ set_cpus_allowed(current, 1UL << cpu);
9870+ if (cpu() != cpu)
9871+ BUG();
9872
9873 sprintf(current->comm, "ksoftirqd_CPU%d", bind_cpu);
9874
9875@@ -395,7 +395,7 @@
9876 }
9877 }
9878
9879-static __init int spawn_ksoftirqd(void)
9880+__init int spawn_ksoftirqd(void)
9881 {
9882 int cpu;
9883
9884diff -urN linux-2.4.22.org/kernel/sys.c linux-2.4.22/kernel/sys.c
a4a9fb52
AM
9885--- linux-2.4.22.org/kernel/sys.c 2003-11-24 18:28:15.000000000 +0100
9886+++ linux-2.4.22/kernel/sys.c 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
9887@@ -239,10 +239,10 @@
9888 }
9889 if (error == -ESRCH)
9890 error = 0;
9891- if (niceval < p->nice && !capable(CAP_SYS_NICE))
9892+ if (niceval < task_nice(p) && !capable(CAP_SYS_NICE))
9893 error = -EACCES;
9894 else
9895- p->nice = niceval;
9896+ set_user_nice(p, niceval);
9897 }
9898 read_unlock(&tasklist_lock);
9899
9900@@ -268,7 +268,7 @@
9901 long niceval;
9902 if (!proc_sel(p, which, who))
9903 continue;
9904- niceval = 20 - p->nice;
9905+ niceval = 20 - task_nice(p);
9906 if (niceval > retval)
9907 retval = niceval;
9908 }
9909diff -urN linux-2.4.22.org/kernel/timer.c linux-2.4.22/kernel/timer.c
a4a9fb52
AM
9910--- linux-2.4.22.org/kernel/timer.c 2003-11-24 18:28:15.000000000 +0100
9911+++ linux-2.4.22/kernel/timer.c 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
9912@@ -25,6 +25,8 @@
9913
9914 #include <asm/uaccess.h>
9915
9916+struct kernel_stat kstat;
9917+
9918 /*
9919 * Timekeeping variables
9920 */
9921@@ -598,25 +600,7 @@
9922 int cpu = smp_processor_id(), system = user_tick ^ 1;
9923
9924 update_one_process(p, user_tick, system, cpu);
9925- if (p->pid) {
9926- if (--p->counter <= 0) {
9927- p->counter = 0;
9928- /*
9929- * SCHED_FIFO is priority preemption, so this is
9930- * not the place to decide whether to reschedule a
9931- * SCHED_FIFO task or not - Bhavesh Davda
9932- */
9933- if (p->policy != SCHED_FIFO) {
9934- p->need_resched = 1;
9935- }
9936- }
9937- if (p->nice > 0)
9938- kstat.per_cpu_nice[cpu] += user_tick;
9939- else
9940- kstat.per_cpu_user[cpu] += user_tick;
9941- kstat.per_cpu_system[cpu] += system;
9942- } else if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
9943- kstat.per_cpu_system[cpu] += system;
9944+ scheduler_tick(user_tick, system);
9945 }
9946
9947 /*
9948@@ -624,17 +608,7 @@
9949 */
9950 static unsigned long count_active_tasks(void)
9951 {
9952- struct task_struct *p;
9953- unsigned long nr = 0;
9954-
9955- read_lock(&tasklist_lock);
9956- for_each_task(p) {
9957- if ((p->state == TASK_RUNNING ||
9958- (p->state & TASK_UNINTERRUPTIBLE)))
9959- nr += FIXED_1;
9960- }
9961- read_unlock(&tasklist_lock);
9962- return nr;
9963+ return (nr_running() + nr_uninterruptible()) * FIXED_1;
9964 }
9965
9966 /*
9967@@ -827,6 +801,89 @@
9968
9969 #endif
9970
9971+static void process_timeout(unsigned long __data)
9972+{
9973+ wake_up_process((task_t *)__data);
9974+}
9975+
9976+/**
9977+ * schedule_timeout - sleep until timeout
9978+ * @timeout: timeout value in jiffies
9979+ *
9980+ * Make the current task sleep until @timeout jiffies have
9981+ * elapsed. The routine will return immediately unless
9982+ * the current task state has been set (see set_current_state()).
9983+ *
9984+ * You can set the task state as follows -
9985+ *
9986+ * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
9987+ * pass before the routine returns. The routine will return 0
9988+ *
9989+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
9990+ * delivered to the current task. In this case the remaining time
9991+ * in jiffies will be returned, or 0 if the timer expired in time
9992+ *
9993+ * The current task state is guaranteed to be TASK_RUNNING when this
9994+ * routine returns.
9995+ *
9996+ * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
9997+ * the CPU away without a bound on the timeout. In this case the return
9998+ * value will be %MAX_SCHEDULE_TIMEOUT.
9999+ *
10000+ * In all cases the return value is guaranteed to be non-negative.
10001+ */
10002+signed long schedule_timeout(signed long timeout)
10003+{
10004+ struct timer_list timer;
10005+ unsigned long expire;
10006+
10007+ switch (timeout)
10008+ {
10009+ case MAX_SCHEDULE_TIMEOUT:
10010+ /*
10011+ * These two special cases are useful to be comfortable
10012+ * in the caller. Nothing more. We could take
10013+ * MAX_SCHEDULE_TIMEOUT from one of the negative value
10014+ * but I' d like to return a valid offset (>=0) to allow
10015+ * the caller to do everything it want with the retval.
10016+ */
10017+ schedule();
10018+ goto out;
10019+ default:
10020+ /*
10021+ * Another bit of PARANOID. Note that the retval will be
10022+ * 0 since no piece of kernel is supposed to do a check
10023+ * for a negative retval of schedule_timeout() (since it
10024+ * should never happens anyway). You just have the printk()
10025+ * that will tell you if something is gone wrong and where.
10026+ */
10027+ if (timeout < 0)
10028+ {
10029+ printk(KERN_ERR "schedule_timeout: wrong timeout "
10030+ "value %lx from %p\n", timeout,
10031+ __builtin_return_address(0));
10032+ current->state = TASK_RUNNING;
10033+ goto out;
10034+ }
10035+ }
10036+
10037+ expire = timeout + jiffies;
10038+
10039+ init_timer(&timer);
10040+ timer.expires = expire;
10041+ timer.data = (unsigned long) current;
10042+ timer.function = process_timeout;
10043+
10044+ add_timer(&timer);
10045+ schedule();
10046+ del_timer_sync(&timer);
10047+
10048+ timeout = expire - jiffies;
10049+
10050+ out:
10051+ return timeout < 0 ? 0 : timeout;
10052+}
10053+
10054 /* Thread ID - the internal kernel "pid" */
10055 asmlinkage long sys_gettid(void)
10056 {
10057@@ -873,4 +930,3 @@
10058 }
10059 return 0;
10060 }
10061-
10062diff -urN linux-2.4.22.org/mm/oom_kill.c linux-2.4.22/mm/oom_kill.c
a4a9fb52
AM
10063--- linux-2.4.22.org/mm/oom_kill.c 2003-11-24 18:28:16.000000000 +0100
10064+++ linux-2.4.22/mm/oom_kill.c 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
10065@@ -88,7 +88,7 @@
10066 * Niced processes are most likely less important, so double
10067 * their badness points.
10068 */
10069- if (p->nice > 0)
10070+ if (task_nice(p) > 0)
10071 points *= 2;
10072
10073 /*
10074@@ -152,7 +152,7 @@
10075 * all the memory it needs. That way it should be able to
10076 * exit() and clear out its resources quickly...
10077 */
10078- p->counter = 5 * HZ;
10079+ p->time_slice = HZ;
10080
10081 /* This process has hardware access, be more careful. */
10082 if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) {
10083diff -urN linux-2.4.22.org/net/bluetooth/bnep/core.c linux-2.4.22/net/bluetooth/bnep/core.c
a4a9fb52
AM
10084--- linux-2.4.22.org/net/bluetooth/bnep/core.c 2003-11-24 18:28:39.000000000 +0100
10085+++ linux-2.4.22/net/bluetooth/bnep/core.c 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
10086@@ -460,7 +460,7 @@
10087 sigfillset(&current->blocked);
10088 flush_signals(current);
10089
10090- current->nice = -15;
10091+ set_user_nice(current, -15);
10092
10093 set_fs(KERNEL_DS);
10094
10095diff -urN linux-2.4.22.org/net/bluetooth/cmtp/core.c linux-2.4.22/net/bluetooth/cmtp/core.c
a4a9fb52
AM
10096--- linux-2.4.22.org/net/bluetooth/cmtp/core.c 2003-11-24 18:28:38.000000000 +0100
10097+++ linux-2.4.22/net/bluetooth/cmtp/core.c 2003-11-24 18:39:03.000000000 +0100
744c6993
AM
10098@@ -298,7 +298,7 @@
10099 sigfillset(&current->blocked);
10100 flush_signals(current);
10101
10102- current->nice = -15;
10103+ set_user_nice(current, -15);
10104
10105 set_fs(KERNEL_DS);
10106
This page took 1.565731 seconds and 4 git commands to generate.