]> git.pld-linux.org Git - packages/kernel.git/blame - linux-2.4.25-sched-O1.patch
- spaces->tab
[packages/kernel.git] / linux-2.4.25-sched-O1.patch
CommitLineData
0aa7655b 1diff -urN linux-2.4.24.org/arch/alpha/kernel/entry.S linux-2.4.24/arch/alpha/kernel/entry.S
5d16fd25
AM
2--- linux-2.4.24.org/arch/alpha/kernel/entry.S 2004-02-04 20:50:50.273627588 +0100
3+++ linux-2.4.24/arch/alpha/kernel/entry.S 2004-02-04 20:52:52.801142450 +0100
0aa7655b
AM
4@@ -695,7 +695,9 @@
5 ret_from_fork:
6 lda $26,ret_from_sys_call
7 mov $17,$16
8+#if CONFIG_SMP
9 jsr $31,schedule_tail
10+#endif
11 .end ret_from_fork
12
13 .align 3
14diff -urN linux-2.4.24.org/arch/alpha/kernel/process.c linux-2.4.24/arch/alpha/kernel/process.c
5d16fd25
AM
15--- linux-2.4.24.org/arch/alpha/kernel/process.c 2004-02-04 20:50:48.800933904 +0100
16+++ linux-2.4.24/arch/alpha/kernel/process.c 2004-02-04 20:52:52.805141619 +0100
0aa7655b
AM
17@@ -74,9 +74,6 @@
18 cpu_idle(void)
19 {
20 /* An endless idle loop with no priority at all. */
21- current->nice = 20;
22- current->counter = -100;
23-
24 while (1) {
25 /* FIXME -- EV6 and LCA45 know how to power down
26 the CPU. */
27diff -urN linux-2.4.24.org/arch/alpha/kernel/smp.c linux-2.4.24/arch/alpha/kernel/smp.c
5d16fd25
AM
28--- linux-2.4.24.org/arch/alpha/kernel/smp.c 2004-02-04 20:50:49.083875053 +0100
29+++ linux-2.4.24/arch/alpha/kernel/smp.c 2004-02-04 20:52:52.820138499 +0100
0aa7655b
AM
30@@ -81,6 +81,8 @@
31 int smp_num_probed; /* Internal processor count */
32 int smp_num_cpus = 1; /* Number that came online. */
33 int smp_threads_ready; /* True once the per process idle is forked. */
34+cycles_t cacheflush_time;
35+unsigned long cache_decay_ticks;
36
37 int __cpu_number_map[NR_CPUS];
38 int __cpu_logical_map[NR_CPUS];
39@@ -155,11 +157,6 @@
40 {
41 int cpuid = hard_smp_processor_id();
42
43- if (current != init_tasks[cpu_number_map(cpuid)]) {
44- printk("BUG: smp_calling: cpu %d current %p init_tasks[cpu_number_map(cpuid)] %p\n",
45- cpuid, current, init_tasks[cpu_number_map(cpuid)]);
46- }
47-
48 DBGS(("CALLIN %d state 0x%lx\n", cpuid, current->state));
49
50 /* Turn on machine checks. */
51@@ -217,9 +214,6 @@
52 DBGS(("smp_callin: commencing CPU %d current %p\n",
53 cpuid, current));
54
55- /* Setup the scheduler for this processor. */
56- init_idle();
57-
58 /* ??? This should be in init_idle. */
59 atomic_inc(&init_mm.mm_count);
60 current->active_mm = &init_mm;
61@@ -227,6 +221,57 @@
62 cpu_idle();
63 }
64
65+
66+/*
67+ * Rough estimation for SMP scheduling, this is the number of cycles it
68+ * takes for a fully memory-limited process to flush the SMP-local cache.
69+ *
70+ * We are not told how much cache there is, so we have to guess.
71+ */
72+static void __init
73+smp_tune_scheduling (int cpuid)
74+{
75+ struct percpu_struct *cpu;
76+ unsigned long on_chip_cache; /* kB */
77+ unsigned long freq; /* Hz */
78+ unsigned long bandwidth = 350; /* MB/s */
79+
80+ cpu = (struct percpu_struct*)((char*)hwrpb + hwrpb->processor_offset
81+ + cpuid * hwrpb->processor_size);
82+ switch (cpu->type)
83+ {
84+ case EV45_CPU:
85+ on_chip_cache = 16 + 16;
86+ break;
87+
88+ case EV5_CPU:
89+ case EV56_CPU:
90+ on_chip_cache = 8 + 8 + 96;
91+ break;
92+
93+ case PCA56_CPU:
94+ on_chip_cache = 16 + 8;
95+ break;
96+
97+ case EV6_CPU:
98+ case EV67_CPU:
99+ default:
100+ on_chip_cache = 64 + 64;
101+ break;
102+ }
103+
104+ freq = hwrpb->cycle_freq ? : est_cycle_freq;
105+
106+ cacheflush_time = (freq / 1000000) * (on_chip_cache << 10) / bandwidth;
107+ cache_decay_ticks = cacheflush_time / (freq / 1000) * HZ / 1000;
108+
109+ printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
110+ cacheflush_time/(freq/1000000),
111+ (cacheflush_time*100/(freq/1000000)) % 100);
112+ printk("task migration cache decay timeout: %ld msecs.\n",
113+ (cache_decay_ticks + 1) * 1000 / HZ);
114+}
115+
116 /*
117 * Send a message to a secondary's console. "START" is one such
118 * interesting message. ;-)
119@@ -449,14 +494,11 @@
120 if (idle == &init_task)
121 panic("idle process is init_task for CPU %d", cpuid);
122
123- idle->processor = cpuid;
124- idle->cpus_runnable = 1 << cpuid; /* we schedule the first task manually */
125+ init_idle(idle, cpuid);
126+ unhash_process(idle);
127+
128 __cpu_logical_map[cpunum] = cpuid;
129 __cpu_number_map[cpuid] = cpunum;
130-
131- del_from_runqueue(idle);
132- unhash_process(idle);
133- init_tasks[cpunum] = idle;
134
135 DBGS(("smp_boot_one_cpu: CPU %d state 0x%lx flags 0x%lx\n",
136 cpuid, idle->state, idle->flags));
137@@ -563,13 +605,11 @@
138
139 __cpu_number_map[boot_cpuid] = 0;
140 __cpu_logical_map[0] = boot_cpuid;
141- current->processor = boot_cpuid;
142
143 smp_store_cpu_info(boot_cpuid);
144+ smp_tune_scheduling(boot_cpuid);
145 smp_setup_percpu_timer(boot_cpuid);
146
147- init_idle();
148-
149 /* ??? This should be in init_idle. */
150 atomic_inc(&init_mm.mm_count);
151 current->active_mm = &init_mm;
152diff -urN linux-2.4.24.org/arch/arm/kernel/process.c linux-2.4.24/arch/arm/kernel/process.c
5d16fd25
AM
153--- linux-2.4.24.org/arch/arm/kernel/process.c 2004-02-04 20:51:34.213488266 +0100
154+++ linux-2.4.24/arch/arm/kernel/process.c 2004-02-04 20:52:52.824137668 +0100
0aa7655b
AM
155@@ -87,8 +87,6 @@
156 {
157 /* endless idle loop with no priority at all */
158 init_idle();
159- current->nice = 20;
160- current->counter = -100;
161
162 while (1) {
163 void (*idle)(void) = pm_idle;
164diff -urN linux-2.4.24.org/arch/i386/kernel/entry.S linux-2.4.24/arch/i386/kernel/entry.S
5d16fd25
AM
165--- linux-2.4.24.org/arch/i386/kernel/entry.S 2004-02-04 20:50:47.376230238 +0100
166+++ linux-2.4.24/arch/i386/kernel/entry.S 2004-02-04 20:52:52.828136836 +0100
0aa7655b
AM
167@@ -79,7 +79,7 @@
168 exec_domain = 16
169 need_resched = 20
170 tsk_ptrace = 24
171-processor = 52
172+cpu = 32
173
174 ENOSYS = 38
175
176@@ -184,9 +184,11 @@
177
178
179 ENTRY(ret_from_fork)
180+#if CONFIG_SMP
181 pushl %ebx
182 call SYMBOL_NAME(schedule_tail)
183 addl $4, %esp
184+#endif
185 GET_CURRENT(%ebx)
186 testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS
187 jne tracesys_exit
188diff -urN linux-2.4.24.org/arch/i386/kernel/process.c linux-2.4.24/arch/i386/kernel/process.c
5d16fd25
AM
189--- linux-2.4.24.org/arch/i386/kernel/process.c 2004-02-04 20:50:46.799350227 +0100
190+++ linux-2.4.24/arch/i386/kernel/process.c 2004-02-04 20:52:52.833135796 +0100
0aa7655b
AM
191@@ -84,7 +84,7 @@
192 {
193 if (current_cpu_data.hlt_works_ok && !hlt_counter) {
194 __cli();
195- if (!current->need_resched)
196+ if (!need_resched())
197 safe_halt();
198 else
199 __sti();
200@@ -126,9 +126,6 @@
201 void cpu_idle (void)
202 {
203 /* endless idle loop with no priority at all */
204- init_idle();
205- current->nice = 20;
206- current->counter = -100;
207
208 while (1) {
209 void (*idle)(void) = pm_idle;
5d16fd25 210@@ -665,15 +662,17 @@
0aa7655b
AM
211 asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs));
212
213 /*
214- * Restore %fs and %gs.
215+ * Restore %fs and %gs if needed.
216 */
217- loadsegment(fs, next->fs);
218- loadsegment(gs, next->gs);
219+ if (unlikely(prev->fs | prev->gs | next->fs | next->gs)) {
220+ loadsegment(fs, next->fs);
221+ loadsegment(gs, next->gs);
222+ }
223
224 /*
225 * Now maybe reload the debug registers
226 */
227- if (next->debugreg[7]){
228+ if (unlikely(next->debugreg[7])) {
229 loaddebug(next, 0);
230 loaddebug(next, 1);
231 loaddebug(next, 2);
5d16fd25 232@@ -683,7 +682,7 @@
0aa7655b
AM
233 loaddebug(next, 7);
234 }
235
236- if (prev->ioperm || next->ioperm) {
237+ if (unlikely(prev->ioperm || next->ioperm)) {
238 if (next->ioperm) {
239 /*
240 * 4 cachelines copy ... not good, but not that
241diff -urN linux-2.4.24.org/arch/i386/kernel/setup.c linux-2.4.24/arch/i386/kernel/setup.c
5d16fd25
AM
242--- linux-2.4.24.org/arch/i386/kernel/setup.c 2004-02-04 20:50:46.790352099 +0100
243+++ linux-2.4.24/arch/i386/kernel/setup.c 2004-02-04 20:52:52.840134340 +0100
0aa7655b
AM
244@@ -3193,9 +3193,10 @@
245 load_TR(nr);
5d16fd25 246 load_LDT(&init_mm.context);
0aa7655b
AM
247
248- /*
249- * Clear all 6 debug registers:
250- */
251+ /* Clear %fs and %gs. */
252+ asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
253+
254+ /* Clear all 6 debug registers: */
255
256 #define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) );
257
258diff -urN linux-2.4.24.org/arch/i386/kernel/smpboot.c linux-2.4.24/arch/i386/kernel/smpboot.c
5d16fd25
AM
259--- linux-2.4.24.org/arch/i386/kernel/smpboot.c 2004-02-04 20:50:46.762357921 +0100
260+++ linux-2.4.24/arch/i386/kernel/smpboot.c 2004-02-04 20:52:52.864129350 +0100
0aa7655b
AM
261@@ -308,14 +308,14 @@
262 if (tsc_values[i] < avg)
263 realdelta = -realdelta;
264
265- printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n",
266- i, realdelta);
267+ printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n", i, realdelta);
268 }
269
270 sum += delta;
271 }
272 if (!buggy)
273 printk("passed.\n");
274+ ;
275 }
276
277 static void __init synchronize_tsc_ap (void)
278@@ -365,7 +365,7 @@
279 * (This works even if the APIC is not enabled.)
280 */
281 phys_id = GET_APIC_ID(apic_read(APIC_ID));
282- cpuid = current->processor;
283+ cpuid = cpu();
284 if (test_and_set_bit(cpuid, &cpu_online_map)) {
285 printk("huh, phys CPU#%d, CPU#%d already present??\n",
286 phys_id, cpuid);
287@@ -435,6 +435,7 @@
288 */
289 smp_store_cpu_info(cpuid);
290
291+ disable_APIC_timer();
292 /*
293 * Allow the master to continue.
294 */
295@@ -465,6 +466,7 @@
296 smp_callin();
297 while (!atomic_read(&smp_commenced))
298 rep_nop();
299+ enable_APIC_timer();
300 /*
301 * low-memory mappings have been cleared, flush them from
302 * the local TLBs too.
303@@ -803,16 +805,13 @@
304 if (!idle)
305 panic("No idle process for CPU %d", cpu);
306
307- idle->processor = cpu;
308- idle->cpus_runnable = 1 << cpu; /* we schedule the first task manually */
309+ init_idle(idle, cpu);
310
311 map_cpu_to_boot_apicid(cpu, apicid);
312
313 idle->thread.eip = (unsigned long) start_secondary;
314
315- del_from_runqueue(idle);
316 unhash_process(idle);
317- init_tasks[cpu] = idle;
318
319 /* start_eip had better be page-aligned! */
320 start_eip = setup_trampoline();
321@@ -925,6 +924,7 @@
322 }
323
324 cycles_t cacheflush_time;
325+unsigned long cache_decay_ticks;
326
327 static void smp_tune_scheduling (void)
328 {
329@@ -958,9 +958,13 @@
330 cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
331 }
332
333+ cache_decay_ticks = (long)cacheflush_time/cpu_khz * HZ / 1000;
334+
335 printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
336 (long)cacheflush_time/(cpu_khz/1000),
337 ((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
338+ printk("task migration cache decay timeout: %ld msecs.\n",
339+ (cache_decay_ticks + 1) * 1000 / HZ);
340 }
341
342 /*
343@@ -1026,8 +1030,7 @@
344 map_cpu_to_boot_apicid(0, boot_cpu_apicid);
345
346 global_irq_holder = 0;
347- current->processor = 0;
348- init_idle();
349+ current->cpu = 0;
350 smp_tune_scheduling();
351
352 /*
353diff -urN linux-2.4.24.org/arch/i386/kernel/smp.c linux-2.4.24/arch/i386/kernel/smp.c
5d16fd25
AM
354--- linux-2.4.24.org/arch/i386/kernel/smp.c 2004-02-04 20:50:47.312243547 +0100
355+++ linux-2.4.24/arch/i386/kernel/smp.c 2004-02-04 20:52:52.868128518 +0100
7f7e7712 356@@ -503,6 +503,17 @@
0aa7655b
AM
357 }
358
359 /*
360+ * this function sends a reschedule IPI to all (other) CPUs.
361+ * This should only be used if some 'global' task became runnable,
362+ * such as a RT task, that must be handled now. The first CPU
363+ * that manages to grab the task will run it.
364+ */
365+void smp_send_reschedule_all(void)
366+{
367+ send_IPI_allbutself(RESCHEDULE_VECTOR);
368+}
369+
370+/*
371 * Structure and data for smp_call_function(). This is designed to minimise
372 * static memory requirements. It also looks cleaner.
373 */
374diff -urN linux-2.4.24.org/arch/mips64/kernel/process.c linux-2.4.24/arch/mips64/kernel/process.c
5d16fd25
AM
375--- linux-2.4.24.org/arch/mips64/kernel/process.c 2004-02-04 20:51:53.268524907 +0100
376+++ linux-2.4.24/arch/mips64/kernel/process.c 2004-02-04 20:52:52.872127686 +0100
0aa7655b
AM
377@@ -39,8 +39,7 @@
378 {
379 /* endless idle loop with no priority at all */
380 init_idle();
381- current->nice = 20;
382- current->counter = -100;
383+
384 while (1) {
385 while (!current->need_resched)
386 if (cpu_wait)
387diff -urN linux-2.4.24.org/arch/parisc/kernel/process.c linux-2.4.24/arch/parisc/kernel/process.c
5d16fd25
AM
388--- linux-2.4.24.org/arch/parisc/kernel/process.c 2004-02-04 20:51:58.602415484 +0100
389+++ linux-2.4.24/arch/parisc/kernel/process.c 2004-02-04 20:52:52.876126854 +0100
0aa7655b
AM
390@@ -65,8 +65,6 @@
391 {
392 /* endless idle loop with no priority at all */
393 init_idle();
394- current->nice = 20;
395- current->counter = -100;
396
397 while (1) {
398 while (!current->need_resched) {
0aa7655b 399diff -urN linux-2.4.24.org/arch/ppc/kernel/entry.S linux-2.4.24/arch/ppc/kernel/entry.S
5d16fd25
AM
400--- linux-2.4.24.org/arch/ppc/kernel/entry.S 2004-02-04 20:51:15.913294629 +0100
401+++ linux-2.4.24/arch/ppc/kernel/entry.S 2004-02-04 20:52:52.903121239 +0100
0aa7655b
AM
402@@ -269,7 +269,9 @@
403
404 .globl ret_from_fork
405 ret_from_fork:
406+#if CONFIG_SMP
407 bl schedule_tail
408+#endif
409 lwz r0,TASK_PTRACE(r2)
410 andi. r0,r0,PT_TRACESYS
411 bnel- syscall_trace
412diff -urN linux-2.4.24.org/arch/ppc/kernel/idle.c linux-2.4.24/arch/ppc/kernel/idle.c
5d16fd25
AM
413--- linux-2.4.24.org/arch/ppc/kernel/idle.c 2004-02-04 20:51:16.300214151 +0100
414+++ linux-2.4.24/arch/ppc/kernel/idle.c 2004-02-04 20:52:52.908120200 +0100
0aa7655b
AM
415@@ -46,9 +46,7 @@
416 do_power_save = 1;
417
418 /* endless loop with no priority at all */
419- current->nice = 20;
420- current->counter = -100;
421- init_idle();
422+
423 for (;;) {
424 #ifdef CONFIG_SMP
425 if (!do_power_save) {
426diff -urN linux-2.4.24.org/arch/ppc/kernel/mk_defs.c linux-2.4.24/arch/ppc/kernel/mk_defs.c
5d16fd25
AM
427--- linux-2.4.24.org/arch/ppc/kernel/mk_defs.c 2004-02-04 20:51:14.150661249 +0100
428+++ linux-2.4.24/arch/ppc/kernel/mk_defs.c 2004-02-04 20:52:52.913119160 +0100
0aa7655b
AM
429@@ -34,8 +34,8 @@
430 /*DEFINE(KERNELBASE, KERNELBASE);*/
431 DEFINE(STATE, offsetof(struct task_struct, state));
432 DEFINE(NEXT_TASK, offsetof(struct task_struct, next_task));
433- DEFINE(COUNTER, offsetof(struct task_struct, counter));
434- DEFINE(PROCESSOR, offsetof(struct task_struct, processor));
435+ DEFINE(COUNTER, offsetof(struct task_struct, time_slice));
436+ DEFINE(PROCESSOR, offsetof(struct task_struct, cpu));
437 DEFINE(SIGPENDING, offsetof(struct task_struct, sigpending));
438 DEFINE(THREAD, offsetof(struct task_struct, thread));
439 DEFINE(MM, offsetof(struct task_struct, mm));
440diff -urN linux-2.4.24.org/arch/ppc/kernel/process.c linux-2.4.24/arch/ppc/kernel/process.c
5d16fd25
AM
441--- linux-2.4.24.org/arch/ppc/kernel/process.c 2004-02-04 20:51:14.062679549 +0100
442+++ linux-2.4.24/arch/ppc/kernel/process.c 2004-02-04 20:52:52.917118328 +0100
0aa7655b
AM
443@@ -281,7 +281,7 @@
444 #endif
445
446 #ifdef CONFIG_SMP
447- printk(" CPU: %d", current->processor);
448+ printk(" CPU: %d", current->cpu);
449 #endif /* CONFIG_SMP */
450
451 printk("\n");
452diff -urN linux-2.4.24.org/arch/ppc/kernel/smp.c linux-2.4.24/arch/ppc/kernel/smp.c
5d16fd25
AM
453--- linux-2.4.24.org/arch/ppc/kernel/smp.c 2004-02-04 20:51:15.993277992 +0100
454+++ linux-2.4.24/arch/ppc/kernel/smp.c 2004-02-04 20:52:52.923117080 +0100
0aa7655b
AM
455@@ -51,6 +51,7 @@
456 unsigned long cpu_online_map;
457 int smp_hw_index[NR_CPUS];
458 static struct smp_ops_t *smp_ops;
459+unsigned long cache_decay_ticks = HZ/100;
460
461 /* all cpu mappings are 1-1 -- Cort */
462 volatile unsigned long cpu_callin_map[NR_CPUS];
463@@ -292,9 +293,7 @@
464 * cpu 0, the master -- Cort
465 */
466 cpu_callin_map[0] = 1;
467- current->processor = 0;
468-
469- init_idle();
470+ current->cpu = 0;
471
472 for (i = 0; i < NR_CPUS; i++) {
473 prof_counter[i] = 1;
474@@ -351,12 +350,9 @@
475 p = init_task.prev_task;
476 if (!p)
477 panic("No idle task for CPU %d", i);
478- del_from_runqueue(p);
479+ init_idle(p, i);
480 unhash_process(p);
481- init_tasks[i] = p;
482
483- p->processor = i;
484- p->cpus_runnable = 1 << i; /* we schedule the first task manually */
485 current_set[i] = p;
486
487 /*
488@@ -505,7 +501,7 @@
489
490 void __init smp_callin(void)
491 {
492- int cpu = current->processor;
493+ int cpu = current->cpu;
494
495 smp_store_cpu_info(cpu);
496 smp_ops->setup_cpu(cpu);
497diff -urN linux-2.4.24.org/arch/ppc/lib/dec_and_lock.c linux-2.4.24/arch/ppc/lib/dec_and_lock.c
5d16fd25
AM
498--- linux-2.4.24.org/arch/ppc/lib/dec_and_lock.c 2004-02-04 20:51:18.406775995 +0100
499+++ linux-2.4.24/arch/ppc/lib/dec_and_lock.c 2004-02-04 20:52:52.927116249 +0100
0aa7655b
AM
500@@ -1,4 +1,5 @@
501 #include <linux/module.h>
502+#include <linux/sched.h>
503 #include <linux/spinlock.h>
504 #include <asm/atomic.h>
505 #include <asm/system.h>
506diff -urN linux-2.4.24.org/arch/ppc/mm/init.c linux-2.4.24/arch/ppc/mm/init.c
5d16fd25
AM
507--- linux-2.4.24.org/arch/ppc/mm/init.c 2004-02-04 20:51:13.814731121 +0100
508+++ linux-2.4.24/arch/ppc/mm/init.c 2004-02-04 20:52:52.931115417 +0100
0aa7655b
AM
509@@ -192,9 +192,9 @@
510 {
511 int iscur = 0;
512 #ifdef CONFIG_SMP
513- printk("%3d ", p->processor);
514- if ( (p->processor != NO_PROC_ID) &&
515- (p == current_set[p->processor]) )
516+ printk("%3d ", p->cpu);
517+ if ( (p->cpu != NO_PROC_ID) &&
518+ (p == current_set[p->cpu]) )
519 {
520 iscur = 1;
521 printk("current");
522diff -urN linux-2.4.24.org/arch/ppc64/kernel/entry.S linux-2.4.24/arch/ppc64/kernel/entry.S
5d16fd25
AM
523--- linux-2.4.24.org/arch/ppc64/kernel/entry.S 2004-02-04 20:50:43.056128805 +0100
524+++ linux-2.4.24/arch/ppc64/kernel/entry.S 2004-02-04 20:53:40.136297052 +0100
0aa7655b
AM
525@@ -299,7 +299,9 @@
526 blr
527
528 _GLOBAL(ret_from_fork)
529+#if CONFIG_SMP
530 bl .schedule_tail
531+#endif
532 ld r4,PACACURRENT(r13)
533 ld r0,TASK_PTRACE(r4)
534 andi. r0,r0,PT_TRACESYS
535diff -urN linux-2.4.24.org/arch/ppc64/kernel/idle.c linux-2.4.24/arch/ppc64/kernel/idle.c
5d16fd25
AM
536--- linux-2.4.24.org/arch/ppc64/kernel/idle.c 2004-02-04 20:50:43.329072034 +0100
537+++ linux-2.4.24/arch/ppc64/kernel/idle.c 2004-02-04 20:55:09.907625341 +0100
0aa7655b
AM
538@@ -88,15 +88,12 @@
539 unsigned long CTRL;
540
541 /* endless loop with no priority at all */
542- current->nice = 20;
543- current->counter = -100;
5d16fd25
AM
544-
545+
0aa7655b
AM
546 /* ensure iSeries run light will be out when idle */
547 current->thread.flags &= ~PPC_FLAG_RUN_LIGHT;
548 CTRL = mfspr(CTRLF);
549 CTRL &= ~RUNLATCH;
550 mtspr(CTRLT, CTRL);
551- init_idle();
552
553 lpaca = get_paca();
554
555diff -urN linux-2.4.24.org/arch/ppc64/kernel/process.c linux-2.4.24/arch/ppc64/kernel/process.c
5d16fd25
AM
556--- linux-2.4.24.org/arch/ppc64/kernel/process.c 2004-02-04 20:50:42.774187448 +0100
557+++ linux-2.4.24/arch/ppc64/kernel/process.c 2004-02-04 20:52:52.986103980 +0100
0aa7655b
AM
558@@ -138,7 +138,7 @@
559 #ifdef SHOW_TASK_SWITCHES
560 printk("%s/%d -> %s/%d NIP %08lx cpu %d root %x/%x\n",
561 prev->comm,prev->pid,
562- new->comm,new->pid,new->thread.regs->nip,new->processor,
563+ new->comm,new->pid,new->thread.regs->nip,new->cpu,
564 new->fs->root,prev->fs->root);
565 #endif
566 #ifdef CONFIG_SMP
567diff -urN linux-2.4.24.org/arch/ppc64/kernel/smp.c linux-2.4.24/arch/ppc64/kernel/smp.c
5d16fd25
AM
568--- linux-2.4.24.org/arch/ppc64/kernel/smp.c 2004-02-04 20:50:43.176103851 +0100
569+++ linux-2.4.24/arch/ppc64/kernel/smp.c 2004-02-04 20:52:52.990103148 +0100
0aa7655b
AM
570@@ -70,6 +70,7 @@
571 extern atomic_t ipi_sent;
572 spinlock_t kernel_flag __cacheline_aligned = SPIN_LOCK_UNLOCKED;
573 cycles_t cacheflush_time;
574+unsigned long cache_decay_ticks = HZ/100;
575 static int max_cpus __initdata = NR_CPUS;
576
577 unsigned long cpu_online_map;
578@@ -636,9 +637,7 @@
579 * cpu 0, the master -- Cort
580 */
581 cpu_callin_map[0] = 1;
582- current->processor = 0;
583-
584- init_idle();
585+ current->cpu = 0;
586
587 for (i = 0; i < NR_CPUS; i++) {
588 paca[i].prof_counter = 1;
589@@ -709,12 +708,9 @@
590
591 PPCDBG(PPCDBG_SMP,"\tProcessor %d, task = 0x%lx\n", i, p);
592
593- del_from_runqueue(p);
594+ init_idle(p, i);
595 unhash_process(p);
596- init_tasks[i] = p;
597
598- p->processor = i;
599- p->cpus_runnable = 1 << i; /* we schedule the first task manually */
600 current_set[i].task = p;
601 sp = ((unsigned long)p) + sizeof(union task_union)
602 - STACK_FRAME_OVERHEAD;
603@@ -765,7 +761,7 @@
604
605 void __init smp_callin(void)
606 {
607- int cpu = current->processor;
608+ int cpu = current->cpu;
609
610 smp_store_cpu_info(cpu);
611 set_dec(paca[cpu].default_decr);
612@@ -773,8 +769,6 @@
613
614 ppc_md.smp_setup_cpu(cpu);
615
616- init_idle();
617-
618 set_bit(smp_processor_id(), &cpu_online_map);
619
620 while(!smp_commenced) {
621@@ -793,7 +787,7 @@
622 {
623 int cpu;
624
625- cpu = current->processor;
626+ cpu = current->cpu;
627 atomic_inc(&init_mm.mm_count);
628 current->active_mm = &init_mm;
629 smp_callin();
630diff -urN linux-2.4.24.org/arch/s390/kernel/process.c linux-2.4.24/arch/s390/kernel/process.c
5d16fd25
AM
631--- linux-2.4.24.org/arch/s390/kernel/process.c 2004-02-04 20:51:56.088938275 +0100
632+++ linux-2.4.24/arch/s390/kernel/process.c 2004-02-04 20:52:52.994102316 +0100
0aa7655b
AM
633@@ -57,8 +57,7 @@
634
635 /* endless idle loop with no priority at all */
636 init_idle();
637- current->nice = 20;
638- current->counter = -100;
639+
640 while (1) {
641 __cli();
642 if (current->need_resched) {
643diff -urN linux-2.4.24.org/arch/s390x/kernel/process.c linux-2.4.24/arch/s390x/kernel/process.c
5d16fd25
AM
644--- linux-2.4.24.org/arch/s390x/kernel/process.c 2004-02-04 20:52:03.781338295 +0100
645+++ linux-2.4.24/arch/s390x/kernel/process.c 2004-02-04 20:52:52.997101692 +0100
0aa7655b
AM
646@@ -57,8 +57,7 @@
647
648 /* endless idle loop with no priority at all */
649 init_idle();
650- current->nice = 20;
651- current->counter = -100;
652+
653 while (1) {
654 __cli();
655 if (current->need_resched) {
656diff -urN linux-2.4.24.org/arch/sh/kernel/process.c linux-2.4.24/arch/sh/kernel/process.c
5d16fd25
AM
657--- linux-2.4.24.org/arch/sh/kernel/process.c 2004-02-04 20:51:43.820490054 +0100
658+++ linux-2.4.24/arch/sh/kernel/process.c 2004-02-04 20:52:53.000101068 +0100
0aa7655b
AM
659@@ -42,8 +42,6 @@
660 {
661 /* endless idle loop with no priority at all */
662 init_idle();
663- current->nice = 20;
664- current->counter = -100;
665
666 while (1) {
667 if (hlt_counter) {
668diff -urN linux-2.4.24.org/arch/sparc/kernel/entry.S linux-2.4.24/arch/sparc/kernel/entry.S
5d16fd25
AM
669--- linux-2.4.24.org/arch/sparc/kernel/entry.S 2004-02-04 20:50:51.877294031 +0100
670+++ linux-2.4.24/arch/sparc/kernel/entry.S 2004-02-04 20:52:53.005100028 +0100
0aa7655b
AM
671@@ -1471,7 +1471,9 @@
672
673 .globl C_LABEL(ret_from_fork)
674 C_LABEL(ret_from_fork):
675+#if CONFIG_SMP
676 call schedule_tail
677+#endif
678 mov %g3, %o0
679 b C_LABEL(ret_sys_call)
680 ld [%sp + STACKFRAME_SZ + PT_I0], %o0
681diff -urN linux-2.4.24.org/arch/sparc/kernel/process.c linux-2.4.24/arch/sparc/kernel/process.c
5d16fd25
AM
682--- linux-2.4.24.org/arch/sparc/kernel/process.c 2004-02-04 20:50:51.550362032 +0100
683+++ linux-2.4.24/arch/sparc/kernel/process.c 2004-02-04 20:52:53.009099197 +0100
0aa7655b
AM
684@@ -74,9 +74,6 @@
685 goto out;
686
687 /* endless idle loop with no priority at all */
688- current->nice = 20;
689- current->counter = -100;
690- init_idle();
691
692 for (;;) {
693 if (ARCH_SUN4C_SUN4) {
694@@ -128,9 +125,6 @@
695 int cpu_idle(void)
696 {
697 /* endless idle loop with no priority at all */
698- current->nice = 20;
699- current->counter = -100;
700- init_idle();
701
702 while(1) {
703 if(current->need_resched) {
704diff -urN linux-2.4.24.org/arch/sparc/kernel/smp.c linux-2.4.24/arch/sparc/kernel/smp.c
5d16fd25
AM
705--- linux-2.4.24.org/arch/sparc/kernel/smp.c 2004-02-04 20:50:51.522367854 +0100
706+++ linux-2.4.24/arch/sparc/kernel/smp.c 2004-02-04 20:52:53.013098365 +0100
0aa7655b
AM
707@@ -57,6 +57,7 @@
708 volatile int __cpu_number_map[NR_CPUS];
709 volatile int __cpu_logical_map[NR_CPUS];
710 cycles_t cacheflush_time = 0; /* XXX */
711+unsigned long cache_decay_ticks = HZ/100; /* XXX */
712
713 /* The only guaranteed locking primitive available on all Sparc
714 * processors is 'ldstub [%reg + immediate], %dest_reg' which atomically
715diff -urN linux-2.4.24.org/arch/sparc/kernel/sun4d_smp.c linux-2.4.24/arch/sparc/kernel/sun4d_smp.c
5d16fd25
AM
716--- linux-2.4.24.org/arch/sparc/kernel/sun4d_smp.c 2004-02-04 20:50:51.254423586 +0100
717+++ linux-2.4.24/arch/sparc/kernel/sun4d_smp.c 2004-02-04 20:52:53.027095454 +0100
0aa7655b
AM
718@@ -107,7 +107,6 @@
719 * the SMP initialization the master will be just allowed
720 * to call the scheduler code.
721 */
722- init_idle();
723
724 /* Get our local ticker going. */
725 smp_setup_percpu_timer();
726@@ -127,7 +126,7 @@
727 while((unsigned long)current_set[cpuid] < PAGE_OFFSET)
728 barrier();
729
730- while(current_set[cpuid]->processor != cpuid)
731+ while(current_set[cpuid]->cpu != cpuid)
732 barrier();
733
734 /* Fix idle thread fields. */
735@@ -197,10 +196,8 @@
736 mid_xlate[i] = i;
737 __cpu_number_map[boot_cpu_id] = 0;
738 __cpu_logical_map[0] = boot_cpu_id;
739- current->processor = boot_cpu_id;
740 smp_store_cpu_info(boot_cpu_id);
741 smp_setup_percpu_timer();
742- init_idle();
743 local_flush_cache_all();
744 if(linux_num_cpus == 1)
745 return; /* Not an MP box. */
746@@ -222,14 +219,10 @@
747 cpucount++;
748
749 p = init_task.prev_task;
750- init_tasks[i] = p;
751-
752- p->processor = i;
753- p->cpus_runnable = 1 << i; /* we schedule the first task manually */
754
755 current_set[i] = p;
756
757- del_from_runqueue(p);
758+ init_idle(p, i);
759 unhash_process(p);
760
761 for (no = 0; no < linux_num_cpus; no++)
762diff -urN linux-2.4.24.org/arch/sparc/kernel/sun4m_smp.c linux-2.4.24/arch/sparc/kernel/sun4m_smp.c
5d16fd25
AM
763--- linux-2.4.24.org/arch/sparc/kernel/sun4m_smp.c 2004-02-04 20:50:52.194228110 +0100
764+++ linux-2.4.24/arch/sparc/kernel/sun4m_smp.c 2004-02-04 20:52:53.030094830 +0100
0aa7655b
AM
765@@ -104,7 +104,6 @@
766 * the SMP initialization the master will be just allowed
767 * to call the scheduler code.
768 */
769- init_idle();
770
771 /* Allow master to continue. */
772 swap((unsigned long *)&cpu_callin_map[cpuid], 1);
773@@ -170,12 +169,10 @@
774 mid_xlate[boot_cpu_id] = (linux_cpus[boot_cpu_id].mid & ~8);
775 __cpu_number_map[boot_cpu_id] = 0;
776 __cpu_logical_map[0] = boot_cpu_id;
777- current->processor = boot_cpu_id;
778
779 smp_store_cpu_info(boot_cpu_id);
780 set_irq_udt(mid_xlate[boot_cpu_id]);
781 smp_setup_percpu_timer();
782- init_idle();
783 local_flush_cache_all();
784 if(linux_num_cpus == 1)
785 return; /* Not an MP box. */
786@@ -195,14 +192,10 @@
787 cpucount++;
788
789 p = init_task.prev_task;
790- init_tasks[i] = p;
791-
792- p->processor = i;
793- p->cpus_runnable = 1 << i; /* we schedule the first task manually */
794
795 current_set[i] = p;
796
797- del_from_runqueue(p);
798+ init_idle(p, i);
799 unhash_process(p);
800
801 /* See trampoline.S for details... */
802diff -urN linux-2.4.24.org/arch/sparc64/kernel/entry.S linux-2.4.24/arch/sparc64/kernel/entry.S
5d16fd25
AM
803--- linux-2.4.24.org/arch/sparc64/kernel/entry.S 2004-02-04 20:51:29.076556726 +0100
804+++ linux-2.4.24/arch/sparc64/kernel/entry.S 2004-02-04 20:52:53.039092958 +0100
0aa7655b
AM
805@@ -1627,7 +1627,9 @@
806 */
807 andn %o7, SPARC_FLAG_NEWCHILD, %l0
808 mov %g5, %o0 /* 'prev' */
809+#if CONFIG_SMP
810 call schedule_tail
811+#endif
812 stb %l0, [%g6 + AOFF_task_thread + AOFF_thread_flags]
813 andcc %l0, SPARC_FLAG_PERFCTR, %g0
814 be,pt %icc, 1f
815diff -urN linux-2.4.24.org/arch/sparc64/kernel/irq.c linux-2.4.24/arch/sparc64/kernel/irq.c
5d16fd25
AM
816--- linux-2.4.24.org/arch/sparc64/kernel/irq.c 2004-02-04 20:51:28.993573986 +0100
817+++ linux-2.4.24/arch/sparc64/kernel/irq.c 2004-02-04 20:52:53.044091918 +0100
0aa7655b
AM
818@@ -174,7 +174,7 @@
819 tid = ((tid & UPA_CONFIG_MID) << 9);
820 tid &= IMAP_TID_UPA;
821 } else {
822- tid = (starfire_translate(imap, current->processor) << 26);
823+ tid = (starfire_translate(imap, current->cpu) << 26);
824 tid &= IMAP_TID_UPA;
825 }
826
827diff -urN linux-2.4.24.org/arch/sparc64/kernel/process.c linux-2.4.24/arch/sparc64/kernel/process.c
5d16fd25
AM
828--- linux-2.4.24.org/arch/sparc64/kernel/process.c 2004-02-04 20:51:29.998364993 +0100
829+++ linux-2.4.24/arch/sparc64/kernel/process.c 2004-02-04 20:52:53.049090879 +0100
0aa7655b
AM
830@@ -54,9 +54,6 @@
831 return -EPERM;
832
833 /* endless idle loop with no priority at all */
834- current->nice = 20;
835- current->counter = -100;
836- init_idle();
837
838 for (;;) {
839 /* If current->need_resched is zero we should really
840@@ -80,14 +77,10 @@
841 /*
842 * the idle loop on a UltraMultiPenguin...
843 */
844-#define idle_me_harder() (cpu_data[current->processor].idle_volume += 1)
845-#define unidle_me() (cpu_data[current->processor].idle_volume = 0)
846+#define idle_me_harder() (cpu_data[current->cpu].idle_volume += 1)
847+#define unidle_me() (cpu_data[current->cpu].idle_volume = 0)
848 int cpu_idle(void)
849 {
850- current->nice = 20;
851- current->counter = -100;
852- init_idle();
853-
854 while(1) {
855 if (current->need_resched != 0) {
856 unidle_me();
857diff -urN linux-2.4.24.org/arch/sparc64/kernel/rtrap.S linux-2.4.24/arch/sparc64/kernel/rtrap.S
5d16fd25
AM
858--- linux-2.4.24.org/arch/sparc64/kernel/rtrap.S 2004-02-04 20:51:29.910383293 +0100
859+++ linux-2.4.24/arch/sparc64/kernel/rtrap.S 2004-02-04 20:52:53.053090047 +0100
0aa7655b
AM
860@@ -140,7 +140,7 @@
861 .align 64
862 .globl rtrap_clr_l6, rtrap, irqsz_patchme, rtrap_xcall
863 rtrap_clr_l6: clr %l6
864-rtrap: lduw [%g6 + AOFF_task_processor], %l0
865+rtrap: lduw [%g6 + AOFF_task_cpu], %l0
866 sethi %hi(irq_stat), %l2 ! &softirq_active
867 or %l2, %lo(irq_stat), %l2 ! &softirq_active
868 irqsz_patchme: sllx %l0, 0, %l0
869diff -urN linux-2.4.24.org/arch/sparc64/kernel/smp.c linux-2.4.24/arch/sparc64/kernel/smp.c
5d16fd25
AM
870--- linux-2.4.24.org/arch/sparc64/kernel/smp.c 2004-02-04 20:51:28.749624726 +0100
871+++ linux-2.4.24/arch/sparc64/kernel/smp.c 2004-02-04 20:52:53.068086928 +0100
468c4c0f
JB
872@@ -347,6 +347,8 @@
873
874 extern unsigned long sparc64_cpu_startup;
875
876+static void __init smp_tune_scheduling(void);
877+
878 /* The OBP cpu startup callback truncates the 3rd arg cookie to
879 * 32-bits (I think) so to be safe we have it read the pointer
880 * contained here so we work on >4GB machines. -DaveM
0aa7655b
AM
881@@ -360,7 +360,7 @@
882 printk("Entering UltraSMPenguin Mode...\n");
883 __sti();
884 smp_store_cpu_info(boot_cpu_id);
885- init_idle();
886+ smp_tune_scheduling();
887
888 if (linux_num_cpus == 1)
889 return;
890@@ -383,12 +383,8 @@
891 cpucount++;
892
893 p = init_task.prev_task;
894- init_tasks[cpucount] = p;
895
896- p->processor = i;
897- p->cpus_runnable = 1UL << i; /* we schedule the first task manually */
898-
899- del_from_runqueue(p);
900+ init_idle(p, i);
901 unhash_process(p);
902
903 callin_flag = 0;
904@@ -1214,10 +1210,96 @@
905 __cpu_number_map[boot_cpu_id] = 0;
906 prom_cpu_nodes[boot_cpu_id] = linux_cpus[0].prom_node;
907 __cpu_logical_map[0] = boot_cpu_id;
908- current->processor = boot_cpu_id;
909 prof_counter(boot_cpu_id) = prof_multiplier(boot_cpu_id) = 1;
910 }
911
912+cycles_t cacheflush_time;
913+unsigned long cache_decay_ticks;
914+
915+extern unsigned long cheetah_tune_scheduling(void);
916+
917+static void __init smp_tune_scheduling(void)
918+{
919+ unsigned long orig_flush_base, flush_base, flags, *p;
920+ unsigned int ecache_size, order;
921+ cycles_t tick1, tick2, raw;
922+
923+ /* Approximate heuristic for SMP scheduling. It is an
924+ * estimation of the time it takes to flush the L2 cache
925+ * on the local processor.
926+ *
927+ * The ia32 chooses to use the L1 cache flush time instead,
928+ * and I consider this complete nonsense. The Ultra can service
929+ * a miss to the L1 with a hit to the L2 in 7 or 8 cycles, and
930+ * L2 misses are what create extra bus traffic (ie. the "cost"
931+ * of moving a process from one cpu to another).
932+ */
933+ printk("SMP: Calibrating ecache flush... ");
934+ if (tlb_type == cheetah || tlb_type == cheetah_plus) {
935+ cacheflush_time = cheetah_tune_scheduling();
936+ goto report;
937+ }
938+
939+ ecache_size = prom_getintdefault(linux_cpus[0].prom_node,
940+ "ecache-size", (512 * 1024));
941+ if (ecache_size > (4 * 1024 * 1024))
942+ ecache_size = (4 * 1024 * 1024);
943+ orig_flush_base = flush_base =
944+ __get_free_pages(GFP_KERNEL, order = get_order(ecache_size));
945+
946+ if (flush_base != 0UL) {
947+ local_irq_save(flags);
948+
949+ /* Scan twice the size once just to get the TLB entries
950+ * loaded and make sure the second scan measures pure misses.
951+ */
952+ for (p = (unsigned long *)flush_base;
953+ ((unsigned long)p) < (flush_base + (ecache_size<<1));
954+ p += (64 / sizeof(unsigned long)))
955+ *((volatile unsigned long *)p);
956+
957+ tick1 = tick_ops->get_tick();
958+
959+ __asm__ __volatile__("1:\n\t"
960+ "ldx [%0 + 0x000], %%g1\n\t"
961+ "ldx [%0 + 0x040], %%g2\n\t"
962+ "ldx [%0 + 0x080], %%g3\n\t"
963+ "ldx [%0 + 0x0c0], %%g5\n\t"
964+ "add %0, 0x100, %0\n\t"
965+ "cmp %0, %2\n\t"
966+ "bne,pt %%xcc, 1b\n\t"
967+ " nop"
968+ : "=&r" (flush_base)
969+ : "0" (flush_base),
970+ "r" (flush_base + ecache_size)
971+ : "g1", "g2", "g3", "g5");
972+
973+ tick2 = tick_ops->get_tick();
974+
975+ local_irq_restore(flags);
976+
977+ raw = (tick2 - tick1);
978+
979+ /* Dampen it a little, considering two processes
980+ * sharing the cache and fitting.
981+ */
982+ cacheflush_time = (raw - (raw >> 2));
983+
984+ free_pages(orig_flush_base, order);
985+ } else {
986+ cacheflush_time = ((ecache_size << 2) +
987+ (ecache_size << 1));
988+ }
989+report:
990+ /* Convert ticks/sticks to jiffies. */
991+ cache_decay_ticks = cacheflush_time / timer_tick_offset;
992+ if (cache_decay_ticks < 1)
993+ cache_decay_ticks = 1;
994+
995+ printk("Using heuristic of %ld cycles, %ld ticks.\n",
996+ cacheflush_time, cache_decay_ticks);
997+}
998+
999 static inline unsigned long find_flush_base(unsigned long size)
1000 {
1001 struct page *p = mem_map;
1002diff -urN linux-2.4.24.org/arch/sparc64/kernel/trampoline.S linux-2.4.24/arch/sparc64/kernel/trampoline.S
5d16fd25
AM
1003--- linux-2.4.24.org/arch/sparc64/kernel/trampoline.S 2004-02-04 20:51:29.425484150 +0100
1004+++ linux-2.4.24/arch/sparc64/kernel/trampoline.S 2004-02-04 20:52:53.073085888 +0100
0aa7655b
AM
1005@@ -250,7 +250,7 @@
1006 wrpr %o1, PSTATE_IG, %pstate
1007
1008 /* Get our UPA MID. */
1009- lduw [%o2 + AOFF_task_processor], %g1
1010+ lduw [%o2 + AOFF_task_cpu], %g1
1011 sethi %hi(cpu_data), %g5
1012 or %g5, %lo(cpu_data), %g5
1013
1014diff -urN linux-2.4.24.org/arch/sparc64/kernel/traps.c linux-2.4.24/arch/sparc64/kernel/traps.c
5d16fd25
AM
1015--- linux-2.4.24.org/arch/sparc64/kernel/traps.c 2004-02-04 20:51:28.672640738 +0100
1016+++ linux-2.4.24/arch/sparc64/kernel/traps.c 2004-02-04 20:52:53.078084848 +0100
0aa7655b
AM
1017@@ -16,6 +16,7 @@
1018 #include <linux/smp.h>
1019 #include <linux/smp_lock.h>
1020 #include <linux/mm.h>
1021+#include <linux/init.h>
1022
1023 #include <asm/delay.h>
1024 #include <asm/system.h>
1025@@ -755,6 +756,48 @@
1026 "i" (ASI_PHYS_USE_EC));
1027 }
1028
1029+#ifdef CONFIG_SMP
1030+unsigned long __init cheetah_tune_scheduling(void)
1031+{
1032+ unsigned long tick1, tick2, raw;
1033+ unsigned long flush_base = ecache_flush_physbase;
1034+ unsigned long flush_linesize = ecache_flush_linesize;
1035+ unsigned long flush_size = ecache_flush_size;
1036+
1037+ /* Run through the whole cache to guarentee the timed loop
1038+ * is really displacing cache lines.
1039+ */
1040+ __asm__ __volatile__("1: subcc %0, %4, %0\n\t"
1041+ " bne,pt %%xcc, 1b\n\t"
1042+ " ldxa [%2 + %0] %3, %%g0\n\t"
1043+ : "=&r" (flush_size)
1044+ : "0" (flush_size), "r" (flush_base),
1045+ "i" (ASI_PHYS_USE_EC), "r" (flush_linesize));
1046+
1047+ /* The flush area is 2 X Ecache-size, so cut this in half for
1048+ * the timed loop.
1049+ */
1050+ flush_base = ecache_flush_physbase;
1051+ flush_linesize = ecache_flush_linesize;
1052+ flush_size = ecache_flush_size >> 1;
1053+
1054+ __asm__ __volatile__("rd %%tick, %0" : "=r" (tick1));
1055+
1056+ __asm__ __volatile__("1: subcc %0, %4, %0\n\t"
1057+ " bne,pt %%xcc, 1b\n\t"
1058+ " ldxa [%2 + %0] %3, %%g0\n\t"
1059+ : "=&r" (flush_size)
1060+ : "0" (flush_size), "r" (flush_base),
1061+ "i" (ASI_PHYS_USE_EC), "r" (flush_linesize));
1062+
1063+ __asm__ __volatile__("rd %%tick, %0" : "=r" (tick2));
1064+
1065+ raw = (tick2 - tick1);
1066+
1067+ return (raw - (raw >> 2));
1068+}
1069+#endif
1070+
1071 /* Unfortunately, the diagnostic access to the I-cache tags we need to
1072 * use to clear the thing interferes with I-cache coherency transactions.
1073 *
1074diff -urN linux-2.4.24.org/Documentation/sched-coding.txt linux-2.4.24/Documentation/sched-coding.txt
1075--- linux-2.4.24.org/Documentation/sched-coding.txt 1970-01-01 01:00:00.000000000 +0100
5d16fd25 1076+++ linux-2.4.24/Documentation/sched-coding.txt 2004-02-04 20:52:53.082084016 +0100
0aa7655b
AM
1077@@ -0,0 +1,126 @@
1078+ Reference for various scheduler-related methods in the O(1) scheduler
1079+ Robert Love <rml@tech9.net>, MontaVista Software
1080+
1081+
1082+Note most of these methods are local to kernel/sched.c - this is by design.
1083+The scheduler is meant to be self-contained and abstracted away. This document
1084+is primarily for understanding the scheduler, not interfacing to it. Some of
1085+the discussed interfaces, however, are general process/scheduling methods.
1086+They are typically defined in include/linux/sched.h.
1087+
1088+
1089+Main Scheduling Methods
1090+-----------------------
1091+
1092+void load_balance(runqueue_t *this_rq, int idle)
1093+ Attempts to pull tasks from one cpu to another to balance cpu usage,
1094+ if needed. This method is called explicitly if the runqueues are
1095+ inbalanced or periodically by the timer tick. Prior to calling,
1096+ the current runqueue must be locked and interrupts disabled.
1097+
1098+void schedule()
1099+ The main scheduling function. Upon return, the highest priority
1100+ process will be active.
1101+
1102+
1103+Locking
1104+-------
1105+
1106+Each runqueue has its own lock, rq->lock. When multiple runqueues need
1107+to be locked, lock acquires must be ordered by ascending &runqueue value.
1108+
1109+A specific runqueue is locked via
1110+
1111+ task_rq_lock(task_t pid, unsigned long *flags)
1112+
1113+which disables preemption, disables interrupts, and locks the runqueue pid is
1114+running on. Likewise,
1115+
1116+ task_rq_unlock(task_t pid, unsigned long *flags)
1117+
1118+unlocks the runqueue pid is running on, restores interrupts to their previous
1119+state, and reenables preemption.
1120+
1121+The routines
1122+
1123+ double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
1124+
1125+and
1126+
1127+ double_rq_unlock(runqueue_t *rq1, runqueue_t rq2)
1128+
1129+safely lock and unlock, respectively, the two specified runqueues. They do
1130+not, however, disable and restore interrupts. Users are required to do so
1131+manually before and after calls.
1132+
1133+
1134+Values
1135+------
1136+
1137+MAX_PRIO
1138+ The maximum priority of the system, stored in the task as task->prio.
1139+ Lower priorities are higher. Normal (non-RT) priorities range from
1140+ MAX_RT_PRIO to (MAX_PRIO - 1).
1141+MAX_RT_PRIO
1142+ The maximum real-time priority of the system. Valid RT priorities
1143+ range from 0 to (MAX_RT_PRIO - 1).
1144+MAX_USER_RT_PRIO
1145+ The maximum real-time priority that is exported to user-space. Should
1146+ always be equal to or less than MAX_RT_PRIO. Setting it less allows
1147+ kernel threads to have higher priorities than any user-space task.
1148+MIN_TIMESLICE
1149+MAX_TIMESLICE
1150+ Respectively, the minimum and maximum timeslices (quanta) of a process.
1151+
1152+Data
1153+----
1154+
1155+struct runqueue
1156+ The main per-CPU runqueue data structure.
1157+struct task_struct
1158+ The main per-process data structure.
1159+
1160+
1161+General Methods
1162+---------------
1163+
1164+cpu_rq(cpu)
1165+ Returns the runqueue of the specified cpu.
1166+this_rq()
1167+ Returns the runqueue of the current cpu.
1168+task_rq(pid)
1169+ Returns the runqueue which holds the specified pid.
1170+cpu_curr(cpu)
1171+ Returns the task currently running on the given cpu.
1172+rt_task(pid)
1173+ Returns true if pid is real-time, false if not.
1174+
1175+
1176+Process Control Methods
1177+-----------------------
1178+
1179+void set_user_nice(task_t *p, long nice)
1180+ Sets the "nice" value of task p to the given value.
1181+int setscheduler(pid_t pid, int policy, struct sched_param *param)
1182+ Sets the scheduling policy and parameters for the given pid.
1183+void set_cpus_allowed(task_t *p, unsigned long new_mask)
1184+ Sets a given task's CPU affinity and migrates it to a proper cpu.
1185+ Callers must have a valid reference to the task and assure the
1186+ task not exit prematurely. No locks can be held during the call.
1187+set_task_state(tsk, state_value)
1188+ Sets the given task's state to the given value.
1189+set_current_state(state_value)
1190+ Sets the current task's state to the given value.
1191+void set_tsk_need_resched(struct task_struct *tsk)
1192+ Sets need_resched in the given task.
1193+void clear_tsk_need_resched(struct task_struct *tsk)
1194+ Clears need_resched in the given task.
1195+void set_need_resched()
1196+ Sets need_resched in the current task.
1197+void clear_need_resched()
1198+ Clears need_resched in the current task.
1199+int need_resched()
1200+ Returns true if need_resched is set in the current task, false
1201+ otherwise.
1202+yield()
1203+ Place the current process at the end of the runqueue and call schedule.
1204diff -urN linux-2.4.24.org/Documentation/sched-design.txt linux-2.4.24/Documentation/sched-design.txt
1205--- linux-2.4.24.org/Documentation/sched-design.txt 1970-01-01 01:00:00.000000000 +0100
5d16fd25 1206+++ linux-2.4.24/Documentation/sched-design.txt 2004-02-04 20:52:53.088082769 +0100
0aa7655b
AM
1207@@ -0,0 +1,165 @@
1208+ Goals, Design and Implementation of the
1209+ new ultra-scalable O(1) scheduler
1210+
1211+
1212+ This is an edited version of an email Ingo Molnar sent to
1213+ lkml on 4 Jan 2002. It describes the goals, design, and
1214+ implementation of Ingo's new ultra-scalable O(1) scheduler.
1215+ Last Updated: 18 April 2002.
1216+
1217+
1218+Goal
1219+====
1220+
1221+The main goal of the new scheduler is to keep all the good things we know
1222+and love about the current Linux scheduler:
1223+
1224+ - good interactive performance even during high load: if the user
1225+ types or clicks then the system must react instantly and must execute
1226+ the user tasks smoothly, even during considerable background load.
1227+
1228+ - good scheduling/wakeup performance with 1-2 runnable processes.
1229+
1230+ - fairness: no process should stay without any timeslice for any
1231+ unreasonable amount of time. No process should get an unjustly high
1232+ amount of CPU time.
1233+
1234+ - priorities: less important tasks can be started with lower priority,
1235+ more important tasks with higher priority.
1236+
1237+ - SMP efficiency: no CPU should stay idle if there is work to do.
1238+
1239+ - SMP affinity: processes which run on one CPU should stay affine to
1240+ that CPU. Processes should not bounce between CPUs too frequently.
1241+
1242+ - plus additional scheduler features: RT scheduling, CPU binding.
1243+
1244+and the goal is also to add a few new things:
1245+
1246+ - fully O(1) scheduling. Are you tired of the recalculation loop
1247+ blowing the L1 cache away every now and then? Do you think the goodness
1248+ loop is taking a bit too long to finish if there are lots of runnable
1249+ processes? This new scheduler takes no prisoners: wakeup(), schedule(),
1250+ the timer interrupt are all O(1) algorithms. There is no recalculation
1251+ loop. There is no goodness loop either.
1252+
1253+ - 'perfect' SMP scalability. With the new scheduler there is no 'big'
1254+ runqueue_lock anymore - it's all per-CPU runqueues and locks - two
1255+ tasks on two separate CPUs can wake up, schedule and context-switch
1256+ completely in parallel, without any interlocking. All
1257+ scheduling-relevant data is structured for maximum scalability.
1258+
1259+ - better SMP affinity. The old scheduler has a particular weakness that
1260+ causes the random bouncing of tasks between CPUs if/when higher
1261+ priority/interactive tasks, this was observed and reported by many
1262+ people. The reason is that the timeslice recalculation loop first needs
1263+ every currently running task to consume its timeslice. But when this
1264+ happens on eg. an 8-way system, then this property starves an
1265+ increasing number of CPUs from executing any process. Once the last
1266+ task that has a timeslice left has finished using up that timeslice,
1267+ the recalculation loop is triggered and other CPUs can start executing
1268+ tasks again - after having idled around for a number of timer ticks.
1269+ The more CPUs, the worse this effect.
1270+
1271+ Furthermore, this same effect causes the bouncing effect as well:
1272+ whenever there is such a 'timeslice squeeze' of the global runqueue,
1273+ idle processors start executing tasks which are not affine to that CPU.
1274+ (because the affine tasks have finished off their timeslices already.)
1275+
1276+ The new scheduler solves this problem by distributing timeslices on a
1277+ per-CPU basis, without having any global synchronization or
1278+ recalculation.
1279+
1280+ - batch scheduling. A significant proportion of computing-intensive tasks
1281+ benefit from batch-scheduling, where timeslices are long and processes
1282+ are roundrobin scheduled. The new scheduler does such batch-scheduling
1283+ of the lowest priority tasks - so nice +19 jobs will get
1284+ 'batch-scheduled' automatically. With this scheduler, nice +19 jobs are
1285+ in essence SCHED_IDLE, from an interactiveness point of view.
1286+
1287+ - handle extreme loads more smoothly, without breakdown and scheduling
1288+ storms.
1289+
1290+ - O(1) RT scheduling. For those RT folks who are paranoid about the
1291+ O(nr_running) property of the goodness loop and the recalculation loop.
1292+
1293+ - run fork()ed children before the parent. Andrea has pointed out the
1294+ advantages of this a few months ago, but patches for this feature
1295+ do not work with the old scheduler as well as they should,
1296+ because idle processes often steal the new child before the fork()ing
1297+ CPU gets to execute it.
1298+
1299+
1300+Design
1301+======
1302+
1303+the core of the new scheduler are the following mechanizms:
1304+
1305+ - *two*, priority-ordered 'priority arrays' per CPU. There is an 'active'
1306+ array and an 'expired' array. The active array contains all tasks that
1307+ are affine to this CPU and have timeslices left. The expired array
1308+ contains all tasks which have used up their timeslices - but this array
1309+ is kept sorted as well. The active and expired array is not accessed
1310+ directly, it's accessed through two pointers in the per-CPU runqueue
1311+ structure. If all active tasks are used up then we 'switch' the two
1312+ pointers and from now on the ready-to-go (former-) expired array is the
1313+ active array - and the empty active array serves as the new collector
1314+ for expired tasks.
1315+
1316+ - there is a 64-bit bitmap cache for array indices. Finding the highest
1317+ priority task is thus a matter of two x86 BSFL bit-search instructions.
1318+
1319+the split-array solution enables us to have an arbitrary number of active
1320+and expired tasks, and the recalculation of timeslices can be done
1321+immediately when the timeslice expires. Because the arrays are always
1322+access through the pointers in the runqueue, switching the two arrays can
1323+be done very quickly.
1324+
1325+this is a hybride priority-list approach coupled with roundrobin
1326+scheduling and the array-switch method of distributing timeslices.
1327+
1328+ - there is a per-task 'load estimator'.
1329+
1330+one of the toughest things to get right is good interactive feel during
1331+heavy system load. While playing with various scheduler variants i found
1332+that the best interactive feel is achieved not by 'boosting' interactive
1333+tasks, but by 'punishing' tasks that want to use more CPU time than there
1334+is available. This method is also much easier to do in an O(1) fashion.
1335+
1336+to establish the actual 'load' the task contributes to the system, a
1337+complex-looking but pretty accurate method is used: there is a 4-entry
1338+'history' ringbuffer of the task's activities during the last 4 seconds.
1339+This ringbuffer is operated without much overhead. The entries tell the
1340+scheduler a pretty accurate load-history of the task: has it used up more
1341+CPU time or less during the past N seconds. [the size '4' and the interval
1342+of 4x 1 seconds was found by lots of experimentation - this part is
1343+flexible and can be changed in both directions.]
1344+
1345+the penalty a task gets for generating more load than the CPU can handle
1346+is a priority decrease - there is a maximum amount to this penalty
1347+relative to their static priority, so even fully CPU-bound tasks will
1348+observe each other's priorities, and will share the CPU accordingly.
1349+
1350+the SMP load-balancer can be extended/switched with additional parallel
1351+computing and cache hierarchy concepts: NUMA scheduling, multi-core CPUs
1352+can be supported easily by changing the load-balancer. Right now it's
1353+tuned for my SMP systems.
1354+
1355+i skipped the prev->mm == next->mm advantage - no workload i know of shows
1356+any sensitivity to this. It can be added back by sacrificing O(1)
1357+schedule() [the current and one-lower priority list can be searched for a
1358+that->mm == current->mm condition], but costs a fair number of cycles
1359+during a number of important workloads, so i wanted to avoid this as much
1360+as possible.
1361+
1362+- the SMP idle-task startup code was still racy and the new scheduler
1363+triggered this. So i streamlined the idle-setup code a bit. We do not call
1364+into schedule() before all processors have started up fully and all idle
1365+threads are in place.
1366+
1367+- the patch also cleans up a number of aspects of sched.c - moves code
1368+into other areas of the kernel where it's appropriate, and simplifies
1369+certain code paths and data constructs. As a result, the new scheduler's
1370+code is smaller than the old one.
1371+
1372+ Ingo
1373diff -urN linux-2.4.24.org/drivers/char/drm-4.0/tdfx_drv.c linux-2.4.24/drivers/char/drm-4.0/tdfx_drv.c
5d16fd25
AM
1374--- linux-2.4.24.org/drivers/char/drm-4.0/tdfx_drv.c 2004-02-04 20:49:21.677055474 +0100
1375+++ linux-2.4.24/drivers/char/drm-4.0/tdfx_drv.c 2004-02-04 20:52:53.236051992 +0100
0aa7655b
AM
1376@@ -554,7 +554,6 @@
1377 lock.context, current->pid, j,
1378 dev->lock.lock_time, jiffies);
1379 current->state = TASK_INTERRUPTIBLE;
1380- current->policy |= SCHED_YIELD;
1381 schedule_timeout(DRM_LOCK_SLICE-j);
1382 DRM_DEBUG("jiffies=%d\n", jiffies);
1383 }
1384diff -urN linux-2.4.24.org/drivers/char/mwave/mwavedd.c linux-2.4.24/drivers/char/mwave/mwavedd.c
5d16fd25
AM
1385--- linux-2.4.24.org/drivers/char/mwave/mwavedd.c 2004-02-04 20:49:18.334750669 +0100
1386+++ linux-2.4.24/drivers/char/mwave/mwavedd.c 2004-02-04 20:52:53.321034316 +0100
0aa7655b
AM
1387@@ -279,7 +279,6 @@
1388 pDrvData->IPCs[ipcnum].bIsHere = FALSE;
1389 pDrvData->IPCs[ipcnum].bIsEnabled = TRUE;
1390 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0)
1391- current->nice = -20; /* boost to provide priority timing */
1392 #else
1393 current->priority = 0x28; /* boost to provide priority timing */
1394 #endif
1395diff -urN linux-2.4.24.org/drivers/char/serial_txx927.c linux-2.4.24/drivers/char/serial_txx927.c
5d16fd25
AM
1396--- linux-2.4.24.org/drivers/char/serial_txx927.c 2004-02-04 20:49:11.902088655 +0100
1397+++ linux-2.4.24/drivers/char/serial_txx927.c 2004-02-04 20:52:53.361025998 +0100
0aa7655b
AM
1398@@ -1533,7 +1533,6 @@
1399 printk("cisr = %d (jiff=%lu)...", cisr, jiffies);
1400 #endif
1401 current->state = TASK_INTERRUPTIBLE;
1402- current->counter = 0; /* make us low-priority */
1403 schedule_timeout(char_time);
1404 if (signal_pending(current))
1405 break;
1406diff -urN linux-2.4.24.org/drivers/md/md.c linux-2.4.24/drivers/md/md.c
5d16fd25
AM
1407--- linux-2.4.24.org/drivers/md/md.c 2004-02-04 20:50:32.930234961 +0100
1408+++ linux-2.4.24/drivers/md/md.c 2004-02-04 20:52:53.369024334 +0100
0aa7655b
AM
1409@@ -2939,8 +2939,6 @@
1410 * bdflush, otherwise bdflush will deadlock if there are too
1411 * many dirty RAID5 blocks.
1412 */
1413- current->policy = SCHED_OTHER;
1414- current->nice = -20;
1415 md_unlock_kernel();
1416
1417 complete(thread->event);
1418@@ -3464,11 +3462,6 @@
1419 "(but not more than %d KB/sec) for reconstruction.\n",
1420 sysctl_speed_limit_max);
1421
1422- /*
1423- * Resync has low priority.
1424- */
1425- current->nice = 19;
1426-
1427 is_mddev_idle(mddev); /* this also initializes IO event counters */
1428 for (m = 0; m < SYNC_MARKS; m++) {
1429 mark[m] = jiffies;
1430@@ -3546,16 +3539,13 @@
1431 currspeed = (j-mddev->resync_mark_cnt)/2/((jiffies-mddev->resync_mark)/HZ +1) +1;
1432
1433 if (currspeed > sysctl_speed_limit_min) {
1434- current->nice = 19;
1435-
1436 if ((currspeed > sysctl_speed_limit_max) ||
1437 !is_mddev_idle(mddev)) {
1438 current->state = TASK_INTERRUPTIBLE;
1439 md_schedule_timeout(HZ/4);
1440 goto repeat;
1441 }
1442- } else
1443- current->nice = -20;
1444+ }
1445 }
1446 printk(KERN_INFO "md: md%d: sync done.\n",mdidx(mddev));
1447 err = 0;
1448diff -urN linux-2.4.24.org/fs/binfmt_elf.c linux-2.4.24/fs/binfmt_elf.c
5d16fd25
AM
1449--- linux-2.4.24.org/fs/binfmt_elf.c 2004-02-04 20:47:14.464515701 +0100
1450+++ linux-2.4.24/fs/binfmt_elf.c 2004-02-04 20:52:53.390019967 +0100
0aa7655b
AM
1451@@ -1173,7 +1173,7 @@
1452 psinfo.pr_state = i;
1453 psinfo.pr_sname = (i < 0 || i > 5) ? '.' : "RSDZTD"[i];
1454 psinfo.pr_zomb = psinfo.pr_sname == 'Z';
1455- psinfo.pr_nice = current->nice;
1456+ psinfo.pr_nice = task_nice(current);
1457 psinfo.pr_flag = current->flags;
1458 psinfo.pr_uid = NEW_TO_OLD_UID(current->uid);
1459 psinfo.pr_gid = NEW_TO_OLD_GID(current->gid);
1460diff -urN linux-2.4.24.org/fs/jffs2/background.c linux-2.4.24/fs/jffs2/background.c
5d16fd25
AM
1461--- linux-2.4.24.org/fs/jffs2/background.c 2004-02-04 20:47:24.029526165 +0100
1462+++ linux-2.4.24/fs/jffs2/background.c 2004-02-04 20:52:53.418014145 +0100
0aa7655b
AM
1463@@ -106,9 +106,6 @@
1464
1465 sprintf(current->comm, "jffs2_gcd_mtd%d", c->mtd->index);
1466
1467- /* FIXME in the 2.2 backport */
1468- current->nice = 10;
1469-
1470 for (;;) {
1471 spin_lock_irq(&current->sigmask_lock);
1472 siginitsetinv (&current->blocked, sigmask(SIGHUP) | sigmask(SIGKILL) | sigmask(SIGSTOP) | sigmask(SIGCONT));
1473diff -urN linux-2.4.24.org/fs/proc/array.c linux-2.4.24/fs/proc/array.c
5d16fd25
AM
1474--- linux-2.4.24.org/fs/proc/array.c 2004-02-04 20:47:14.980408395 +0100
1475+++ linux-2.4.24/fs/proc/array.c 2004-02-04 20:52:53.447008114 +0100
0aa7655b
AM
1476@@ -339,9 +339,8 @@
1477
1478 /* scale priority and nice values from timeslices to -20..20 */
1479 /* to make it look like a "normal" Unix priority/nice value */
1480- priority = task->counter;
1481- priority = 20 - (priority * 10 + DEF_COUNTER / 2) / DEF_COUNTER;
1482- nice = task->nice;
1483+ priority = task_prio(task);
1484+ nice = task_nice(task);
1485
1486 read_lock(&tasklist_lock);
1487 ppid = task->pid ? task->p_opptr->pid : 0;
1488@@ -391,7 +390,7 @@
1489 task->nswap,
1490 task->cnswap,
1491 task->exit_signal,
1492- task->processor);
1493+ task->cpu);
1494 if(mm)
1495 mmput(mm);
1496 return res;
1497diff -urN linux-2.4.24.org/fs/proc/proc_misc.c linux-2.4.24/fs/proc/proc_misc.c
5d16fd25
AM
1498--- linux-2.4.24.org/fs/proc/proc_misc.c 2004-02-04 20:47:14.897425655 +0100
1499+++ linux-2.4.24/fs/proc/proc_misc.c 2004-02-04 20:52:53.485000212 +0100
0aa7655b
AM
1500@@ -109,11 +109,11 @@
1501 a = avenrun[0] + (FIXED_1/200);
1502 b = avenrun[1] + (FIXED_1/200);
1503 c = avenrun[2] + (FIXED_1/200);
1504- len = sprintf(page,"%d.%02d %d.%02d %d.%02d %d/%d %d\n",
1505+ len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
1506 LOAD_INT(a), LOAD_FRAC(a),
1507 LOAD_INT(b), LOAD_FRAC(b),
1508 LOAD_INT(c), LOAD_FRAC(c),
1509- nr_running, nr_threads, last_pid);
1510+ nr_running(), nr_threads, last_pid);
1511 return proc_calc_metrics(page, start, off, count, eof, len);
1512 }
1513
1514@@ -125,7 +125,7 @@
1515 int len;
1516
1517 uptime = jiffies;
1518- idle = init_tasks[0]->times.tms_utime + init_tasks[0]->times.tms_stime;
1519+ idle = init_task.times.tms_utime + init_task.times.tms_stime;
1520
1521 /* The formula for the fraction parts really is ((t * 100) / HZ) % 100, but
1522 that would overflow about every five days at HZ == 100.
1523@@ -374,10 +374,10 @@
1524 }
1525
1526 proc_sprintf(page, &off, &len,
1527- "\nctxt %u\n"
1528+ "\nctxt %lu\n"
1529 "btime %lu\n"
1530 "processes %lu\n",
1531- kstat.context_swtch,
1532+ nr_context_switches(),
1533 xtime.tv_sec - jif / HZ,
1534 total_forks);
1535
1536diff -urN linux-2.4.24.org/fs/reiserfs/buffer2.c linux-2.4.24/fs/reiserfs/buffer2.c
5d16fd25
AM
1537--- linux-2.4.24.org/fs/reiserfs/buffer2.c 2004-02-04 20:47:23.322673191 +0100
1538+++ linux-2.4.24/fs/reiserfs/buffer2.c 2004-02-04 20:52:53.511994597 +0100
0aa7655b
AM
1539@@ -51,11 +51,11 @@
1540 struct buffer_head * reiserfs_bread (struct super_block *super, int n_block, int n_size)
1541 {
1542 struct buffer_head *result;
1543- PROC_EXP( unsigned int ctx_switches = kstat.context_swtch );
1544+ PROC_EXP( unsigned int ctx_switches = nr_context_switches(); );
1545
1546 result = bread (super -> s_dev, n_block, n_size);
1547 PROC_INFO_INC( super, breads );
1548- PROC_EXP( if( kstat.context_swtch != ctx_switches )
1549+ PROC_EXP( if( nr_context_switches() != ctx_switches )
1550 PROC_INFO_INC( super, bread_miss ) );
1551 return result;
1552 }
1553diff -urN linux-2.4.24.org/include/asm-alpha/bitops.h linux-2.4.24/include/asm-alpha/bitops.h
5d16fd25
AM
1554--- linux-2.4.24.org/include/asm-alpha/bitops.h 2004-02-04 20:47:46.527846489 +0100
1555+++ linux-2.4.24/include/asm-alpha/bitops.h 2004-02-04 20:52:53.537989191 +0100
0aa7655b
AM
1556@@ -3,6 +3,7 @@
1557
1558 #include <linux/config.h>
1559 #include <linux/kernel.h>
1560+#include <asm/compiler.h>
1561
1562 /*
1563 * Copyright 1994, Linus Torvalds.
1564@@ -60,25 +61,25 @@
1565
1566 __asm__ __volatile__(
1567 "1: ldl_l %0,%3\n"
1568- " and %0,%2,%0\n"
1569+ " bic %0,%2,%0\n"
1570 " stl_c %0,%1\n"
1571 " beq %0,2f\n"
1572 ".subsection 2\n"
1573 "2: br 1b\n"
1574 ".previous"
1575 :"=&r" (temp), "=m" (*m)
1576- :"Ir" (~(1UL << (nr & 31))), "m" (*m));
1577+ :"Ir" (1UL << (nr & 31)), "m" (*m));
1578 }
1579
1580 /*
1581 * WARNING: non atomic version.
1582 */
1583 static __inline__ void
1584-__change_bit(unsigned long nr, volatile void * addr)
1585+__clear_bit(unsigned long nr, volatile void * addr)
1586 {
1587 int *m = ((int *) addr) + (nr >> 5);
1588
1589- *m ^= 1 << (nr & 31);
1590+ *m &= ~(1 << (nr & 31));
1591 }
1592
1593 static inline void
1594@@ -99,6 +100,17 @@
1595 :"Ir" (1UL << (nr & 31)), "m" (*m));
1596 }
1597
1598+/*
1599+ * WARNING: non atomic version.
1600+ */
1601+static __inline__ void
1602+__change_bit(unsigned long nr, volatile void * addr)
1603+{
1604+ int *m = ((int *) addr) + (nr >> 5);
1605+
1606+ *m ^= 1 << (nr & 31);
1607+}
1608+
1609 static inline int
1610 test_and_set_bit(unsigned long nr, volatile void *addr)
1611 {
1612@@ -181,20 +193,6 @@
1613 return (old & mask) != 0;
1614 }
1615
1616-/*
1617- * WARNING: non atomic version.
1618- */
1619-static __inline__ int
1620-__test_and_change_bit(unsigned long nr, volatile void * addr)
1621-{
1622- unsigned long mask = 1 << (nr & 0x1f);
1623- int *m = ((int *) addr) + (nr >> 5);
1624- int old = *m;
1625-
1626- *m = old ^ mask;
1627- return (old & mask) != 0;
1628-}
1629-
1630 static inline int
1631 test_and_change_bit(unsigned long nr, volatile void * addr)
1632 {
1633@@ -220,6 +218,20 @@
1634 return oldbit != 0;
1635 }
1636
1637+/*
1638+ * WARNING: non atomic version.
1639+ */
1640+static __inline__ int
1641+__test_and_change_bit(unsigned long nr, volatile void * addr)
1642+{
1643+ unsigned long mask = 1 << (nr & 0x1f);
1644+ int *m = ((int *) addr) + (nr >> 5);
1645+ int old = *m;
1646+
1647+ *m = old ^ mask;
1648+ return (old & mask) != 0;
1649+}
1650+
1651 static inline int
1652 test_bit(int nr, volatile void * addr)
1653 {
1654@@ -235,12 +247,15 @@
1655 */
1656 static inline unsigned long ffz_b(unsigned long x)
1657 {
1658- unsigned long sum = 0;
1659+ unsigned long sum, x1, x2, x4;
1660
1661 x = ~x & -~x; /* set first 0 bit, clear others */
1662- if (x & 0xF0) sum += 4;
1663- if (x & 0xCC) sum += 2;
1664- if (x & 0xAA) sum += 1;
1665+ x1 = x & 0xAA;
1666+ x2 = x & 0xCC;
1667+ x4 = x & 0xF0;
1668+ sum = x2 ? 2 : 0;
1669+ sum += (x4 != 0) * 4;
1670+ sum += (x1 != 0);
1671
1672 return sum;
1673 }
1674@@ -257,24 +272,46 @@
1675
1676 __asm__("cmpbge %1,%2,%0" : "=r"(bits) : "r"(word), "r"(~0UL));
1677 qofs = ffz_b(bits);
1678- __asm__("extbl %1,%2,%0" : "=r"(bits) : "r"(word), "r"(qofs));
1679+ bits = __kernel_extbl(word, qofs);
1680 bofs = ffz_b(bits);
1681
1682 return qofs*8 + bofs;
1683 #endif
1684 }
1685
1686+/*
1687+ * __ffs = Find First set bit in word. Undefined if no set bit exists.
1688+ */
1689+static inline unsigned long __ffs(unsigned long word)
1690+{
1691+#if defined(__alpha_cix__) && defined(__alpha_fix__)
1692+ /* Whee. EV67 can calculate it directly. */
1693+ unsigned long result;
1694+ __asm__("cttz %1,%0" : "=r"(result) : "r"(word));
1695+ return result;
1696+#else
1697+ unsigned long bits, qofs, bofs;
1698+
1699+ __asm__("cmpbge $31,%1,%0" : "=r"(bits) : "r"(word));
1700+ qofs = ffz_b(bits);
1701+ bits = __kernel_extbl(word, qofs);
1702+ bofs = ffz_b(~bits);
1703+
1704+ return qofs*8 + bofs;
1705+#endif
1706+}
1707+
1708 #ifdef __KERNEL__
1709
1710 /*
1711 * ffs: find first bit set. This is defined the same way as
1712 * the libc and compiler builtin ffs routines, therefore
1713- * differs in spirit from the above ffz (man ffs).
1714+ * differs in spirit from the above __ffs.
1715 */
1716
1717 static inline int ffs(int word)
1718 {
1719- int result = ffz(~word);
1720+ int result = __ffs(word);
1721 return word ? result+1 : 0;
1722 }
1723
1724@@ -316,6 +353,14 @@
1725 #define hweight16(x) hweight64((x) & 0xfffful)
1726 #define hweight8(x) hweight64((x) & 0xfful)
1727 #else
1728+static inline unsigned long hweight64(unsigned long w)
1729+{
1730+ unsigned long result;
1731+ for (result = 0; w ; w >>= 1)
1732+ result += (w & 1);
1733+ return result;
1734+}
1735+
1736 #define hweight32(x) generic_hweight32(x)
1737 #define hweight16(x) generic_hweight16(x)
1738 #define hweight8(x) generic_hweight8(x)
1739@@ -365,13 +410,77 @@
1740 }
1741
1742 /*
1743- * The optimizer actually does good code for this case..
1744+ * Find next one bit in a bitmap reasonably efficiently.
1745+ */
1746+static inline unsigned long
1747+find_next_bit(void * addr, unsigned long size, unsigned long offset)
1748+{
1749+ unsigned long * p = ((unsigned long *) addr) + (offset >> 6);
1750+ unsigned long result = offset & ~63UL;
1751+ unsigned long tmp;
1752+
1753+ if (offset >= size)
1754+ return size;
1755+ size -= result;
1756+ offset &= 63UL;
1757+ if (offset) {
1758+ tmp = *(p++);
1759+ tmp &= ~0UL << offset;
1760+ if (size < 64)
1761+ goto found_first;
1762+ if (tmp)
1763+ goto found_middle;
1764+ size -= 64;
1765+ result += 64;
1766+ }
1767+ while (size & ~63UL) {
1768+ if ((tmp = *(p++)))
1769+ goto found_middle;
1770+ result += 64;
1771+ size -= 64;
1772+ }
1773+ if (!size)
1774+ return result;
1775+ tmp = *p;
1776+found_first:
1777+ tmp &= ~0UL >> (64 - size);
1778+ if (!tmp)
1779+ return result + size;
1780+found_middle:
1781+ return result + __ffs(tmp);
1782+}
1783+
1784+/*
1785+ * The optimizer actually does good code for this case.
1786 */
1787 #define find_first_zero_bit(addr, size) \
1788 find_next_zero_bit((addr), (size), 0)
1789+#define find_first_bit(addr, size) \
1790+ find_next_bit((addr), (size), 0)
1791
1792 #ifdef __KERNEL__
1793
1794+/*
1795+ * Every architecture must define this function. It's the fastest
1796+ * way of searching a 140-bit bitmap where the first 100 bits are
1797+ * unlikely to be set. It's guaranteed that at least one of the 140
1798+ * bits is set.
1799+ */
1800+static inline unsigned long
1801+sched_find_first_bit(unsigned long b[3])
1802+{
1803+ unsigned long b0 = b[0], b1 = b[1], b2 = b[2];
1804+ unsigned long ofs;
1805+
1806+ ofs = (b1 ? 64 : 128);
1807+ b1 = (b1 ? b1 : b2);
1808+ ofs = (b0 ? 0 : ofs);
1809+ b0 = (b0 ? b0 : b1);
1810+
1811+ return __ffs(b0) + ofs;
1812+}
1813+
1814+
1815 #define ext2_set_bit __test_and_set_bit
1816 #define ext2_clear_bit __test_and_clear_bit
1817 #define ext2_test_bit test_bit
1818diff -urN linux-2.4.24.org/include/asm-alpha/smp.h linux-2.4.24/include/asm-alpha/smp.h
5d16fd25
AM
1819--- linux-2.4.24.org/include/asm-alpha/smp.h 2004-02-04 20:47:46.648821326 +0100
1820+++ linux-2.4.24/include/asm-alpha/smp.h 2004-02-04 20:52:53.540988567 +0100
0aa7655b
AM
1821@@ -55,7 +55,7 @@
1822 #define cpu_logical_map(cpu) __cpu_logical_map[cpu]
1823
1824 #define hard_smp_processor_id() __hard_smp_processor_id()
1825-#define smp_processor_id() (current->processor)
1826+#define smp_processor_id() (current->cpu)
1827
1828 extern unsigned long cpu_present_mask;
1829 #define cpu_online_map cpu_present_mask
1830diff -urN linux-2.4.24.org/include/asm-alpha/system.h linux-2.4.24/include/asm-alpha/system.h
5d16fd25
AM
1831--- linux-2.4.24.org/include/asm-alpha/system.h 2004-02-04 20:47:45.924971887 +0100
1832+++ linux-2.4.24/include/asm-alpha/system.h 2004-02-04 20:52:53.545987527 +0100
0aa7655b
AM
1833@@ -131,7 +131,6 @@
1834 extern void halt(void) __attribute__((noreturn));
1835 #define __halt() __asm__ __volatile__ ("call_pal %0 #halt" : : "i" (PAL_halt))
1836
1837-#define prepare_to_switch() do { } while(0)
1838 #define switch_to(prev,next,last) \
1839 do { \
1840 unsigned long pcbb; \
1841diff -urN linux-2.4.24.org/include/asm-arm/bitops.h linux-2.4.24/include/asm-arm/bitops.h
5d16fd25
AM
1842--- linux-2.4.24.org/include/asm-arm/bitops.h 2004-02-04 20:48:05.614876374 +0100
1843+++ linux-2.4.24/include/asm-arm/bitops.h 2004-02-04 20:52:53.589978377 +0100
0aa7655b
AM
1844@@ -2,6 +2,8 @@
1845 * Copyright 1995, Russell King.
1846 * Various bits and pieces copyrights include:
1847 * Linus Torvalds (test_bit).
1848+ * Big endian support: Copyright 2001, Nicolas Pitre
1849+ * reworked by rmk.
1850 *
1851 * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
1852 *
1853@@ -17,81 +19,271 @@
1854
1855 #ifdef __KERNEL__
1856
1857+#include <asm/system.h>
1858+
1859 #define smp_mb__before_clear_bit() do { } while (0)
1860 #define smp_mb__after_clear_bit() do { } while (0)
1861
1862 /*
1863- * Function prototypes to keep gcc -Wall happy.
1864+ * These functions are the basis of our bit ops.
1865+ * First, the atomic bitops.
1866+ *
1867+ * The endian issue for these functions is handled by the macros below.
1868 */
1869-extern void set_bit(int nr, volatile void * addr);
1870+static inline void
1871+____atomic_set_bit_mask(unsigned int mask, volatile unsigned char *p)
1872+{
1873+ unsigned long flags;
1874+
1875+ local_irq_save(flags);
1876+ *p |= mask;
1877+ local_irq_restore(flags);
1878+}
1879+
1880+static inline void
1881+____atomic_clear_bit_mask(unsigned int mask, volatile unsigned char *p)
1882+{
1883+ unsigned long flags;
1884+
1885+ local_irq_save(flags);
1886+ *p &= ~mask;
1887+ local_irq_restore(flags);
1888+}
1889+
1890+static inline void
1891+____atomic_change_bit_mask(unsigned int mask, volatile unsigned char *p)
1892+{
1893+ unsigned long flags;
1894+
1895+ local_irq_save(flags);
1896+ *p ^= mask;
1897+ local_irq_restore(flags);
1898+}
1899
1900-static inline void __set_bit(int nr, volatile void *addr)
1901+static inline int
1902+____atomic_test_and_set_bit_mask(unsigned int mask, volatile unsigned char *p)
1903 {
1904- ((unsigned char *) addr)[nr >> 3] |= (1U << (nr & 7));
1905+ unsigned long flags;
1906+ unsigned int res;
1907+
1908+ local_irq_save(flags);
1909+ res = *p;
1910+ *p = res | mask;
1911+ local_irq_restore(flags);
1912+
1913+ return res & mask;
1914 }
1915
1916-extern void clear_bit(int nr, volatile void * addr);
1917+static inline int
1918+____atomic_test_and_clear_bit_mask(unsigned int mask, volatile unsigned char *p)
1919+{
1920+ unsigned long flags;
1921+ unsigned int res;
1922+
1923+ local_irq_save(flags);
1924+ res = *p;
1925+ *p = res & ~mask;
1926+ local_irq_restore(flags);
1927+
1928+ return res & mask;
1929+}
1930
1931-static inline void __clear_bit(int nr, volatile void *addr)
1932+static inline int
1933+____atomic_test_and_change_bit_mask(unsigned int mask, volatile unsigned char *p)
1934 {
1935- ((unsigned char *) addr)[nr >> 3] &= ~(1U << (nr & 7));
1936+ unsigned long flags;
1937+ unsigned int res;
1938+
1939+ local_irq_save(flags);
1940+ res = *p;
1941+ *p = res ^ mask;
1942+ local_irq_restore(flags);
1943+
1944+ return res & mask;
1945 }
1946
1947-extern void change_bit(int nr, volatile void * addr);
1948+/*
1949+ * Now the non-atomic variants. We let the compiler handle all optimisations
1950+ * for these.
1951+ */
1952+static inline void ____nonatomic_set_bit(int nr, volatile void *p)
1953+{
1954+ ((unsigned char *) p)[nr >> 3] |= (1U << (nr & 7));
1955+}
1956
1957-static inline void __change_bit(int nr, volatile void *addr)
1958+static inline void ____nonatomic_clear_bit(int nr, volatile void *p)
1959 {
1960- ((unsigned char *) addr)[nr >> 3] ^= (1U << (nr & 7));
1961+ ((unsigned char *) p)[nr >> 3] &= ~(1U << (nr & 7));
1962 }
1963
1964-extern int test_and_set_bit(int nr, volatile void * addr);
1965+static inline void ____nonatomic_change_bit(int nr, volatile void *p)
1966+{
1967+ ((unsigned char *) p)[nr >> 3] ^= (1U << (nr & 7));
1968+}
1969
1970-static inline int __test_and_set_bit(int nr, volatile void *addr)
1971+static inline int ____nonatomic_test_and_set_bit(int nr, volatile void *p)
1972 {
1973 unsigned int mask = 1 << (nr & 7);
1974 unsigned int oldval;
1975
1976- oldval = ((unsigned char *) addr)[nr >> 3];
1977- ((unsigned char *) addr)[nr >> 3] = oldval | mask;
1978+ oldval = ((unsigned char *) p)[nr >> 3];
1979+ ((unsigned char *) p)[nr >> 3] = oldval | mask;
1980 return oldval & mask;
1981 }
1982
1983-extern int test_and_clear_bit(int nr, volatile void * addr);
1984-
1985-static inline int __test_and_clear_bit(int nr, volatile void *addr)
1986+static inline int ____nonatomic_test_and_clear_bit(int nr, volatile void *p)
1987 {
1988 unsigned int mask = 1 << (nr & 7);
1989 unsigned int oldval;
1990
1991- oldval = ((unsigned char *) addr)[nr >> 3];
1992- ((unsigned char *) addr)[nr >> 3] = oldval & ~mask;
1993+ oldval = ((unsigned char *) p)[nr >> 3];
1994+ ((unsigned char *) p)[nr >> 3] = oldval & ~mask;
1995 return oldval & mask;
1996 }
1997
1998-extern int test_and_change_bit(int nr, volatile void * addr);
1999-
2000-static inline int __test_and_change_bit(int nr, volatile void *addr)
2001+static inline int ____nonatomic_test_and_change_bit(int nr, volatile void *p)
2002 {
2003 unsigned int mask = 1 << (nr & 7);
2004 unsigned int oldval;
2005
2006- oldval = ((unsigned char *) addr)[nr >> 3];
2007- ((unsigned char *) addr)[nr >> 3] = oldval ^ mask;
2008+ oldval = ((unsigned char *) p)[nr >> 3];
2009+ ((unsigned char *) p)[nr >> 3] = oldval ^ mask;
2010 return oldval & mask;
2011 }
2012
2013-extern int find_first_zero_bit(void * addr, unsigned size);
2014-extern int find_next_zero_bit(void * addr, int size, int offset);
2015-
2016 /*
2017 * This routine doesn't need to be atomic.
2018 */
2019-static inline int test_bit(int nr, const void * addr)
2020+static inline int ____test_bit(int nr, const void * p)
2021 {
2022- return (((unsigned char *) addr)[nr >> 3] >> (nr & 7)) & 1;
2023+ return (((volatile unsigned char *) p)[nr >> 3] >> (nr & 7)) & 1;
2024 }
2025
2026 /*
2027+ * A note about Endian-ness.
2028+ * -------------------------
2029+ *
2030+ * When the ARM is put into big endian mode via CR15, the processor
2031+ * merely swaps the order of bytes within words, thus:
2032+ *
2033+ * ------------ physical data bus bits -----------
2034+ * D31 ... D24 D23 ... D16 D15 ... D8 D7 ... D0
2035+ * little byte 3 byte 2 byte 1 byte 0
2036+ * big byte 0 byte 1 byte 2 byte 3
2037+ *
2038+ * This means that reading a 32-bit word at address 0 returns the same
2039+ * value irrespective of the endian mode bit.
2040+ *
2041+ * Peripheral devices should be connected with the data bus reversed in
2042+ * "Big Endian" mode. ARM Application Note 61 is applicable, and is
2043+ * available from http://www.arm.com/.
2044+ *
2045+ * The following assumes that the data bus connectivity for big endian
2046+ * mode has been followed.
2047+ *
2048+ * Note that bit 0 is defined to be 32-bit word bit 0, not byte 0 bit 0.
2049+ */
2050+
2051+/*
2052+ * Little endian assembly bitops. nr = 0 -> byte 0 bit 0.
2053+ */
2054+extern void _set_bit_le(int nr, volatile void * p);
2055+extern void _clear_bit_le(int nr, volatile void * p);
2056+extern void _change_bit_le(int nr, volatile void * p);
2057+extern int _test_and_set_bit_le(int nr, volatile void * p);
2058+extern int _test_and_clear_bit_le(int nr, volatile void * p);
2059+extern int _test_and_change_bit_le(int nr, volatile void * p);
2060+extern int _find_first_zero_bit_le(void * p, unsigned size);
2061+extern int _find_next_zero_bit_le(void * p, int size, int offset);
2062+
2063+/*
2064+ * Big endian assembly bitops. nr = 0 -> byte 3 bit 0.
2065+ */
2066+extern void _set_bit_be(int nr, volatile void * p);
2067+extern void _clear_bit_be(int nr, volatile void * p);
2068+extern void _change_bit_be(int nr, volatile void * p);
2069+extern int _test_and_set_bit_be(int nr, volatile void * p);
2070+extern int _test_and_clear_bit_be(int nr, volatile void * p);
2071+extern int _test_and_change_bit_be(int nr, volatile void * p);
2072+extern int _find_first_zero_bit_be(void * p, unsigned size);
2073+extern int _find_next_zero_bit_be(void * p, int size, int offset);
2074+
2075+
2076+/*
2077+ * The __* form of bitops are non-atomic and may be reordered.
2078+ */
2079+#define ATOMIC_BITOP_LE(name,nr,p) \
2080+ (__builtin_constant_p(nr) ? \
2081+ ____atomic_##name##_mask(1 << ((nr) & 7), \
2082+ ((unsigned char *)(p)) + ((nr) >> 3)) : \
2083+ _##name##_le(nr,p))
2084+
2085+#define ATOMIC_BITOP_BE(name,nr,p) \
2086+ (__builtin_constant_p(nr) ? \
2087+ ____atomic_##name##_mask(1 << ((nr) & 7), \
2088+ ((unsigned char *)(p)) + (((nr) >> 3) ^ 3)) : \
2089+ _##name##_be(nr,p))
2090+
2091+#define NONATOMIC_BITOP_LE(name,nr,p) \
2092+ (____nonatomic_##name(nr, p))
2093+
2094+#define NONATOMIC_BITOP_BE(name,nr,p) \
2095+ (____nonatomic_##name(nr ^ 0x18, p))
2096+
2097+#ifndef __ARMEB__
2098+/*
2099+ * These are the little endian, atomic definitions.
2100+ */
2101+#define set_bit(nr,p) ATOMIC_BITOP_LE(set_bit,nr,p)
2102+#define clear_bit(nr,p) ATOMIC_BITOP_LE(clear_bit,nr,p)
2103+#define change_bit(nr,p) ATOMIC_BITOP_LE(change_bit,nr,p)
2104+#define test_and_set_bit(nr,p) ATOMIC_BITOP_LE(test_and_set_bit,nr,p)
2105+#define test_and_clear_bit(nr,p) ATOMIC_BITOP_LE(test_and_clear_bit,nr,p)
2106+#define test_and_change_bit(nr,p) ATOMIC_BITOP_LE(test_and_change_bit,nr,p)
2107+#define test_bit(nr,p) ____test_bit(nr,p)
2108+#define find_first_zero_bit(p,sz) _find_first_zero_bit_le(p,sz)
2109+#define find_next_zero_bit(p,sz,off) _find_next_zero_bit_le(p,sz,off)
2110+
2111+/*
2112+ * These are the little endian, non-atomic definitions.
2113+ */
2114+#define __set_bit(nr,p) NONATOMIC_BITOP_LE(set_bit,nr,p)
2115+#define __clear_bit(nr,p) NONATOMIC_BITOP_LE(clear_bit,nr,p)
2116+#define __change_bit(nr,p) NONATOMIC_BITOP_LE(change_bit,nr,p)
2117+#define __test_and_set_bit(nr,p) NONATOMIC_BITOP_LE(test_and_set_bit,nr,p)
2118+#define __test_and_clear_bit(nr,p) NONATOMIC_BITOP_LE(test_and_clear_bit,nr,p)
2119+#define __test_and_change_bit(nr,p) NONATOMIC_BITOP_LE(test_and_change_bit,nr,p)
2120+#define __test_bit(nr,p) ____test_bit(nr,p)
2121+
2122+#else
2123+
2124+/*
2125+ * These are the big endian, atomic definitions.
2126+ */
2127+#define set_bit(nr,p) ATOMIC_BITOP_BE(set_bit,nr,p)
2128+#define clear_bit(nr,p) ATOMIC_BITOP_BE(clear_bit,nr,p)
2129+#define change_bit(nr,p) ATOMIC_BITOP_BE(change_bit,nr,p)
2130+#define test_and_set_bit(nr,p) ATOMIC_BITOP_BE(test_and_set_bit,nr,p)
2131+#define test_and_clear_bit(nr,p) ATOMIC_BITOP_BE(test_and_clear_bit,nr,p)
2132+#define test_and_change_bit(nr,p) ATOMIC_BITOP_BE(test_and_change_bit,nr,p)
2133+#define test_bit(nr,p) ____test_bit((nr) ^ 0x18, p)
2134+#define find_first_zero_bit(p,sz) _find_first_zero_bit_be(p,sz)
2135+#define find_next_zero_bit(p,sz,off) _find_next_zero_bit_be(p,sz,off)
2136+
2137+/*
2138+ * These are the big endian, non-atomic definitions.
2139+ */
2140+#define __set_bit(nr,p) NONATOMIC_BITOP_BE(set_bit,nr,p)
2141+#define __clear_bit(nr,p) NONATOMIC_BITOP_BE(clear_bit,nr,p)
2142+#define __change_bit(nr,p) NONATOMIC_BITOP_BE(change_bit,nr,p)
2143+#define __test_and_set_bit(nr,p) NONATOMIC_BITOP_BE(test_and_set_bit,nr,p)
2144+#define __test_and_clear_bit(nr,p) NONATOMIC_BITOP_BE(test_and_clear_bit,nr,p)
2145+#define __test_and_change_bit(nr,p) NONATOMIC_BITOP_BE(test_and_change_bit,nr,p)
2146+#define __test_bit(nr,p) ____test_bit((nr) ^ 0x18, p)
2147+
2148+#endif
2149+
2150+/*
2151 * ffz = Find First Zero in word. Undefined if no zero exists,
2152 * so code should check against ~0UL first..
2153 */
2154@@ -110,6 +302,29 @@
2155 }
2156
2157 /*
2158+ * ffz = Find First Zero in word. Undefined if no zero exists,
2159+ * so code should check against ~0UL first..
2160+ */
2161+static inline unsigned long __ffs(unsigned long word)
2162+{
2163+ int k;
2164+
2165+ k = 31;
2166+ if (word & 0x0000ffff) { k -= 16; word <<= 16; }
2167+ if (word & 0x00ff0000) { k -= 8; word <<= 8; }
2168+ if (word & 0x0f000000) { k -= 4; word <<= 4; }
2169+ if (word & 0x30000000) { k -= 2; word <<= 2; }
2170+ if (word & 0x40000000) { k -= 1; }
2171+ return k;
2172+}
2173+
2174+/*
2175+ * fls: find last bit set.
2176+ */
2177+
2178+#define fls(x) generic_fls(x)
2179+
2180+/*
2181 * ffs: find first bit set. This is defined the same way as
2182 * the libc and compiler builtin ffs routines, therefore
2183 * differs in spirit from the above ffz (man ffs).
2184@@ -118,6 +333,22 @@
2185 #define ffs(x) generic_ffs(x)
2186
2187 /*
2188+ * Find first bit set in a 168-bit bitmap, where the first
2189+ * 128 bits are unlikely to be set.
2190+ */
2191+static inline int sched_find_first_bit(unsigned long *b)
2192+{
2193+ unsigned long v;
2194+ unsigned int off;
2195+
2196+ for (off = 0; v = b[off], off < 4; off++) {
2197+ if (unlikely(v))
2198+ break;
2199+ }
2200+ return __ffs(v) + off * 32;
2201+}
2202+
2203+/*
2204 * hweightN: returns the hamming weight (i.e. the number
2205 * of bits set) of a N-bit word
2206 */
2207@@ -126,18 +357,25 @@
2208 #define hweight16(x) generic_hweight16(x)
2209 #define hweight8(x) generic_hweight8(x)
2210
2211-#define ext2_set_bit test_and_set_bit
2212-#define ext2_clear_bit test_and_clear_bit
2213-#define ext2_test_bit test_bit
2214-#define ext2_find_first_zero_bit find_first_zero_bit
2215-#define ext2_find_next_zero_bit find_next_zero_bit
2216-
2217-/* Bitmap functions for the minix filesystem. */
2218-#define minix_test_and_set_bit(nr,addr) test_and_set_bit(nr,addr)
2219-#define minix_set_bit(nr,addr) set_bit(nr,addr)
2220-#define minix_test_and_clear_bit(nr,addr) test_and_clear_bit(nr,addr)
2221-#define minix_test_bit(nr,addr) test_bit(nr,addr)
2222-#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
2223+/*
2224+ * Ext2 is defined to use little-endian byte ordering.
2225+ * These do not need to be atomic.
2226+ */
2227+#define ext2_set_bit(nr,p) NONATOMIC_BITOP_LE(test_and_set_bit,nr,p)
2228+#define ext2_clear_bit(nr,p) NONATOMIC_BITOP_LE(test_and_clear_bit,nr,p)
2229+#define ext2_test_bit(nr,p) __test_bit(nr,p)
2230+#define ext2_find_first_zero_bit(p,sz) _find_first_zero_bit_le(p,sz)
2231+#define ext2_find_next_zero_bit(p,sz,off) _find_next_zero_bit_le(p,sz,off)
2232+
2233+/*
2234+ * Minix is defined to use little-endian byte ordering.
2235+ * These do not need to be atomic.
2236+ */
2237+#define minix_set_bit(nr,p) NONATOMIC_BITOP_LE(set_bit,nr,p)
2238+#define minix_test_bit(nr,p) __test_bit(nr,p)
2239+#define minix_test_and_set_bit(nr,p) NONATOMIC_BITOP_LE(test_and_set_bit,nr,p)
2240+#define minix_test_and_clear_bit(nr,p) NONATOMIC_BITOP_LE(test_and_clear_bit,nr,p)
2241+#define minix_find_first_zero_bit(p,sz) _find_first_zero_bit_le(p,sz)
2242
2243 #endif /* __KERNEL__ */
2244
2245diff -urN linux-2.4.24.org/include/asm-cris/bitops.h linux-2.4.24/include/asm-cris/bitops.h
5d16fd25
AM
2246--- linux-2.4.24.org/include/asm-cris/bitops.h 2004-02-04 20:48:26.679494929 +0100
2247+++ linux-2.4.24/include/asm-cris/bitops.h 2004-02-04 20:52:53.595977130 +0100
0aa7655b
AM
2248@@ -22,6 +22,7 @@
2249 /* We use generic_ffs so get it; include guards resolve the possible
2250 mutually inclusion. */
2251 #include <linux/bitops.h>
2252+#include <linux/compiler.h>
2253
2254 /*
2255 * Some hacks to defeat gcc over-optimizations..
2256@@ -44,6 +45,8 @@
2257 #define set_bit(nr, addr) (void)test_and_set_bit(nr, addr)
2258 #define __set_bit(nr, addr) (void)__test_and_set_bit(nr, addr)
2259
2260+#define __set_bit(nr, addr) (void)__test_and_set_bit(nr, addr)
2261+
2262 /*
2263 * clear_bit - Clears a bit in memory
2264 * @nr: Bit to clear
2265@@ -58,6 +61,8 @@
2266 #define clear_bit(nr, addr) (void)test_and_clear_bit(nr, addr)
2267 #define __clear_bit(nr, addr) (void)__test_and_clear_bit(nr, addr)
2268
2269+#define __clear_bit(nr, addr) (void)__test_and_clear_bit(nr, addr)
2270+
2271 /*
2272 * change_bit - Toggle a bit in memory
2273 * @nr: Bit to change
2274@@ -91,7 +96,7 @@
2275 * It also implies a memory barrier.
2276 */
2277
2278-extern __inline__ int test_and_set_bit(int nr, void *addr)
2279+extern inline int test_and_set_bit(int nr, void *addr)
2280 {
2281 unsigned int mask, retval;
2282 unsigned long flags;
2283@@ -119,6 +124,18 @@
2284 return retval;
2285 }
2286
2287+extern inline int __test_and_set_bit(int nr, void *addr)
2288+{
2289+ unsigned int mask, retval;
2290+ unsigned int *adr = (unsigned int *)addr;
2291+
2292+ adr += nr >> 5;
2293+ mask = 1 << (nr & 0x1f);
2294+ retval = (mask & *adr) != 0;
2295+ *adr |= mask;
2296+ return retval;
2297+}
2298+
2299 /*
2300 * clear_bit() doesn't provide any barrier for the compiler.
2301 */
2302@@ -134,7 +151,7 @@
2303 * It also implies a memory barrier.
2304 */
2305
2306-extern __inline__ int test_and_clear_bit(int nr, void *addr)
2307+extern inline int test_and_clear_bit(int nr, void *addr)
2308 {
2309 unsigned int mask, retval;
2310 unsigned long flags;
2311@@ -160,7 +177,7 @@
2312 * but actually fail. You must protect multiple accesses with a lock.
2313 */
2314
2315-extern __inline__ int __test_and_clear_bit(int nr, void *addr)
2316+extern inline int __test_and_clear_bit(int nr, void *addr)
2317 {
2318 unsigned int mask, retval;
2319 unsigned int *adr = (unsigned int *)addr;
2320@@ -180,7 +197,7 @@
2321 * It also implies a memory barrier.
2322 */
2323
2324-extern __inline__ int test_and_change_bit(int nr, void *addr)
2325+extern inline int test_and_change_bit(int nr, void *addr)
2326 {
2327 unsigned int mask, retval;
2328 unsigned long flags;
2329@@ -197,7 +214,7 @@
2330
2331 /* WARNING: non atomic and it can be reordered! */
2332
2333-extern __inline__ int __test_and_change_bit(int nr, void *addr)
2334+extern inline int __test_and_change_bit(int nr, void *addr)
2335 {
2336 unsigned int mask, retval;
2337 unsigned int *adr = (unsigned int *)addr;
2338@@ -218,7 +235,7 @@
2339 * This routine doesn't need to be atomic.
2340 */
2341
2342-extern __inline__ int test_bit(int nr, const void *addr)
2343+extern inline int test_bit(int nr, const void *addr)
2344 {
2345 unsigned int mask;
2346 unsigned int *adr = (unsigned int *)addr;
2347@@ -239,7 +256,7 @@
2348 * number. They differ in that the first function also inverts all bits
2349 * in the input.
2350 */
2351-extern __inline__ unsigned long cris_swapnwbrlz(unsigned long w)
2352+extern inline unsigned long cris_swapnwbrlz(unsigned long w)
2353 {
2354 /* Let's just say we return the result in the same register as the
2355 input. Saying we clobber the input but can return the result
2356@@ -255,7 +272,7 @@
2357 return res;
2358 }
2359
2360-extern __inline__ unsigned long cris_swapwbrlz(unsigned long w)
2361+extern inline unsigned long cris_swapwbrlz(unsigned long w)
2362 {
2363 unsigned res;
2364 __asm__ ("swapwbr %0 \n\t"
2365@@ -269,7 +286,7 @@
2366 * ffz = Find First Zero in word. Undefined if no zero exists,
2367 * so code should check against ~0UL first..
2368 */
2369-extern __inline__ unsigned long ffz(unsigned long w)
2370+extern inline unsigned long ffz(unsigned long w)
2371 {
2372 /* The generic_ffs function is used to avoid the asm when the
2373 argument is a constant. */
2374@@ -282,7 +299,7 @@
2375 * Somewhat like ffz but the equivalent of generic_ffs: in contrast to
2376 * ffz we return the first one-bit *plus one*.
2377 */
2378-extern __inline__ unsigned long kernel_ffs(unsigned long w)
2379+extern inline unsigned long kernel_ffs(unsigned long w)
2380 {
2381 /* The generic_ffs function is used to avoid the asm when the
2382 argument is a constant. */
2383@@ -304,7 +321,7 @@
2384 * @offset: The bitnumber to start searching at
2385 * @size: The maximum size to search
2386 */
2387-extern __inline__ int find_next_zero_bit (void * addr, int size, int offset)
2388+extern inline int find_next_zero_bit (void * addr, int size, int offset)
2389 {
2390 unsigned long *p = ((unsigned long *) addr) + (offset >> 5);
2391 unsigned long result = offset & ~31UL;
2392@@ -375,7 +392,45 @@
2393 #define minix_test_bit(nr,addr) test_bit(nr,addr)
2394 #define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
2395
2396-#endif /* __KERNEL__ */
2397+#if 0
2398+/* TODO: see below */
2399+#define sched_find_first_zero_bit(addr) find_first_zero_bit(addr, 168)
2400+
2401+#else
2402+/* TODO: left out pending where to put it.. (there are .h dependencies) */
2403+
2404+ /*
2405+ * Every architecture must define this function. It's the fastest
2406+ * way of searching a 168-bit bitmap where the first 128 bits are
2407+ * unlikely to be set. It's guaranteed that at least one of the 168
2408+ * bits is cleared.
2409+ */
2410+#if 0
2411+#if MAX_RT_PRIO != 128 || MAX_PRIO != 168
2412+# error update this function.
2413+#endif
2414+#else
2415+#define MAX_RT_PRIO 128
2416+#define MAX_PRIO 168
2417+#endif
2418+
2419+static inline int sched_find_first_zero_bit(char *bitmap)
2420+{
2421+ unsigned int *b = (unsigned int *)bitmap;
2422+ unsigned int rt;
2423+
2424+ rt = b[0] & b[1] & b[2] & b[3];
2425+ if (unlikely(rt != 0xffffffff))
2426+ return find_first_zero_bit(bitmap, MAX_RT_PRIO);
2427+
2428+ if (b[4] != ~0)
2429+ return ffz(b[4]) + MAX_RT_PRIO;
2430+ return ffz(b[5]) + 32 + MAX_RT_PRIO;
2431+}
2432+#undef MAX_PRIO
2433+#undef MAX_RT_PRIO
2434+#endif
2435
2436+#endif /* __KERNEL__ */
2437
2438 #endif /* _CRIS_BITOPS_H */
2439diff -urN linux-2.4.24.org/include/asm-generic/bitops.h linux-2.4.24/include/asm-generic/bitops.h
5d16fd25
AM
2440--- linux-2.4.24.org/include/asm-generic/bitops.h 2004-02-04 20:47:40.855026441 +0100
2441+++ linux-2.4.24/include/asm-generic/bitops.h 2004-02-04 20:52:53.630969851 +0100
0aa7655b
AM
2442@@ -51,6 +51,12 @@
2443 return ((mask & *addr) != 0);
2444 }
2445
2446+/*
2447+ * fls: find last bit set.
2448+ */
2449+
2450+#define fls(x) generic_fls(x)
2451+
2452 #ifdef __KERNEL__
2453
2454 /*
2455diff -urN linux-2.4.24.org/include/asm-i386/bitops.h linux-2.4.24/include/asm-i386/bitops.h
5d16fd25
AM
2456--- linux-2.4.24.org/include/asm-i386/bitops.h 2004-02-04 20:47:40.983999614 +0100
2457+++ linux-2.4.24/include/asm-i386/bitops.h 2004-02-04 20:52:53.655964653 +0100
0aa7655b
AM
2458@@ -6,6 +6,7 @@
2459 */
2460
2461 #include <linux/config.h>
2462+#include <linux/compiler.h>
2463
2464 /*
2465 * These have to be done with inline assembly: that way the bit-setting
2466@@ -75,6 +76,14 @@
2467 :"=m" (ADDR)
2468 :"Ir" (nr));
2469 }
2470+
2471+static __inline__ void __clear_bit(int nr, volatile void * addr)
2472+{
2473+ __asm__ __volatile__(
2474+ "btrl %1,%0"
2475+ :"=m" (ADDR)
2476+ :"Ir" (nr));
2477+}
2478 #define smp_mb__before_clear_bit() barrier()
2479 #define smp_mb__after_clear_bit() barrier()
2480
2481@@ -284,6 +293,34 @@
2482 }
2483
2484 /**
2485+ * find_first_bit - find the first set bit in a memory region
2486+ * @addr: The address to start the search at
2487+ * @size: The maximum size to search
2488+ *
2489+ * Returns the bit-number of the first set bit, not the number of the byte
2490+ * containing a bit.
2491+ */
2492+static __inline__ int find_first_bit(void * addr, unsigned size)
2493+{
2494+ int d0, d1;
2495+ int res;
2496+
2497+ /* This looks at memory. Mark it volatile to tell gcc not to move it around */
2498+ __asm__ __volatile__(
2499+ "xorl %%eax,%%eax\n\t"
2500+ "repe; scasl\n\t"
2501+ "jz 1f\n\t"
2502+ "leal -4(%%edi),%%edi\n\t"
2503+ "bsfl (%%edi),%%eax\n"
2504+ "1:\tsubl %%ebx,%%edi\n\t"
2505+ "shll $3,%%edi\n\t"
2506+ "addl %%edi,%%eax"
2507+ :"=a" (res), "=&c" (d0), "=&D" (d1)
2508+ :"1" ((size + 31) >> 5), "2" (addr), "b" (addr));
2509+ return res;
2510+}
2511+
2512+/**
2513 * find_next_zero_bit - find the first zero bit in a memory region
2514 * @addr: The address to base the search on
2515 * @offset: The bitnumber to start searching at
2516@@ -296,7 +333,7 @@
2517
2518 if (bit) {
2519 /*
2520- * Look for zero in first byte
2521+ * Look for zero in the first 32 bits.
2522 */
2523 __asm__("bsfl %1,%0\n\t"
2524 "jne 1f\n\t"
2525@@ -317,6 +354,39 @@
2526 }
2527
2528 /**
2529+ * find_next_bit - find the first set bit in a memory region
2530+ * @addr: The address to base the search on
2531+ * @offset: The bitnumber to start searching at
2532+ * @size: The maximum size to search
2533+ */
2534+static __inline__ int find_next_bit (void * addr, int size, int offset)
2535+{
2536+ unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
2537+ int set = 0, bit = offset & 31, res;
2538+
2539+ if (bit) {
2540+ /*
2541+ * Look for nonzero in the first 32 bits:
2542+ */
2543+ __asm__("bsfl %1,%0\n\t"
2544+ "jne 1f\n\t"
2545+ "movl $32, %0\n"
2546+ "1:"
2547+ : "=r" (set)
2548+ : "r" (*p >> bit));
2549+ if (set < (32 - bit))
2550+ return set + offset;
2551+ set = 32 - bit;
2552+ p++;
2553+ }
2554+ /*
2555+ * No set bit yet, search remaining full words for a bit
2556+ */
2557+ res = find_first_bit (p, size - 32 * (p - (unsigned long *) addr));
2558+ return (offset + set + res);
2559+}
2560+
2561+/**
2562 * ffz - find first zero in word.
2563 * @word: The word to search
2564 *
2565@@ -330,8 +400,41 @@
2566 return word;
2567 }
2568
2569+/**
2570+ * __ffs - find first bit in word.
2571+ * @word: The word to search
2572+ * Undefined if no bit exists, so code should check against 0 first.
2573+ */
2574+static __inline__ unsigned long __ffs(unsigned long word)
2575+{
2576+ __asm__("bsfl %1,%0"
2577+ :"=r" (word)
2578+ :"rm" (word));
2579+ return word;
2580+}
2581+#define fls(x) generic_fls(x)
2582+
2583 #ifdef __KERNEL__
2584
2585+/*
2586+ * Every architecture must define this function. It's the fastest
2587+ * way of searching a 140-bit bitmap where the first 100 bits are
2588+ * unlikely to be set. It's guaranteed that at least one of the 140
2589+ * bits is cleared.
2590+ */
2591+static inline int sched_find_first_bit(unsigned long *b)
2592+{
2593+ if (unlikely(b[0]))
2594+ return __ffs(b[0]);
2595+ if (unlikely(b[1]))
2596+ return __ffs(b[1]) + 32;
2597+ if (unlikely(b[2]))
2598+ return __ffs(b[2]) + 64;
2599+ if (b[3])
2600+ return __ffs(b[3]) + 96;
2601+ return __ffs(b[4]) + 128;
2602+}
2603+
2604 /**
2605 * ffs - find first bit set
2606 * @x: the word to search
2607diff -urN linux-2.4.24.org/include/asm-i386/mmu_context.h linux-2.4.24/include/asm-i386/mmu_context.h
5d16fd25
AM
2608--- linux-2.4.24.org/include/asm-i386/mmu_context.h 2004-02-04 20:47:41.355922254 +0100
2609+++ linux-2.4.24/include/asm-i386/mmu_context.h 2004-02-04 20:57:00.389646201 +0100
2610@@ -29,7 +29,7 @@
0aa7655b
AM
2611
2612 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu)
2613 {
2614- if (prev != next) {
2615+ if (likely(prev != next)) {
2616 /* stop flush ipis for the previous mm */
2617 clear_bit(cpu, &prev->cpu_vm_mask);
5d16fd25
AM
2618 #ifdef CONFIG_SMP
2619@@ -42,7 +42,7 @@
2620 /* load_LDT, if either the previous or next thread
2621 * has a non-default LDT.
0aa7655b 2622 */
5d16fd25
AM
2623- if (next->context.size+prev->context.size)
2624+ if (unlikely(next->context.size+prev->context.size))
2625 load_LDT(&next->context);
2626 }
0aa7655b 2627 #ifdef CONFIG_SMP
0aa7655b 2628diff -urN linux-2.4.24.org/include/asm-i386/processor.h linux-2.4.24/include/asm-i386/processor.h
5d16fd25
AM
2629--- linux-2.4.24.org/include/asm-i386/processor.h 2004-02-04 20:47:40.967003150 +0100
2630+++ linux-2.4.24/include/asm-i386/processor.h 2004-02-04 20:52:53.702954879 +0100
2631@@ -498,6 +498,8 @@
0aa7655b
AM
2632
2633 #define cpu_relax() rep_nop()
2634
2635+#define ARCH_HAS_SMP_BALANCE
2636+
2637 /* Prefetch instructions for Pentium III and AMD Athlon */
2638 #if defined(CONFIG_MPENTIUMIII) || defined (CONFIG_MPENTIUM4)
2639
2640diff -urN linux-2.4.24.org/include/asm-i386/smp_balance.h linux-2.4.24/include/asm-i386/smp_balance.h
2641--- linux-2.4.24.org/include/asm-i386/smp_balance.h 1970-01-01 01:00:00.000000000 +0100
5d16fd25 2642+++ linux-2.4.24/include/asm-i386/smp_balance.h 2004-02-04 20:52:53.705954255 +0100
0aa7655b
AM
2643@@ -0,0 +1,66 @@
2644+#ifndef _ASM_SMP_BALANCE_H
2645+#define _ASM_SMP_BALANCE_H
2646+
2647+/*
2648+ * We have an architecture-specific SMP load balancer to improve
2649+ * scheduling behavior on hyperthreaded CPUs. Since only P4s have
2650+ * HT, maybe this should be conditional on CONFIG_MPENTIUM4...
2651+ *
2652+ */
2653+
2654+/*
2655+ * Find any idle processor package (i.e. both virtual processors are idle)
2656+ */
2657+static inline int find_idle_package(int this_cpu)
2658+{
2659+ int i;
2660+
2661+ this_cpu = cpu_number_map(this_cpu);
2662+
2663+ for (i = (this_cpu + 1) % smp_num_cpus;
2664+ i != this_cpu;
2665+ i = (i + 1) % smp_num_cpus) {
2666+ int physical = cpu_logical_map(i);
2667+ int sibling = cpu_sibling_map[physical];
2668+
2669+ if (idle_cpu(physical) && idle_cpu(sibling))
2670+ return physical;
2671+ }
2672+ return -1; /* not found */
2673+}
2674+
2675+static inline int arch_reschedule_idle_override(task_t * p, int idle)
2676+{
2677+ if (unlikely(smp_num_siblings > 1) && !idle_cpu(cpu_sibling_map[idle])) {
2678+ int true_idle = find_idle_package(idle);
2679+ if (true_idle >= 0) {
2680+ if (likely(p->cpus_allowed & (1UL << true_idle)))
2681+ idle = true_idle;
2682+ else {
2683+ true_idle = cpu_sibling_map[true_idle];
2684+ if (p->cpus_allowed & (1UL << true_idle))
2685+ idle = true_idle;
2686+ }
2687+ }
2688+ }
2689+
2690+ return idle;
2691+}
2692+
2693+static inline int arch_load_balance(int this_cpu, int idle)
2694+{
2695+ /* Special hack for hyperthreading */
2696+ if (unlikely(smp_num_siblings > 1 && idle == 2 && !idle_cpu(cpu_sibling_map[this_cpu]))) {
2697+ int found;
2698+ struct runqueue *rq_target;
2699+
2700+ if ((found = find_idle_package(this_cpu)) >= 0 ) {
2701+ rq_target = cpu_rq(found);
2702+ resched_task(rq_target->idle);
2703+ return 1;
2704+ }
2705+ }
2706+ return 0;
2707+}
2708+
2709+#endif /* _ASM_SMP_BALANCE_H */
2710diff -urN linux-2.4.24.org/include/asm-i386/smp.h linux-2.4.24/include/asm-i386/smp.h
5d16fd25
AM
2711--- linux-2.4.24.org/include/asm-i386/smp.h 2004-02-04 20:47:41.153964261 +0100
2712+++ linux-2.4.24/include/asm-i386/smp.h 2004-02-04 20:52:53.733948432 +0100
0aa7655b
AM
2713@@ -40,6 +40,7 @@
2714 extern void smp_flush_tlb(void);
2715 extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
7f7e7712 2716 extern void fastcall smp_send_reschedule(int cpu);
0aa7655b
AM
2717+extern void smp_send_reschedule_all(void);
2718 extern void smp_invalidate_rcv(void); /* Process an NMI */
2719 extern void (*mtrr_hook) (void);
2720 extern void zap_low_mappings (void);
2721@@ -81,7 +82,7 @@
2722 * so this is correct in the x86 case.
2723 */
2724
2725-#define smp_processor_id() (current->processor)
2726+#define smp_processor_id() (current->cpu)
2727
2728 static __inline int hard_smp_processor_id(void)
2729 {
2730@@ -99,17 +100,5 @@
2731
2732 #define NO_PROC_ID 0xFF /* No processor magic marker */
2733
2734-/*
2735- * This magic constant controls our willingness to transfer
2736- * a process across CPUs. Such a transfer incurs misses on the L1
2737- * cache, and on a P6 or P5 with multiple L2 caches L2 hits. My
2738- * gut feeling is this will vary by board in value. For a board
2739- * with separate L2 cache it probably depends also on the RSS, and
2740- * for a board with shared L2 cache it ought to decay fast as other
2741- * processes are run.
2742- */
2743-
2744-#define PROC_CHANGE_PENALTY 15 /* Schedule penalty */
2745-
2746 #endif
2747 #endif
2748diff -urN linux-2.4.24.org/include/asm-i386/system.h linux-2.4.24/include/asm-i386/system.h
5d16fd25
AM
2749--- linux-2.4.24.org/include/asm-i386/system.h 2004-02-04 20:47:40.963003981 +0100
2750+++ linux-2.4.24/include/asm-i386/system.h 2004-02-04 20:52:53.759943026 +0100
0aa7655b
AM
2751@@ -12,25 +12,22 @@
2752 struct task_struct; /* one of the stranger aspects of C forward declarations.. */
2753 extern void FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next));
2754
2755-#define prepare_to_switch() do { } while(0)
2756 #define switch_to(prev,next,last) do { \
2757 asm volatile("pushl %%esi\n\t" \
2758 "pushl %%edi\n\t" \
2759 "pushl %%ebp\n\t" \
2760 "movl %%esp,%0\n\t" /* save ESP */ \
2761- "movl %3,%%esp\n\t" /* restore ESP */ \
2762+ "movl %2,%%esp\n\t" /* restore ESP */ \
2763 "movl $1f,%1\n\t" /* save EIP */ \
2764- "pushl %4\n\t" /* restore EIP */ \
2765+ "pushl %3\n\t" /* restore EIP */ \
2766 "jmp __switch_to\n" \
2767 "1:\t" \
2768 "popl %%ebp\n\t" \
2769 "popl %%edi\n\t" \
2770 "popl %%esi\n\t" \
2771- :"=m" (prev->thread.esp),"=m" (prev->thread.eip), \
2772- "=b" (last) \
2773+ :"=m" (prev->thread.esp),"=m" (prev->thread.eip) \
2774 :"m" (next->thread.esp),"m" (next->thread.eip), \
2775- "a" (prev), "d" (next), \
2776- "b" (prev)); \
2777+ "a" (prev), "d" (next)); \
2778 } while (0)
2779
2780 #define _set_base(addr,base) do { unsigned long __pr; \
2781diff -urN linux-2.4.24.org/include/asm-ia64/bitops.h linux-2.4.24/include/asm-ia64/bitops.h
5d16fd25
AM
2782--- linux-2.4.24.org/include/asm-ia64/bitops.h 2004-02-04 20:48:16.659579072 +0100
2783+++ linux-2.4.24/include/asm-ia64/bitops.h 2004-02-04 20:52:53.793935955 +0100
0aa7655b
AM
2784@@ -4,6 +4,9 @@
2785 /*
2786 * Copyright (C) 1998-2003 Hewlett-Packard Co
2787 * David Mosberger-Tang <davidm@hpl.hp.com>
2788+ *
2789+ * 02/06/02 find_next_bit() and find_first_bit() added from Erich Focht's ia64 O(1)
2790+ * scheduler patch
2791 */
2792
2793 #include <linux/types.h>
2794@@ -91,6 +94,17 @@
2795 }
2796
2797 /**
2798+ * __clear_bit - Clears a bit in memory (non-atomic version)
2799+ */
2800+static __inline__ void
2801+__clear_bit (int nr, volatile void *addr)
2802+{
2803+ volatile __u32 *p = (__u32 *) addr + (nr >> 5);
2804+ __u32 m = 1 << (nr & 31);
2805+ *p &= ~m;
2806+}
2807+
2808+/**
2809 * change_bit - Toggle a bit in memory
2810 * @nr: Bit to clear
2811 * @addr: Address to start counting from
2812@@ -266,12 +280,11 @@
2813 }
2814
2815 /**
2816- * ffz - find the first zero bit in a memory region
2817- * @x: The address to start the search at
2818+ * ffz - find the first zero bit in a long word
2819+ * @x: The long word to find the bit in
2820 *
2821- * Returns the bit-number (0..63) of the first (least significant) zero bit, not
2822- * the number of the byte containing a bit. Undefined if no zero exists, so
2823- * code should check against ~0UL first...
2824+ * Returns the bit-number (0..63) of the first (least significant) zero bit. Undefined if
2825+ * no zero exists, so code should check against ~0UL first...
2826 */
2827 static inline unsigned long
2828 ffz (unsigned long x)
2829@@ -297,6 +310,21 @@
2830 return result;
2831 }
2832
2833+/**
2834+ * __ffs - find first bit in word.
2835+ * @x: The word to search
2836+ *
2837+ * Undefined if no bit exists, so code should check against 0 first.
2838+ */
2839+static __inline__ unsigned long
2840+__ffs (unsigned long x)
2841+{
2842+ unsigned long result;
2843+
2844+ __asm__ ("popcnt %0=%1" : "=r" (result) : "r" ((x - 1) & ~x));
2845+ return result;
2846+}
2847+
2848 #ifdef __KERNEL__
2849
2850 /*
2851@@ -313,6 +341,12 @@
2852 return exp - 0xffff;
2853 }
2854
2855+static int
2856+fls (int x)
2857+{
2858+ return ia64_fls((unsigned int) x);
2859+}
2860+
2861 /*
2862 * ffs: find first bit set. This is defined the same way as the libc and compiler builtin
2863 * ffs routines, therefore differs in spirit from the above ffz (man ffs): it operates on
2864@@ -385,8 +419,53 @@
2865 */
2866 #define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
2867
2868+/*
2869+ * Find next bit in a bitmap reasonably efficiently..
2870+ */
2871+static inline int
2872+find_next_bit (void *addr, unsigned long size, unsigned long offset)
2873+{
2874+ unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
2875+ unsigned long result = offset & ~63UL;
2876+ unsigned long tmp;
2877+
2878+ if (offset >= size)
2879+ return size;
2880+ size -= result;
2881+ offset &= 63UL;
2882+ if (offset) {
2883+ tmp = *(p++);
2884+ tmp &= ~0UL << offset;
2885+ if (size < 64)
2886+ goto found_first;
2887+ if (tmp)
2888+ goto found_middle;
2889+ size -= 64;
2890+ result += 64;
2891+ }
2892+ while (size & ~63UL) {
2893+ if ((tmp = *(p++)))
2894+ goto found_middle;
2895+ result += 64;
2896+ size -= 64;
2897+ }
2898+ if (!size)
2899+ return result;
2900+ tmp = *p;
2901+ found_first:
2902+ tmp &= ~0UL >> (64-size);
2903+ if (tmp == 0UL) /* Are any bits set? */
2904+ return result + size; /* Nope. */
2905+ found_middle:
2906+ return result + __ffs(tmp);
2907+}
2908+
2909+#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
2910+
2911 #ifdef __KERNEL__
2912
2913+#define __clear_bit(nr, addr) clear_bit(nr, addr)
2914+
2915 #define ext2_set_bit test_and_set_bit
2916 #define ext2_clear_bit test_and_clear_bit
2917 #define ext2_test_bit test_bit
2918@@ -400,6 +479,16 @@
2919 #define minix_test_bit(nr,addr) test_bit(nr,addr)
2920 #define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
2921
2922+static inline int
2923+sched_find_first_bit (unsigned long *b)
2924+{
2925+ if (unlikely(b[0]))
2926+ return __ffs(b[0]);
2927+ if (unlikely(b[1]))
2928+ return 64 + __ffs(b[1]);
2929+ return __ffs(b[2]) + 128;
2930+}
2931+
2932 #endif /* __KERNEL__ */
2933
2934 #endif /* _ASM_IA64_BITOPS_H */
2935diff -urN linux-2.4.24.org/include/asm-m68k/bitops.h linux-2.4.24/include/asm-m68k/bitops.h
5d16fd25
AM
2936--- linux-2.4.24.org/include/asm-m68k/bitops.h 2004-02-04 20:47:47.882564706 +0100
2937+++ linux-2.4.24/include/asm-m68k/bitops.h 2004-02-04 20:52:53.798934916 +0100
0aa7655b
AM
2938@@ -97,6 +97,7 @@
2939 (__builtin_constant_p(nr) ? \
2940 __constant_clear_bit(nr, vaddr) : \
2941 __generic_clear_bit(nr, vaddr))
2942+#define __clear_bit(nr,vaddr) clear_bit(nr,vaddr)
2943
2944 static inline void __constant_clear_bit(int nr, volatile void *vaddr)
2945 {
2946@@ -238,6 +239,28 @@
2947
2948 return 32 - cnt;
2949 }
2950+#define __ffs(x) (ffs(x) - 1)
2951+
2952+
2953+/*
2954+ * Every architecture must define this function. It's the fastest
2955+ * way of searching a 140-bit bitmap where the first 100 bits are
2956+ * unlikely to be set. It's guaranteed that at least one of the 140
2957+ * bits is cleared.
2958+ */
2959+static inline int sched_find_first_bit(unsigned long *b)
2960+{
2961+ if (unlikely(b[0]))
2962+ return __ffs(b[0]);
2963+ if (unlikely(b[1]))
2964+ return __ffs(b[1]) + 32;
2965+ if (unlikely(b[2]))
2966+ return __ffs(b[2]) + 64;
2967+ if (b[3])
2968+ return __ffs(b[3]) + 96;
2969+ return __ffs(b[4]) + 128;
2970+}
2971+
2972
2973 /*
2974 * hweightN: returns the hamming weight (i.e. the number
2975diff -urN linux-2.4.24.org/include/asm-mips/bitops.h linux-2.4.24/include/asm-mips/bitops.h
5d16fd25
AM
2976--- linux-2.4.24.org/include/asm-mips/bitops.h 2004-02-04 20:47:43.266524847 +0100
2977+++ linux-2.4.24/include/asm-mips/bitops.h 2004-02-04 20:52:53.820930341 +0100
0aa7655b
AM
2978@@ -51,6 +51,8 @@
2979
2980 #ifdef CONFIG_CPU_HAS_LLSC
2981
2982+#include <asm/mipsregs.h>
2983+
2984 /*
2985 * These functions for MIPS ISA > 1 are interrupt and SMP proof and
2986 * interrupt friendly
2987@@ -593,21 +595,30 @@
2988 *
2989 * Undefined if no zero exists, so code should check against ~0UL first.
2990 */
2991-static __inline__ unsigned long ffz(unsigned long word)
2992+extern __inline__ unsigned long ffz(unsigned long word)
2993 {
2994- int b = 0, s;
2995+ unsigned int __res;
2996+ unsigned int mask = 1;
2997
2998- word = ~word;
2999- s = 16; if (word << 16 != 0) s = 0; b += s; word >>= s;
3000- s = 8; if (word << 24 != 0) s = 0; b += s; word >>= s;
3001- s = 4; if (word << 28 != 0) s = 0; b += s; word >>= s;
3002- s = 2; if (word << 30 != 0) s = 0; b += s; word >>= s;
3003- s = 1; if (word << 31 != 0) s = 0; b += s;
3004+ __asm__ (
3005+ ".set\tnoreorder\n\t"
3006+ ".set\tnoat\n\t"
3007+ "move\t%0,$0\n"
3008+ "1:\tand\t$1,%2,%1\n\t"
3009+ "beqz\t$1,2f\n\t"
3010+ "sll\t%1,1\n\t"
3011+ "bnez\t%1,1b\n\t"
3012+ "addiu\t%0,1\n\t"
3013+ ".set\tat\n\t"
3014+ ".set\treorder\n"
3015+ "2:\n\t"
3016+ : "=&r" (__res), "=r" (mask)
3017+ : "r" (word), "1" (mask)
3018+ : "$1");
3019
3020- return b;
3021+ return __res;
3022 }
3023
3024-
3025 #ifdef __KERNEL__
3026
3027 /*
3028diff -urN linux-2.4.24.org/include/asm-mips64/bitops.h linux-2.4.24/include/asm-mips64/bitops.h
5d16fd25
AM
3029--- linux-2.4.24.org/include/asm-mips64/bitops.h 2004-02-04 20:48:21.702530138 +0100
3030+++ linux-2.4.24/include/asm-mips64/bitops.h 2004-02-04 20:52:53.873919319 +0100
0aa7655b
AM
3031@@ -27,6 +27,7 @@
3032
3033 #include <asm/system.h>
3034 #include <asm/sgidefs.h>
3035+#include <asm/mipsregs.h>
3036
3037 /*
3038 * set_bit - Atomically set a bit in memory
3039@@ -38,7 +39,8 @@
3040 * Note that @nr may be almost arbitrarily large; this function is not
3041 * restricted to acting on a single-word quantity.
3042 */
3043-static inline void set_bit(unsigned long nr, volatile void *addr)
3044+extern __inline__ void
3045+set_bit(unsigned long nr, volatile void *addr)
3046 {
3047 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
3048 unsigned long temp;
3049@@ -62,7 +64,7 @@
3050 * If it's called on the same region of memory simultaneously, the effect
3051 * may be that only one operation succeeds.
3052 */
3053-static inline void __set_bit(int nr, volatile void * addr)
3054+extern __inline__ void __set_bit(int nr, volatile void * addr)
3055 {
3056 unsigned long * m = ((unsigned long *) addr) + (nr >> 6);
3057
3058@@ -79,7 +81,8 @@
3059 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
3060 * in order to ensure changes are visible on other processors.
3061 */
3062-static inline void clear_bit(unsigned long nr, volatile void *addr)
3063+extern __inline__ void
3064+clear_bit(unsigned long nr, volatile void *addr)
3065 {
3066 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
3067 unsigned long temp;
3068@@ -105,7 +108,8 @@
3069 * Note that @nr may be almost arbitrarily large; this function is not
3070 * restricted to acting on a single-word quantity.
3071 */
3072-static inline void change_bit(unsigned long nr, volatile void *addr)
3073+extern __inline__ void
3074+change_bit(unsigned long nr, volatile void *addr)
3075 {
3076 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
3077 unsigned long temp;
3078@@ -128,7 +132,7 @@
3079 * If it's called on the same region of memory simultaneously, the effect
3080 * may be that only one operation succeeds.
3081 */
3082-static inline void __change_bit(int nr, volatile void * addr)
3083+extern __inline__ void __change_bit(int nr, volatile void * addr)
3084 {
3085 unsigned long * m = ((unsigned long *) addr) + (nr >> 6);
3086
3087@@ -143,8 +147,8 @@
3088 * This operation is atomic and cannot be reordered.
3089 * It also implies a memory barrier.
3090 */
3091-static inline unsigned long test_and_set_bit(unsigned long nr,
3092- volatile void *addr)
3093+extern __inline__ unsigned long
3094+test_and_set_bit(unsigned long nr, volatile void *addr)
3095 {
3096 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
3097 unsigned long temp, res;
3098@@ -176,7 +180,8 @@
3099 * If two examples of this operation race, one can appear to succeed
3100 * but actually fail. You must protect multiple accesses with a lock.
3101 */
3102-static inline int __test_and_set_bit(int nr, volatile void *addr)
3103+extern __inline__ int
3104+__test_and_set_bit(int nr, volatile void * addr)
3105 {
3106 unsigned long mask, retval;
3107 long *a = (unsigned long *) addr;
3108@@ -197,8 +202,8 @@
3109 * This operation is atomic and cannot be reordered.
3110 * It also implies a memory barrier.
3111 */
3112-static inline unsigned long test_and_clear_bit(unsigned long nr,
3113- volatile void *addr)
3114+extern __inline__ unsigned long
3115+test_and_clear_bit(unsigned long nr, volatile void *addr)
3116 {
3117 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
3118 unsigned long temp, res;
3119@@ -231,7 +236,8 @@
3120 * If two examples of this operation race, one can appear to succeed
3121 * but actually fail. You must protect multiple accesses with a lock.
3122 */
3123-static inline int __test_and_clear_bit(int nr, volatile void * addr)
3124+extern __inline__ int
3125+__test_and_clear_bit(int nr, volatile void * addr)
3126 {
3127 unsigned long mask, retval;
3128 unsigned long *a = (unsigned long *) addr;
3129@@ -252,8 +258,8 @@
3130 * This operation is atomic and cannot be reordered.
3131 * It also implies a memory barrier.
3132 */
3133-static inline unsigned long test_and_change_bit(unsigned long nr,
3134- volatile void *addr)
3135+extern __inline__ unsigned long
3136+test_and_change_bit(unsigned long nr, volatile void *addr)
3137 {
3138 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
3139 unsigned long temp, res;
3140@@ -285,7 +291,8 @@
3141 * If two examples of this operation race, one can appear to succeed
3142 * but actually fail. You must protect multiple accesses with a lock.
3143 */
3144-static inline int __test_and_change_bit(int nr, volatile void *addr)
3145+extern __inline__ int
3146+__test_and_change_bit(int nr, volatile void * addr)
3147 {
3148 unsigned long mask, retval;
3149 unsigned long *a = (unsigned long *) addr;
3150@@ -302,7 +309,8 @@
3151 * @nr: bit number to test
3152 * @addr: Address to start counting from
3153 */
3154-static inline int test_bit(int nr, volatile void * addr)
3155+extern __inline__ unsigned long
3156+test_bit(int nr, volatile void * addr)
3157 {
3158 return 1UL & (((const volatile unsigned long *) addr)[nr >> SZLONG_LOG] >> (nr & SZLONG_MASK));
3159 }
3160@@ -313,19 +321,20 @@
3161 *
3162 * Undefined if no zero exists, so code should check against ~0UL first.
3163 */
3164-static __inline__ unsigned long ffz(unsigned long word)
3165+extern __inline__ unsigned long ffz(unsigned long word)
3166 {
3167- int b = 0, s;
3168+ unsigned long k;
3169
3170 word = ~word;
3171- s = 32; if (word << 32 != 0) s = 0; b += s; word >>= s;
3172- s = 16; if (word << 48 != 0) s = 0; b += s; word >>= s;
3173- s = 8; if (word << 56 != 0) s = 0; b += s; word >>= s;
3174- s = 4; if (word << 60 != 0) s = 0; b += s; word >>= s;
3175- s = 2; if (word << 62 != 0) s = 0; b += s; word >>= s;
3176- s = 1; if (word << 63 != 0) s = 0; b += s;
3177+ k = 63;
3178+ if (word & 0x00000000ffffffffUL) { k -= 32; word <<= 32; }
3179+ if (word & 0x0000ffff00000000UL) { k -= 16; word <<= 16; }
3180+ if (word & 0x00ff000000000000UL) { k -= 8; word <<= 8; }
3181+ if (word & 0x0f00000000000000UL) { k -= 4; word <<= 4; }
3182+ if (word & 0x3000000000000000UL) { k -= 2; word <<= 2; }
3183+ if (word & 0x4000000000000000UL) { k -= 1; }
3184
3185- return b;
3186+ return k;
3187 }
3188
3189 /*
3190@@ -334,8 +343,8 @@
3191 * @offset: The bitnumber to start searching at
3192 * @size: The maximum size to search
3193 */
3194-static inline unsigned long find_next_zero_bit(void *addr, unsigned long size,
3195- unsigned long offset)
3196+extern __inline__ unsigned long
3197+find_next_zero_bit(void *addr, unsigned long size, unsigned long offset)
3198 {
3199 unsigned long *p = ((unsigned long *) addr) + (offset >> SZLONG_LOG);
3200 unsigned long result = offset & ~SZLONG_MASK;
3201@@ -400,7 +409,8 @@
3202 #define hweight16(x) generic_hweight16(x)
3203 #define hweight8(x) generic_hweight8(x)
3204
3205-static inline int __test_and_set_le_bit(unsigned long nr, void * addr)
3206+extern inline int
3207+__test_and_set_le_bit(unsigned long nr, void * addr
3208 {
3209 unsigned char *ADDR = (unsigned char *) addr;
3210 int mask, retval;
3211@@ -413,7 +423,8 @@
3212 return retval;
3213 }
3214
3215-static inline int __test_and_clear_le_bit(unsigned long nr, void * addr)
3216+extern inline int
3217+__test_and_clear_le_bit(unsigned long nr, void * addr)
3218 {
3219 unsigned char *ADDR = (unsigned char *) addr;
3220 int mask, retval;
3221@@ -426,7 +437,8 @@
3222 return retval;
3223 }
3224
3225-static inline int test_le_bit(unsigned long nr, const void * addr)
3226+extern inline int
3227+test_le_bit(unsigned long nr, const void * addr)
3228 {
3229 const unsigned char *ADDR = (const unsigned char *) addr;
3230 int mask;
3231@@ -451,7 +463,7 @@
3232 return b;
3233 }
3234
3235-static inline unsigned long find_next_zero_le_bit(void *addr,
3236+extern inline unsigned long find_next_zero_le_bit(void *addr,
3237 unsigned long size, unsigned long offset)
3238 {
3239 unsigned int *p = ((unsigned int *) addr) + (offset >> 5);
3240diff -urN linux-2.4.24.org/include/asm-ppc/bitops.h linux-2.4.24/include/asm-ppc/bitops.h
5d16fd25
AM
3241--- linux-2.4.24.org/include/asm-ppc/bitops.h 2004-02-04 20:47:57.992461840 +0100
3242+++ linux-2.4.24/include/asm-ppc/bitops.h 2004-02-04 20:52:53.902913289 +0100
0aa7655b
AM
3243@@ -7,6 +7,7 @@
3244 #define _PPC_BITOPS_H
3245
3246 #include <linux/config.h>
3247+#include <linux/compiler.h>
3248 #include <asm/byteorder.h>
3249 #include <asm/atomic.h>
3250
3251@@ -26,7 +27,7 @@
3252 * These used to be if'd out here because using : "cc" as a constraint
3253 * resulted in errors from egcs. Things appear to be OK with gcc-2.95.
3254 */
3255-static __inline__ void set_bit(int nr, volatile void * addr)
3256+static __inline__ void set_bit(int nr, volatile unsigned long * addr)
3257 {
3258 unsigned long old;
3259 unsigned long mask = 1 << (nr & 0x1f);
3260@@ -46,7 +47,7 @@
3261 /*
3262 * non-atomic version
3263 */
3264-static __inline__ void __set_bit(int nr, volatile void *addr)
3265+static __inline__ void __set_bit(int nr, volatile unsigned long *addr)
3266 {
3267 unsigned long mask = 1 << (nr & 0x1f);
3268 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
3269@@ -60,7 +61,7 @@
3270 #define smp_mb__before_clear_bit() smp_mb()
3271 #define smp_mb__after_clear_bit() smp_mb()
3272
3273-static __inline__ void clear_bit(int nr, volatile void *addr)
3274+static __inline__ void clear_bit(int nr, volatile unsigned long *addr)
3275 {
3276 unsigned long old;
3277 unsigned long mask = 1 << (nr & 0x1f);
3278@@ -80,7 +81,7 @@
3279 /*
3280 * non-atomic version
3281 */
3282-static __inline__ void __clear_bit(int nr, volatile void *addr)
3283+static __inline__ void __clear_bit(int nr, volatile unsigned long *addr)
3284 {
3285 unsigned long mask = 1 << (nr & 0x1f);
3286 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
3287@@ -88,7 +89,7 @@
3288 *p &= ~mask;
3289 }
3290
3291-static __inline__ void change_bit(int nr, volatile void *addr)
3292+static __inline__ void change_bit(int nr, volatile unsigned long *addr)
3293 {
3294 unsigned long old;
3295 unsigned long mask = 1 << (nr & 0x1f);
3296@@ -108,7 +109,7 @@
3297 /*
3298 * non-atomic version
3299 */
3300-static __inline__ void __change_bit(int nr, volatile void *addr)
3301+static __inline__ void __change_bit(int nr, volatile unsigned long *addr)
3302 {
3303 unsigned long mask = 1 << (nr & 0x1f);
3304 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
3305@@ -119,7 +120,7 @@
3306 /*
3307 * test_and_*_bit do imply a memory barrier (?)
3308 */
3309-static __inline__ int test_and_set_bit(int nr, volatile void *addr)
3310+static __inline__ int test_and_set_bit(int nr, volatile unsigned long *addr)
3311 {
3312 unsigned int old, t;
3313 unsigned int mask = 1 << (nr & 0x1f);
3314@@ -142,7 +143,7 @@
3315 /*
3316 * non-atomic version
3317 */
3318-static __inline__ int __test_and_set_bit(int nr, volatile void *addr)
3319+static __inline__ int __test_and_set_bit(int nr, volatile unsigned long *addr)
3320 {
3321 unsigned long mask = 1 << (nr & 0x1f);
3322 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
3323@@ -152,7 +153,7 @@
3324 return (old & mask) != 0;
3325 }
3326
3327-static __inline__ int test_and_clear_bit(int nr, volatile void *addr)
3328+static __inline__ int test_and_clear_bit(int nr, volatile unsigned long *addr)
3329 {
3330 unsigned int old, t;
3331 unsigned int mask = 1 << (nr & 0x1f);
3332@@ -175,7 +176,7 @@
3333 /*
3334 * non-atomic version
3335 */
3336-static __inline__ int __test_and_clear_bit(int nr, volatile void *addr)
3337+static __inline__ int __test_and_clear_bit(int nr, volatile unsigned long *addr)
3338 {
3339 unsigned long mask = 1 << (nr & 0x1f);
3340 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
3341@@ -185,7 +186,7 @@
3342 return (old & mask) != 0;
3343 }
3344
3345-static __inline__ int test_and_change_bit(int nr, volatile void *addr)
3346+static __inline__ int test_and_change_bit(int nr, volatile unsigned long *addr)
3347 {
3348 unsigned int old, t;
3349 unsigned int mask = 1 << (nr & 0x1f);
3350@@ -208,7 +209,7 @@
3351 /*
3352 * non-atomic version
3353 */
3354-static __inline__ int __test_and_change_bit(int nr, volatile void *addr)
3355+static __inline__ int __test_and_change_bit(int nr, volatile unsigned long *addr)
3356 {
3357 unsigned long mask = 1 << (nr & 0x1f);
3358 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
3359@@ -218,7 +219,7 @@
3360 return (old & mask) != 0;
3361 }
3362
3363-static __inline__ int test_bit(int nr, __const__ volatile void *addr)
3364+static __inline__ int test_bit(int nr, __const__ volatile unsigned long *addr)
3365 {
3366 __const__ unsigned int *p = (__const__ unsigned int *) addr;
3367
3368@@ -226,7 +227,7 @@
3369 }
3370
3371 /* Return the bit position of the most significant 1 bit in a word */
3372-static __inline__ int __ilog2(unsigned int x)
3373+static __inline__ int __ilog2(unsigned long x)
3374 {
3375 int lz;
3376
3377@@ -234,13 +235,18 @@
3378 return 31 - lz;
3379 }
3380
3381-static __inline__ int ffz(unsigned int x)
3382+static __inline__ int ffz(unsigned long x)
3383 {
3384 if ((x = ~x) == 0)
3385 return 32;
3386 return __ilog2(x & -x);
3387 }
3388
3389+static inline int __ffs(unsigned long x)
3390+{
3391+ return __ilog2(x & -x);
3392+}
3393+
3394 /*
3395 * ffs: find first bit set. This is defined the same way as
3396 * the libc and compiler builtin ffs routines, therefore
3397@@ -252,6 +258,18 @@
3398 }
3399
3400 /*
3401+ * fls: find last (most-significant) bit set.
3402+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
3403+ */
3404+static __inline__ int fls(unsigned int x)
3405+{
3406+ int lz;
3407+
3408+ asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x));
3409+ return 32 - lz;
3410+}
3411+
3412+/*
3413 * hweightN: returns the hamming weight (i.e. the number
3414 * of bits set) of a N-bit word
3415 */
3416@@ -261,13 +279,86 @@
3417 #define hweight8(x) generic_hweight8(x)
3418
3419 /*
3420+ * Find the first bit set in a 140-bit bitmap.
3421+ * The first 100 bits are unlikely to be set.
3422+ */
3423+static inline int sched_find_first_bit(unsigned long *b)
3424+{
3425+ if (unlikely(b[0]))
3426+ return __ffs(b[0]);
3427+ if (unlikely(b[1]))
3428+ return __ffs(b[1]) + 32;
3429+ if (unlikely(b[2]))
3430+ return __ffs(b[2]) + 64;
3431+ if (b[3])
3432+ return __ffs(b[3]) + 96;
3433+ return __ffs(b[4]) + 128;
3434+}
3435+
3436+/**
3437+ * find_next_bit - find the next set bit in a memory region
3438+ * @addr: The address to base the search on
3439+ * @offset: The bitnumber to start searching at
3440+ * @size: The maximum size to search
3441+ */
3442+static __inline__ unsigned long find_next_bit(unsigned long *addr,
3443+ unsigned long size, unsigned long offset)
3444+{
3445+ unsigned int *p = ((unsigned int *) addr) + (offset >> 5);
3446+ unsigned int result = offset & ~31UL;
3447+ unsigned int tmp;
3448+
3449+ if (offset >= size)
3450+ return size;
3451+ size -= result;
3452+ offset &= 31UL;
3453+ if (offset) {
3454+ tmp = *p++;
3455+ tmp &= ~0UL << offset;
3456+ if (size < 32)
3457+ goto found_first;
3458+ if (tmp)
3459+ goto found_middle;
3460+ size -= 32;
3461+ result += 32;
3462+ }
3463+ while (size >= 32) {
3464+ if ((tmp = *p++) != 0)
3465+ goto found_middle;
3466+ result += 32;
3467+ size -= 32;
3468+ }
3469+ if (!size)
3470+ return result;
3471+ tmp = *p;
3472+
3473+found_first:
3474+ tmp &= ~0UL >> (32 - size);
3475+ if (tmp == 0UL) /* Are any bits set? */
3476+ return result + size; /* Nope. */
3477+found_middle:
3478+ return result + __ffs(tmp);
3479+}
3480+
3481+/**
3482+ * find_first_bit - find the first set bit in a memory region
3483+ * @addr: The address to start the search at
3484+ * @size: The maximum size to search
3485+ *
3486+ * Returns the bit-number of the first set bit, not the number of the byte
3487+ * containing a bit.
3488+ */
3489+#define find_first_bit(addr, size) \
3490+ find_next_bit((addr), (size), 0)
3491+
3492+/*
3493 * This implementation of find_{first,next}_zero_bit was stolen from
3494 * Linus' asm-alpha/bitops.h.
3495 */
3496 #define find_first_zero_bit(addr, size) \
3497 find_next_zero_bit((addr), (size), 0)
3498
3499-static __inline__ unsigned long find_next_zero_bit(void * addr,
3500+static __inline__ unsigned long find_next_zero_bit(unsigned long * addr,
3501 unsigned long size, unsigned long offset)
3502 {
3503 unsigned int * p = ((unsigned int *) addr) + (offset >> 5);
3504@@ -306,8 +397,8 @@
3505 }
3506
3507
3508-#define ext2_set_bit(nr, addr) __test_and_set_bit((nr) ^ 0x18, addr)
3509-#define ext2_clear_bit(nr, addr) __test_and_clear_bit((nr) ^ 0x18, addr)
3510+#define ext2_set_bit(nr, addr) __test_and_set_bit((nr) ^ 0x18, (unsigned long *)(addr))
3511+#define ext2_clear_bit(nr, addr) __test_and_clear_bit((nr) ^ 0x18, (unsigned long *)(addr))
3512
3513 static __inline__ int ext2_test_bit(int nr, __const__ void * addr)
3514 {
3515diff -urN linux-2.4.24.org/include/asm-ppc/smp.h linux-2.4.24/include/asm-ppc/smp.h
5d16fd25
AM
3516--- linux-2.4.24.org/include/asm-ppc/smp.h 2004-02-04 20:47:58.116436054 +0100
3517+++ linux-2.4.24/include/asm-ppc/smp.h 2004-02-04 20:52:53.906912457 +0100
0aa7655b
AM
3518@@ -45,7 +45,7 @@
3519 #define cpu_logical_map(cpu) (cpu)
3520 #define cpu_number_map(x) (x)
3521
3522-#define smp_processor_id() (current->processor)
3523+#define smp_processor_id() (current->cpu)
3524
3525 extern int smp_hw_index[NR_CPUS];
3526 #define hard_smp_processor_id() (smp_hw_index[smp_processor_id()])
3527diff -urN linux-2.4.24.org/include/asm-ppc64/bitops.h linux-2.4.24/include/asm-ppc64/bitops.h
5d16fd25
AM
3528--- linux-2.4.24.org/include/asm-ppc64/bitops.h 2004-02-04 20:47:31.682934246 +0100
3529+++ linux-2.4.24/include/asm-ppc64/bitops.h 2004-02-04 20:52:53.961901020 +0100
0aa7655b
AM
3530@@ -41,12 +41,12 @@
3531 #define smp_mb__before_clear_bit() smp_mb()
3532 #define smp_mb__after_clear_bit() smp_mb()
3533
3534-static __inline__ int test_bit(unsigned long nr, __const__ volatile void *addr)
3535+static __inline__ int test_bit(unsigned long nr, __const__ volatile unsigned long *addr)
3536 {
3537 return (1UL & (((__const__ long *) addr)[nr >> 6] >> (nr & 63)));
3538 }
3539
3540-static __inline__ void set_bit(unsigned long nr, volatile void *addr)
3541+static __inline__ void set_bit(unsigned long nr, volatile unsigned long *addr)
3542 {
3543 unsigned long old;
3544 unsigned long mask = 1UL << (nr & 0x3f);
3545@@ -62,7 +62,7 @@
3546 : "cc");
3547 }
3548
3549-static __inline__ void clear_bit(unsigned long nr, volatile void *addr)
3550+static __inline__ void clear_bit(unsigned long nr, volatile unsigned long *addr)
3551 {
3552 unsigned long old;
3553 unsigned long mask = 1UL << (nr & 0x3f);
3554@@ -78,7 +78,7 @@
3555 : "cc");
3556 }
3557
3558-static __inline__ void change_bit(unsigned long nr, volatile void *addr)
3559+static __inline__ void change_bit(unsigned long nr, volatile unsigned long *addr)
3560 {
3561 unsigned long old;
3562 unsigned long mask = 1UL << (nr & 0x3f);
3563@@ -94,7 +94,7 @@
3564 : "cc");
3565 }
3566
3567-static __inline__ int test_and_set_bit(unsigned long nr, volatile void *addr)
3568+static __inline__ int test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
3569 {
3570 unsigned long old, t;
3571 unsigned long mask = 1UL << (nr & 0x3f);
3572@@ -114,7 +114,7 @@
3573 return (old & mask) != 0;
3574 }
3575
3576-static __inline__ int test_and_clear_bit(unsigned long nr, volatile void *addr)
3577+static __inline__ int test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
3578 {
3579 unsigned long old, t;
3580 unsigned long mask = 1UL << (nr & 0x3f);
3581@@ -134,7 +134,7 @@
3582 return (old & mask) != 0;
3583 }
3584
3585-static __inline__ int test_and_change_bit(unsigned long nr, volatile void *addr)
3586+static __inline__ int test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
3587 {
3588 unsigned long old, t;
3589 unsigned long mask = 1UL << (nr & 0x3f);
3590@@ -157,7 +157,7 @@
3591 /*
3592 * non-atomic versions
3593 */
3594-static __inline__ void __set_bit(unsigned long nr, volatile void *addr)
3595+static __inline__ void __set_bit(unsigned long nr, volatile unsigned long *addr)
3596 {
3597 unsigned long mask = 1UL << (nr & 0x3f);
3598 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
3599@@ -165,7 +165,7 @@
3600 *p |= mask;
3601 }
3602
3603-static __inline__ void __clear_bit(unsigned long nr, volatile void *addr)
3604+static __inline__ void __clear_bit(unsigned long nr, volatile unsigned long *addr)
3605 {
3606 unsigned long mask = 1UL << (nr & 0x3f);
3607 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
3608@@ -173,7 +173,7 @@
3609 *p &= ~mask;
3610 }
3611
3612-static __inline__ void __change_bit(unsigned long nr, volatile void *addr)
3613+static __inline__ void __change_bit(unsigned long nr, volatile unsigned long *addr)
3614 {
3615 unsigned long mask = 1UL << (nr & 0x3f);
3616 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
3617@@ -181,7 +181,7 @@
3618 *p ^= mask;
3619 }
3620
3621-static __inline__ int __test_and_set_bit(unsigned long nr, volatile void *addr)
3622+static __inline__ int __test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
3623 {
3624 unsigned long mask = 1UL << (nr & 0x3f);
3625 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
3626@@ -191,7 +191,7 @@
3627 return (old & mask) != 0;
3628 }
3629
3630-static __inline__ int __test_and_clear_bit(unsigned long nr, volatile void *addr)
3631+static __inline__ int __test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
3632 {
3633 unsigned long mask = 1UL << (nr & 0x3f);
3634 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
3635@@ -201,7 +201,7 @@
3636 return (old & mask) != 0;
3637 }
3638
3639-static __inline__ int __test_and_change_bit(unsigned long nr, volatile void *addr)
3640+static __inline__ int __test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
3641 {
3642 unsigned long mask = 1UL << (nr & 0x3f);
3643 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
3644diff -urN linux-2.4.24.org/include/asm-s390/bitops.h linux-2.4.24/include/asm-s390/bitops.h
5d16fd25
AM
3645--- linux-2.4.24.org/include/asm-s390/bitops.h 2004-02-04 20:48:24.809883809 +0100
3646+++ linux-2.4.24/include/asm-s390/bitops.h 2004-02-04 20:52:53.990894989 +0100
0aa7655b
AM
3647@@ -47,272 +47,217 @@
3648 extern const char _oi_bitmap[];
3649 extern const char _ni_bitmap[];
3650 extern const char _zb_findmap[];
3651+extern const char _sb_findmap[];
3652
3653 #ifdef CONFIG_SMP
3654 /*
3655 * SMP save set_bit routine based on compare and swap (CS)
3656 */
3657-static __inline__ void set_bit_cs(int nr, volatile void * addr)
3658+static inline void set_bit_cs(int nr, volatile void *ptr)
3659 {
3660- unsigned long bits, mask;
3661- __asm__ __volatile__(
3662+ unsigned long addr, old, new, mask;
3663+
3664+ addr = (unsigned long) ptr;
3665 #if ALIGN_CS == 1
3666- " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
3667- " nr %2,%1\n" /* isolate last 2 bits of address */
3668- " xr %1,%2\n" /* make addr % 4 == 0 */
3669- " sll %2,3\n"
3670- " ar %0,%2\n" /* add alignement to bitnr */
3671+ addr ^= addr & 3; /* align address to 4 */
3672+ nr += (addr & 3) << 3; /* add alignment to bit number */
3673 #endif
3674- " lhi %2,31\n"
3675- " nr %2,%0\n" /* make shift value */
3676- " xr %0,%2\n"
3677- " srl %0,3\n"
3678- " lhi %3,1\n"
3679- " la %1,0(%0,%1)\n" /* calc. address for CS */
3680- " sll %3,0(%2)\n" /* make OR mask */
3681- " l %0,0(%1)\n"
3682- "0: lr %2,%0\n" /* CS loop starts here */
3683- " or %2,%3\n" /* set bit */
3684- " cs %0,%2,0(%1)\n"
3685- " jl 0b"
3686- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
3687- : "cc", "memory" );
3688+ addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
3689+ mask = 1UL << (nr & 31); /* make OR mask */
3690+ asm volatile(
3691+ " l %0,0(%4)\n"
3692+ "0: lr %1,%0\n"
3693+ " or %1,%3\n"
3694+ " cs %0,%1,0(%4)\n"
3695+ " jl 0b"
3696+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
3697+ : "d" (mask), "a" (addr)
3698+ : "cc" );
3699 }
3700
3701 /*
3702 * SMP save clear_bit routine based on compare and swap (CS)
3703 */
3704-static __inline__ void clear_bit_cs(int nr, volatile void * addr)
3705+static inline void clear_bit_cs(int nr, volatile void *ptr)
3706 {
3707- static const int minusone = -1;
3708- unsigned long bits, mask;
3709- __asm__ __volatile__(
3710+ unsigned long addr, old, new, mask;
3711+
3712+ addr = (unsigned long) ptr;
3713 #if ALIGN_CS == 1
3714- " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
3715- " nr %2,%1\n" /* isolate last 2 bits of address */
3716- " xr %1,%2\n" /* make addr % 4 == 0 */
3717- " sll %2,3\n"
3718- " ar %0,%2\n" /* add alignement to bitnr */
3719+ addr ^= addr & 3; /* align address to 4 */
3720+ nr += (addr & 3) << 3; /* add alignment to bit number */
3721 #endif
3722- " lhi %2,31\n"
3723- " nr %2,%0\n" /* make shift value */
3724- " xr %0,%2\n"
3725- " srl %0,3\n"
3726- " lhi %3,1\n"
3727- " la %1,0(%0,%1)\n" /* calc. address for CS */
3728- " sll %3,0(%2)\n"
3729- " x %3,%4\n" /* make AND mask */
3730- " l %0,0(%1)\n"
3731- "0: lr %2,%0\n" /* CS loop starts here */
3732- " nr %2,%3\n" /* clear bit */
3733- " cs %0,%2,0(%1)\n"
3734- " jl 0b"
3735- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask)
3736- : "m" (minusone) : "cc", "memory" );
3737+ addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
3738+ mask = ~(1UL << (nr & 31)); /* make AND mask */
3739+ asm volatile(
3740+ " l %0,0(%4)\n"
3741+ "0: lr %1,%0\n"
3742+ " nr %1,%3\n"
3743+ " cs %0,%1,0(%4)\n"
3744+ " jl 0b"
3745+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
3746+ : "d" (mask), "a" (addr)
3747+ : "cc" );
3748 }
3749
3750 /*
3751 * SMP save change_bit routine based on compare and swap (CS)
3752 */
3753-static __inline__ void change_bit_cs(int nr, volatile void * addr)
3754+static inline void change_bit_cs(int nr, volatile void *ptr)
3755 {
3756- unsigned long bits, mask;
3757- __asm__ __volatile__(
3758+ unsigned long addr, old, new, mask;
3759+
3760+ addr = (unsigned long) ptr;
3761 #if ALIGN_CS == 1
3762- " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
3763- " nr %2,%1\n" /* isolate last 2 bits of address */
3764- " xr %1,%2\n" /* make addr % 4 == 0 */
3765- " sll %2,3\n"
3766- " ar %0,%2\n" /* add alignement to bitnr */
3767+ addr ^= addr & 3; /* align address to 4 */
3768+ nr += (addr & 3) << 3; /* add alignment to bit number */
3769 #endif
3770- " lhi %2,31\n"
3771- " nr %2,%0\n" /* make shift value */
3772- " xr %0,%2\n"
3773- " srl %0,3\n"
3774- " lhi %3,1\n"
3775- " la %1,0(%0,%1)\n" /* calc. address for CS */
3776- " sll %3,0(%2)\n" /* make XR mask */
3777- " l %0,0(%1)\n"
3778- "0: lr %2,%0\n" /* CS loop starts here */
3779- " xr %2,%3\n" /* change bit */
3780- " cs %0,%2,0(%1)\n"
3781- " jl 0b"
3782- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
3783- : "cc", "memory" );
3784+ addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
3785+ mask = 1UL << (nr & 31); /* make XOR mask */
3786+ asm volatile(
3787+ " l %0,0(%4)\n"
3788+ "0: lr %1,%0\n"
3789+ " xr %1,%3\n"
3790+ " cs %0,%1,0(%4)\n"
3791+ " jl 0b"
3792+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
3793+ : "d" (mask), "a" (addr)
3794+ : "cc" );
3795 }
3796
3797 /*
3798 * SMP save test_and_set_bit routine based on compare and swap (CS)
3799 */
3800-static __inline__ int test_and_set_bit_cs(int nr, volatile void * addr)
3801+static inline int test_and_set_bit_cs(int nr, volatile void *ptr)
3802 {
3803- unsigned long bits, mask;
3804- __asm__ __volatile__(
3805+ unsigned long addr, old, new, mask;
3806+
3807+ addr = (unsigned long) ptr;
3808 #if ALIGN_CS == 1
3809- " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
3810- " nr %2,%1\n" /* isolate last 2 bits of address */
3811- " xr %1,%2\n" /* make addr % 4 == 0 */
3812- " sll %2,3\n"
3813- " ar %0,%2\n" /* add alignement to bitnr */
3814+ addr ^= addr & 3; /* align address to 4 */
3815+ nr += (addr & 3) << 3; /* add alignment to bit number */
3816 #endif
3817- " lhi %2,31\n"
3818- " nr %2,%0\n" /* make shift value */
3819- " xr %0,%2\n"
3820- " srl %0,3\n"
3821- " lhi %3,1\n"
3822- " la %1,0(%0,%1)\n" /* calc. address for CS */
3823- " sll %3,0(%2)\n" /* make OR mask */
3824- " l %0,0(%1)\n"
3825- "0: lr %2,%0\n" /* CS loop starts here */
3826- " or %2,%3\n" /* set bit */
3827- " cs %0,%2,0(%1)\n"
3828- " jl 0b\n"
3829- " nr %0,%3\n" /* isolate old bit */
3830- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
3831- : "cc", "memory" );
3832- return nr != 0;
3833+ addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
3834+ mask = 1UL << (nr & 31); /* make OR/test mask */
3835+ asm volatile(
3836+ " l %0,0(%4)\n"
3837+ "0: lr %1,%0\n"
3838+ " or %1,%3\n"
3839+ " cs %0,%1,0(%4)\n"
3840+ " jl 0b"
3841+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
3842+ : "d" (mask), "a" (addr)
3843+ : "cc" );
3844+ return (old & mask) != 0;
3845 }
3846
3847 /*
3848 * SMP save test_and_clear_bit routine based on compare and swap (CS)
3849 */
3850-static __inline__ int test_and_clear_bit_cs(int nr, volatile void * addr)
3851+static inline int test_and_clear_bit_cs(int nr, volatile void *ptr)
3852 {
3853- static const int minusone = -1;
3854- unsigned long bits, mask;
3855- __asm__ __volatile__(
3856+ unsigned long addr, old, new, mask;
3857+
3858+ addr = (unsigned long) ptr;
3859 #if ALIGN_CS == 1
3860- " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
3861- " nr %2,%1\n" /* isolate last 2 bits of address */
3862- " xr %1,%2\n" /* make addr % 4 == 0 */
3863- " sll %2,3\n"
3864- " ar %0,%2\n" /* add alignement to bitnr */
3865+ addr ^= addr & 3; /* align address to 4 */
3866+ nr += (addr & 3) << 3; /* add alignment to bit number */
3867 #endif
3868- " lhi %2,31\n"
3869- " nr %2,%0\n" /* make shift value */
3870- " xr %0,%2\n"
3871- " srl %0,3\n"
3872- " lhi %3,1\n"
3873- " la %1,0(%0,%1)\n" /* calc. address for CS */
3874- " sll %3,0(%2)\n"
3875- " l %0,0(%1)\n"
3876- " x %3,%4\n" /* make AND mask */
3877- "0: lr %2,%0\n" /* CS loop starts here */
3878- " nr %2,%3\n" /* clear bit */
3879- " cs %0,%2,0(%1)\n"
3880- " jl 0b\n"
3881- " x %3,%4\n"
3882- " nr %0,%3\n" /* isolate old bit */
3883- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask)
3884- : "m" (minusone) : "cc", "memory" );
3885- return nr;
3886+ addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
3887+ mask = ~(1UL << (nr & 31)); /* make AND mask */
3888+ asm volatile(
3889+ " l %0,0(%4)\n"
3890+ "0: lr %1,%0\n"
3891+ " nr %1,%3\n"
3892+ " cs %0,%1,0(%4)\n"
3893+ " jl 0b"
3894+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
3895+ : "d" (mask), "a" (addr)
3896+ : "cc" );
3897+ return (old ^ new) != 0;
3898 }
3899
3900 /*
3901 * SMP save test_and_change_bit routine based on compare and swap (CS)
3902 */
3903-static __inline__ int test_and_change_bit_cs(int nr, volatile void * addr)
3904+static inline int test_and_change_bit_cs(int nr, volatile void *ptr)
3905 {
3906- unsigned long bits, mask;
3907- __asm__ __volatile__(
3908+ unsigned long addr, old, new, mask;
3909+
3910+ addr = (unsigned long) ptr;
3911 #if ALIGN_CS == 1
3912- " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
3913- " nr %2,%1\n" /* isolate last 2 bits of address */
3914- " xr %1,%2\n" /* make addr % 4 == 0 */
3915- " sll %2,3\n"
3916- " ar %0,%2\n" /* add alignement to bitnr */
3917+ addr ^= addr & 3; /* align address to 4 */
3918+ nr += (addr & 3) << 3; /* add alignment to bit number */
3919 #endif
3920- " lhi %2,31\n"
3921- " nr %2,%0\n" /* make shift value */
3922- " xr %0,%2\n"
3923- " srl %0,3\n"
3924- " lhi %3,1\n"
3925- " la %1,0(%0,%1)\n" /* calc. address for CS */
3926- " sll %3,0(%2)\n" /* make OR mask */
3927- " l %0,0(%1)\n"
3928- "0: lr %2,%0\n" /* CS loop starts here */
3929- " xr %2,%3\n" /* change bit */
3930- " cs %0,%2,0(%1)\n"
3931- " jl 0b\n"
3932- " nr %0,%3\n" /* isolate old bit */
3933- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
3934- : "cc", "memory" );
3935- return nr != 0;
3936+ addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
3937+ mask = 1UL << (nr & 31); /* make XOR mask */
3938+ asm volatile(
3939+ " l %0,0(%4)\n"
3940+ "0: lr %1,%0\n"
3941+ " xr %1,%3\n"
3942+ " cs %0,%1,0(%4)\n"
3943+ " jl 0b"
3944+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
3945+ : "d" (mask), "a" (addr)
3946+ : "cc" );
3947+ return (old & mask) != 0;
3948 }
3949 #endif /* CONFIG_SMP */
3950
3951 /*
3952 * fast, non-SMP set_bit routine
3953 */
3954-static __inline__ void __set_bit(int nr, volatile void * addr)
3955+static inline void __set_bit(int nr, volatile void *ptr)
3956 {
3957- unsigned long reg1, reg2;
3958- __asm__ __volatile__(
3959- " lhi %1,24\n"
3960- " lhi %0,7\n"
3961- " xr %1,%2\n"
3962- " nr %0,%2\n"
3963- " srl %1,3\n"
3964- " la %1,0(%1,%3)\n"
3965- " la %0,0(%0,%4)\n"
3966- " oc 0(1,%1),0(%0)"
3967- : "=&a" (reg1), "=&a" (reg2)
3968- : "r" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
3969-}
3970-
3971-static __inline__ void
3972-__constant_set_bit(const int nr, volatile void * addr)
3973-{
3974- switch (nr&7) {
3975- case 0:
3976- __asm__ __volatile__ ("la 1,%0\n\t"
3977- "oi 0(1),0x01"
3978- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
3979- : : "1", "cc", "memory");
3980- break;
3981- case 1:
3982- __asm__ __volatile__ ("la 1,%0\n\t"
3983- "oi 0(1),0x02"
3984- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
3985- : : "1", "cc", "memory" );
3986- break;
3987- case 2:
3988- __asm__ __volatile__ ("la 1,%0\n\t"
3989- "oi 0(1),0x04"
3990- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
3991- : : "1", "cc", "memory" );
3992- break;
3993- case 3:
3994- __asm__ __volatile__ ("la 1,%0\n\t"
3995- "oi 0(1),0x08"
3996- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
3997- : : "1", "cc", "memory" );
3998- break;
3999- case 4:
4000- __asm__ __volatile__ ("la 1,%0\n\t"
4001- "oi 0(1),0x10"
4002- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4003- : : "1", "cc", "memory" );
4004- break;
4005- case 5:
4006- __asm__ __volatile__ ("la 1,%0\n\t"
4007- "oi 0(1),0x20"
4008- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4009- : : "1", "cc", "memory" );
4010- break;
4011- case 6:
4012- __asm__ __volatile__ ("la 1,%0\n\t"
4013- "oi 0(1),0x40"
4014- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4015- : : "1", "cc", "memory" );
4016- break;
4017- case 7:
4018- __asm__ __volatile__ ("la 1,%0\n\t"
4019- "oi 0(1),0x80"
4020- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4021- : : "1", "cc", "memory" );
4022- break;
4023- }
4024+ unsigned long addr;
4025+
4026+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
4027+ asm volatile("oc 0(1,%1),0(%2)"
4028+ : "+m" (*(char *) addr)
4029+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
4030+ : "cc" );
4031+}
4032+
4033+static inline void
4034+__constant_set_bit(const int nr, volatile void *ptr)
4035+{
4036+ unsigned long addr;
4037+
4038+ addr = ((unsigned long) ptr) + ((nr >> 3) ^ 3);
4039+ switch (nr&7) {
4040+ case 0:
4041+ asm volatile ("oi 0(%1),0x01"
4042+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4043+ break;
4044+ case 1:
4045+ asm volatile ("oi 0(%1),0x02"
4046+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4047+ break;
4048+ case 2:
4049+ asm volatile ("oi 0(%1),0x04"
4050+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4051+ break;
4052+ case 3:
4053+ asm volatile ("oi 0(%1),0x08"
4054+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4055+ break;
4056+ case 4:
4057+ asm volatile ("oi 0(%1),0x10"
4058+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4059+ break;
4060+ case 5:
4061+ asm volatile ("oi 0(%1),0x20"
4062+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4063+ break;
4064+ case 6:
4065+ asm volatile ("oi 0(%1),0x40"
4066+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4067+ break;
4068+ case 7:
4069+ asm volatile ("oi 0(%1),0x80"
4070+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4071+ break;
4072+ }
4073 }
4074
4075 #define set_bit_simple(nr,addr) \
4076@@ -323,76 +268,58 @@
4077 /*
4078 * fast, non-SMP clear_bit routine
4079 */
4080-static __inline__ void
4081-__clear_bit(int nr, volatile void * addr)
4082+static inline void
4083+__clear_bit(int nr, volatile void *ptr)
4084 {
4085- unsigned long reg1, reg2;
4086- __asm__ __volatile__(
4087- " lhi %1,24\n"
4088- " lhi %0,7\n"
4089- " xr %1,%2\n"
4090- " nr %0,%2\n"
4091- " srl %1,3\n"
4092- " la %1,0(%1,%3)\n"
4093- " la %0,0(%0,%4)\n"
4094- " nc 0(1,%1),0(%0)"
4095- : "=&a" (reg1), "=&a" (reg2)
4096- : "r" (nr), "a" (addr), "a" (&_ni_bitmap) : "cc", "memory" );
4097-}
4098-
4099-static __inline__ void
4100-__constant_clear_bit(const int nr, volatile void * addr)
4101-{
4102- switch (nr&7) {
4103- case 0:
4104- __asm__ __volatile__ ("la 1,%0\n\t"
4105- "ni 0(1),0xFE"
4106- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4107- : : "1", "cc", "memory" );
4108- break;
4109- case 1:
4110- __asm__ __volatile__ ("la 1,%0\n\t"
4111- "ni 0(1),0xFD"
4112- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4113- : : "1", "cc", "memory" );
4114- break;
4115- case 2:
4116- __asm__ __volatile__ ("la 1,%0\n\t"
4117- "ni 0(1),0xFB"
4118- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4119- : : "1", "cc", "memory" );
4120- break;
4121- case 3:
4122- __asm__ __volatile__ ("la 1,%0\n\t"
4123- "ni 0(1),0xF7"
4124- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4125- : : "1", "cc", "memory" );
4126- break;
4127- case 4:
4128- __asm__ __volatile__ ("la 1,%0\n\t"
4129- "ni 0(1),0xEF"
4130- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4131- : : "cc", "memory" );
4132- break;
4133- case 5:
4134- __asm__ __volatile__ ("la 1,%0\n\t"
4135- "ni 0(1),0xDF"
4136- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4137- : : "1", "cc", "memory" );
4138- break;
4139- case 6:
4140- __asm__ __volatile__ ("la 1,%0\n\t"
4141- "ni 0(1),0xBF"
4142- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4143- : : "1", "cc", "memory" );
4144- break;
4145- case 7:
4146- __asm__ __volatile__ ("la 1,%0\n\t"
4147- "ni 0(1),0x7F"
4148- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4149- : : "1", "cc", "memory" );
4150- break;
4151- }
4152+ unsigned long addr;
4153+
4154+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
4155+ asm volatile("nc 0(1,%1),0(%2)"
4156+ : "+m" (*(char *) addr)
4157+ : "a" (addr), "a" (_ni_bitmap + (nr & 7))
4158+ : "cc" );
4159+}
4160+
4161+static inline void
4162+__constant_clear_bit(const int nr, volatile void *ptr)
4163+{
4164+ unsigned long addr;
4165+
4166+ addr = ((unsigned long) ptr) + ((nr >> 3) ^ 3);
4167+ switch (nr&7) {
4168+ case 0:
4169+ asm volatile ("ni 0(%1),0xFE"
4170+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4171+ break;
4172+ case 1:
4173+ asm volatile ("ni 0(%1),0xFD"
4174+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4175+ break;
4176+ case 2:
4177+ asm volatile ("ni 0(%1),0xFB"
4178+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4179+ break;
4180+ case 3:
4181+ asm volatile ("ni 0(%1),0xF7"
4182+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4183+ break;
4184+ case 4:
4185+ asm volatile ("ni 0(%1),0xEF"
4186+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4187+ break;
4188+ case 5:
4189+ asm volatile ("ni 0(%1),0xDF"
4190+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4191+ break;
4192+ case 6:
4193+ asm volatile ("ni 0(%1),0xBF"
4194+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4195+ break;
4196+ case 7:
4197+ asm volatile ("ni 0(%1),0x7F"
4198+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4199+ break;
4200+ }
4201 }
4202
4203 #define clear_bit_simple(nr,addr) \
4204@@ -403,75 +330,57 @@
4205 /*
4206 * fast, non-SMP change_bit routine
4207 */
4208-static __inline__ void __change_bit(int nr, volatile void * addr)
4209+static inline void __change_bit(int nr, volatile void *ptr)
4210 {
4211- unsigned long reg1, reg2;
4212- __asm__ __volatile__(
4213- " lhi %1,24\n"
4214- " lhi %0,7\n"
4215- " xr %1,%2\n"
4216- " nr %0,%2\n"
4217- " srl %1,3\n"
4218- " la %1,0(%1,%3)\n"
4219- " la %0,0(%0,%4)\n"
4220- " xc 0(1,%1),0(%0)"
4221- : "=&a" (reg1), "=&a" (reg2)
4222- : "r" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
4223-}
4224-
4225-static __inline__ void
4226-__constant_change_bit(const int nr, volatile void * addr)
4227-{
4228- switch (nr&7) {
4229- case 0:
4230- __asm__ __volatile__ ("la 1,%0\n\t"
4231- "xi 0(1),0x01"
4232- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4233- : : "cc", "memory" );
4234- break;
4235- case 1:
4236- __asm__ __volatile__ ("la 1,%0\n\t"
4237- "xi 0(1),0x02"
4238- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4239- : : "cc", "memory" );
4240- break;
4241- case 2:
4242- __asm__ __volatile__ ("la 1,%0\n\t"
4243- "xi 0(1),0x04"
4244- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4245- : : "cc", "memory" );
4246- break;
4247- case 3:
4248- __asm__ __volatile__ ("la 1,%0\n\t"
4249- "xi 0(1),0x08"
4250- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4251- : : "cc", "memory" );
4252- break;
4253- case 4:
4254- __asm__ __volatile__ ("la 1,%0\n\t"
4255- "xi 0(1),0x10"
4256- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4257- : : "cc", "memory" );
4258- break;
4259- case 5:
4260- __asm__ __volatile__ ("la 1,%0\n\t"
4261- "xi 0(1),0x20"
4262- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4263- : : "1", "cc", "memory" );
4264- break;
4265- case 6:
4266- __asm__ __volatile__ ("la 1,%0\n\t"
4267- "xi 0(1),0x40"
4268- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4269- : : "1", "cc", "memory" );
4270- break;
4271- case 7:
4272- __asm__ __volatile__ ("la 1,%0\n\t"
4273- "xi 0(1),0x80"
4274- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4275- : : "1", "cc", "memory" );
4276- break;
4277- }
4278+ unsigned long addr;
4279+
4280+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
4281+ asm volatile("xc 0(1,%1),0(%2)"
4282+ : "+m" (*(char *) addr)
4283+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
4284+ : "cc" );
4285+}
4286+
4287+static inline void
4288+__constant_change_bit(const int nr, volatile void *ptr)
4289+{
4290+ unsigned long addr;
4291+
4292+ addr = ((unsigned long) ptr) + ((nr >> 3) ^ 3);
4293+ switch (nr&7) {
4294+ case 0:
4295+ asm volatile ("xi 0(%1),0x01"
4296+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4297+ break;
4298+ case 1:
4299+ asm volatile ("xi 0(%1),0x02"
4300+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4301+ break;
4302+ case 2:
4303+ asm volatile ("xi 0(%1),0x04"
4304+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4305+ break;
4306+ case 3:
4307+ asm volatile ("xi 0(%1),0x08"
4308+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4309+ break;
4310+ case 4:
4311+ asm volatile ("xi 0(%1),0x10"
4312+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4313+ break;
4314+ case 5:
4315+ asm volatile ("xi 0(%1),0x20"
4316+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4317+ break;
4318+ case 6:
4319+ asm volatile ("xi 0(%1),0x40"
4320+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4321+ break;
4322+ case 7:
4323+ asm volatile ("xi 0(%1),0x80"
4324+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4325+ break;
4326+ }
4327 }
4328
4329 #define change_bit_simple(nr,addr) \
4330@@ -482,74 +391,54 @@
4331 /*
4332 * fast, non-SMP test_and_set_bit routine
4333 */
4334-static __inline__ int test_and_set_bit_simple(int nr, volatile void * addr)
4335+static inline int test_and_set_bit_simple(int nr, volatile void *ptr)
4336 {
4337- unsigned long reg1, reg2;
4338- int oldbit;
4339- __asm__ __volatile__(
4340- " lhi %1,24\n"
4341- " lhi %2,7\n"
4342- " xr %1,%3\n"
4343- " nr %2,%3\n"
4344- " srl %1,3\n"
4345- " la %1,0(%1,%4)\n"
4346- " ic %0,0(%1)\n"
4347- " srl %0,0(%2)\n"
4348- " la %2,0(%2,%5)\n"
4349- " oc 0(1,%1),0(%2)"
4350- : "=d&" (oldbit), "=&a" (reg1), "=&a" (reg2)
4351- : "r" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
4352- return oldbit & 1;
4353+ unsigned long addr;
4354+ unsigned char ch;
4355+
4356+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
4357+ ch = *(unsigned char *) addr;
4358+ asm volatile("oc 0(1,%1),0(%2)"
4359+ : "+m" (*(char *) addr)
4360+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
4361+ : "cc" );
4362+ return (ch >> (nr & 7)) & 1;
4363 }
4364 #define __test_and_set_bit(X,Y) test_and_set_bit_simple(X,Y)
4365
4366 /*
4367 * fast, non-SMP test_and_clear_bit routine
4368 */
4369-static __inline__ int test_and_clear_bit_simple(int nr, volatile void * addr)
4370+static inline int test_and_clear_bit_simple(int nr, volatile void *ptr)
4371 {
4372- unsigned long reg1, reg2;
4373- int oldbit;
4374+ unsigned long addr;
4375+ unsigned char ch;
4376
4377- __asm__ __volatile__(
4378- " lhi %1,24\n"
4379- " lhi %2,7\n"
4380- " xr %1,%3\n"
4381- " nr %2,%3\n"
4382- " srl %1,3\n"
4383- " la %1,0(%1,%4)\n"
4384- " ic %0,0(%1)\n"
4385- " srl %0,0(%2)\n"
4386- " la %2,0(%2,%5)\n"
4387- " nc 0(1,%1),0(%2)"
4388- : "=d&" (oldbit), "=&a" (reg1), "=&a" (reg2)
4389- : "r" (nr), "a" (addr), "a" (&_ni_bitmap) : "cc", "memory" );
4390- return oldbit & 1;
4391+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
4392+ ch = *(unsigned char *) addr;
4393+ asm volatile("nc 0(1,%1),0(%2)"
4394+ : "+m" (*(char *) addr)
4395+ : "a" (addr), "a" (_ni_bitmap + (nr & 7))
4396+ : "cc" );
4397+ return (ch >> (nr & 7)) & 1;
4398 }
4399 #define __test_and_clear_bit(X,Y) test_and_clear_bit_simple(X,Y)
4400
4401 /*
4402 * fast, non-SMP test_and_change_bit routine
4403 */
4404-static __inline__ int test_and_change_bit_simple(int nr, volatile void * addr)
4405+static inline int test_and_change_bit_simple(int nr, volatile void *ptr)
4406 {
4407- unsigned long reg1, reg2;
4408- int oldbit;
4409+ unsigned long addr;
4410+ unsigned char ch;
4411
4412- __asm__ __volatile__(
4413- " lhi %1,24\n"
4414- " lhi %2,7\n"
4415- " xr %1,%3\n"
4416- " nr %2,%1\n"
4417- " srl %1,3\n"
4418- " la %1,0(%1,%4)\n"
4419- " ic %0,0(%1)\n"
4420- " srl %0,0(%2)\n"
4421- " la %2,0(%2,%5)\n"
4422- " xc 0(1,%1),0(%2)"
4423- : "=d&" (oldbit), "=&a" (reg1), "=&a" (reg2)
4424- : "r" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
4425- return oldbit & 1;
4426+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
4427+ ch = *(unsigned char *) addr;
4428+ asm volatile("xc 0(1,%1),0(%2)"
4429+ : "+m" (*(char *) addr)
4430+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
4431+ : "cc" );
4432+ return (ch >> (nr & 7)) & 1;
4433 }
4434 #define __test_and_change_bit(X,Y) test_and_change_bit_simple(X,Y)
4435
4436@@ -574,25 +463,17 @@
4437 * This routine doesn't need to be atomic.
4438 */
4439
4440-static __inline__ int __test_bit(int nr, volatile void * addr)
4441+static inline int __test_bit(int nr, volatile void *ptr)
4442 {
4443- unsigned long reg1, reg2;
4444- int oldbit;
4445+ unsigned long addr;
4446+ unsigned char ch;
4447
4448- __asm__ __volatile__(
4449- " lhi %2,24\n"
4450- " lhi %1,7\n"
4451- " xr %2,%3\n"
4452- " nr %1,%3\n"
4453- " srl %2,3\n"
4454- " ic %0,0(%2,%4)\n"
4455- " srl %0,0(%1)"
4456- : "=d&" (oldbit), "=&a" (reg1), "=&a" (reg2)
4457- : "r" (nr), "a" (addr) : "cc" );
4458- return oldbit & 1;
4459+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
4460+ ch = *(unsigned char *) addr;
4461+ return (ch >> (nr & 7)) & 1;
4462 }
4463
4464-static __inline__ int __constant_test_bit(int nr, volatile void * addr) {
4465+static inline int __constant_test_bit(int nr, volatile void * addr) {
4466 return (((volatile char *) addr)[(nr>>3)^3] & (1<<(nr&7))) != 0;
4467 }
4468
4469@@ -604,7 +485,7 @@
4470 /*
4471 * Find-bit routines..
4472 */
4473-static __inline__ int find_first_zero_bit(void * addr, unsigned size)
4474+static inline int find_first_zero_bit(void * addr, unsigned size)
4475 {
4476 unsigned long cmp, count;
4477 int res;
4478@@ -642,7 +523,45 @@
4479 return (res < size) ? res : size;
4480 }
4481
4482-static __inline__ int find_next_zero_bit (void * addr, int size, int offset)
4483+static inline int find_first_bit(void * addr, unsigned size)
4484+{
4485+ unsigned long cmp, count;
4486+ int res;
4487+
4488+ if (!size)
4489+ return 0;
4490+ __asm__(" slr %1,%1\n"
4491+ " lr %2,%3\n"
4492+ " slr %0,%0\n"
4493+ " ahi %2,31\n"
4494+ " srl %2,5\n"
4495+ "0: c %1,0(%0,%4)\n"
4496+ " jne 1f\n"
4497+ " ahi %0,4\n"
4498+ " brct %2,0b\n"
4499+ " lr %0,%3\n"
4500+ " j 4f\n"
4501+ "1: l %2,0(%0,%4)\n"
4502+ " sll %0,3\n"
4503+ " lhi %1,0xff\n"
4504+ " tml %2,0xffff\n"
4505+ " jnz 2f\n"
4506+ " ahi %0,16\n"
4507+ " srl %2,16\n"
4508+ "2: tml %2,0x00ff\n"
4509+ " jnz 3f\n"
4510+ " ahi %0,8\n"
4511+ " srl %2,8\n"
4512+ "3: nr %2,%1\n"
4513+ " ic %2,0(%2,%5)\n"
4514+ " alr %0,%2\n"
4515+ "4:"
4516+ : "=&a" (res), "=&d" (cmp), "=&a" (count)
4517+ : "a" (size), "a" (addr), "a" (&_sb_findmap) : "cc" );
4518+ return (res < size) ? res : size;
4519+}
4520+
4521+static inline int find_next_zero_bit (void * addr, int size, int offset)
4522 {
4523 unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
4524 unsigned long bitvec, reg;
4525@@ -680,11 +599,49 @@
4526 return (offset + res);
4527 }
4528
4529+static inline int find_next_bit (void * addr, int size, int offset)
4530+{
4531+ unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
4532+ unsigned long bitvec, reg;
4533+ int set, bit = offset & 31, res;
4534+
4535+ if (bit) {
4536+ /*
4537+ * Look for set bit in first word
4538+ */
4539+ bitvec = (*p) >> bit;
4540+ __asm__(" slr %0,%0\n"
4541+ " lhi %2,0xff\n"
4542+ " tml %1,0xffff\n"
4543+ " jnz 0f\n"
4544+ " ahi %0,16\n"
4545+ " srl %1,16\n"
4546+ "0: tml %1,0x00ff\n"
4547+ " jnz 1f\n"
4548+ " ahi %0,8\n"
4549+ " srl %1,8\n"
4550+ "1: nr %1,%2\n"
4551+ " ic %1,0(%1,%3)\n"
4552+ " alr %0,%1"
4553+ : "=&d" (set), "+a" (bitvec), "=&d" (reg)
4554+ : "a" (&_sb_findmap) : "cc" );
4555+ if (set < (32 - bit))
4556+ return set + offset;
4557+ offset += 32 - bit;
4558+ p++;
4559+ }
4560+ /*
4561+ * No set bit yet, search remaining full words for a bit
4562+ */
4563+ res = find_first_bit (p, size - 32 * (p - (unsigned long *) addr));
4564+ return (offset + res);
4565+}
4566+
4567 /*
4568 * ffz = Find First Zero in word. Undefined if no zero exists,
4569 * so code should check against ~0UL first..
4570 */
4571-static __inline__ unsigned long ffz(unsigned long word)
4572+static inline unsigned long ffz(unsigned long word)
4573 {
4574 unsigned long reg;
4575 int result;
4576@@ -708,40 +665,109 @@
4577 }
4578
4579 /*
4580+ * __ffs = find first bit in word. Undefined if no bit exists,
4581+ * so code should check against 0UL first..
4582+ */
4583+static inline unsigned long __ffs(unsigned long word)
4584+{
4585+ unsigned long reg, result;
4586+
4587+ __asm__(" slr %0,%0\n"
4588+ " lhi %2,0xff\n"
4589+ " tml %1,0xffff\n"
4590+ " jnz 0f\n"
4591+ " ahi %0,16\n"
4592+ " srl %1,16\n"
4593+ "0: tml %1,0x00ff\n"
4594+ " jnz 1f\n"
4595+ " ahi %0,8\n"
4596+ " srl %1,8\n"
4597+ "1: nr %1,%2\n"
4598+ " ic %1,0(%1,%3)\n"
4599+ " alr %0,%1"
4600+ : "=&d" (result), "+a" (word), "=&d" (reg)
4601+ : "a" (&_sb_findmap) : "cc" );
4602+ return result;
4603+}
4604+
4605+/*
4606+ * Every architecture must define this function. It's the fastest
4607+ * way of searching a 140-bit bitmap where the first 100 bits are
4608+ * unlikely to be set. It's guaranteed that at least one of the 140
4609+ * bits is cleared.
4610+ */
4611+static inline int sched_find_first_bit(unsigned long *b)
4612+{
4613+ return find_first_bit(b, 140);
4614+}
4615+
4616+/*
4617 * ffs: find first bit set. This is defined the same way as
4618 * the libc and compiler builtin ffs routines, therefore
4619 * differs in spirit from the above ffz (man ffs).
4620 */
4621
4622-extern int __inline__ ffs (int x)
4623+extern int inline ffs (int x)
4624 {
4625- int r;
4626+ int r = 1;
4627
4628 if (x == 0)
4629- return 0;
4630- __asm__(" slr %0,%0\n"
4631- " tml %1,0xffff\n"
4632+ return 0;
4633+ __asm__(" tml %1,0xffff\n"
4634 " jnz 0f\n"
4635- " ahi %0,16\n"
4636 " srl %1,16\n"
4637+ " ahi %0,16\n"
4638 "0: tml %1,0x00ff\n"
4639 " jnz 1f\n"
4640- " ahi %0,8\n"
4641 " srl %1,8\n"
4642+ " ahi %0,8\n"
4643 "1: tml %1,0x000f\n"
4644 " jnz 2f\n"
4645- " ahi %0,4\n"
4646 " srl %1,4\n"
4647+ " ahi %0,4\n"
4648 "2: tml %1,0x0003\n"
4649 " jnz 3f\n"
4650- " ahi %0,2\n"
4651 " srl %1,2\n"
4652+ " ahi %0,2\n"
4653 "3: tml %1,0x0001\n"
4654 " jnz 4f\n"
4655 " ahi %0,1\n"
4656 "4:"
4657 : "=&d" (r), "+d" (x) : : "cc" );
4658- return r+1;
4659+ return r;
4660+}
4661+
4662+/*
4663+ * fls: find last bit set.
4664+ */
4665+extern __inline__ int fls(int x)
4666+{
4667+ int r = 32;
4668+
4669+ if (x == 0)
4670+ return 0;
4671+ __asm__(" tmh %1,0xffff\n"
4672+ " jz 0f\n"
4673+ " sll %1,16\n"
4674+ " ahi %0,-16\n"
4675+ "0: tmh %1,0xff00\n"
4676+ " jz 1f\n"
4677+ " sll %1,8\n"
4678+ " ahi %0,-8\n"
4679+ "1: tmh %1,0xf000\n"
4680+ " jz 2f\n"
4681+ " sll %1,4\n"
4682+ " ahi %0,-4\n"
4683+ "2: tmh %1,0xc000\n"
4684+ " jz 3f\n"
4685+ " sll %1,2\n"
4686+ " ahi %0,-2\n"
4687+ "3: tmh %1,0x8000\n"
4688+ " jz 4f\n"
4689+ " ahi %0,-1\n"
4690+ "4:"
4691+ : "+d" (r), "+d" (x) : : "cc" );
4692+ return r;
4693 }
4694
4695 /*
4696@@ -769,7 +795,7 @@
4697 #define ext2_set_bit(nr, addr) test_and_set_bit((nr)^24, addr)
4698 #define ext2_clear_bit(nr, addr) test_and_clear_bit((nr)^24, addr)
4699 #define ext2_test_bit(nr, addr) test_bit((nr)^24, addr)
4700-static __inline__ int ext2_find_first_zero_bit(void *vaddr, unsigned size)
4701+static inline int ext2_find_first_zero_bit(void *vaddr, unsigned size)
4702 {
4703 unsigned long cmp, count;
4704 int res;
4705@@ -808,7 +834,7 @@
4706 return (res < size) ? res : size;
4707 }
4708
4709-static __inline__ int
4710+static inline int
4711 ext2_find_next_zero_bit(void *vaddr, unsigned size, unsigned offset)
4712 {
4713 unsigned long *addr = vaddr;
4714diff -urN linux-2.4.24.org/include/asm-s390x/bitops.h linux-2.4.24/include/asm-s390x/bitops.h
5d16fd25
AM
4715--- linux-2.4.24.org/include/asm-s390x/bitops.h 2004-02-04 20:48:28.470122479 +0100
4716+++ linux-2.4.24/include/asm-s390x/bitops.h 2004-02-04 20:52:54.030886671 +0100
0aa7655b
AM
4717@@ -51,271 +51,220 @@
4718 extern const char _oi_bitmap[];
4719 extern const char _ni_bitmap[];
4720 extern const char _zb_findmap[];
4721+extern const char _sb_findmap[];
4722
4723 #ifdef CONFIG_SMP
4724 /*
4725 * SMP save set_bit routine based on compare and swap (CS)
4726 */
4727-static __inline__ void set_bit_cs(unsigned long nr, volatile void * addr)
4728+static inline void set_bit_cs(unsigned long nr, volatile void *ptr)
4729 {
4730- unsigned long bits, mask;
4731- __asm__ __volatile__(
4732+ unsigned long addr, old, new, mask;
4733+
4734+ addr = (unsigned long) ptr;
4735 #if ALIGN_CS == 1
4736- " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
4737- " ngr %2,%1\n" /* isolate last 2 bits of address */
4738- " xgr %1,%2\n" /* make addr % 4 == 0 */
4739- " sllg %2,%2,3\n"
4740- " agr %0,%2\n" /* add alignement to bitnr */
4741+ addr ^= addr & 7; /* align address to 8 */
4742+ nr += (addr & 7) << 3; /* add alignment to bit number */
4743 #endif
4744- " lghi %2,63\n"
4745- " nr %2,%0\n" /* make shift value */
4746- " xr %0,%2\n"
4747- " srlg %0,%0,3\n"
4748- " lghi %3,1\n"
4749- " la %1,0(%0,%1)\n" /* calc. address for CS */
4750- " sllg %3,%3,0(%2)\n" /* make OR mask */
4751- " lg %0,0(%1)\n"
4752- "0: lgr %2,%0\n" /* CS loop starts here */
4753- " ogr %2,%3\n" /* set bit */
4754- " csg %0,%2,0(%1)\n"
4755- " jl 0b"
4756- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
4757- : "cc", "memory" );
4758+ addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
4759+ mask = 1UL << (nr & 63); /* make OR mask */
4760+ asm volatile(
4761+ " lg %0,0(%4)\n"
4762+ "0: lgr %1,%0\n"
4763+ " ogr %1,%3\n"
4764+ " csg %0,%1,0(%4)\n"
4765+ " jl 0b"
4766+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
4767+ : "d" (mask), "a" (addr)
4768+ : "cc" );
4769 }
4770
4771 /*
4772 * SMP save clear_bit routine based on compare and swap (CS)
4773 */
4774-static __inline__ void clear_bit_cs(unsigned long nr, volatile void * addr)
4775+static inline void clear_bit_cs(unsigned long nr, volatile void *ptr)
4776 {
4777- unsigned long bits, mask;
4778- __asm__ __volatile__(
4779+ unsigned long addr, old, new, mask;
4780+
4781+ addr = (unsigned long) ptr;
4782 #if ALIGN_CS == 1
4783- " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
4784- " ngr %2,%1\n" /* isolate last 2 bits of address */
4785- " xgr %1,%2\n" /* make addr % 4 == 0 */
4786- " sllg %2,%2,3\n"
4787- " agr %0,%2\n" /* add alignement to bitnr */
4788+ addr ^= addr & 7; /* align address to 8 */
4789+ nr += (addr & 7) << 3; /* add alignment to bit number */
4790 #endif
4791- " lghi %2,63\n"
4792- " nr %2,%0\n" /* make shift value */
4793- " xr %0,%2\n"
4794- " srlg %0,%0,3\n"
4795- " lghi %3,-2\n"
4796- " la %1,0(%0,%1)\n" /* calc. address for CS */
4797- " lghi %3,-2\n"
4798- " rllg %3,%3,0(%2)\n" /* make AND mask */
4799- " lg %0,0(%1)\n"
4800- "0: lgr %2,%0\n" /* CS loop starts here */
4801- " ngr %2,%3\n" /* clear bit */
4802- " csg %0,%2,0(%1)\n"
4803- " jl 0b"
4804- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
4805- : "cc", "memory" );
4806+ addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
4807+ mask = ~(1UL << (nr & 63)); /* make AND mask */
4808+ asm volatile(
4809+ " lg %0,0(%4)\n"
4810+ "0: lgr %1,%0\n"
4811+ " ngr %1,%3\n"
4812+ " csg %0,%1,0(%4)\n"
4813+ " jl 0b"
4814+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
4815+ : "d" (mask), "a" (addr)
4816+ : "cc" );
4817 }
4818
4819 /*
4820 * SMP save change_bit routine based on compare and swap (CS)
4821 */
4822-static __inline__ void change_bit_cs(unsigned long nr, volatile void * addr)
4823+static inline void change_bit_cs(unsigned long nr, volatile void *ptr)
4824 {
4825- unsigned long bits, mask;
4826- __asm__ __volatile__(
4827+ unsigned long addr, old, new, mask;
4828+
4829+ addr = (unsigned long) ptr;
4830 #if ALIGN_CS == 1
4831- " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
4832- " ngr %2,%1\n" /* isolate last 2 bits of address */
4833- " xgr %1,%2\n" /* make addr % 4 == 0 */
4834- " sllg %2,%2,3\n"
4835- " agr %0,%2\n" /* add alignement to bitnr */
4836+ addr ^= addr & 7; /* align address to 8 */
4837+ nr += (addr & 7) << 3; /* add alignment to bit number */
4838 #endif
4839- " lghi %2,63\n"
4840- " nr %2,%0\n" /* make shift value */
4841- " xr %0,%2\n"
4842- " srlg %0,%0,3\n"
4843- " lghi %3,1\n"
4844- " la %1,0(%0,%1)\n" /* calc. address for CS */
4845- " sllg %3,%3,0(%2)\n" /* make XR mask */
4846- " lg %0,0(%1)\n"
4847- "0: lgr %2,%0\n" /* CS loop starts here */
4848- " xgr %2,%3\n" /* change bit */
4849- " csg %0,%2,0(%1)\n"
4850- " jl 0b"
4851- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
4852- : "cc", "memory" );
4853+ addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
4854+ mask = 1UL << (nr & 63); /* make XOR mask */
4855+ asm volatile(
4856+ " lg %0,0(%4)\n"
4857+ "0: lgr %1,%0\n"
4858+ " xgr %1,%3\n"
4859+ " csg %0,%1,0(%4)\n"
4860+ " jl 0b"
4861+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
4862+ : "d" (mask), "a" (addr)
4863+ : "cc" );
4864 }
4865
4866 /*
4867 * SMP save test_and_set_bit routine based on compare and swap (CS)
4868 */
4869-static __inline__ int
4870-test_and_set_bit_cs(unsigned long nr, volatile void * addr)
4871+static inline int
4872+test_and_set_bit_cs(unsigned long nr, volatile void *ptr)
4873 {
4874- unsigned long bits, mask;
4875- __asm__ __volatile__(
4876+ unsigned long addr, old, new, mask;
4877+
4878+ addr = (unsigned long) ptr;
4879 #if ALIGN_CS == 1
4880- " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
4881- " ngr %2,%1\n" /* isolate last 2 bits of address */
4882- " xgr %1,%2\n" /* make addr % 4 == 0 */
4883- " sllg %2,%2,3\n"
4884- " agr %0,%2\n" /* add alignement to bitnr */
4885+ addr ^= addr & 7; /* align address to 8 */
4886+ nr += (addr & 7) << 3; /* add alignment to bit number */
4887 #endif
4888- " lghi %2,63\n"
4889- " nr %2,%0\n" /* make shift value */
4890- " xr %0,%2\n"
4891- " srlg %0,%0,3\n"
4892- " lghi %3,1\n"
4893- " la %1,0(%0,%1)\n" /* calc. address for CS */
4894- " sllg %3,%3,0(%2)\n" /* make OR mask */
4895- " lg %0,0(%1)\n"
4896- "0: lgr %2,%0\n" /* CS loop starts here */
4897- " ogr %2,%3\n" /* set bit */
4898- " csg %0,%2,0(%1)\n"
4899- " jl 0b\n"
4900- " ngr %0,%3\n" /* isolate old bit */
4901- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
4902- : "cc", "memory" );
4903- return nr != 0;
4904+ addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
4905+ mask = 1UL << (nr & 63); /* make OR/test mask */
4906+ asm volatile(
4907+ " lg %0,0(%4)\n"
4908+ "0: lgr %1,%0\n"
4909+ " ogr %1,%3\n"
4910+ " csg %0,%1,0(%4)\n"
4911+ " jl 0b"
4912+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
4913+ : "d" (mask), "a" (addr)
4914+ : "cc" );
4915+ return (old & mask) != 0;
4916 }
4917
4918 /*
4919 * SMP save test_and_clear_bit routine based on compare and swap (CS)
4920 */
4921-static __inline__ int
4922-test_and_clear_bit_cs(unsigned long nr, volatile void * addr)
4923+static inline int
4924+test_and_clear_bit_cs(unsigned long nr, volatile void *ptr)
4925 {
4926- unsigned long bits, mask;
4927- __asm__ __volatile__(
4928+ unsigned long addr, old, new, mask;
4929+
4930+ addr = (unsigned long) ptr;
4931 #if ALIGN_CS == 1
4932- " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
4933- " ngr %2,%1\n" /* isolate last 2 bits of address */
4934- " xgr %1,%2\n" /* make addr % 4 == 0 */
4935- " sllg %2,%2,3\n"
4936- " agr %0,%2\n" /* add alignement to bitnr */
4937+ addr ^= addr & 7; /* align address to 8 */
4938+ nr += (addr & 7) << 3; /* add alignment to bit number */
4939 #endif
4940- " lghi %2,63\n"
4941- " nr %2,%0\n" /* make shift value */
4942- " xr %0,%2\n"
4943- " srlg %0,%0,3\n"
4944- " lghi %3,-2\n"
4945- " la %1,0(%0,%1)\n" /* calc. address for CS */
4946- " rllg %3,%3,0(%2)\n" /* make AND mask */
4947- " lg %0,0(%1)\n"
4948- "0: lgr %2,%0\n" /* CS loop starts here */
4949- " ngr %2,%3\n" /* clear bit */
4950- " csg %0,%2,0(%1)\n"
4951- " jl 0b\n"
4952- " xgr %0,%2\n" /* isolate old bit */
4953- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
4954- : "cc", "memory" );
4955- return nr != 0;
4956+ addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
4957+ mask = ~(1UL << (nr & 63)); /* make AND mask */
4958+ asm volatile(
4959+ " lg %0,0(%4)\n"
4960+ "0: lgr %1,%0\n"
4961+ " ngr %1,%3\n"
4962+ " csg %0,%1,0(%4)\n"
4963+ " jl 0b"
4964+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
4965+ : "d" (mask), "a" (addr)
4966+ : "cc" );
4967+ return (old ^ new) != 0;
4968 }
4969
4970 /*
4971 * SMP save test_and_change_bit routine based on compare and swap (CS)
4972 */
4973-static __inline__ int
4974-test_and_change_bit_cs(unsigned long nr, volatile void * addr)
4975+static inline int
4976+test_and_change_bit_cs(unsigned long nr, volatile void *ptr)
4977 {
4978- unsigned long bits, mask;
4979- __asm__ __volatile__(
4980+ unsigned long addr, old, new, mask;
4981+
4982+ addr = (unsigned long) ptr;
4983 #if ALIGN_CS == 1
4984- " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
4985- " ngr %2,%1\n" /* isolate last 2 bits of address */
4986- " xgr %1,%2\n" /* make addr % 4 == 0 */
4987- " sllg %2,%2,3\n"
4988- " agr %0,%2\n" /* add alignement to bitnr */
4989+ addr ^= addr & 7; /* align address to 8 */
4990+ nr += (addr & 7) << 3; /* add alignment to bit number */
4991 #endif
4992- " lghi %2,63\n"
4993- " nr %2,%0\n" /* make shift value */
4994- " xr %0,%2\n"
4995- " srlg %0,%0,3\n"
4996- " lghi %3,1\n"
4997- " la %1,0(%0,%1)\n" /* calc. address for CS */
4998- " sllg %3,%3,0(%2)\n" /* make OR mask */
4999- " lg %0,0(%1)\n"
5000- "0: lgr %2,%0\n" /* CS loop starts here */
5001- " xgr %2,%3\n" /* change bit */
5002- " csg %0,%2,0(%1)\n"
5003- " jl 0b\n"
5004- " ngr %0,%3\n" /* isolate old bit */
5005- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
5006- : "cc", "memory" );
5007- return nr != 0;
5008+ addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
5009+ mask = 1UL << (nr & 63); /* make XOR mask */
5010+ asm volatile(
5011+ " lg %0,0(%4)\n"
5012+ "0: lgr %1,%0\n"
5013+ " xgr %1,%3\n"
5014+ " csg %0,%1,0(%4)\n"
5015+ " jl 0b"
5016+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
5017+ : "d" (mask), "a" (addr)
5018+ : "cc" );
5019+ return (old & mask) != 0;
5020 }
5021 #endif /* CONFIG_SMP */
5022
5023 /*
5024 * fast, non-SMP set_bit routine
5025 */
5026-static __inline__ void __set_bit(unsigned long nr, volatile void * addr)
5027+static inline void __set_bit(unsigned long nr, volatile void *ptr)
5028 {
5029- unsigned long reg1, reg2;
5030- __asm__ __volatile__(
5031- " lghi %1,56\n"
5032- " lghi %0,7\n"
5033- " xgr %1,%2\n"
5034- " nr %0,%2\n"
5035- " srlg %1,%1,3\n"
5036- " la %1,0(%1,%3)\n"
5037- " la %0,0(%0,%4)\n"
5038- " oc 0(1,%1),0(%0)"
5039- : "=&a" (reg1), "=&a" (reg2)
5040- : "a" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
5041-}
5042-
5043-static __inline__ void
5044-__constant_set_bit(const unsigned long nr, volatile void * addr)
5045-{
5046- switch (nr&7) {
5047- case 0:
5048- __asm__ __volatile__ ("la 1,%0\n\t"
5049- "oi 0(1),0x01"
5050- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5051- : : "1", "cc", "memory");
5052- break;
5053- case 1:
5054- __asm__ __volatile__ ("la 1,%0\n\t"
5055- "oi 0(1),0x02"
5056- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5057- : : "1", "cc", "memory" );
5058- break;
5059- case 2:
5060- __asm__ __volatile__ ("la 1,%0\n\t"
5061- "oi 0(1),0x04"
5062- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5063- : : "1", "cc", "memory" );
5064- break;
5065- case 3:
5066- __asm__ __volatile__ ("la 1,%0\n\t"
5067- "oi 0(1),0x08"
5068- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5069- : : "1", "cc", "memory" );
5070- break;
5071- case 4:
5072- __asm__ __volatile__ ("la 1,%0\n\t"
5073- "oi 0(1),0x10"
5074- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5075- : : "1", "cc", "memory" );
5076- break;
5077- case 5:
5078- __asm__ __volatile__ ("la 1,%0\n\t"
5079- "oi 0(1),0x20"
5080- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5081- : : "1", "cc", "memory" );
5082- break;
5083- case 6:
5084- __asm__ __volatile__ ("la 1,%0\n\t"
5085- "oi 0(1),0x40"
5086- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5087- : : "1", "cc", "memory" );
5088- break;
5089- case 7:
5090- __asm__ __volatile__ ("la 1,%0\n\t"
5091- "oi 0(1),0x80"
5092- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5093- : : "1", "cc", "memory" );
5094- break;
5095- }
5096+ unsigned long addr;
5097+
5098+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
5099+ asm volatile("oc 0(1,%1),0(%2)"
5100+ : "+m" (*(char *) addr)
5101+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
5102+ : "cc" );
5103+}
5104+
5105+static inline void
5106+__constant_set_bit(const unsigned long nr, volatile void *ptr)
5107+{
5108+ unsigned long addr;
5109+
5110+ addr = ((unsigned long) ptr) + ((nr >> 3) ^ 7);
5111+ switch (nr&7) {
5112+ case 0:
5113+ asm volatile ("oi 0(%1),0x01"
5114+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5115+ break;
5116+ case 1:
5117+ asm volatile ("oi 0(%1),0x02"
5118+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5119+ break;
5120+ case 2:
5121+ asm volatile ("oi 0(%1),0x04"
5122+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5123+ break;
5124+ case 3:
5125+ asm volatile ("oi 0(%1),0x08"
5126+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5127+ break;
5128+ case 4:
5129+ asm volatile ("oi 0(%1),0x10"
5130+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5131+ break;
5132+ case 5:
5133+ asm volatile ("oi 0(%1),0x20"
5134+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5135+ break;
5136+ case 6:
5137+ asm volatile ("oi 0(%1),0x40"
5138+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5139+ break;
5140+ case 7:
5141+ asm volatile ("oi 0(%1),0x80"
5142+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5143+ break;
5144+ }
5145 }
5146
5147 #define set_bit_simple(nr,addr) \
5148@@ -326,76 +275,58 @@
5149 /*
5150 * fast, non-SMP clear_bit routine
5151 */
5152-static __inline__ void
5153-__clear_bit(unsigned long nr, volatile void * addr)
5154+static inline void
5155+__clear_bit(unsigned long nr, volatile void *ptr)
5156 {
5157- unsigned long reg1, reg2;
5158- __asm__ __volatile__(
5159- " lghi %1,56\n"
5160- " lghi %0,7\n"
5161- " xgr %1,%2\n"
5162- " nr %0,%2\n"
5163- " srlg %1,%1,3\n"
5164- " la %1,0(%1,%3)\n"
5165- " la %0,0(%0,%4)\n"
5166- " nc 0(1,%1),0(%0)"
5167- : "=&a" (reg1), "=&a" (reg2)
5168- : "d" (nr), "a" (addr), "a" (&_ni_bitmap) : "cc", "memory" );
5169-}
5170-
5171-static __inline__ void
5172-__constant_clear_bit(const unsigned long nr, volatile void * addr)
5173-{
5174- switch (nr&7) {
5175- case 0:
5176- __asm__ __volatile__ ("la 1,%0\n\t"
5177- "ni 0(1),0xFE"
5178- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5179- : : "1", "cc", "memory" );
5180- break;
5181- case 1:
5182- __asm__ __volatile__ ("la 1,%0\n\t"
5183- "ni 0(1),0xFD"
5184- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5185- : : "1", "cc", "memory" );
5186- break;
5187- case 2:
5188- __asm__ __volatile__ ("la 1,%0\n\t"
5189- "ni 0(1),0xFB"
5190- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5191- : : "1", "cc", "memory" );
5192- break;
5193- case 3:
5194- __asm__ __volatile__ ("la 1,%0\n\t"
5195- "ni 0(1),0xF7"
5196- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5197- : : "1", "cc", "memory" );
5198- break;
5199- case 4:
5200- __asm__ __volatile__ ("la 1,%0\n\t"
5201- "ni 0(1),0xEF"
5202- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5203- : : "cc", "memory" );
5204- break;
5205- case 5:
5206- __asm__ __volatile__ ("la 1,%0\n\t"
5207- "ni 0(1),0xDF"
5208- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5209- : : "1", "cc", "memory" );
5210- break;
5211- case 6:
5212- __asm__ __volatile__ ("la 1,%0\n\t"
5213- "ni 0(1),0xBF"
5214- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5215- : : "1", "cc", "memory" );
5216- break;
5217- case 7:
5218- __asm__ __volatile__ ("la 1,%0\n\t"
5219- "ni 0(1),0x7F"
5220- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5221- : : "1", "cc", "memory" );
5222- break;
5223- }
5224+ unsigned long addr;
5225+
5226+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
5227+ asm volatile("nc 0(1,%1),0(%2)"
5228+ : "+m" (*(char *) addr)
5229+ : "a" (addr), "a" (_ni_bitmap + (nr & 7))
5230+ : "cc" );
5231+}
5232+
5233+static inline void
5234+__constant_clear_bit(const unsigned long nr, volatile void *ptr)
5235+{
5236+ unsigned long addr;
5237+
5238+ addr = ((unsigned long) ptr) + ((nr >> 3) ^ 7);
5239+ switch (nr&7) {
5240+ case 0:
5241+ asm volatile ("ni 0(%1),0xFE"
5242+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5243+ break;
5244+ case 1:
5245+ asm volatile ("ni 0(%1),0xFD"
5246+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5247+ break;
5248+ case 2:
5249+ asm volatile ("ni 0(%1),0xFB"
5250+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5251+ break;
5252+ case 3:
5253+ asm volatile ("ni 0(%1),0xF7"
5254+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5255+ break;
5256+ case 4:
5257+ asm volatile ("ni 0(%1),0xEF"
5258+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5259+ break;
5260+ case 5:
5261+ asm volatile ("ni 0(%1),0xDF"
5262+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5263+ break;
5264+ case 6:
5265+ asm volatile ("ni 0(%1),0xBF"
5266+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5267+ break;
5268+ case 7:
5269+ asm volatile ("ni 0(%1),0x7F"
5270+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5271+ break;
5272+ }
5273 }
5274
5275 #define clear_bit_simple(nr,addr) \
5276@@ -406,75 +337,57 @@
5277 /*
5278 * fast, non-SMP change_bit routine
5279 */
5280-static __inline__ void __change_bit(unsigned long nr, volatile void * addr)
5281+static inline void __change_bit(unsigned long nr, volatile void *ptr)
5282 {
5283- unsigned long reg1, reg2;
5284- __asm__ __volatile__(
5285- " lghi %1,56\n"
5286- " lghi %0,7\n"
5287- " xgr %1,%2\n"
5288- " nr %0,%2\n"
5289- " srlg %1,%1,3\n"
5290- " la %1,0(%1,%3)\n"
5291- " la %0,0(%0,%4)\n"
5292- " xc 0(1,%1),0(%0)"
5293- : "=&a" (reg1), "=&a" (reg2)
5294- : "d" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
5295-}
5296-
5297-static __inline__ void
5298-__constant_change_bit(const unsigned long nr, volatile void * addr)
5299-{
5300- switch (nr&7) {
5301- case 0:
5302- __asm__ __volatile__ ("la 1,%0\n\t"
5303- "xi 0(1),0x01"
5304- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5305- : : "cc", "memory" );
5306- break;
5307- case 1:
5308- __asm__ __volatile__ ("la 1,%0\n\t"
5309- "xi 0(1),0x02"
5310- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5311- : : "cc", "memory" );
5312- break;
5313- case 2:
5314- __asm__ __volatile__ ("la 1,%0\n\t"
5315- "xi 0(1),0x04"
5316- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5317- : : "cc", "memory" );
5318- break;
5319- case 3:
5320- __asm__ __volatile__ ("la 1,%0\n\t"
5321- "xi 0(1),0x08"
5322- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5323- : : "cc", "memory" );
5324- break;
5325- case 4:
5326- __asm__ __volatile__ ("la 1,%0\n\t"
5327- "xi 0(1),0x10"
5328- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5329- : : "cc", "memory" );
5330- break;
5331- case 5:
5332- __asm__ __volatile__ ("la 1,%0\n\t"
5333- "xi 0(1),0x20"
5334- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5335- : : "1", "cc", "memory" );
5336- break;
5337- case 6:
5338- __asm__ __volatile__ ("la 1,%0\n\t"
5339- "xi 0(1),0x40"
5340- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5341- : : "1", "cc", "memory" );
5342- break;
5343- case 7:
5344- __asm__ __volatile__ ("la 1,%0\n\t"
5345- "xi 0(1),0x80"
5346- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5347- : : "1", "cc", "memory" );
5348- break;
5349- }
5350+ unsigned long addr;
5351+
5352+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
5353+ asm volatile("xc 0(1,%1),0(%2)"
5354+ : "+m" (*(char *) addr)
5355+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
5356+ : "cc" );
5357+}
5358+
5359+static inline void
5360+__constant_change_bit(const unsigned long nr, volatile void *ptr)
5361+{
5362+ unsigned long addr;
5363+
5364+ addr = ((unsigned long) ptr) + ((nr >> 3) ^ 7);
5365+ switch (nr&7) {
5366+ case 0:
5367+ asm volatile ("xi 0(%1),0x01"
5368+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5369+ break;
5370+ case 1:
5371+ asm volatile ("xi 0(%1),0x02"
5372+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5373+ break;
5374+ case 2:
5375+ asm volatile ("xi 0(%1),0x04"
5376+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5377+ break;
5378+ case 3:
5379+ asm volatile ("xi 0(%1),0x08"
5380+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5381+ break;
5382+ case 4:
5383+ asm volatile ("xi 0(%1),0x10"
5384+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5385+ break;
5386+ case 5:
5387+ asm volatile ("xi 0(%1),0x20"
5388+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5389+ break;
5390+ case 6:
5391+ asm volatile ("xi 0(%1),0x40"
5392+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5393+ break;
5394+ case 7:
5395+ asm volatile ("xi 0(%1),0x80"
5396+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5397+ break;
5398+ }
5399 }
5400
5401 #define change_bit_simple(nr,addr) \
5402@@ -485,77 +398,57 @@
5403 /*
5404 * fast, non-SMP test_and_set_bit routine
5405 */
5406-static __inline__ int
5407-test_and_set_bit_simple(unsigned long nr, volatile void * addr)
5408+static inline int
5409+test_and_set_bit_simple(unsigned long nr, volatile void *ptr)
5410 {
5411- unsigned long reg1, reg2;
5412- int oldbit;
5413- __asm__ __volatile__(
5414- " lghi %1,56\n"
5415- " lghi %2,7\n"
5416- " xgr %1,%3\n"
5417- " nr %2,%3\n"
5418- " srlg %1,%1,3\n"
5419- " la %1,0(%1,%4)\n"
5420- " ic %0,0(%1)\n"
5421- " srl %0,0(%2)\n"
5422- " la %2,0(%2,%5)\n"
5423- " oc 0(1,%1),0(%2)"
5424- : "=&d" (oldbit), "=&a" (reg1), "=&a" (reg2)
5425- : "d" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
5426- return oldbit & 1;
5427+ unsigned long addr;
5428+ unsigned char ch;
5429+
5430+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
5431+ ch = *(unsigned char *) addr;
5432+ asm volatile("oc 0(1,%1),0(%2)"
5433+ : "+m" (*(char *) addr)
5434+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
5435+ : "cc" );
5436+ return (ch >> (nr & 7)) & 1;
5437 }
5438 #define __test_and_set_bit(X,Y) test_and_set_bit_simple(X,Y)
5439
5440 /*
5441 * fast, non-SMP test_and_clear_bit routine
5442 */
5443-static __inline__ int
5444-test_and_clear_bit_simple(unsigned long nr, volatile void * addr)
5445+static inline int
5446+test_and_clear_bit_simple(unsigned long nr, volatile void *ptr)
5447 {
5448- unsigned long reg1, reg2;
5449- int oldbit;
5450+ unsigned long addr;
5451+ unsigned char ch;
5452
5453- __asm__ __volatile__(
5454- " lghi %1,56\n"
5455- " lghi %2,7\n"
5456- " xgr %1,%3\n"
5457- " nr %2,%3\n"
5458- " srlg %1,%1,3\n"
5459- " la %1,0(%1,%4)\n"
5460- " ic %0,0(%1)\n"
5461- " srl %0,0(%2)\n"
5462- " la %2,0(%2,%5)\n"
5463- " nc 0(1,%1),0(%2)"
5464- : "=&d" (oldbit), "=&a" (reg1), "=&a" (reg2)
5465- : "d" (nr), "a" (addr), "a" (&_ni_bitmap) : "cc", "memory" );
5466- return oldbit & 1;
5467+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
5468+ ch = *(unsigned char *) addr;
5469+ asm volatile("nc 0(1,%1),0(%2)"
5470+ : "+m" (*(char *) addr)
5471+ : "a" (addr), "a" (_ni_bitmap + (nr & 7))
5472+ : "cc" );
5473+ return (ch >> (nr & 7)) & 1;
5474 }
5475 #define __test_and_clear_bit(X,Y) test_and_clear_bit_simple(X,Y)
5476
5477 /*
5478 * fast, non-SMP test_and_change_bit routine
5479 */
5480-static __inline__ int
5481-test_and_change_bit_simple(unsigned long nr, volatile void * addr)
5482+static inline int
5483+test_and_change_bit_simple(unsigned long nr, volatile void *ptr)
5484 {
5485- unsigned long reg1, reg2;
5486- int oldbit;
5487+ unsigned long addr;
5488+ unsigned char ch;
5489
5490- __asm__ __volatile__(
5491- " lghi %1,56\n"
5492- " lghi %2,7\n"
5493- " xgr %1,%3\n"
5494- " nr %2,%3\n"
5495- " srlg %1,%1,3\n"
5496- " la %1,0(%1,%4)\n"
5497- " ic %0,0(%1)\n"
5498- " srl %0,0(%2)\n"
5499- " la %2,0(%2,%5)\n"
5500- " xc 0(1,%1),0(%2)"
5501- : "=&d" (oldbit), "=&a" (reg1), "=&a" (reg2)
5502- : "d" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
5503- return oldbit & 1;
5504+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
5505+ ch = *(unsigned char *) addr;
5506+ asm volatile("xc 0(1,%1),0(%2)"
5507+ : "+m" (*(char *) addr)
5508+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
5509+ : "cc" );
5510+ return (ch >> (nr & 7)) & 1;
5511 }
5512 #define __test_and_change_bit(X,Y) test_and_change_bit_simple(X,Y)
5513
5514@@ -580,26 +473,18 @@
5515 * This routine doesn't need to be atomic.
5516 */
5517
5518-static __inline__ int __test_bit(unsigned long nr, volatile void * addr)
5519+static inline int __test_bit(unsigned long nr, volatile void *ptr)
5520 {
5521- unsigned long reg1, reg2;
5522- int oldbit;
5523+ unsigned long addr;
5524+ unsigned char ch;
5525
5526- __asm__ __volatile__(
5527- " lghi %2,56\n"
5528- " lghi %1,7\n"
5529- " xgr %2,%3\n"
5530- " nr %1,%3\n"
5531- " srlg %2,%2,3\n"
5532- " ic %0,0(%2,%4)\n"
5533- " srl %0,0(%1)\n"
5534- : "=&d" (oldbit), "=&a" (reg1), "=&a" (reg2)
5535- : "d" (nr), "a" (addr) : "cc" );
5536- return oldbit & 1;
5537+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
5538+ ch = *(unsigned char *) addr;
5539+ return (ch >> (nr & 7)) & 1;
5540 }
5541
5542-static __inline__ int
5543-__constant_test_bit(unsigned long nr, volatile void * addr) {
5544+static inline int
5545+__constant_test_bit(unsigned long nr, volatile void *addr) {
5546 return (((volatile char *) addr)[(nr>>3)^7] & (1<<(nr&7))) != 0;
5547 }
5548
5549@@ -611,7 +496,7 @@
5550 /*
5551 * Find-bit routines..
5552 */
5553-static __inline__ unsigned long
5554+static inline unsigned long
5555 find_first_zero_bit(void * addr, unsigned long size)
5556 {
5557 unsigned long res, cmp, count;
5558@@ -653,7 +538,49 @@
5559 return (res < size) ? res : size;
5560 }
5561
5562-static __inline__ unsigned long
5563+static inline unsigned long
5564+find_first_bit(void * addr, unsigned long size)
5565+{
5566+ unsigned long res, cmp, count;
5567+
5568+ if (!size)
5569+ return 0;
5570+ __asm__(" slgr %1,%1\n"
5571+ " lgr %2,%3\n"
5572+ " slgr %0,%0\n"
5573+ " aghi %2,63\n"
5574+ " srlg %2,%2,6\n"
5575+ "0: cg %1,0(%0,%4)\n"
5576+ " jne 1f\n"
5577+ " aghi %0,8\n"
5578+ " brct %2,0b\n"
5579+ " lgr %0,%3\n"
5580+ " j 5f\n"
5581+ "1: lg %2,0(%0,%4)\n"
5582+ " sllg %0,%0,3\n"
5583+ " clr %2,%1\n"
5584+ " jne 2f\n"
5585+ " aghi %0,32\n"
5586+ " srlg %2,%2,32\n"
5587+ "2: lghi %1,0xff\n"
5588+ " tmll %2,0xffff\n"
5589+ " jnz 3f\n"
5590+ " aghi %0,16\n"
5591+ " srl %2,16\n"
5592+ "3: tmll %2,0x00ff\n"
5593+ " jnz 4f\n"
5594+ " aghi %0,8\n"
5595+ " srl %2,8\n"
5596+ "4: ngr %2,%1\n"
5597+ " ic %2,0(%2,%5)\n"
5598+ " algr %0,%2\n"
5599+ "5:"
5600+ : "=&a" (res), "=&d" (cmp), "=&a" (count)
5601+ : "a" (size), "a" (addr), "a" (&_sb_findmap) : "cc" );
5602+ return (res < size) ? res : size;
5603+}
5604+
5605+static inline unsigned long
5606 find_next_zero_bit (void * addr, unsigned long size, unsigned long offset)
5607 {
5608 unsigned long * p = ((unsigned long *) addr) + (offset >> 6);
5609@@ -697,14 +624,56 @@
5610 return (offset + res);
5611 }
5612
5613+static inline unsigned long
5614+find_next_bit (void * addr, unsigned long size, unsigned long offset)
5615+{
5616+ unsigned long * p = ((unsigned long *) addr) + (offset >> 6);
5617+ unsigned long bitvec, reg;
5618+ unsigned long set, bit = offset & 63, res;
5619+
5620+ if (bit) {
5621+ /*
5622+ * Look for zero in first word
5623+ */
5624+ bitvec = (*p) >> bit;
5625+ __asm__(" slgr %0,%0\n"
5626+ " ltr %1,%1\n"
5627+ " jnz 0f\n"
5628+ " aghi %0,32\n"
5629+ " srlg %1,%1,32\n"
5630+ "0: lghi %2,0xff\n"
5631+ " tmll %1,0xffff\n"
5632+ " jnz 1f\n"
5633+ " aghi %0,16\n"
5634+ " srlg %1,%1,16\n"
5635+ "1: tmll %1,0x00ff\n"
5636+ " jnz 2f\n"
5637+ " aghi %0,8\n"
5638+ " srlg %1,%1,8\n"
5639+ "2: ngr %1,%2\n"
5640+ " ic %1,0(%1,%3)\n"
5641+ " algr %0,%1"
5642+ : "=&d" (set), "+a" (bitvec), "=&d" (reg)
5643+ : "a" (&_sb_findmap) : "cc" );
5644+ if (set < (64 - bit))
5645+ return set + offset;
5646+ offset += 64 - bit;
5647+ p++;
5648+ }
5649+ /*
5650+ * No set bit yet, search remaining full words for a bit
5651+ */
5652+ res = find_first_bit (p, size - 64 * (p - (unsigned long *) addr));
5653+ return (offset + res);
5654+}
5655+
5656 /*
5657 * ffz = Find First Zero in word. Undefined if no zero exists,
5658 * so code should check against ~0UL first..
5659 */
5660-static __inline__ unsigned long ffz(unsigned long word)
5661+static inline unsigned long ffz(unsigned long word)
5662 {
5663- unsigned long reg;
5664- int result;
5665+ unsigned long reg, result;
5666
5667 __asm__(" lhi %2,-1\n"
5668 " slgr %0,%0\n"
5669@@ -730,40 +699,112 @@
5670 }
5671
5672 /*
5673+ * __ffs = find first bit in word. Undefined if no bit exists,
5674+ * so code should check against 0UL first..
5675+ */
5676+static inline unsigned long __ffs (unsigned long word)
5677+{
5678+ unsigned long reg, result;
5679+
5680+ __asm__(" slgr %0,%0\n"
5681+ " ltr %1,%1\n"
5682+ " jnz 0f\n"
5683+ " aghi %0,32\n"
5684+ " srlg %1,%1,32\n"
5685+ "0: lghi %2,0xff\n"
5686+ " tmll %1,0xffff\n"
5687+ " jnz 1f\n"
5688+ " aghi %0,16\n"
5689+ " srlg %1,%1,16\n"
5690+ "1: tmll %1,0x00ff\n"
5691+ " jnz 2f\n"
5692+ " aghi %0,8\n"
5693+ " srlg %1,%1,8\n"
5694+ "2: ngr %1,%2\n"
5695+ " ic %1,0(%1,%3)\n"
5696+ " algr %0,%1"
5697+ : "=&d" (result), "+a" (word), "=&d" (reg)
5698+ : "a" (&_sb_findmap) : "cc" );
5699+ return result;
5700+}
5701+
5702+/*
5703+ * Every architecture must define this function. It's the fastest
5704+ * way of searching a 140-bit bitmap where the first 100 bits are
5705+ * unlikely to be set. It's guaranteed that at least one of the 140
5706+ * bits is cleared.
5707+ */
5708+static inline int sched_find_first_bit(unsigned long *b)
5709+{
5710+ return find_first_bit(b, 140);
5711+}
5712+
5713+/*
5714 * ffs: find first bit set. This is defined the same way as
5715 * the libc and compiler builtin ffs routines, therefore
5716 * differs in spirit from the above ffz (man ffs).
5717 */
5718-
5719-extern int __inline__ ffs (int x)
5720+extern int inline ffs (int x)
5721 {
5722- int r;
5723+ int r = 1;
5724
5725 if (x == 0)
5726- return 0;
5727- __asm__(" slr %0,%0\n"
5728- " tml %1,0xffff\n"
5729+ return 0;
5730+ __asm__(" tml %1,0xffff\n"
5731 " jnz 0f\n"
5732- " ahi %0,16\n"
5733 " srl %1,16\n"
5734+ " ahi %0,16\n"
5735 "0: tml %1,0x00ff\n"
5736 " jnz 1f\n"
5737- " ahi %0,8\n"
5738 " srl %1,8\n"
5739+ " ahi %0,8\n"
5740 "1: tml %1,0x000f\n"
5741 " jnz 2f\n"
5742- " ahi %0,4\n"
5743 " srl %1,4\n"
5744+ " ahi %0,4\n"
5745 "2: tml %1,0x0003\n"
5746 " jnz 3f\n"
5747- " ahi %0,2\n"
5748 " srl %1,2\n"
5749+ " ahi %0,2\n"
5750 "3: tml %1,0x0001\n"
5751 " jnz 4f\n"
5752 " ahi %0,1\n"
5753 "4:"
5754 : "=&d" (r), "+d" (x) : : "cc" );
5755- return r+1;
5756+ return r;
5757+}
5758+
5759+/*
5760+ * fls: find last bit set.
5761+ */
5762+extern __inline__ int fls(int x)
5763+{
5764+ int r = 32;
5765+
5766+ if (x == 0)
5767+ return 0;
5768+ __asm__(" tmh %1,0xffff\n"
5769+ " jz 0f\n"
5770+ " sll %1,16\n"
5771+ " ahi %0,-16\n"
5772+ "0: tmh %1,0xff00\n"
5773+ " jz 1f\n"
5774+ " sll %1,8\n"
5775+ " ahi %0,-8\n"
5776+ "1: tmh %1,0xf000\n"
5777+ " jz 2f\n"
5778+ " sll %1,4\n"
5779+ " ahi %0,-4\n"
5780+ "2: tmh %1,0xc000\n"
5781+ " jz 3f\n"
5782+ " sll %1,2\n"
5783+ " ahi %0,-2\n"
5784+ "3: tmh %1,0x8000\n"
5785+ " jz 4f\n"
5786+ " ahi %0,-1\n"
5787+ "4:"
5788+ : "+d" (r), "+d" (x) : : "cc" );
5789+ return r;
5790 }
5791
5792 /*
5793@@ -791,7 +832,7 @@
5794 #define ext2_set_bit(nr, addr) test_and_set_bit((nr)^56, addr)
5795 #define ext2_clear_bit(nr, addr) test_and_clear_bit((nr)^56, addr)
5796 #define ext2_test_bit(nr, addr) test_bit((nr)^56, addr)
5797-static __inline__ unsigned long
5798+static inline unsigned long
5799 ext2_find_first_zero_bit(void *vaddr, unsigned long size)
5800 {
5801 unsigned long res, cmp, count;
5802@@ -833,7 +874,7 @@
5803 return (res < size) ? res : size;
5804 }
5805
5806-static __inline__ unsigned long
5807+static inline unsigned long
5808 ext2_find_next_zero_bit(void *vaddr, unsigned long size, unsigned long offset)
5809 {
5810 unsigned long *addr = vaddr;
5811diff -urN linux-2.4.24.org/include/asm-sparc/bitops.h linux-2.4.24/include/asm-sparc/bitops.h
5d16fd25
AM
5812--- linux-2.4.24.org/include/asm-sparc/bitops.h 2004-02-04 20:47:50.760965997 +0100
5813+++ linux-2.4.24/include/asm-sparc/bitops.h 2004-02-04 20:52:54.074877521 +0100
529c1c99 5814@@ -231,6 +231,63 @@
0aa7655b
AM
5815 return result;
5816 }
5817
5818+/**
5819+ * __ffs - find first bit in word.
5820+ * @word: The word to search
5821+ *
5822+ * Undefined if no bit exists, so code should check against 0 first.
5823+ */
5824+static __inline__ int __ffs(unsigned long word)
5825+{
5826+ int num = 0;
5827+
5828+ if ((word & 0xffff) == 0) {
5829+ num += 16;
5830+ word >>= 16;
5831+ }
5832+ if ((word & 0xff) == 0) {
5833+ num += 8;
5834+ word >>= 8;
5835+ }
5836+ if ((word & 0xf) == 0) {
5837+ num += 4;
5838+ word >>= 4;
5839+ }
5840+ if ((word & 0x3) == 0) {
5841+ num += 2;
5842+ word >>= 2;
5843+ }
5844+ if ((word & 0x1) == 0)
5845+ num += 1;
5846+ return num;
5847+}
5848+
5849+/*
5850+ * Every architecture must define this function. It's the fastest
5851+ * way of searching a 140-bit bitmap where the first 100 bits are
5852+ * unlikely to be set. It's guaranteed that at least one of the 140
5853+ * bits is cleared.
5854+ */
5855+static __inline__ int sched_find_first_bit(unsigned long *b)
5856+{
5857+
5858+ if (unlikely(b[0]))
5859+ return __ffs(b[0]);
5860+ if (unlikely(b[1]))
5861+ return __ffs(b[1]) + 32;
5862+ if (unlikely(b[2]))
5863+ return __ffs(b[2]) + 64;
5864+ if (b[3])
5865+ return __ffs(b[3]) + 96;
5866+ return __ffs(b[4]) + 128;
5867+}
529c1c99
JB
5868+
5869+/*
5870+ * fls: find last bit set.
5871+ */
5872+
5873+#define fls(x) generic_fls(x)
0aa7655b
AM
5874+
5875 /*
5876 * ffs: find first bit set. This is defined the same way as
5877 * the libc and compiler builtin ffs routines, therefore
529c1c99 5878@@ -296,6 +353,32 @@
0aa7655b
AM
5879 #define find_first_zero_bit(addr, size) \
5880 find_next_zero_bit((addr), (size), 0)
5881
5882+/**
5883+ * find_next_bit - find the first set bit in a memory region
5884+ * @addr: The address to base the search on
5885+ * @offset: The bitnumber to start searching at
5886+ * @size: The maximum size to search
5887+ *
5888+ * Scheduler induced bitop, do not use.
5889+ */
5890+static inline int find_next_bit(unsigned long *addr, int size, int offset)
5891+{
5892+ unsigned long *p = addr + (offset >> 5);
5893+ int num = offset & ~0x1f;
5894+ unsigned long word;
5895+
5896+ word = *p++;
5897+ word &= ~((1 << (offset & 0x1f)) - 1);
5898+ while (num < size) {
5899+ if (word != 0) {
5900+ return __ffs(word) + num;
5901+ }
5902+ word = *p++;
5903+ num += 0x20;
5904+ }
5905+ return num;
5906+}
5907+
5908 static inline int test_le_bit(int nr, __const__ void * addr)
5909 {
5910 __const__ unsigned char *ADDR = (__const__ unsigned char *) addr;
5911diff -urN linux-2.4.24.org/include/asm-sparc/system.h linux-2.4.24/include/asm-sparc/system.h
5d16fd25
AM
5912--- linux-2.4.24.org/include/asm-sparc/system.h 2004-02-04 20:47:50.644990120 +0100
5913+++ linux-2.4.24/include/asm-sparc/system.h 2004-02-04 20:52:54.110870035 +0100
0aa7655b
AM
5914@@ -84,7 +84,7 @@
5915 *
5916 * SWITCH_ENTER and SWITH_DO_LAZY_FPU do not work yet (e.g. SMP does not work)
5917 */
5918-#define prepare_to_switch() do { \
5919+#define prepare_arch_switch(rq, next) do { \
5920 __asm__ __volatile__( \
5921 ".globl\tflush_patch_switch\nflush_patch_switch:\n\t" \
5922 "save %sp, -0x40, %sp; save %sp, -0x40, %sp; save %sp, -0x40, %sp\n\t" \
5923@@ -92,6 +92,8 @@
5924 "save %sp, -0x40, %sp\n\t" \
5925 "restore; restore; restore; restore; restore; restore; restore"); \
5926 } while(0)
5927+#define finish_arch_switch(rq, next) do{ }while(0)
5928+#define task_running(rq, p) ((rq)->curr == (p))
5929
5930 /* Much care has gone into this code, do not touch it.
5931 *
ccfe23f9
JB
5932--- linux-2.4.30/include/asm-sparc64/bitops.h.orig 2005-04-04 03:42:20.000000000 +0200
5933+++ linux-2.4.30/include/asm-sparc64/bitops.h 2005-04-05 12:20:56.000000000 +0200
5934@@ -130,6 +130,23 @@
0aa7655b
AM
5935 #ifdef __KERNEL__
5936
5937 /*
5938+ * Every architecture must define this function. It's the fastest
5939+ * way of searching a 140-bit bitmap where the first 100 bits are
5940+ * unlikely to be set. It's guaranteed that at least one of the 140
5941+ * bits is cleared.
5942+ */
5943+static inline int sched_find_first_bit(unsigned long *b)
5944+{
5945+ if (unlikely(b[0]))
5946+ return __ffs(b[0]);
5947+ if (unlikely(((unsigned int)b[1])))
5948+ return __ffs(b[1]) + 64;
5949+ if (b[1] >> 32)
5950+ return __ffs(b[1] >> 32) + 96;
5951+ return __ffs(b[2]) + 128;
5952+}
5953+
5954+/*
5955 * ffs: find first bit set. This is defined the same way as
5956 * the libc and compiler builtin ffs routines, therefore
5957 * differs in spirit from the above ffz (man ffs).
ccfe23f9 5958@@ -190,14 +207,69 @@
0aa7655b
AM
5959 #endif
5960 #endif /* __KERNEL__ */
5961
5962+/**
5963+ * find_next_bit - find the next set bit in a memory region
5964+ * @addr: The address to base the search on
5965+ * @offset: The bitnumber to start searching at
5966+ * @size: The maximum size to search
5967+ */
5968+static __inline__ unsigned long find_next_bit(unsigned long *addr, unsigned long size, unsigned long offset)
5969+{
5970+ unsigned long *p = addr + (offset >> 6);
5971+ unsigned long result = offset & ~63UL;
5972+ unsigned long tmp;
5973+
5974+ if (offset >= size)
5975+ return size;
5976+ size -= result;
5977+ offset &= 63UL;
5978+ if (offset) {
5979+ tmp = *(p++);
5980+ tmp &= (~0UL << offset);
5981+ if (size < 64)
5982+ goto found_first;
5983+ if (tmp)
5984+ goto found_middle;
5985+ size -= 64;
5986+ result += 64;
5987+ }
5988+ while (size & ~63UL) {
5989+ if ((tmp = *(p++)))
5990+ goto found_middle;
5991+ result += 64;
5992+ size -= 64;
5993+ }
5994+ if (!size)
5995+ return result;
5996+ tmp = *p;
5997+
5998+found_first:
5999+ tmp &= (~0UL >> (64 - size));
6000+ if (tmp == 0UL) /* Are any bits set? */
6001+ return result + size; /* Nope. */
6002+found_middle:
6003+ return result + __ffs(tmp);
6004+}
6005+
6006+/**
6007+ * find_first_bit - find the first set bit in a memory region
6008+ * @addr: The address to start the search at
6009+ * @size: The maximum size to search
6010+ *
6011+ * Returns the bit-number of the first set bit, not the number of the byte
6012+ * containing a bit.
6013+ */
6014+#define find_first_bit(addr, size) \
6015+ find_next_bit((addr), (size), 0)
6016+
6017 /* find_next_zero_bit() finds the first zero bit in a bit string of length
6018 * 'size' bits, starting the search at bit 'offset'. This is largely based
6019 * on Linus's ALPHA routines, which are pretty portable BTW.
6020 */
6021
ccfe23f9 6022-static __inline__ unsigned long find_next_zero_bit(void *addr, unsigned long size, unsigned long offset)
0aa7655b
AM
6023+static __inline__ unsigned long find_next_zero_bit(unsigned long *addr, unsigned long size, unsigned long offset)
6024 {
6025- unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
6026+ unsigned long *p = addr + (offset >> 6);
6027 unsigned long result = offset & ~63UL;
6028 unsigned long tmp;
6029
ccfe23f9
JB
6030@@ -241,7 +313,7 @@
6031 #define test_and_clear_le_bit(nr,addr) \
6032 test_and_clear_bit((nr) ^ 0x38, (addr))
0aa7655b 6033
ccfe23f9
JB
6034-static __inline__ int test_le_bit(int nr, __const__ void *addr)
6035+static __inline__ int test_le_bit(int nr, __const__ unsigned long *addr)
0aa7655b
AM
6036 {
6037 int mask;
6038 __const__ unsigned char *ADDR = (__const__ unsigned char *) addr;
ccfe23f9 6039@@ -254,9 +326,9 @@
0aa7655b
AM
6040 #define find_first_zero_le_bit(addr, size) \
6041 find_next_zero_le_bit((addr), (size), 0)
6042
ccfe23f9 6043-static __inline__ unsigned long find_next_zero_le_bit(void *addr, unsigned long size, unsigned long offset)
0aa7655b
AM
6044+static __inline__ unsigned long find_next_zero_le_bit(unsigned long *addr, unsigned long size, unsigned long offset)
6045 {
6046- unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
6047+ unsigned long *p = addr + (offset >> 6);
6048 unsigned long result = offset & ~63UL;
6049 unsigned long tmp;
6050
0aa7655b 6051diff -urN linux-2.4.24.org/include/asm-sparc64/smp.h linux-2.4.24/include/asm-sparc64/smp.h
5d16fd25
AM
6052--- linux-2.4.24.org/include/asm-sparc64/smp.h 2004-02-04 20:48:01.767676594 +0100
6053+++ linux-2.4.24/include/asm-sparc64/smp.h 2004-02-04 20:52:54.175856518 +0100
0aa7655b
AM
6054@@ -111,7 +111,7 @@
6055 }
6056 }
6057
6058-#define smp_processor_id() (current->processor)
6059+#define smp_processor_id() (current->cpu)
6060
6061 /* This needn't do anything as we do not sleep the cpu
6062 * inside of the idler task, so an interrupt is not needed
6063diff -urN linux-2.4.24.org/include/asm-sparc64/system.h linux-2.4.24/include/asm-sparc64/system.h
5d16fd25
AM
6064--- linux-2.4.24.org/include/asm-sparc64/system.h 2004-02-04 20:48:01.898649351 +0100
6065+++ linux-2.4.24/include/asm-sparc64/system.h 2004-02-04 20:52:54.208849656 +0100
0aa7655b
AM
6066@@ -154,7 +154,18 @@
6067
6068 #define flush_user_windows flushw_user
6069 #define flush_register_windows flushw_all
6070-#define prepare_to_switch flushw_all
6071+
6072+#define prepare_arch_schedule(prev) task_lock(prev)
6073+#define finish_arch_schedule(prev) task_unlock(prev)
6074+#define prepare_arch_switch(rq, next) \
6075+do { spin_lock(&(next)->switch_lock); \
6076+ spin_unlock(&(rq)->lock); \
6077+ flushw_all(); \
6078+} while (0)
6079+
6080+#define finish_arch_switch(rq, prev) \
6081+do { spin_unlock_irq(&(prev)->switch_lock); \
6082+} while (0)
6083
6084 #ifndef CONFIG_DEBUG_SPINLOCK
6085 #define CHECK_LOCKS(PREV) do { } while(0)
6086diff -urN linux-2.4.24.org/include/linux/bitops.h linux-2.4.24/include/linux/bitops.h
5d16fd25
AM
6087--- linux-2.4.24.org/include/linux/bitops.h 2004-02-04 20:47:38.725469391 +0100
6088+++ linux-2.4.24/include/linux/bitops.h 2004-02-04 20:52:54.244842170 +0100
0aa7655b
AM
6089@@ -1,6 +1,38 @@
6090 #ifndef _LINUX_BITOPS_H
6091 #define _LINUX_BITOPS_H
6092
6093+/*
6094+ * fls: find last bit set.
6095+ */
6096+
6097+extern __inline__ int generic_fls(int x)
6098+{
6099+ int r = 32;
6100+
6101+ if (!x)
6102+ return 0;
6103+ if (!(x & 0xffff0000u)) {
6104+ x <<= 16;
6105+ r -= 16;
6106+ }
6107+ if (!(x & 0xff000000u)) {
6108+ x <<= 8;
6109+ r -= 8;
6110+ }
6111+ if (!(x & 0xf0000000u)) {
6112+ x <<= 4;
6113+ r -= 4;
6114+ }
6115+ if (!(x & 0xc0000000u)) {
6116+ x <<= 2;
6117+ r -= 2;
6118+ }
6119+ if (!(x & 0x80000000u)) {
6120+ x <<= 1;
6121+ r -= 1;
6122+ }
6123+ return r;
6124+}
6125
6126 /*
6127 * ffs: find first bit set. This is defined the same way as
6128diff -urN linux-2.4.24.org/include/linux/kernel_stat.h linux-2.4.24/include/linux/kernel_stat.h
5d16fd25
AM
6129--- linux-2.4.24.org/include/linux/kernel_stat.h 2004-02-04 20:47:34.063439098 +0100
6130+++ linux-2.4.24/include/linux/kernel_stat.h 2004-02-04 20:52:54.297831148 +0100
0aa7655b
AM
6131@@ -31,7 +31,6 @@
6132 #elif !defined(CONFIG_ARCH_S390)
6133 unsigned int irqs[NR_CPUS][NR_IRQS];
6134 #endif
6135- unsigned int context_swtch;
6136 };
6137
6138 extern struct kernel_stat kstat;
6139diff -urN linux-2.4.24.org/include/linux/sched.h linux-2.4.24/include/linux/sched.h
5d16fd25
AM
6140--- linux-2.4.24.org/include/linux/sched.h 2004-02-04 20:47:32.755711107 +0100
6141+++ linux-2.4.24/include/linux/sched.h 2004-02-04 20:52:54.755735907 +0100
0aa7655b
AM
6142@@ -6,6 +6,7 @@
6143 extern unsigned long event;
6144
6145 #include <linux/config.h>
6146+#include <linux/compiler.h>
6147 #include <linux/binfmts.h>
6148 #include <linux/threads.h>
6149 #include <linux/kernel.h>
6150@@ -21,7 +22,7 @@
6151 #include <asm/mmu.h>
6152
6153 #include <linux/smp.h>
6154-#include <linux/tty.h>
6155+//#include <linux/tty.h>
6156 #include <linux/sem.h>
6157 #include <linux/signal.h>
6158 #include <linux/securebits.h>
6159@@ -73,10 +74,12 @@
6160 #define CT_TO_SECS(x) ((x) / HZ)
6161 #define CT_TO_USECS(x) (((x) % HZ) * 1000000/HZ)
6162
6163-extern int nr_running, nr_threads;
6164+extern int nr_threads;
6165 extern int last_pid;
6166+extern unsigned long nr_running(void);
6167+extern unsigned long nr_uninterruptible(void);
6168
6169-#include <linux/fs.h>
6170+//#include <linux/fs.h>
6171 #include <linux/time.h>
6172 #include <linux/param.h>
6173 #include <linux/resource.h>
6174@@ -109,12 +112,6 @@
6175 #define SCHED_FIFO 1
6176 #define SCHED_RR 2
6177
6178-/*
6179- * This is an additional bit set when we want to
6180- * yield the CPU for one re-schedule..
6181- */
6182-#define SCHED_YIELD 0x10
6183-
6184 struct sched_param {
6185 int sched_priority;
6186 };
6187@@ -132,17 +129,21 @@
6188 * a separate lock).
6189 */
6190 extern rwlock_t tasklist_lock;
6191-extern spinlock_t runqueue_lock;
6192 extern spinlock_t mmlist_lock;
6193
6194+typedef struct task_struct task_t;
6195+
6196 extern void sched_init(void);
6197-extern void init_idle(void);
6198+extern void init_idle(task_t *idle, int cpu);
6199 extern void show_state(void);
6200 extern void cpu_init (void);
6201 extern void trap_init(void);
6202 extern void update_process_times(int user);
6203-extern void update_one_process(struct task_struct *p, unsigned long user,
6204+extern void update_one_process(task_t *p, unsigned long user,
6205 unsigned long system, int cpu);
6206+extern void scheduler_tick(int user_tick, int system);
6207+extern void migration_init(void);
6208+extern unsigned long cache_decay_ticks;
6209
6210 #define MAX_SCHEDULE_TIMEOUT LONG_MAX
6211 extern signed long FASTCALL(schedule_timeout(signed long timeout));
6212@@ -152,6 +153,28 @@
6213 extern void flush_scheduled_tasks(void);
6214 extern int start_context_thread(void);
6215 extern int current_is_keventd(void);
6216+extern void FASTCALL(sched_exit(task_t * p));
6217+extern int FASTCALL(idle_cpu(int cpu));
6218+
6219+/*
6220+ * Priority of a process goes from 0..MAX_PRIO-1, valid RT
6221+ * priority is 0..MAX_RT_PRIO-1, and SCHED_OTHER tasks are
6222+ * in the range MAX_RT_PRIO..MAX_PRIO-1. Priority values
6223+ * are inverted: lower p->prio value means higher priority.
6224+ *
6225+ * The MAX_RT_USER_PRIO value allows the actual maximum
6226+ * RT priority to be separate from the value exported to
6227+ * user-space. This allows kernel threads to set their
6228+ * priority to a value higher than any user task. Note:
6229+ * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
6230+ *
6231+ * Both values are configurable at compile-time.
6232+ */
6233+
6234+#define MAX_USER_RT_PRIO 100
6235+#define MAX_RT_PRIO MAX_USER_RT_PRIO
6236+
6237+#define MAX_PRIO (MAX_RT_PRIO + 40)
6238
6239 #if CONFIG_SMP
6240 extern void set_cpus_allowed(struct task_struct *p, unsigned long new_mask);
6241@@ -280,6 +303,8 @@
6242 extern struct user_struct root_user;
6243 #define INIT_USER (&root_user)
6244
6245+typedef struct prio_array prio_array_t;
6246+
6247 struct task_struct {
6248 /*
6249 * offsets of these are hardcoded elsewhere - touch with care
6250@@ -297,35 +322,26 @@
6251
6252 int lock_depth; /* Lock depth */
6253
6254-/*
6255- * offset 32 begins here on 32-bit platforms. We keep
6256- * all fields in a single cacheline that are needed for
6257- * the goodness() loop in schedule().
6258- */
6259- long counter;
6260- long nice;
6261- unsigned long policy;
6262- struct mm_struct *mm;
6263- int processor;
6264 /*
6265- * cpus_runnable is ~0 if the process is not running on any
6266- * CPU. It's (1 << cpu) if it's running on a CPU. This mask
6267- * is updated under the runqueue lock.
6268- *
6269- * To determine whether a process might run on a CPU, this
6270- * mask is AND-ed with cpus_allowed.
6271- */
6272- unsigned long cpus_runnable, cpus_allowed;
6273- /*
6274- * (only the 'next' pointer fits into the cacheline, but
6275- * that's just fine.)
6276+ * offset 32 begins here on 32-bit platforms.
6277 */
6278+ unsigned int cpu;
6279+ int prio, static_prio;
6280 struct list_head run_list;
6281- unsigned long sleep_time;
6282+ prio_array_t *array;
6283
6284- struct task_struct *next_task, *prev_task;
6285- struct mm_struct *active_mm;
6286+ unsigned long sleep_avg;
6287+ unsigned long sleep_timestamp;
6288+
6289+ unsigned long policy;
6290+ unsigned long cpus_allowed;
6291+ unsigned int time_slice, first_time_slice;
6292+
6293+ task_t *next_task, *prev_task;
6294+
6295+ struct mm_struct *mm, *active_mm;
6296 struct list_head local_pages;
6297+
6298 unsigned int allocation_order, nr_local_pages;
6299
6300 /* task state */
6301@@ -348,12 +364,12 @@
6302 * older sibling, respectively. (p->father can be replaced with
6303 * p->p_pptr->pid)
6304 */
6305- struct task_struct *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr;
6306+ task_t *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr;
6307 struct list_head thread_group;
6308
6309 /* PID hash table linkage. */
6310- struct task_struct *pidhash_next;
6311- struct task_struct **pidhash_pprev;
6312+ task_t *pidhash_next;
6313+ task_t **pidhash_pprev;
6314
6315 wait_queue_head_t wait_chldexit; /* for wait4() */
6316 struct completion *vfork_done; /* for vfork() */
6317@@ -412,6 +428,8 @@
6318 u32 self_exec_id;
6319 /* Protection of (de-)allocation: mm, files, fs, tty */
6320 spinlock_t alloc_lock;
6321+/* context-switch lock */
6322+ spinlock_t switch_lock;
6323
6324 /* journalling filesystem info */
6325 void *journal_info;
6326@@ -454,9 +472,15 @@
6327 */
6328 #define _STK_LIM (8*1024*1024)
6329
6330-#define DEF_COUNTER (10*HZ/100) /* 100 ms time slice */
6331-#define MAX_COUNTER (20*HZ/100)
6332-#define DEF_NICE (0)
6333+#if CONFIG_SMP
6334+extern void set_cpus_allowed(task_t *p, unsigned long new_mask);
6335+#else
6336+#define set_cpus_allowed(p, new_mask) do { } while (0)
6337+#endif
6338+
6339+extern void set_user_nice(task_t *p, long nice);
6340+extern int task_prio(task_t *p);
6341+extern int task_nice(task_t *p);
6342
6343 extern void yield(void);
6344
6345@@ -477,14 +501,14 @@
6346 addr_limit: KERNEL_DS, \
6347 exec_domain: &default_exec_domain, \
6348 lock_depth: -1, \
6349- counter: DEF_COUNTER, \
6350- nice: DEF_NICE, \
6351+ prio: MAX_PRIO-20, \
6352+ static_prio: MAX_PRIO-20, \
6353 policy: SCHED_OTHER, \
6354+ cpus_allowed: ~0UL, \
6355 mm: NULL, \
6356 active_mm: &init_mm, \
6357- cpus_runnable: ~0UL, \
6358- cpus_allowed: ~0UL, \
6359 run_list: LIST_HEAD_INIT(tsk.run_list), \
6360+ time_slice: HZ, \
6361 next_task: &tsk, \
6362 prev_task: &tsk, \
6363 p_opptr: &tsk, \
6364@@ -509,6 +533,7 @@
6365 pending: { NULL, &tsk.pending.head, {{0}}}, \
6366 blocked: {{0}}, \
6367 alloc_lock: SPIN_LOCK_UNLOCKED, \
6368+ switch_lock: SPIN_LOCK_UNLOCKED, \
6369 journal_info: NULL, \
6370 }
6371
6372@@ -518,24 +543,23 @@
6373 #endif
6374
6375 union task_union {
6376- struct task_struct task;
6377+ task_t task;
6378 unsigned long stack[INIT_TASK_SIZE/sizeof(long)];
6379 };
6380
6381 extern union task_union init_task_union;
6382
6383 extern struct mm_struct init_mm;
6384-extern struct task_struct *init_tasks[NR_CPUS];
6385
6386 /* PID hashing. (shouldnt this be dynamic?) */
6387 #define PIDHASH_SZ (4096 >> 2)
6388-extern struct task_struct *pidhash[PIDHASH_SZ];
6389+extern task_t *pidhash[PIDHASH_SZ];
6390
6391 #define pid_hashfn(x) ((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1))
6392
6393-static inline void hash_pid(struct task_struct *p)
6394+static inline void hash_pid(task_t *p)
6395 {
6396- struct task_struct **htable = &pidhash[pid_hashfn(p->pid)];
6397+ task_t **htable = &pidhash[pid_hashfn(p->pid)];
6398
6399 if((p->pidhash_next = *htable) != NULL)
6400 (*htable)->pidhash_pprev = &p->pidhash_next;
6401@@ -543,16 +567,16 @@
6402 p->pidhash_pprev = htable;
6403 }
6404
6405-static inline void unhash_pid(struct task_struct *p)
6406+static inline void unhash_pid(task_t *p)
6407 {
6408 if(p->pidhash_next)
6409 p->pidhash_next->pidhash_pprev = p->pidhash_pprev;
6410 *p->pidhash_pprev = p->pidhash_next;
6411 }
6412
6413-static inline struct task_struct *find_task_by_pid(int pid)
6414+static inline task_t *find_task_by_pid(int pid)
6415 {
6416- struct task_struct *p, **htable = &pidhash[pid_hashfn(pid)];
6417+ task_t *p, **htable = &pidhash[pid_hashfn(pid)];
6418
6419 for(p = *htable; p && p->pid != pid; p = p->pidhash_next)
6420 ;
6421@@ -560,19 +584,6 @@
6422 return p;
6423 }
6424
6425-#define task_has_cpu(tsk) ((tsk)->cpus_runnable != ~0UL)
6426-
6427-static inline void task_set_cpu(struct task_struct *tsk, unsigned int cpu)
6428-{
6429- tsk->processor = cpu;
6430- tsk->cpus_runnable = 1UL << cpu;
6431-}
6432-
6433-static inline void task_release_cpu(struct task_struct *tsk)
6434-{
6435- tsk->cpus_runnable = ~0UL;
6436-}
6437-
6438 /* per-UID process charging. */
6439 extern struct user_struct * alloc_uid(uid_t);
6440 extern void free_uid(struct user_struct *);
6441@@ -600,47 +611,50 @@
6442 extern void FASTCALL(interruptible_sleep_on(wait_queue_head_t *q));
6443 extern long FASTCALL(interruptible_sleep_on_timeout(wait_queue_head_t *q,
6444 signed long timeout));
6445-extern int FASTCALL(wake_up_process(struct task_struct * tsk));
6446+extern int FASTCALL(wake_up_process(task_t * p));
6447+extern void FASTCALL(wake_up_forked_process(task_t * p));
6448
6449 #define wake_up(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
6450 #define wake_up_nr(x, nr) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
6451 #define wake_up_all(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0)
6452-#define wake_up_sync(x) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
6453-#define wake_up_sync_nr(x, nr) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
6454 #define wake_up_interruptible(x) __wake_up((x),TASK_INTERRUPTIBLE, 1)
6455 #define wake_up_interruptible_nr(x, nr) __wake_up((x),TASK_INTERRUPTIBLE, nr)
6456 #define wake_up_interruptible_all(x) __wake_up((x),TASK_INTERRUPTIBLE, 0)
6457-#define wake_up_interruptible_sync(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
6458-#define wake_up_interruptible_sync_nr(x, nr) __wake_up_sync((x),TASK_INTERRUPTIBLE, nr)
6459+#ifdef CONFIG_SMP
6460+#define wake_up_interruptible_sync(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
6461+#else
6462+#define wake_up_interruptible_sync(x) __wake_up((x),TASK_INTERRUPTIBLE, 1)
6463+#endif
6464+
6465 asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru);
6466
6467 extern int in_group_p(gid_t);
6468 extern int in_egroup_p(gid_t);
6469
6470 extern void proc_caches_init(void);
6471-extern void flush_signals(struct task_struct *);
6472-extern void flush_signal_handlers(struct task_struct *);
6473+extern void flush_signals(task_t *);
6474+extern void flush_signal_handlers(task_t *);
6475 extern void sig_exit(int, int, struct siginfo *);
6476 extern int dequeue_signal(sigset_t *, siginfo_t *);
6477 extern void block_all_signals(int (*notifier)(void *priv), void *priv,
6478 sigset_t *mask);
6479 extern void unblock_all_signals(void);
6480-extern int send_sig_info(int, struct siginfo *, struct task_struct *);
6481-extern int force_sig_info(int, struct siginfo *, struct task_struct *);
6482+extern int send_sig_info(int, struct siginfo *, task_t *);
6483+extern int force_sig_info(int, struct siginfo *, task_t *);
6484 extern int kill_pg_info(int, struct siginfo *, pid_t);
6485 extern int kill_sl_info(int, struct siginfo *, pid_t);
6486 extern int kill_proc_info(int, struct siginfo *, pid_t);
6487-extern void notify_parent(struct task_struct *, int);
6488-extern void do_notify_parent(struct task_struct *, int);
6489-extern void force_sig(int, struct task_struct *);
6490-extern int send_sig(int, struct task_struct *, int);
6491+extern void notify_parent(task_t *, int);
6492+extern void do_notify_parent(task_t *, int);
6493+extern void force_sig(int, task_t *);
6494+extern int send_sig(int, task_t *, int);
6495 extern int kill_pg(pid_t, int, int);
6496 extern int kill_sl(pid_t, int, int);
6497 extern int kill_proc(pid_t, int, int);
6498 extern int do_sigaction(int, const struct k_sigaction *, struct k_sigaction *);
6499 extern int do_sigaltstack(const stack_t *, stack_t *, unsigned long);
6500
6501-static inline int signal_pending(struct task_struct *p)
6502+static inline int signal_pending(task_t *p)
6503 {
6504 return (p->sigpending != 0);
6505 }
6506@@ -679,7 +693,7 @@
6507 This is required every time the blocked sigset_t changes.
6508 All callers should have t->sigmask_lock. */
6509
6510-static inline void recalc_sigpending(struct task_struct *t)
6511+static inline void recalc_sigpending(task_t *t)
6512 {
6513 t->sigpending = has_pending_signals(&t->pending.signal, &t->blocked);
6514 }
6515@@ -786,16 +800,17 @@
6516 extern int expand_fdset(struct files_struct *, int nr);
6517 extern void free_fdset(fd_set *, int);
6518
6519-extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
6520+extern int copy_thread(int, unsigned long, unsigned long, unsigned long, task_t *, struct pt_regs *);
6521 extern void flush_thread(void);
6522 extern void exit_thread(void);
6523
6524-extern void exit_mm(struct task_struct *);
6525-extern void exit_files(struct task_struct *);
6526-extern void exit_sighand(struct task_struct *);
6527+extern void exit_mm(task_t *);
6528+extern void exit_files(task_t *);
6529+extern void exit_sighand(task_t *);
6530
6531 extern void reparent_to_init(void);
6532 extern void daemonize(void);
6533+extern task_t *child_reaper;
6534
6535 extern int do_execve(char *, char **, char **, struct pt_regs *);
6536 extern int do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long);
6537@@ -806,6 +821,9 @@
6538
6539 extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
6540
6541+extern void wait_task_inactive(task_t * p);
6542+extern void kick_if_running(task_t * p);
6543+
6544 #define __wait_event(wq, condition) \
6545 do { \
6546 wait_queue_t __wait; \
6547@@ -887,27 +905,12 @@
6548 for (task = next_thread(current) ; task != current ; task = next_thread(task))
6549
6550 #define next_thread(p) \
6551- list_entry((p)->thread_group.next, struct task_struct, thread_group)
6552+ list_entry((p)->thread_group.next, task_t, thread_group)
6553
6554 #define thread_group_leader(p) (p->pid == p->tgid)
6555
6556-static inline void del_from_runqueue(struct task_struct * p)
6557+static inline void unhash_process(task_t *p)
6558 {
6559- nr_running--;
6560- p->sleep_time = jiffies;
6561- list_del(&p->run_list);
6562- p->run_list.next = NULL;
6563-}
6564-
6565-static inline int task_on_runqueue(struct task_struct *p)
6566-{
6567- return (p->run_list.next != NULL);
6568-}
6569-
6570-static inline void unhash_process(struct task_struct *p)
6571-{
6572- if (task_on_runqueue(p))
6573- out_of_line_bug();
6574 write_lock_irq(&tasklist_lock);
6575 nr_threads--;
6576 unhash_pid(p);
6577@@ -917,12 +920,12 @@
6578 }
6579
6580 /* Protects ->fs, ->files, ->mm, and synchronises with wait4(). Nests inside tasklist_lock */
6581-static inline void task_lock(struct task_struct *p)
6582+static inline void task_lock(task_t *p)
6583 {
6584 spin_lock(&p->alloc_lock);
6585 }
6586
6587-static inline void task_unlock(struct task_struct *p)
6588+static inline void task_unlock(task_t *p)
6589 {
6590 spin_unlock(&p->alloc_lock);
6591 }
6592@@ -946,6 +949,26 @@
6593 return res;
6594 }
6595
6596+static inline void set_need_resched(void)
6597+{
6598+ current->need_resched = 1;
6599+}
6600+
6601+static inline void clear_need_resched(void)
6602+{
6603+ current->need_resched = 0;
6604+}
6605+
6606+static inline void set_tsk_need_resched(task_t *tsk)
6607+{
6608+ tsk->need_resched = 1;
6609+}
6610+
6611+static inline void clear_tsk_need_resched(task_t *tsk)
6612+{
6613+ tsk->need_resched = 0;
6614+}
6615+
6616 static inline int need_resched(void)
6617 {
6618 return (unlikely(current->need_resched));
6619@@ -959,4 +982,5 @@
6620 }
6621
6622 #endif /* __KERNEL__ */
6623+
6624 #endif
6625diff -urN linux-2.4.24.org/include/linux/smp_balance.h linux-2.4.24/include/linux/smp_balance.h
6626--- linux-2.4.24.org/include/linux/smp_balance.h 1970-01-01 01:00:00.000000000 +0100
5d16fd25 6627+++ linux-2.4.24/include/linux/smp_balance.h 2004-02-04 20:52:54.758735283 +0100
0aa7655b
AM
6628@@ -0,0 +1,15 @@
6629+#ifndef _LINUX_SMP_BALANCE_H
6630+#define _LINUX_SMP_BALANCE_H
6631+
6632+/*
6633+ * per-architecture load balancing logic, e.g. for hyperthreading
6634+ */
6635+
6636+#ifdef ARCH_HAS_SMP_BALANCE
6637+#include <asm/smp_balance.h>
6638+#else
6639+#define arch_load_balance(x, y) (0)
6640+#define arch_reschedule_idle_override(x, idle) (idle)
6641+#endif
6642+
6643+#endif /* _LINUX_SMP_BALANCE_H */
6644diff -urN linux-2.4.24.org/include/linux/smp.h linux-2.4.24/include/linux/smp.h
5d16fd25
AM
6645--- linux-2.4.24.org/include/linux/smp.h 2004-02-04 20:47:38.184581896 +0100
6646+++ linux-2.4.24/include/linux/smp.h 2004-02-04 20:52:54.806725301 +0100
0aa7655b
AM
6647@@ -86,6 +86,14 @@
6648 #define cpu_number_map(cpu) 0
6649 #define smp_call_function(func,info,retry,wait) ({ 0; })
6650 #define cpu_online_map 1
6651+static inline void smp_send_reschedule(int cpu) { }
6652+static inline void smp_send_reschedule_all(void) { }
6653
6654 #endif
6655+
6656+/*
6657+ * Common definitions:
6658+ */
6659+#define cpu() smp_processor_id()
6660+
6661 #endif
6662diff -urN linux-2.4.24.org/include/linux/wait.h linux-2.4.24/include/linux/wait.h
5d16fd25
AM
6663--- linux-2.4.24.org/include/linux/wait.h 2004-02-04 20:47:33.472562001 +0100
6664+++ linux-2.4.24/include/linux/wait.h 2004-02-04 20:52:54.861713864 +0100
0aa7655b
AM
6665@@ -59,6 +59,7 @@
6666 # define wq_write_lock_irq write_lock_irq
6667 # define wq_write_lock_irqsave write_lock_irqsave
6668 # define wq_write_unlock_irqrestore write_unlock_irqrestore
6669+# define wq_write_unlock_irq write_unlock_irq
6670 # define wq_write_unlock write_unlock
6671 #else
6672 # define wq_lock_t spinlock_t
6673@@ -71,6 +72,7 @@
6674 # define wq_write_lock_irq spin_lock_irq
6675 # define wq_write_lock_irqsave spin_lock_irqsave
6676 # define wq_write_unlock_irqrestore spin_unlock_irqrestore
6677+# define wq_write_unlock_irq spin_unlock_irq
6678 # define wq_write_unlock spin_unlock
6679 #endif
6680
6681diff -urN linux-2.4.24.org/init/main.c linux-2.4.24/init/main.c
5d16fd25
AM
6682--- linux-2.4.24.org/init/main.c 2004-02-04 20:47:26.630985058 +0100
6683+++ linux-2.4.24/init/main.c 2004-02-04 20:52:54.909703882 +0100
0aa7655b
AM
6684@@ -293,8 +293,6 @@
6685 extern void setup_arch(char **);
6686 extern void cpu_idle(void);
6687
6688-unsigned long wait_init_idle;
6689-
6690 #ifndef CONFIG_SMP
6691
6692 #ifdef CONFIG_X86_LOCAL_APIC
6693@@ -303,34 +301,24 @@
6694 APIC_init_uniprocessor();
6695 }
6696 #else
6697-#define smp_init() do { } while (0)
6698+#define smp_init() do { } while (0)
6699 #endif
6700
6701 #else
6702
6703-
6704 /* Called by boot processor to activate the rest. */
6705 static void __init smp_init(void)
6706 {
6707 /* Get other processors into their bootup holding patterns. */
6708 smp_boot_cpus();
6709- wait_init_idle = cpu_online_map;
6710- clear_bit(current->processor, &wait_init_idle); /* Don't wait on me! */
6711
6712 smp_threads_ready=1;
6713 smp_commence();
6714-
6715- /* Wait for the other cpus to set up their idle processes */
6716- printk("Waiting on wait_init_idle (map = 0x%lx)\n", wait_init_idle);
6717- while (wait_init_idle) {
6718- cpu_relax();
6719- barrier();
6720- }
6721- printk("All processors have done init_idle\n");
6722 }
6723
6724 #endif
6725
6726+
6727 /*
6728 * We need to finalize in a non-__init function or else race conditions
6729 * between the root thread and the init thread may cause start_kernel to
6730@@ -342,9 +330,8 @@
6731 {
6732 kernel_thread(init, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
6733 unlock_kernel();
6734- current->need_resched = 1;
6735- cpu_idle();
6736-}
6737+ cpu_idle();
6738+}
6739
6740 /*
6741 * Activate the first processor.
6742@@ -428,6 +415,7 @@
6743 check_bugs();
6744 printk("POSIX conformance testing by UNIFIX\n");
6745
6746+ init_idle(current, smp_processor_id());
6747 /*
6748 * We count on the initial thread going ok
6749 * Like idlers init is an unlocked kernel thread, which will
6750@@ -465,6 +453,10 @@
6751 */
6752 static void __init do_basic_setup(void)
6753 {
6754+ /* Start the per-CPU migration threads */
6755+#if CONFIG_SMP
6756+ migration_init();
6757+#endif
6758
6759 /*
6760 * Tell the world that we're going to be the grim
6761diff -urN linux-2.4.24.org/kernel/capability.c linux-2.4.24/kernel/capability.c
5d16fd25
AM
6762--- linux-2.4.24.org/kernel/capability.c 2004-02-04 20:47:27.302845310 +0100
6763+++ linux-2.4.24/kernel/capability.c 2004-02-04 20:52:54.945696396 +0100
0aa7655b
AM
6764@@ -8,6 +8,8 @@
6765 #include <linux/mm.h>
6766 #include <asm/uaccess.h>
6767
6768+unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
6769+
6770 kernel_cap_t cap_bset = CAP_INIT_EFF_SET;
6771
6772 /* Note: never hold tasklist_lock while spinning for this one */
6773diff -urN linux-2.4.24.org/kernel/exit.c linux-2.4.24/kernel/exit.c
5d16fd25
AM
6774--- linux-2.4.24.org/kernel/exit.c 2004-02-04 20:47:27.240858204 +0100
6775+++ linux-2.4.24/kernel/exit.c 2004-02-04 20:52:54.951695148 +0100
0aa7655b
AM
6776@@ -28,49 +28,22 @@
6777
6778 static void release_task(struct task_struct * p)
6779 {
6780- if (p != current) {
6781+ if (p == current)
6782+ BUG();
6783 #ifdef CONFIG_SMP
6784- /*
6785- * Wait to make sure the process isn't on the
6786- * runqueue (active on some other CPU still)
6787- */
6788- for (;;) {
6789- task_lock(p);
6790- if (!task_has_cpu(p))
6791- break;
6792- task_unlock(p);
6793- do {
6794- cpu_relax();
6795- barrier();
6796- } while (task_has_cpu(p));
6797- }
6798- task_unlock(p);
6799+ wait_task_inactive(p);
6800 #endif
6801- atomic_dec(&p->user->processes);
6802- free_uid(p->user);
6803- unhash_process(p);
6804-
6805- release_thread(p);
6806- current->cmin_flt += p->min_flt + p->cmin_flt;
6807- current->cmaj_flt += p->maj_flt + p->cmaj_flt;
6808- current->cnswap += p->nswap + p->cnswap;
6809- /*
6810- * Potentially available timeslices are retrieved
6811- * here - this way the parent does not get penalized
6812- * for creating too many processes.
6813- *
6814- * (this cannot be used to artificially 'generate'
6815- * timeslices, because any timeslice recovered here
6816- * was given away by the parent in the first place.)
6817- */
6818- current->counter += p->counter;
6819- if (current->counter >= MAX_COUNTER)
6820- current->counter = MAX_COUNTER;
6821- p->pid = 0;
6822- free_task_struct(p);
6823- } else {
6824- printk("task releasing itself\n");
6825- }
6826+ atomic_dec(&p->user->processes);
6827+ free_uid(p->user);
6828+ unhash_process(p);
6829+
6830+ release_thread(p);
6831+ current->cmin_flt += p->min_flt + p->cmin_flt;
6832+ current->cmaj_flt += p->maj_flt + p->cmaj_flt;
6833+ current->cnswap += p->nswap + p->cnswap;
6834+ sched_exit(p);
6835+ p->pid = 0;
6836+ free_task_struct(p);
6837 }
6838
6839 /*
6840@@ -150,6 +123,79 @@
6841 return retval;
6842 }
6843
6844+/**
6845+ * reparent_to_init() - Reparent the calling kernel thread to the init task.
6846+ *
6847+ * If a kernel thread is launched as a result of a system call, or if
6848+ * it ever exits, it should generally reparent itself to init so that
6849+ * it is correctly cleaned up on exit.
6850+ *
6851+ * The various task state such as scheduling policy and priority may have
6852+ * been inherited from a user process, so we reset them to sane values here.
6853+ *
6854+ * NOTE that reparent_to_init() gives the caller full capabilities.
6855+ */
6856+void reparent_to_init(void)
6857+{
6858+ write_lock_irq(&tasklist_lock);
6859+
6860+ /* Reparent to init */
6861+ REMOVE_LINKS(current);
6862+ current->p_pptr = child_reaper;
6863+ current->p_opptr = child_reaper;
6864+ SET_LINKS(current);
6865+
6866+ /* Set the exit signal to SIGCHLD so we signal init on exit */
6867+ current->exit_signal = SIGCHLD;
6868+
6869+ current->ptrace = 0;
6870+ if ((current->policy == SCHED_OTHER) && (task_nice(current) < 0))
6871+ set_user_nice(current, 0);
6872+ /* cpus_allowed? */
6873+ /* rt_priority? */
6874+ /* signals? */
6875+ current->cap_effective = CAP_INIT_EFF_SET;
6876+ current->cap_inheritable = CAP_INIT_INH_SET;
6877+ current->cap_permitted = CAP_FULL_SET;
6878+ current->keep_capabilities = 0;
6879+ memcpy(current->rlim, init_task.rlim, sizeof(*(current->rlim)));
6880+ current->user = INIT_USER;
6881+
6882+ write_unlock_irq(&tasklist_lock);
6883+}
6884+
6885+/*
6886+ * Put all the gunge required to become a kernel thread without
6887+ * attached user resources in one place where it belongs.
6888+ */
6889+
6890+void daemonize(void)
6891+{
6892+ struct fs_struct *fs;
6893+
6894+
6895+ /*
6896+ * If we were started as result of loading a module, close all of the
6897+ * user space pages. We don't need them, and if we didn't close them
6898+ * they would be locked into memory.
6899+ */
6900+ exit_mm(current);
6901+
6902+ current->session = 1;
6903+ current->pgrp = 1;
6904+ current->tty = NULL;
6905+
6906+ /* Become as one with the init task */
6907+
6908+ exit_fs(current); /* current->fs->count--; */
6909+ fs = init_task.fs;
6910+ current->fs = fs;
6911+ atomic_inc(&fs->count);
6912+ exit_files(current);
6913+ current->files = init_task.files;
6914+ atomic_inc(&current->files->count);
6915+}
6916+
6917 /*
6918 * When we die, we re-parent all our children.
6919 * Try to give them to another thread in our thread
6920@@ -171,6 +217,7 @@
6921 /* Make sure we're not reparenting to ourselves */
6922 p->p_opptr = child_reaper;
6923
6924+ p->first_time_slice = 0;
6925 if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0);
6926 }
6927 }
6928diff -urN linux-2.4.24.org/kernel/fork.c linux-2.4.24/kernel/fork.c
5d16fd25
AM
6929--- linux-2.4.24.org/kernel/fork.c 2004-02-04 20:47:26.750960103 +0100
6930+++ linux-2.4.24/kernel/fork.c 2004-02-04 20:52:54.987687662 +0100
0aa7655b
AM
6931@@ -31,7 +31,6 @@
6932
6933 /* The idle threads do not count.. */
6934 int nr_threads;
6935-int nr_running;
6936
6937 int max_threads;
6938 unsigned long total_forks; /* Handle normal Linux uptimes. */
6939@@ -39,6 +38,8 @@
6940
6941 struct task_struct *pidhash[PIDHASH_SZ];
6942
6943+rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */
7f7e7712
KT
6944+
6945 void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
0aa7655b
AM
6946 {
6947 unsigned long flags;
6948@@ -700,9 +701,6 @@
6949 if (p->pid == 0 && current->pid != 0)
6950 goto bad_fork_cleanup;
6951
6952- p->run_list.next = NULL;
6953- p->run_list.prev = NULL;
6954-
6955 p->p_cptr = NULL;
6956 init_waitqueue_head(&p->wait_chldexit);
6957 p->vfork_done = NULL;
6958@@ -711,6 +709,7 @@
6959 init_completion(&vfork);
6960 }
6961 spin_lock_init(&p->alloc_lock);
6962+ spin_lock_init(&p->switch_lock);
6963
6964 p->sigpending = 0;
6965 init_sigpending(&p->pending);
6966@@ -727,11 +726,11 @@
6967 #ifdef CONFIG_SMP
6968 {
6969 int i;
6970- p->cpus_runnable = ~0UL;
6971- p->processor = current->processor;
6972+
6973 /* ?? should we just memset this ?? */
6974 for(i = 0; i < smp_num_cpus; i++)
6975- p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0;
6976+ p->per_cpu_utime[cpu_logical_map(i)] =
6977+ p->per_cpu_stime[cpu_logical_map(i)] = 0;
6978 spin_lock_init(&p->sigmask_lock);
6979 }
6980 #endif
6981@@ -769,15 +768,27 @@
6982 p->pdeath_signal = 0;
6983
6984 /*
6985- * "share" dynamic priority between parent and child, thus the
6986- * total amount of dynamic priorities in the system doesn't change,
6987- * more scheduling fairness. This is only important in the first
6988- * timeslice, on the long run the scheduling behaviour is unchanged.
6989- */
6990- p->counter = (current->counter + 1) >> 1;
6991- current->counter >>= 1;
6992- if (!current->counter)
6993- current->need_resched = 1;
6994+ * Share the timeslice between parent and child, thus the
6995+ * total amount of pending timeslices in the system doesnt change,
6996+ * resulting in more scheduling fairness.
6997+ */
6998+ __cli();
6999+ if (!current->time_slice)
7000+ BUG();
7001+ p->time_slice = (current->time_slice + 1) >> 1;
7002+ current->time_slice >>= 1;
7003+ p->first_time_slice = 1;
7004+ if (!current->time_slice) {
7005+ /*
7006+ * This case is rare, it happens when the parent has only
7007+ * a single jiffy left from its timeslice. Taking the
7008+ * runqueue lock is not a problem.
7009+ */
7010+ current->time_slice = 1;
7011+ scheduler_tick(0,0);
7012+ }
7013+ p->sleep_timestamp = jiffies;
7014+ __sti();
7015
7016 /*
7017 * Ok, add it to the run-queues and make it
7018@@ -813,11 +824,16 @@
7019
7020 if (p->ptrace & PT_PTRACED)
7021 send_sig(SIGSTOP, p, 1);
7022-
7023- wake_up_process(p); /* do this last */
7024+ wake_up_forked_process(p); /* do this last */
7025 ++total_forks;
7026 if (clone_flags & CLONE_VFORK)
7027 wait_for_completion(&vfork);
7028+ else
7029+ /*
7030+ * Let the child process run first, to avoid most of the
7031+ * COW overhead when the child exec()s afterwards.
7032+ */
7033+ current->need_resched = 1;
7034
7035 fork_out:
7036 return retval;
7037diff -urN linux-2.4.24.org/kernel/ksyms.c linux-2.4.24/kernel/ksyms.c
5d16fd25
AM
7038--- linux-2.4.24.org/kernel/ksyms.c 2004-02-04 20:47:26.747960727 +0100
7039+++ linux-2.4.24/kernel/ksyms.c 2004-02-04 20:52:54.992686623 +0100
0aa7655b
AM
7040@@ -461,7 +461,6 @@
7041 /* process management */
7042 EXPORT_SYMBOL(complete_and_exit);
7043 EXPORT_SYMBOL(__wake_up);
7044-EXPORT_SYMBOL(__wake_up_sync);
7045 EXPORT_SYMBOL(wake_up_process);
7046 EXPORT_SYMBOL(sleep_on);
7047 EXPORT_SYMBOL(sleep_on_timeout);
7048@@ -474,6 +473,8 @@
7049 #endif
7050 EXPORT_SYMBOL(yield);
7051 EXPORT_SYMBOL(__cond_resched);
7052+EXPORT_SYMBOL(set_user_nice);
7053+EXPORT_SYMBOL(nr_context_switches);
7054 EXPORT_SYMBOL(jiffies);
7055 EXPORT_SYMBOL(xtime);
7056 EXPORT_SYMBOL(do_gettimeofday);
7057@@ -484,7 +485,6 @@
7058 #endif
7059
7060 EXPORT_SYMBOL(kstat);
7061-EXPORT_SYMBOL(nr_running);
7062
7063 /* misc */
7064 EXPORT_SYMBOL(panic);
7065diff -urN linux-2.4.24.org/kernel/printk.c linux-2.4.24/kernel/printk.c
5d16fd25
AM
7066--- linux-2.4.24.org/kernel/printk.c 2004-02-04 20:47:26.744961351 +0100
7067+++ linux-2.4.24/kernel/printk.c 2004-02-04 20:52:55.015681840 +0100
0aa7655b
AM
7068@@ -26,6 +26,7 @@
7069 #include <linux/module.h>
7070 #include <linux/interrupt.h> /* For in_interrupt() */
7071 #include <linux/config.h>
7072+#include <linux/delay.h>
7073
7074 #include <asm/uaccess.h>
7075
7076diff -urN linux-2.4.24.org/kernel/ptrace.c linux-2.4.24/kernel/ptrace.c
5d16fd25
AM
7077--- linux-2.4.24.org/kernel/ptrace.c 2004-02-04 20:47:26.776954696 +0100
7078+++ linux-2.4.24/kernel/ptrace.c 2004-02-04 20:52:55.029678928 +0100
0aa7655b
AM
7079@@ -32,20 +32,7 @@
7080 if (child->state != TASK_STOPPED)
7081 return -ESRCH;
7082 #ifdef CONFIG_SMP
7083- /* Make sure the child gets off its CPU.. */
7084- for (;;) {
7085- task_lock(child);
7086- if (!task_has_cpu(child))
7087- break;
7088- task_unlock(child);
7089- do {
7090- if (child->state != TASK_STOPPED)
7091- return -ESRCH;
7092- barrier();
7093- cpu_relax();
7094- } while (task_has_cpu(child));
7095- }
7096- task_unlock(child);
7097+ wait_task_inactive(child);
7098 #endif
7099 }
7100
7101diff -urN linux-2.4.24.org/kernel/sched.c linux-2.4.24/kernel/sched.c
5d16fd25
AM
7102--- linux-2.4.24.org/kernel/sched.c 2004-02-04 20:47:26.741961975 +0100
7103+++ linux-2.4.24/kernel/sched.c 2004-02-04 20:52:55.076669155 +0100
0aa7655b
AM
7104@@ -3,340 +3,333 @@
7105 *
7106 * Kernel scheduler and related syscalls
7107 *
7108- * Copyright (C) 1991, 1992 Linus Torvalds
7109+ * Copyright (C) 1991-2002 Linus Torvalds
7110 *
7111 * 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and
7112 * make semaphores SMP safe
7113 * 1998-11-19 Implemented schedule_timeout() and related stuff
7114 * by Andrea Arcangeli
7115- * 1998-12-28 Implemented better SMP scheduling by Ingo Molnar
7116+ * 2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar:
7117+ * hybrid priority-list and round-robin design with
7118+ * an array-switch method of distributing timeslices
7119+ * and per-CPU runqueues. Additional code by Davide
7120+ * Libenzi, Robert Love, and Rusty Russell.
7121 */
7122
7123-/*
7124- * 'sched.c' is the main kernel file. It contains scheduling primitives
7125- * (sleep_on, wakeup, schedule etc) as well as a number of simple system
7126- * call functions (type getpid()), which just extract a field from
7127- * current-task
7128- */
7129-
7130-#include <linux/config.h>
7131 #include <linux/mm.h>
7132-#include <linux/init.h>
7133-#include <linux/smp_lock.h>
7134 #include <linux/nmi.h>
7135 #include <linux/interrupt.h>
7136-#include <linux/kernel_stat.h>
7137-#include <linux/completion.h>
7138-#include <linux/prefetch.h>
7139-#include <linux/compiler.h>
7140-
7141+#include <linux/init.h>
7142 #include <asm/uaccess.h>
7143+#include <linux/smp_lock.h>
7144 #include <asm/mmu_context.h>
7145-
7146-extern void timer_bh(void);
7147-extern void tqueue_bh(void);
7148-extern void immediate_bh(void);
7149+#include <linux/kernel_stat.h>
7150+#include <linux/completion.h>
7151
7152 /*
7153- * scheduler variables
7154- */
7155+ * Convert user-nice values [ -20 ... 0 ... 19 ]
7156+ * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
7157+ * and back.
7158+ */
7159+#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20)
7160+#define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20)
7161+#define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio)
7162
7163-unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
7164-
7165-extern void mem_use(void);
7166+/*
7167+ * 'User priority' is the nice value converted to something we
7168+ * can work with better when scaling various scheduler parameters,
7169+ * it's a [ 0 ... 39 ] range.
7170+ */
7171+#define USER_PRIO(p) ((p)-MAX_RT_PRIO)
7172+#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio)
7173+#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO))
7174
7175 /*
7176- * Scheduling quanta.
7177+ * These are the 'tuning knobs' of the scheduler:
7178 *
7179- * NOTE! The unix "nice" value influences how long a process
7180- * gets. The nice value ranges from -20 to +19, where a -20
7181- * is a "high-priority" task, and a "+10" is a low-priority
7182- * task.
7183- *
7184- * We want the time-slice to be around 50ms or so, so this
7185- * calculation depends on the value of HZ.
7186- */
7187-#if HZ < 200
7188-#define TICK_SCALE(x) ((x) >> 2)
7189-#elif HZ < 400
7190-#define TICK_SCALE(x) ((x) >> 1)
7191-#elif HZ < 800
7192-#define TICK_SCALE(x) (x)
7193-#elif HZ < 1600
7194-#define TICK_SCALE(x) ((x) << 1)
7195-#else
7196-#define TICK_SCALE(x) ((x) << 2)
7197-#endif
7198-
7199-#define NICE_TO_TICKS(nice) (TICK_SCALE(20-(nice))+1)
7200-
7201+ * Minimum timeslice is 10 msecs, default timeslice is 150 msecs,
7202+ * maximum timeslice is 300 msecs. Timeslices get refilled after
7203+ * they expire.
7204+ */
7205+#define MIN_TIMESLICE ( 10 * HZ / 1000)
7206+#define MAX_TIMESLICE (300 * HZ / 1000)
7207+#define CHILD_PENALTY 50
7208+#define PARENT_PENALTY 100
7209+#define PRIO_BONUS_RATIO 25
7210+#define INTERACTIVE_DELTA 2
7211+#define MAX_SLEEP_AVG (2*HZ)
7212+#define STARVATION_LIMIT (2*HZ)
7213
7214 /*
7215- * Init task must be ok at boot for the ix86 as we will check its signals
7216- * via the SMP irq return path.
7217- */
7218-
7219-struct task_struct * init_tasks[NR_CPUS] = {&init_task, };
7220+ * If a task is 'interactive' then we reinsert it in the active
7221+ * array after it has expired its current timeslice. (it will not
7222+ * continue to run immediately, it will still roundrobin with
7223+ * other interactive tasks.)
7224+ *
7225+ * This part scales the interactivity limit depending on niceness.
7226+ *
7227+ * We scale it linearly, offset by the INTERACTIVE_DELTA delta.
7228+ * Here are a few examples of different nice levels:
7229+ *
7230+ * TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0]
7231+ * TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0]
7232+ * TASK_INTERACTIVE( 0): [1,1,1,1,0,0,0,0,0,0,0]
7233+ * TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0]
7234+ * TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0]
7235+ *
7236+ * (the X axis represents the possible -5 ... 0 ... +5 dynamic
7237+ * priority range a task can explore, a value of '1' means the
7238+ * task is rated interactive.)
7239+ *
7240+ * Ie. nice +19 tasks can never get 'interactive' enough to be
7241+ * reinserted into the active array. And only heavily CPU-hog nice -20
7242+ * tasks will be expired. Default nice 0 tasks are somewhere between,
7243+ * it takes some effort for them to get interactive, but it's not
7244+ * too hard.
7245+ */
7246+
7247+#define SCALE(v1,v1_max,v2_max) \
7248+ (v1) * (v2_max) / (v1_max)
7249+
7250+#define DELTA(p) \
7251+ (SCALE(TASK_NICE(p), 40, MAX_USER_PRIO*PRIO_BONUS_RATIO/100) + \
7252+ INTERACTIVE_DELTA)
7253+
7254+#define TASK_INTERACTIVE(p) \
7255+ ((p)->prio <= (p)->static_prio - DELTA(p))
7256
7257 /*
7258- * The tasklist_lock protects the linked list of processes.
7259- *
7260- * The runqueue_lock locks the parts that actually access
7261- * and change the run-queues, and have to be interrupt-safe.
7262- *
7263- * If both locks are to be concurrently held, the runqueue_lock
7264- * nests inside the tasklist_lock.
7265+ * TASK_TIMESLICE scales user-nice values [ -20 ... 19 ]
7266+ * to time slice values.
7267 *
7268- * task->alloc_lock nests inside tasklist_lock.
7269+ * The higher a process's priority, the bigger timeslices
7270+ * it gets during one round of execution. But even the lowest
7271+ * priority process gets MIN_TIMESLICE worth of execution time.
7272 */
7273-spinlock_t runqueue_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED; /* inner */
7274-rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */
7275
7276-static LIST_HEAD(runqueue_head);
7277+#define TASK_TIMESLICE(p) (MIN_TIMESLICE + \
7278+ ((MAX_TIMESLICE - MIN_TIMESLICE) * (MAX_PRIO-1-(p)->static_prio)/39))
7279
7280 /*
7281- * We align per-CPU scheduling data on cacheline boundaries,
7282- * to prevent cacheline ping-pong.
7283+ * These are the runqueue data structures:
7284 */
7285-static union {
7286- struct schedule_data {
7287- struct task_struct * curr;
7288- cycles_t last_schedule;
7289- } schedule_data;
7290- char __pad [SMP_CACHE_BYTES];
7291-} aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}};
7292
7293-#define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr
7294-#define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule
7295+#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long))
7296
7297-struct kernel_stat kstat;
7298-extern struct task_struct *child_reaper;
7299+typedef struct runqueue runqueue_t;
7300
7301-#ifdef CONFIG_SMP
7302+struct prio_array {
7303+ int nr_active;
7304+ unsigned long bitmap[BITMAP_SIZE];
7305+ struct list_head queue[MAX_PRIO];
7306+};
7307
7308-#define idle_task(cpu) (init_tasks[cpu_number_map(cpu)])
7309-#define can_schedule(p,cpu) \
7310- ((p)->cpus_runnable & (p)->cpus_allowed & (1UL << cpu))
7311+/*
7312+ * This is the main, per-CPU runqueue data structure.
7313+ *
7314+ * Locking rule: those places that want to lock multiple runqueues
7315+ * (such as the load balancing or the process migration code), lock
7316+ * acquire operations must be ordered by ascending &runqueue.
7317+ */
7318+struct runqueue {
7319+ spinlock_t lock;
7320+ unsigned long nr_running, nr_switches, expired_timestamp;
7321+ task_t *curr, *idle;
7322+ prio_array_t *active, *expired, arrays[2];
7323+ long nr_uninterruptible;
7324+#ifdef CONFIG_SMP
7325+ long last_jiffy;
7326+ int prev_nr_running[NR_CPUS];
7327+ task_t *migration_thread;
7328+ struct list_head migration_queue;
7329+#endif
7330+} ____cacheline_aligned;
7331
7332-#else
7333+static struct runqueue runqueues[NR_CPUS] __cacheline_aligned;
7334
7335-#define idle_task(cpu) (&init_task)
7336-#define can_schedule(p,cpu) (1)
7337+#define cpu_rq(cpu) (runqueues + (cpu))
7338+#define this_rq() cpu_rq(smp_processor_id())
7339+#define task_rq(p) cpu_rq((p)->cpu)
7340+#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
7341+#define rt_task(p) ((p)->prio < MAX_RT_PRIO)
7342
7343+/*
7344+ * Default context-switch locking:
7345+ */
7346+#ifndef prepare_arch_switch
7347+# define prepare_arch_switch(rq, next) do { } while(0)
7348+# define finish_arch_switch(rq, prev) spin_unlock_irq(&(rq)->lock)
7349 #endif
7350
7351-void scheduling_functions_start_here(void) { }
7352-
7353 /*
7354- * This is the function that decides how desirable a process is..
7355- * You can weigh different processes against each other depending
7356- * on what CPU they've run on lately etc to try to handle cache
7357- * and TLB miss penalties.
7358- *
7359- * Return values:
7360- * -1000: never select this
7361- * 0: out of time, recalculate counters (but it might still be
7362- * selected)
7363- * +ve: "goodness" value (the larger, the better)
7364- * +1000: realtime process, select this.
7365+ * task_rq_lock - lock the runqueue a given task resides on and disable
7366+ * interrupts. Note the ordering: we can safely lookup the task_rq without
7367+ * explicitly disabling preemption.
7368 */
7369-
7370-static inline int goodness(struct task_struct * p, int this_cpu, struct mm_struct *this_mm)
7371+static inline runqueue_t *task_rq_lock(task_t *p, unsigned long *flags)
7372 {
7373- int weight;
7374-
7375- /*
7376- * select the current process after every other
7377- * runnable process, but before the idle thread.
7378- * Also, dont trigger a counter recalculation.
7379- */
7380- weight = -1;
7381- if (p->policy & SCHED_YIELD)
7382- goto out;
7383+ struct runqueue *rq;
7384
7385- /*
7386- * Non-RT process - normal case first.
7387- */
7388- if (p->policy == SCHED_OTHER) {
7389- /*
7390- * Give the process a first-approximation goodness value
7391- * according to the number of clock-ticks it has left.
7392- *
7393- * Don't do any other calculations if the time slice is
7394- * over..
7395- */
7396- weight = p->counter;
7397- if (!weight)
7398- goto out;
7399-
7400-#ifdef CONFIG_SMP
7401- /* Give a largish advantage to the same processor... */
7402- /* (this is equivalent to penalizing other processors) */
7403- if (p->processor == this_cpu)
7404- weight += PROC_CHANGE_PENALTY;
7405-#endif
7406-
7407- /* .. and a slight advantage to the current MM */
7408- if (p->mm == this_mm || !p->mm)
7409- weight += 1;
7410- weight += 20 - p->nice;
7411- goto out;
7412+repeat_lock_task:
7413+ rq = task_rq(p);
7414+ spin_lock_irqsave(&rq->lock, *flags);
7415+ if (unlikely(rq != task_rq(p))) {
7416+ spin_unlock_irqrestore(&rq->lock, *flags);
7417+ goto repeat_lock_task;
7418 }
7419+ return rq;
7420+}
7421
7422- /*
7423- * Realtime process, select the first one on the
7424- * runqueue (taking priorities within processes
7425- * into account).
7426- */
7427- weight = 1000 + p->rt_priority;
7428-out:
7429- return weight;
7430+static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags)
7431+{
7432+ spin_unlock_irqrestore(&rq->lock, *flags);
7433 }
7434
7435 /*
7436- * the 'goodness value' of replacing a process on a given CPU.
7437- * positive value means 'replace', zero or negative means 'dont'.
7438+ * Adding/removing a task to/from a priority array:
7439 */
7440-static inline int preemption_goodness(struct task_struct * prev, struct task_struct * p, int cpu)
7441+static inline void dequeue_task(struct task_struct *p, prio_array_t *array)
7442 {
7443- return goodness(p, cpu, prev->active_mm) - goodness(prev, cpu, prev->active_mm);
7444+ array->nr_active--;
7445+ list_del(&p->run_list);
7446+ if (list_empty(array->queue + p->prio))
7447+ __clear_bit(p->prio, array->bitmap);
7448 }
7449
7450-/*
7451- * This is ugly, but reschedule_idle() is very timing-critical.
7452- * We are called with the runqueue spinlock held and we must
7453- * not claim the tasklist_lock.
7454- */
7455-static FASTCALL(void reschedule_idle(struct task_struct * p));
7456+#define enqueue_task(p, array) __enqueue_task(p, array, NULL)
7457+static inline void __enqueue_task(struct task_struct *p, prio_array_t *array, task_t * parent)
7458+{
7459+ if (!parent) {
7460+ list_add_tail(&p->run_list, array->queue + p->prio);
7461+ __set_bit(p->prio, array->bitmap);
7462+ p->array = array;
7463+ } else {
7464+ list_add_tail(&p->run_list, &parent->run_list);
7465+ array = p->array = parent->array;
7466+ }
7467+ array->nr_active++;
7468+}
7469
7f7e7712 7470-static void fastcall reschedule_idle(struct task_struct * p)
0aa7655b
AM
7471+static inline int effective_prio(task_t *p)
7472 {
7473-#ifdef CONFIG_SMP
7474- int this_cpu = smp_processor_id();
7475- struct task_struct *tsk, *target_tsk;
7476- int cpu, best_cpu, i, max_prio;
7477- cycles_t oldest_idle;
7478+ int bonus, prio;
7479
7480 /*
7481- * shortcut if the woken up task's last CPU is
7482- * idle now.
7483+ * Here we scale the actual sleep average [0 .... MAX_SLEEP_AVG]
7484+ * into the -5 ... 0 ... +5 bonus/penalty range.
7485+ *
7486+ * We use 25% of the full 0...39 priority range so that:
7487+ *
7488+ * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
7489+ * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
7490+ *
7491+ * Both properties are important to certain workloads.
7492 */
7493- best_cpu = p->processor;
7494- if (can_schedule(p, best_cpu)) {
7495- tsk = idle_task(best_cpu);
7496- if (cpu_curr(best_cpu) == tsk) {
7497- int need_resched;
7498-send_now_idle:
7499- /*
7500- * If need_resched == -1 then we can skip sending
7501- * the IPI altogether, tsk->need_resched is
7502- * actively watched by the idle thread.
7503- */
7504- need_resched = tsk->need_resched;
7505- tsk->need_resched = 1;
7506- if ((best_cpu != this_cpu) && !need_resched)
7507- smp_send_reschedule(best_cpu);
7508- return;
7509- }
7510- }
7511+ bonus = MAX_USER_PRIO*PRIO_BONUS_RATIO*p->sleep_avg/MAX_SLEEP_AVG/100 -
7512+ MAX_USER_PRIO*PRIO_BONUS_RATIO/100/2;
7513
7514- /*
7515- * We know that the preferred CPU has a cache-affine current
7516- * process, lets try to find a new idle CPU for the woken-up
7517- * process. Select the least recently active idle CPU. (that
7518- * one will have the least active cache context.) Also find
7519- * the executing process which has the least priority.
7520- */
7521- oldest_idle = (cycles_t) -1;
7522- target_tsk = NULL;
7523- max_prio = 0;
7524+ prio = p->static_prio - bonus;
7525+ if (prio < MAX_RT_PRIO)
7526+ prio = MAX_RT_PRIO;
7527+ if (prio > MAX_PRIO-1)
7528+ prio = MAX_PRIO-1;
7529+ return prio;
7530+}
7531
7532- for (i = 0; i < smp_num_cpus; i++) {
7533- cpu = cpu_logical_map(i);
7534- if (!can_schedule(p, cpu))
7535- continue;
7536- tsk = cpu_curr(cpu);
7537+#define activate_task(p, rq) __activate_task(p, rq, NULL)
7538+static inline void __activate_task(task_t *p, runqueue_t *rq, task_t * parent)
7539+{
7540+ unsigned long sleep_time = jiffies - p->sleep_timestamp;
7541+ prio_array_t *array = rq->active;
7542+
7543+ if (!parent && !rt_task(p) && sleep_time) {
7544 /*
7545- * We use the first available idle CPU. This creates
7546- * a priority list between idle CPUs, but this is not
7547- * a problem.
7548+ * This code gives a bonus to interactive tasks. We update
7549+ * an 'average sleep time' value here, based on
7550+ * sleep_timestamp. The more time a task spends sleeping,
7551+ * the higher the average gets - and the higher the priority
7552+ * boost gets as well.
7553 */
7554- if (tsk == idle_task(cpu)) {
7555-#if defined(__i386__) && defined(CONFIG_SMP)
7556- /*
7557- * Check if two siblings are idle in the same
7558- * physical package. Use them if found.
7559- */
7560- if (smp_num_siblings == 2) {
7561- if (cpu_curr(cpu_sibling_map[cpu]) ==
7562- idle_task(cpu_sibling_map[cpu])) {
7563- oldest_idle = last_schedule(cpu);
7564- target_tsk = tsk;
7565- break;
7566- }
7567-
7568- }
7569-#endif
7570- if (last_schedule(cpu) < oldest_idle) {
7571- oldest_idle = last_schedule(cpu);
7572- target_tsk = tsk;
7573- }
7574- } else {
7575- if (oldest_idle == (cycles_t)-1) {
7576- int prio = preemption_goodness(tsk, p, cpu);
7577-
7578- if (prio > max_prio) {
7579- max_prio = prio;
7580- target_tsk = tsk;
7581- }
7582- }
7583- }
7584- }
7585- tsk = target_tsk;
7586- if (tsk) {
7587- if (oldest_idle != (cycles_t)-1) {
7588- best_cpu = tsk->processor;
7589- goto send_now_idle;
7590- }
7591- tsk->need_resched = 1;
7592- if (tsk->processor != this_cpu)
7593- smp_send_reschedule(tsk->processor);
7594+ p->sleep_timestamp = jiffies;
7595+ p->sleep_avg += sleep_time;
7596+ if (p->sleep_avg > MAX_SLEEP_AVG)
7597+ p->sleep_avg = MAX_SLEEP_AVG;
7598+ p->prio = effective_prio(p);
7599 }
7600- return;
7601-
7602+ __enqueue_task(p, array, parent);
7603+ rq->nr_running++;
7604+}
7605
7606-#else /* UP */
7607- int this_cpu = smp_processor_id();
7608- struct task_struct *tsk;
7609+static inline void deactivate_task(struct task_struct *p, runqueue_t *rq)
7610+{
7611+ rq->nr_running--;
7612+ if (p->state == TASK_UNINTERRUPTIBLE)
7613+ rq->nr_uninterruptible++;
7614+ dequeue_task(p, p->array);
7615+ p->array = NULL;
7616+}
7617+
7618+static inline void resched_task(task_t *p)
7619+{
7620+#ifdef CONFIG_SMP
7621+ int need_resched;
7622
7623- tsk = cpu_curr(this_cpu);
7624- if (preemption_goodness(tsk, p, this_cpu) > 0)
7625- tsk->need_resched = 1;
7626+ need_resched = p->need_resched;
7627+ set_tsk_need_resched(p);
7628+ if (!need_resched && (p->cpu != smp_processor_id()))
7629+ smp_send_reschedule(p->cpu);
7630+#else
7631+ set_tsk_need_resched(p);
7632 #endif
7633 }
7634
7635+#ifdef CONFIG_SMP
7636+
7637 /*
7638- * Careful!
7639- *
7640- * This has to add the process to the _end_ of the
7641- * run-queue, not the beginning. The goodness value will
7642- * determine whether this process will run next. This is
7643- * important to get SCHED_FIFO and SCHED_RR right, where
7644- * a process that is either pre-empted or its time slice
7645- * has expired, should be moved to the tail of the run
7646- * queue for its priority - Bhavesh Davda
7647+ * Wait for a process to unschedule. This is used by the exit() and
7648+ * ptrace() code.
7649 */
7650-static inline void add_to_runqueue(struct task_struct * p)
7651+void wait_task_inactive(task_t * p)
7652 {
7653- list_add_tail(&p->run_list, &runqueue_head);
7654- nr_running++;
7655+ unsigned long flags;
7656+ runqueue_t *rq;
7657+
7658+repeat:
7659+ rq = task_rq(p);
7660+ if (unlikely(rq->curr == p)) {
7661+ cpu_relax();
7662+ barrier();
7663+ goto repeat;
7664+ }
7665+ rq = task_rq_lock(p, &flags);
7666+ if (unlikely(rq->curr == p)) {
7667+ task_rq_unlock(rq, &flags);
7668+ goto repeat;
7669+ }
7670+ task_rq_unlock(rq, &flags);
7671 }
7672
7673-static inline void move_last_runqueue(struct task_struct * p)
7674+/*
7675+ * Kick the remote CPU if the task is running currently,
7676+ * this code is used by the signal code to signal tasks
7677+ * which are in user-mode as quickly as possible.
7678+ *
7679+ * (Note that we do this lockless - if the task does anything
7680+ * while the message is in flight then it will notice the
7681+ * sigpending condition anyway.)
7682+ */
7683+void kick_if_running(task_t * p)
7684 {
7685- list_del(&p->run_list);
7686- list_add_tail(&p->run_list, &runqueue_head);
7687+ if (p == task_rq(p)->curr && p->cpu != smp_processor_id())
7688+ resched_task(p);
7689 }
7690+#endif
7691+
7692+#ifdef CONFIG_SMP
7693+static int FASTCALL(reschedule_idle(task_t * p));
7694+static void FASTCALL(load_balance(runqueue_t *this_rq, int idle));
7695+#endif
7696+
7697
7698 /*
7699 * Wake up a process. Put it on the run-queue if it's not
7700@@ -345,429 +338,721 @@
7701 * progress), and as such you're allowed to do the simpler
7702 * "current->state = TASK_RUNNING" to mark yourself runnable
7703 * without the overhead of this.
7704+ *
7705+ * returns failure only if the task is already active.
7706 */
7707-static inline int try_to_wake_up(struct task_struct * p, int synchronous)
7708+static int try_to_wake_up(task_t * p, int sync)
7709 {
7710 unsigned long flags;
7711 int success = 0;
7712+ long old_state;
7713+ runqueue_t *rq;
7714+#ifdef CONFIG_SMP
7715+ int migrated_to_idle = 0;
7716+#endif
7717+
7718+#ifdef CONFIG_SMP
7719+repeat_lock_task:
7720+#endif
7721+ rq = task_rq_lock(p, &flags);
7722+ old_state = p->state;
7723+ if (!p->array) {
7724+#ifdef CONFIG_SMP
7725+ if (likely(rq->curr != p)) {
7726+ /* can migrate */
7727+ if (unlikely(sync)) {
7728+ if (p->cpu != smp_processor_id() &&
7729+ p->cpus_allowed & (1UL << smp_processor_id())) {
7730+ p->cpu = smp_processor_id();
7731+ goto migrated_task;
7732+ }
7733+ } else {
7734+ if (reschedule_idle(p))
7735+ goto migrated_task;
7736+ }
7737+ }
7738+#endif
7739+ if (old_state == TASK_UNINTERRUPTIBLE)
7740+ rq->nr_uninterruptible--;
7741+ activate_task(p, rq);
7742+ if (p->prio < rq->curr->prio)
7743+ resched_task(rq->curr);
7744+ success = 1;
7745+ }
7746+ p->state = TASK_RUNNING;
7747
7748+#ifdef CONFIG_SMP
7749 /*
7750- * We want the common case fall through straight, thus the goto.
7751+ * Subtle: we can load_balance only here (before unlock)
7752+ * because it can internally drop the lock. Claim
7753+ * that the cpu is running so it will be a light rebalance,
7754+ * if this cpu will go idle soon schedule() will trigger the
7755+ * idle rescheduling balancing by itself.
7756 */
7757- spin_lock_irqsave(&runqueue_lock, flags);
7758- p->state = TASK_RUNNING;
7759- if (task_on_runqueue(p))
7760- goto out;
7761- add_to_runqueue(p);
7762- if (!synchronous || !(p->cpus_allowed & (1UL << smp_processor_id())))
7763- reschedule_idle(p);
7764- success = 1;
7765-out:
7766- spin_unlock_irqrestore(&runqueue_lock, flags);
7767+ if (success && migrated_to_idle)
7768+ load_balance(rq, 0);
7769+#endif
7770+
7771+ task_rq_unlock(rq, &flags);
7772+
7773 return success;
7774+
7775+#ifdef CONFIG_SMP
7776+ migrated_task:
7777+ task_rq_unlock(rq, &flags);
7778+ migrated_to_idle = 1;
7779+ goto repeat_lock_task;
7780+#endif
7781 }
7782
7f7e7712 7783-inline int fastcall wake_up_process(struct task_struct * p)
0aa7655b
AM
7784+int wake_up_process(task_t * p)
7785 {
7786 return try_to_wake_up(p, 0);
7787 }
7788
7789-static void process_timeout(unsigned long __data)
7790+void wake_up_forked_process(task_t * p)
7791 {
7792- struct task_struct * p = (struct task_struct *) __data;
7793+ runqueue_t *rq;
7794+ task_t * parent = current;
7795
7796- wake_up_process(p);
7797-}
7798+ rq = this_rq();
7799+ spin_lock_irq(&rq->lock);
7800
7801-/**
7802- * schedule_timeout - sleep until timeout
7803- * @timeout: timeout value in jiffies
7804- *
7805- * Make the current task sleep until @timeout jiffies have
7806- * elapsed. The routine will return immediately unless
7807- * the current task state has been set (see set_current_state()).
7808- *
7809- * You can set the task state as follows -
7810- *
7811- * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
7812- * pass before the routine returns. The routine will return 0
7813- *
7814- * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
7815- * delivered to the current task. In this case the remaining time
7816- * in jiffies will be returned, or 0 if the timer expired in time
7817- *
7818- * The current task state is guaranteed to be TASK_RUNNING when this
7819- * routine returns.
7820- *
7821- * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
7822- * the CPU away without a bound on the timeout. In this case the return
7823- * value will be %MAX_SCHEDULE_TIMEOUT.
7824- *
7825- * In all cases the return value is guaranteed to be non-negative.
7826- */
7f7e7712 7827-signed long fastcall schedule_timeout(signed long timeout)
0aa7655b
AM
7828-{
7829- struct timer_list timer;
7830- unsigned long expire;
7831+ p->state = TASK_RUNNING;
7832+ if (likely(!rt_task(p) && parent->array)) {
7833+ /*
7834+ * We decrease the sleep average of forked
7835+ * children, to keep max-interactive tasks
7836+ * from forking tasks that are max-interactive.
7837+ * CHILD_PENALTY is set to 50% since we have
7838+ * no clue if this is still an interactive
7839+ * task like the parent or if this will be a
7840+ * cpu bound task. The parent isn't touched
7841+ * as we don't make assumption about the parent
7842+ * changing behaviour after the child is forked.
7843+ */
7844+ parent->sleep_avg = parent->sleep_avg * PARENT_PENALTY / 100;
7845+ p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100;
7846
7847- switch (timeout)
7848- {
7849- case MAX_SCHEDULE_TIMEOUT:
7850 /*
7851- * These two special cases are useful to be comfortable
7852- * in the caller. Nothing more. We could take
7853- * MAX_SCHEDULE_TIMEOUT from one of the negative value
7854- * but I' d like to return a valid offset (>=0) to allow
7855- * the caller to do everything it want with the retval.
7856+ * For its first schedule keep the child at the same
7857+ * priority (i.e. in the same list) of the parent,
7858+ * activate_forked_task() will take care to put the
7859+ * child in front of the parent (lifo) to guarantee a
7860+ * schedule-child-first behaviour after fork.
7861 */
7862- schedule();
7863- goto out;
7864- default:
7865+ p->prio = parent->prio;
7866+ } else {
7867 /*
7868- * Another bit of PARANOID. Note that the retval will be
7869- * 0 since no piece of kernel is supposed to do a check
7870- * for a negative retval of schedule_timeout() (since it
7871- * should never happens anyway). You just have the printk()
7872- * that will tell you if something is gone wrong and where.
7873+ * Take the usual wakeup path if it's RT or if
7874+ * it's a child of the first idle task (during boot
7875+ * only).
7876 */
7877- if (timeout < 0)
7878- {
7879- printk(KERN_ERR "schedule_timeout: wrong timeout "
7880- "value %lx from %p\n", timeout,
7881- __builtin_return_address(0));
7882- current->state = TASK_RUNNING;
7883- goto out;
7884- }
7885+ p->prio = effective_prio(p);
7886+ parent = NULL;
7887 }
7888
7889- expire = timeout + jiffies;
7890+ p->cpu = smp_processor_id();
7891+ __activate_task(p, rq, parent);
7892+ spin_unlock_irq(&rq->lock);
7893+}
7894
7895- init_timer(&timer);
7896- timer.expires = expire;
7897- timer.data = (unsigned long) current;
7898- timer.function = process_timeout;
7899+/*
7900+ * Potentially available exiting-child timeslices are
7901+ * retrieved here - this way the parent does not get
7902+ * penalized for creating too many processes.
7903+ *
7904+ * (this cannot be used to 'generate' timeslices
7905+ * artificially, because any timeslice recovered here
7906+ * was given away by the parent in the first place.)
7907+ */
7908+void sched_exit(task_t * p)
7909+{
7910+ __cli();
7911+ if (p->first_time_slice) {
7912+ current->time_slice += p->time_slice;
7913+ if (unlikely(current->time_slice > MAX_TIMESLICE))
7914+ current->time_slice = MAX_TIMESLICE;
7915+ }
7916+ __sti();
7917+}
7918
7919- add_timer(&timer);
7920- schedule();
7921- del_timer_sync(&timer);
7922+#if CONFIG_SMP
7923+asmlinkage void schedule_tail(task_t *prev)
7924+{
7925+ finish_arch_switch(this_rq(), prev);
7926+}
7927+#endif
7928+
7929+static inline task_t * context_switch(task_t *prev, task_t *next)
7930+{
7931+ struct mm_struct *mm = next->mm;
7932+ struct mm_struct *oldmm = prev->active_mm;
7933
7934- timeout = expire - jiffies;
7935+ if (unlikely(!mm)) {
7936+ next->active_mm = oldmm;
7937+ atomic_inc(&oldmm->mm_count);
7938+ enter_lazy_tlb(oldmm, next, smp_processor_id());
7939+ } else
7940+ switch_mm(oldmm, mm, next, smp_processor_id());
7941
7942- out:
7943- return timeout < 0 ? 0 : timeout;
7944+ if (unlikely(!prev->mm)) {
7945+ prev->active_mm = NULL;
7946+ mmdrop(oldmm);
7947+ }
7948+
7949+ /* Here we just switch the register state and the stack. */
7950+ switch_to(prev, next, prev);
7951+
7952+ return prev;
7953 }
7954
7955-/*
7956- * schedule_tail() is getting called from the fork return path. This
7957- * cleans up all remaining scheduler things, without impacting the
7958- * common case.
7959- */
7960-static inline void __schedule_tail(struct task_struct *prev)
7961+unsigned long nr_running(void)
7962 {
7963-#ifdef CONFIG_SMP
7964- int policy;
7965+ unsigned long i, sum = 0;
7966
7967- /*
7968- * prev->policy can be written from here only before `prev'
7969- * can be scheduled (before setting prev->cpus_runnable to ~0UL).
7970- * Of course it must also be read before allowing prev
7971- * to be rescheduled, but since the write depends on the read
7972- * to complete, wmb() is enough. (the spin_lock() acquired
7973- * before setting cpus_runnable is not enough because the spin_lock()
7974- * common code semantics allows code outside the critical section
7975- * to enter inside the critical section)
7976- */
7977- policy = prev->policy;
7978- prev->policy = policy & ~SCHED_YIELD;
7979- wmb();
7980+ for (i = 0; i < smp_num_cpus; i++)
7981+ sum += cpu_rq(cpu_logical_map(i))->nr_running;
7982
7983- /*
7984- * fast path falls through. We have to clear cpus_runnable before
7985- * checking prev->state to avoid a wakeup race. Protect against
7986- * the task exiting early.
7987- */
7988- task_lock(prev);
7989- task_release_cpu(prev);
7990- mb();
7991- if (prev->state == TASK_RUNNING)
7992- goto needs_resched;
7993+ return sum;
7994+}
7995
7996-out_unlock:
7997- task_unlock(prev); /* Synchronise here with release_task() if prev is TASK_ZOMBIE */
7998- return;
7999+/* Note: the per-cpu information is useful only to get the cumulative result */
8000+unsigned long nr_uninterruptible(void)
8001+{
8002+ unsigned long i, sum = 0;
8003
8004- /*
8005- * Slow path - we 'push' the previous process and
8006- * reschedule_idle() will attempt to find a new
8007- * processor for it. (but it might preempt the
8008- * current process as well.) We must take the runqueue
8009- * lock and re-check prev->state to be correct. It might
8010- * still happen that this process has a preemption
8011- * 'in progress' already - but this is not a problem and
8012- * might happen in other circumstances as well.
8013- */
8014-needs_resched:
8015- {
8016- unsigned long flags;
8017+ for (i = 0; i < smp_num_cpus; i++)
8018+ sum += cpu_rq(cpu_logical_map(i))->nr_uninterruptible;
8019
8020- /*
8021- * Avoid taking the runqueue lock in cases where
8022- * no preemption-check is necessery:
8023- */
8024- if ((prev == idle_task(smp_processor_id())) ||
8025- (policy & SCHED_YIELD))
8026- goto out_unlock;
8027+ return sum;
8028+}
8029
8030- spin_lock_irqsave(&runqueue_lock, flags);
8031- if ((prev->state == TASK_RUNNING) && !task_has_cpu(prev))
8032- reschedule_idle(prev);
8033- spin_unlock_irqrestore(&runqueue_lock, flags);
8034- goto out_unlock;
8035- }
8036-#else
8037- prev->policy &= ~SCHED_YIELD;
8038-#endif /* CONFIG_SMP */
8039+unsigned long nr_context_switches(void)
8040+{
8041+ unsigned long i, sum = 0;
8042+
8043+ for (i = 0; i < smp_num_cpus; i++)
8044+ sum += cpu_rq(cpu_logical_map(i))->nr_switches;
8045+
8046+ return sum;
8047 }
8048
8049-asmlinkage void schedule_tail(struct task_struct *prev)
8050+inline int idle_cpu(int cpu)
8051 {
8052- __schedule_tail(prev);
8053+ return cpu_curr(cpu) == cpu_rq(cpu)->idle;
8054 }
8055
8056+#if CONFIG_SMP
8057 /*
8058- * 'schedule()' is the scheduler function. It's a very simple and nice
8059- * scheduler: it's not perfect, but certainly works for most things.
8060- *
8061- * The goto is "interesting".
8062- *
8063- * NOTE!! Task 0 is the 'idle' task, which gets called when no other
8064- * tasks can run. It can not be killed, and it cannot sleep. The 'state'
8065- * information in task[0] is never used.
8066+ * Lock the busiest runqueue as well, this_rq is locked already.
8067+ * Recalculate nr_running if we have to drop the runqueue lock.
8068 */
8069-asmlinkage void schedule(void)
8070+static inline unsigned int double_lock_balance(runqueue_t *this_rq,
8071+ runqueue_t *busiest, int this_cpu, int idle, unsigned int nr_running)
8072 {
8073- struct schedule_data * sched_data;
8074- struct task_struct *prev, *next, *p;
8075- struct list_head *tmp;
8076- int this_cpu, c;
8077+ if (unlikely(!spin_trylock(&busiest->lock))) {
8078+ if (busiest < this_rq) {
8079+ spin_unlock(&this_rq->lock);
8080+ spin_lock(&busiest->lock);
8081+ spin_lock(&this_rq->lock);
8082+ /* Need to recalculate nr_running */
8083+ if (idle || (this_rq->nr_running > this_rq->prev_nr_running[this_cpu]))
8084+ nr_running = this_rq->nr_running;
8085+ else
8086+ nr_running = this_rq->prev_nr_running[this_cpu];
8087+ } else
8088+ spin_lock(&busiest->lock);
8089+ }
8090+ return nr_running;
8091+}
8092
8093+/*
8094+ * Move a task from a remote runqueue to the local runqueue.
8095+ * Both runqueues must be locked.
8096+ */
8097+static inline int pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, runqueue_t *this_rq, int this_cpu)
8098+{
8099+ int resched = 0;
8100
8101- spin_lock_prefetch(&runqueue_lock);
8102+ dequeue_task(p, src_array);
8103+ src_rq->nr_running--;
8104+ p->cpu = this_cpu;
8105+ this_rq->nr_running++;
8106+ enqueue_task(p, this_rq->active);
8107+ /*
8108+ * Note that idle threads have a prio of MAX_PRIO, for this test
8109+ * to be always true for them.
8110+ */
8111+ if (p->prio < this_rq->curr->prio)
8112+ resched = 1;
8113
8114- BUG_ON(!current->active_mm);
8115-need_resched_back:
8116- prev = current;
8117- this_cpu = prev->processor;
8118+ return resched;
8119+}
8120
8121- if (unlikely(in_interrupt())) {
8122- printk("Scheduling in interrupt\n");
8123- BUG();
8124+static inline int idle_cpu_reschedule(task_t * p, int cpu)
8125+{
8126+ if (unlikely(!(p->cpus_allowed & (1UL << cpu))))
8127+ return 0;
8128+ return idle_cpu(cpu);
8129+}
8130+
8131+#include <linux/smp_balance.h>
8132+
8133+static int reschedule_idle(task_t * p)
8134+{
8135+ int p_cpu = p->cpu, i;
8136+
8137+ if (idle_cpu(p_cpu))
8138+ return 0;
8139+
8140+ p_cpu = cpu_number_map(p_cpu);
8141+
8142+ for (i = (p_cpu + 1) % smp_num_cpus;
8143+ i != p_cpu;
8144+ i = (i + 1) % smp_num_cpus) {
8145+ int physical = cpu_logical_map(i);
8146+
8147+ if (idle_cpu_reschedule(p, physical)) {
8148+ physical = arch_reschedule_idle_override(p, physical);
8149+ p->cpu = physical;
8150+ return 1;
8151+ }
8152 }
8153
8154- release_kernel_lock(prev, this_cpu);
8155+ return 0;
8156+}
8157+
8158+/*
8159+ * Current runqueue is empty, or rebalance tick: if there is an
8160+ * inbalance (current runqueue is too short) then pull from
8161+ * busiest runqueue(s).
8162+ *
8163+ * We call this with the current runqueue locked,
8164+ * irqs disabled.
8165+ */
8166+static void load_balance(runqueue_t *this_rq, int idle)
8167+{
8168+ int imbalance, nr_running, load, max_load,
8169+ idx, i, this_cpu = this_rq - runqueues;
8170+ task_t *tmp;
8171+ runqueue_t *busiest, *rq_src;
8172+ prio_array_t *array;
8173+ struct list_head *head, *curr;
8174+ int resched;
8175
8176 /*
8177- * 'sched_data' is protected by the fact that we can run
8178- * only one process per CPU.
8179+ * Handle architecture-specific balancing, such as hyperthreading.
8180 */
8181- sched_data = & aligned_data[this_cpu].schedule_data;
8182+ if (arch_load_balance(this_cpu, idle))
8183+ return;
8184
8185- spin_lock_irq(&runqueue_lock);
8186+ retry:
8187+ /*
8188+ * We search all runqueues to find the most busy one.
8189+ * We do this lockless to reduce cache-bouncing overhead,
8190+ * we re-check the 'best' source CPU later on again, with
8191+ * the lock held.
8192+ *
8193+ * We fend off statistical fluctuations in runqueue lengths by
8194+ * saving the runqueue length during the previous load-balancing
8195+ * operation and using the smaller one the current and saved lengths.
8196+ * If a runqueue is long enough for a longer amount of time then
8197+ * we recognize it and pull tasks from it.
8198+ *
8199+ * The 'current runqueue length' is a statistical maximum variable,
8200+ * for that one we take the longer one - to avoid fluctuations in
8201+ * the other direction. So for a load-balance to happen it needs
8202+ * stable long runqueue on the target CPU and stable short runqueue
8203+ * on the local runqueue.
8204+ *
8205+ * We make an exception if this CPU is about to become idle - in
8206+ * that case we are less picky about moving a task across CPUs and
8207+ * take what can be taken.
8208+ */
8209+ if (idle || (this_rq->nr_running > this_rq->prev_nr_running[this_cpu]))
8210+ nr_running = this_rq->nr_running;
8211+ else
8212+ nr_running = this_rq->prev_nr_running[this_cpu];
8213
8214- /* move an exhausted RR process to be last.. */
8215- if (unlikely(prev->policy == SCHED_RR))
8216- if (!prev->counter) {
8217- prev->counter = NICE_TO_TICKS(prev->nice);
8218- move_last_runqueue(prev);
8219- }
8220+ busiest = NULL;
8221+ max_load = 1;
8222+ for (i = 0; i < smp_num_cpus; i++) {
8223+ int logical = cpu_logical_map(i);
8224
8225- switch (prev->state) {
8226- case TASK_INTERRUPTIBLE:
8227- if (signal_pending(prev)) {
8228- prev->state = TASK_RUNNING;
8229- break;
8230- }
8231- default:
8232- del_from_runqueue(prev);
8233- case TASK_RUNNING:;
8234+ rq_src = cpu_rq(logical);
8235+ if (idle || (rq_src->nr_running < this_rq->prev_nr_running[logical]))
8236+ load = rq_src->nr_running;
8237+ else
8238+ load = this_rq->prev_nr_running[logical];
8239+ this_rq->prev_nr_running[logical] = rq_src->nr_running;
8240+
8241+ if ((load > max_load) && (rq_src != this_rq)) {
8242+ busiest = rq_src;
8243+ max_load = load;
8244+ }
8245 }
8246- prev->need_resched = 0;
8247+
8248+ if (likely(!busiest))
8249+ return;
8250+
8251+ imbalance = (max_load - nr_running) / 2;
8252+
8253+ /* It needs an at least ~25% imbalance to trigger balancing. */
8254+ if (!idle && (imbalance < (max_load + 3)/4))
8255+ return;
8256
8257 /*
8258- * this is the scheduler proper:
8259+ * Make sure nothing significant changed since we checked the
8260+ * runqueue length.
8261 */
8262+ if (double_lock_balance(this_rq, busiest, this_cpu, idle, nr_running) > nr_running ||
8263+ busiest->nr_running < max_load)
8264+ goto out_unlock_retry;
8265
8266-repeat_schedule:
8267 /*
8268- * Default process to select..
8269+ * We first consider expired tasks. Those will likely not be
8270+ * executed in the near future, and they are most likely to
8271+ * be cache-cold, thus switching CPUs has the least effect
8272+ * on them.
8273 */
8274- next = idle_task(this_cpu);
8275- c = -1000;
8276- list_for_each(tmp, &runqueue_head) {
8277- p = list_entry(tmp, struct task_struct, run_list);
8278- if (can_schedule(p, this_cpu)) {
8279- int weight = goodness(p, this_cpu, prev->active_mm);
8280- if (weight > c)
8281- c = weight, next = p;
8282+ if (busiest->expired->nr_active)
8283+ array = busiest->expired;
8284+ else
8285+ array = busiest->active;
8286+
8287+ resched = 0;
8288+new_array:
8289+ /* Start searching at priority 0: */
8290+ idx = 0;
8291+skip_bitmap:
8292+ if (!idx)
8293+ idx = sched_find_first_bit(array->bitmap);
8294+ else
8295+ idx = find_next_bit(array->bitmap, MAX_PRIO, idx);
8296+ if (idx == MAX_PRIO) {
8297+ if (array == busiest->expired) {
8298+ array = busiest->active;
8299+ goto new_array;
8300 }
8301+ goto out_unlock;
8302 }
8303
8304- /* Do we need to re-calculate counters? */
8305- if (unlikely(!c)) {
8306- struct task_struct *p;
8307-
8308- spin_unlock_irq(&runqueue_lock);
8309- read_lock(&tasklist_lock);
8310- for_each_task(p)
8311- p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice);
8312- read_unlock(&tasklist_lock);
8313- spin_lock_irq(&runqueue_lock);
8314- goto repeat_schedule;
8315+ head = array->queue + idx;
8316+ curr = head->prev;
8317+skip_queue:
8318+ tmp = list_entry(curr, task_t, run_list);
8319+
8320+ /*
8321+ * We do not migrate tasks that are:
8322+ * 1) running (obviously), or
8323+ * 2) cannot be migrated to this CPU due to cpus_allowed, or
8324+ * 3) are cache-hot on their current CPU.
8325+ */
8326+
8327+#define CAN_MIGRATE_TASK(p,rq,this_cpu) \
8328+ ((jiffies - (p)->sleep_timestamp > cache_decay_ticks) && \
8329+ ((p) != (rq)->curr) && \
8330+ ((p)->cpus_allowed & (1UL << (this_cpu))))
8331+
8332+ curr = curr->prev;
8333+
8334+ if (!CAN_MIGRATE_TASK(tmp, busiest, this_cpu)) {
8335+ if (curr != head)
8336+ goto skip_queue;
8337+ idx++;
8338+ goto skip_bitmap;
8339+ }
8340+ resched |= pull_task(busiest, array, tmp, this_rq, this_cpu);
8341+ if (--imbalance > 0) {
8342+ if (curr != head)
8343+ goto skip_queue;
8344+ idx++;
8345+ goto skip_bitmap;
8346 }
8347+out_unlock:
8348+ spin_unlock(&busiest->lock);
8349+ if (resched)
8350+ resched_task(this_rq->curr);
8351+ return;
8352+out_unlock_retry:
8353+ spin_unlock(&busiest->lock);
8354+ goto retry;
8355+}
8356
8357- /*
8358- * from this point on nothing can prevent us from
8359- * switching to the next task, save this fact in
8360- * sched_data.
8361- */
8362- sched_data->curr = next;
8363- task_set_cpu(next, this_cpu);
8364- spin_unlock_irq(&runqueue_lock);
8365-
8366- if (unlikely(prev == next)) {
8367- /* We won't go through the normal tail, so do this by hand */
8368- prev->policy &= ~SCHED_YIELD;
8369- goto same_process;
8370+/*
8371+ * One of the idle_cpu_tick() or the busy_cpu_tick() function will
8372+ * gets called every timer tick, on every CPU. Our balancing action
8373+ * frequency and balancing agressivity depends on whether the CPU is
8374+ * idle or not.
8375+ *
8376+ * busy-rebalance every 250 msecs. idle-rebalance every 100 msec.
8377+ */
8378+#define BUSY_REBALANCE_TICK (HZ/4 ?: 1)
8379+#define IDLE_REBALANCE_TICK (HZ/10 ?: 1)
8380+
8381+static inline void idle_tick(void)
8382+{
8383+ if (unlikely(time_before_eq(this_rq()->last_jiffy + IDLE_REBALANCE_TICK, jiffies))) {
8384+ spin_lock(&this_rq()->lock);
8385+ load_balance(this_rq(), 1);
8386+ spin_unlock(&this_rq()->lock);
8387+ this_rq()->last_jiffy = jiffies;
8388 }
8389+}
8390
8391-#ifdef CONFIG_SMP
8392- /*
8393- * maintain the per-process 'last schedule' value.
8394- * (this has to be recalculated even if we reschedule to
8395- * the same process) Currently this is only used on SMP,
8396- * and it's approximate, so we do not have to maintain
8397- * it while holding the runqueue spinlock.
8398- */
8399- sched_data->last_schedule = get_cycles();
8400+#endif
8401
8402- /*
8403- * We drop the scheduler lock early (it's a global spinlock),
8404- * thus we have to lock the previous process from getting
8405- * rescheduled during switch_to().
8406- */
8407+/*
8408+ * We place interactive tasks back into the active array, if possible.
8409+ *
8410+ * To guarantee that this does not starve expired tasks we ignore the
8411+ * interactivity of a task if the first expired task had to wait more
8412+ * than a 'reasonable' amount of time. This deadline timeout is
8413+ * load-dependent, as the frequency of array switched decreases with
8414+ * increasing number of running tasks:
8415+ */
8416+#define EXPIRED_STARVING(rq) \
8417+ ((rq)->expired_timestamp && \
8418+ (jiffies - (rq)->expired_timestamp >= \
8419+ STARVATION_LIMIT * ((rq)->nr_running) + 1))
8420
8421-#endif /* CONFIG_SMP */
8422+/*
8423+ * This function gets called by the timer code, with HZ frequency.
8424+ * We call it with interrupts disabled.
8425+ */
8426+void scheduler_tick(int user_tick, int system)
8427+{
8428+ int cpu = smp_processor_id();
8429+ runqueue_t *rq = this_rq();
8430+ task_t *p = current;
8431
8432- kstat.context_swtch++;
8433- /*
8434- * there are 3 processes which are affected by a context switch:
8435- *
8436- * prev == .... ==> (last => next)
8437- *
8438- * It's the 'much more previous' 'prev' that is on next's stack,
8439- * but prev is set to (the just run) 'last' process by switch_to().
8440- * This might sound slightly confusing but makes tons of sense.
8441- */
8442- prepare_to_switch();
8443- {
8444- struct mm_struct *mm = next->mm;
8445- struct mm_struct *oldmm = prev->active_mm;
8446- if (!mm) {
8447- BUG_ON(next->active_mm);
8448- next->active_mm = oldmm;
8449- atomic_inc(&oldmm->mm_count);
8450- enter_lazy_tlb(oldmm, next, this_cpu);
8451- } else {
8452- BUG_ON(next->active_mm != mm);
8453- switch_mm(oldmm, mm, next, this_cpu);
8454+ if (p == rq->idle) {
8455+ if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
8456+ kstat.per_cpu_system[cpu] += system;
8457+#if CONFIG_SMP
8458+ idle_tick();
8459+#endif
8460+ return;
8461+ }
8462+ if (TASK_NICE(p) > 0)
8463+ kstat.per_cpu_nice[cpu] += user_tick;
8464+ else
8465+ kstat.per_cpu_user[cpu] += user_tick;
8466+ kstat.per_cpu_system[cpu] += system;
8467+
8468+ /* Task might have expired already, but not scheduled off yet */
8469+ if (p->array != rq->active) {
8470+ set_tsk_need_resched(p);
8471+ return;
8472+ }
8473+ spin_lock(&rq->lock);
8474+ if (unlikely(rt_task(p))) {
8475+ /*
8476+ * RR tasks need a special form of timeslice management.
8477+ * FIFO tasks have no timeslices.
8478+ */
8479+ if ((p->policy == SCHED_RR) && !--p->time_slice) {
8480+ p->time_slice = TASK_TIMESLICE(p);
8481+ p->first_time_slice = 0;
8482+ set_tsk_need_resched(p);
8483+
8484+ /* put it at the end of the queue: */
8485+ dequeue_task(p, rq->active);
8486+ enqueue_task(p, rq->active);
8487 }
8488+ goto out;
8489+ }
8490+ /*
8491+ * The task was running during this tick - update the
8492+ * time slice counter and the sleep average. Note: we
8493+ * do not update a process's priority until it either
8494+ * goes to sleep or uses up its timeslice. This makes
8495+ * it possible for interactive tasks to use up their
8496+ * timeslices at their highest priority levels.
8497+ */
8498+ if (p->sleep_avg)
8499+ p->sleep_avg--;
8500+ if (!--p->time_slice) {
8501+ dequeue_task(p, rq->active);
8502+ set_tsk_need_resched(p);
8503+ p->prio = effective_prio(p);
8504+ p->time_slice = TASK_TIMESLICE(p);
8505+ p->first_time_slice = 0;
8506+
8507+ if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) {
8508+ if (!rq->expired_timestamp)
8509+ rq->expired_timestamp = jiffies;
8510+ enqueue_task(p, rq->expired);
8511+ } else
8512+ enqueue_task(p, rq->active);
8513+ }
8514+out:
8515+#if CONFIG_SMP
8516+ if (unlikely(time_before_eq(this_rq()->last_jiffy + BUSY_REBALANCE_TICK, jiffies))) {
8517+ load_balance(rq, 0);
8518+ rq->last_jiffy = jiffies;
8519+ }
8520+#endif
8521+ spin_unlock(&rq->lock);
8522+}
8523+
8524+void scheduling_functions_start_here(void) { }
8525+
8526+/*
8527+ * 'schedule()' is the main scheduler function.
8528+ */
8529+asmlinkage void schedule(void)
8530+{
8531+ task_t *prev, *next;
8532+ runqueue_t *rq;
8533+ prio_array_t *array;
8534+ struct list_head *queue;
8535+ int idx;
8536+
8537+ if (unlikely(in_interrupt()))
8538+ BUG();
8539
8540- if (!prev->mm) {
8541- prev->active_mm = NULL;
8542- mmdrop(oldmm);
8543+need_resched:
8544+ prev = current;
8545+ rq = this_rq();
8546+
8547+ release_kernel_lock(prev, smp_processor_id());
8548+ prev->sleep_timestamp = jiffies;
8549+ spin_lock_irq(&rq->lock);
8550+
8551+ switch (prev->state) {
8552+ case TASK_INTERRUPTIBLE:
8553+ if (unlikely(signal_pending(prev))) {
8554+ prev->state = TASK_RUNNING;
8555+ break;
8556 }
8557+ default:
8558+ deactivate_task(prev, rq);
8559+ case TASK_RUNNING:
8560+ ;
8561+ }
8562+#if CONFIG_SMP
8563+pick_next_task:
8564+#endif
8565+ if (unlikely(!rq->nr_running)) {
8566+#if CONFIG_SMP
8567+ load_balance(rq, 2);
8568+ rq->last_jiffy = jiffies;
8569+ if (rq->nr_running)
8570+ goto pick_next_task;
8571+#endif
8572+ next = rq->idle;
8573+ rq->expired_timestamp = 0;
8574+ goto switch_tasks;
8575 }
8576
8577- /*
8578- * This just switches the register state and the
8579- * stack.
8580- */
8581- switch_to(prev, next, prev);
8582- __schedule_tail(prev);
8583+ array = rq->active;
8584+ if (unlikely(!array->nr_active)) {
8585+ /*
8586+ * Switch the active and expired arrays.
8587+ */
8588+ rq->active = rq->expired;
8589+ rq->expired = array;
8590+ array = rq->active;
8591+ rq->expired_timestamp = 0;
8592+ }
8593+
8594+ idx = sched_find_first_bit(array->bitmap);
8595+ queue = array->queue + idx;
8596+ next = list_entry(queue->next, task_t, run_list);
8597+
8598+switch_tasks:
8599+ prefetch(next);
8600+ clear_tsk_need_resched(prev);
8601+
8602+ if (likely(prev != next)) {
8603+ rq->nr_switches++;
8604+ rq->curr = next;
8605+
8606+ prepare_arch_switch(rq, next);
8607+ prev = context_switch(prev, next);
8608+ barrier();
8609+ rq = this_rq();
8610+ finish_arch_switch(rq, prev);
8611+ } else
8612+ spin_unlock_irq(&rq->lock);
8613
8614-same_process:
8615 reacquire_kernel_lock(current);
8616- if (current->need_resched)
8617- goto need_resched_back;
8618- return;
8619+ if (need_resched())
8620+ goto need_resched;
8621 }
8622
8623 /*
8624- * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just wake everything
8625- * up. If it's an exclusive wakeup (nr_exclusive == small +ve number) then we wake all the
8626- * non-exclusive tasks and one exclusive task.
8627+ * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
8628+ * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
8629+ * number) then we wake all the non-exclusive tasks and one exclusive task.
8630 *
8631 * There are circumstances in which we can try to wake a task which has already
8632- * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns zero
8633- * in this (rare) case, and we handle it by contonuing to scan the queue.
8634+ * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
8635+ * zero in this (rare) case, and we handle it by continuing to scan the queue.
8636 */
8637-static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode,
8638- int nr_exclusive, const int sync)
8639+static inline void __wake_up_common(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, int sync)
8640 {
8641 struct list_head *tmp;
8642- struct task_struct *p;
8643-
8644- CHECK_MAGIC_WQHEAD(q);
8645- WQ_CHECK_LIST_HEAD(&q->task_list);
8646-
8647- list_for_each(tmp,&q->task_list) {
8648- unsigned int state;
8649- wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
8650+ unsigned int state;
8651+ wait_queue_t *curr;
8652+ task_t *p;
8653
8654- CHECK_MAGIC(curr->__magic);
8655+ list_for_each(tmp, &q->task_list) {
8656+ curr = list_entry(tmp, wait_queue_t, task_list);
8657 p = curr->task;
8658 state = p->state;
8659- if (state & mode) {
8660- WQ_NOTE_WAKER(curr);
8661- if (try_to_wake_up(p, sync) && (curr->flags&WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
8662+ if ((state & mode) && try_to_wake_up(p, sync) &&
8663+ ((curr->flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive))
8664 break;
8665- }
8666 }
8667 }
8668
7f7e7712
KT
8669-void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode, int nr)
8670+void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
0aa7655b
AM
8671 {
8672- if (q) {
8673- unsigned long flags;
8674- wq_read_lock_irqsave(&q->lock, flags);
8675- __wake_up_common(q, mode, nr, 0);
8676- wq_read_unlock_irqrestore(&q->lock, flags);
8677- }
8678+ unsigned long flags;
8679+
8680+ if (unlikely(!q))
8681+ return;
8682+
8683+ wq_read_lock_irqsave(&q->lock, flags);
8684+ __wake_up_common(q, mode, nr_exclusive, 0);
8685+ wq_read_unlock_irqrestore(&q->lock, flags);
8686 }
8687
7f7e7712 8688-void fastcall __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr)
0aa7655b
AM
8689+#if CONFIG_SMP
8690+
7f7e7712 8691+void fastcall __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
0aa7655b
AM
8692 {
8693- if (q) {
8694- unsigned long flags;
8695- wq_read_lock_irqsave(&q->lock, flags);
8696- __wake_up_common(q, mode, nr, 1);
8697- wq_read_unlock_irqrestore(&q->lock, flags);
8698- }
8699+ unsigned long flags;
8700+
8701+ if (unlikely(!q))
8702+ return;
8703+
8704+ wq_read_lock_irqsave(&q->lock, flags);
8705+ if (likely(nr_exclusive))
8706+ __wake_up_common(q, mode, nr_exclusive, 1);
8707+ else
8708+ __wake_up_common(q, mode, nr_exclusive, 0);
8709+ wq_read_unlock_irqrestore(&q->lock, flags);
8710 }
8711
8712+#endif
8713+
7f7e7712 8714 void fastcall complete(struct completion *x)
0aa7655b
AM
8715 {
8716 unsigned long flags;
8717
8718- spin_lock_irqsave(&x->wait.lock, flags);
8719+ wq_write_lock_irqsave(&x->wait.lock, flags);
8720 x->done++;
8721 __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, 0);
8722- spin_unlock_irqrestore(&x->wait.lock, flags);
8723+ wq_write_unlock_irqrestore(&x->wait.lock, flags);
8724 }
8725
7f7e7712 8726 void fastcall wait_for_completion(struct completion *x)
0aa7655b
AM
8727 {
8728- spin_lock_irq(&x->wait.lock);
8729+ wq_write_lock_irq(&x->wait.lock);
8730 if (!x->done) {
8731 DECLARE_WAITQUEUE(wait, current);
8732
8733@@ -775,14 +1060,14 @@
8734 __add_wait_queue_tail(&x->wait, &wait);
8735 do {
8736 __set_current_state(TASK_UNINTERRUPTIBLE);
8737- spin_unlock_irq(&x->wait.lock);
8738+ wq_write_unlock_irq(&x->wait.lock);
8739 schedule();
8740- spin_lock_irq(&x->wait.lock);
8741+ wq_write_lock_irq(&x->wait.lock);
8742 } while (!x->done);
8743 __remove_wait_queue(&x->wait, &wait);
8744 }
8745 x->done--;
8746- spin_unlock_irq(&x->wait.lock);
8747+ wq_write_unlock_irq(&x->wait.lock);
8748 }
8749
8750 #define SLEEP_ON_VAR \
8751@@ -850,43 +1135,40 @@
8752
8753 void scheduling_functions_end_here(void) { }
8754
8755-#if CONFIG_SMP
8756-/**
8757- * set_cpus_allowed() - change a given task's processor affinity
8758- * @p: task to bind
8759- * @new_mask: bitmask of allowed processors
8760- *
8761- * Upon return, the task is running on a legal processor. Note the caller
8762- * must have a valid reference to the task: it must not exit() prematurely.
8763- * This call can sleep; do not hold locks on call.
8764- */
8765-void set_cpus_allowed(struct task_struct *p, unsigned long new_mask)
8766+void set_user_nice(task_t *p, long nice)
8767 {
8768- new_mask &= cpu_online_map;
8769- BUG_ON(!new_mask);
8770-
8771- p->cpus_allowed = new_mask;
8772+ unsigned long flags;
8773+ prio_array_t *array;
8774+ runqueue_t *rq;
8775
8776+ if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
8777+ return;
8778 /*
8779- * If the task is on a no-longer-allowed processor, we need to move
8780- * it. If the task is not current, then set need_resched and send
8781- * its processor an IPI to reschedule.
8782+ * We have to be careful, if called from sys_setpriority(),
8783+ * the task might be in the middle of scheduling on another CPU.
8784 */
8785- if (!(p->cpus_runnable & p->cpus_allowed)) {
8786- if (p != current) {
8787- p->need_resched = 1;
8788- smp_send_reschedule(p->processor);
8789- }
8790+ rq = task_rq_lock(p, &flags);
8791+ if (rt_task(p)) {
8792+ p->static_prio = NICE_TO_PRIO(nice);
8793+ goto out_unlock;
8794+ }
8795+ array = p->array;
8796+ if (array)
8797+ dequeue_task(p, array);
8798+ p->static_prio = NICE_TO_PRIO(nice);
8799+ p->prio = NICE_TO_PRIO(nice);
8800+ if (array) {
8801+ enqueue_task(p, array);
8802 /*
8803- * Wait until we are on a legal processor. If the task is
8804- * current, then we should be on a legal processor the next
8805- * time we reschedule. Otherwise, we need to wait for the IPI.
8806+ * If the task is running and lowered its priority,
8807+ * or increased its priority then reschedule its CPU:
8808 */
8809- while (!(p->cpus_runnable & p->cpus_allowed))
8810- schedule();
8811+ if (p == rq->curr)
8812+ resched_task(rq->curr);
8813 }
8814+out_unlock:
8815+ task_rq_unlock(rq, &flags);
8816 }
8817-#endif /* CONFIG_SMP */
8818
8819 #ifndef __alpha__
8820
8821@@ -898,7 +1180,7 @@
8822
8823 asmlinkage long sys_nice(int increment)
8824 {
8825- long newprio;
8826+ long nice;
8827
8828 /*
8829 * Setpriority might change our priority at the same moment.
8830@@ -914,32 +1196,46 @@
8831 if (increment > 40)
8832 increment = 40;
8833
8834- newprio = current->nice + increment;
8835- if (newprio < -20)
8836- newprio = -20;
8837- if (newprio > 19)
8838- newprio = 19;
8839- current->nice = newprio;
8840+ nice = PRIO_TO_NICE(current->static_prio) + increment;
8841+ if (nice < -20)
8842+ nice = -20;
8843+ if (nice > 19)
8844+ nice = 19;
8845+ set_user_nice(current, nice);
8846 return 0;
8847 }
8848
8849 #endif
8850
8851-static inline struct task_struct *find_process_by_pid(pid_t pid)
8852+/*
8853+ * This is the priority value as seen by users in /proc
8854+ *
8855+ * RT tasks are offset by -200. Normal tasks are centered
8856+ * around 0, value goes from -16 to +15.
8857+ */
8858+int task_prio(task_t *p)
8859 {
8860- struct task_struct *tsk = current;
8861+ return p->prio - MAX_USER_RT_PRIO;
8862+}
8863
8864- if (pid)
8865- tsk = find_task_by_pid(pid);
8866- return tsk;
8867+int task_nice(task_t *p)
8868+{
8869+ return TASK_NICE(p);
8870+}
8871+
8872+static inline task_t *find_process_by_pid(pid_t pid)
8873+{
8874+ return pid ? find_task_by_pid(pid) : current;
8875 }
8876
8877-static int setscheduler(pid_t pid, int policy,
8878- struct sched_param *param)
8879+static int setscheduler(pid_t pid, int policy, struct sched_param *param)
8880 {
8881 struct sched_param lp;
8882- struct task_struct *p;
8883+ prio_array_t *array;
8884+ unsigned long flags;
8885+ runqueue_t *rq;
8886 int retval;
8887+ task_t *p;
8888
8889 retval = -EINVAL;
8890 if (!param || pid < 0)
8891@@ -953,14 +1249,19 @@
8892 * We play safe to avoid deadlocks.
8893 */
8894 read_lock_irq(&tasklist_lock);
8895- spin_lock(&runqueue_lock);
8896
8897 p = find_process_by_pid(pid);
8898
8899 retval = -ESRCH;
8900 if (!p)
8901- goto out_unlock;
8902-
8903+ goto out_unlock_tasklist;
8904+
8905+ /*
8906+ * To be able to change p->policy safely, the apropriate
8907+ * runqueue lock must be held.
8908+ */
8909+ rq = task_rq_lock(p, &flags);
8910+
8911 if (policy < 0)
8912 policy = p->policy;
8913 else {
8914@@ -969,40 +1270,48 @@
8915 policy != SCHED_OTHER)
8916 goto out_unlock;
8917 }
8918-
8919+
8920 /*
8921- * Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid
8922- * priority for SCHED_OTHER is 0.
8923+ * Valid priorities for SCHED_FIFO and SCHED_RR are
8924+ * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_OTHER is 0.
8925 */
8926 retval = -EINVAL;
8927- if (lp.sched_priority < 0 || lp.sched_priority > 99)
8928+ if (lp.sched_priority < 0 || lp.sched_priority > MAX_USER_RT_PRIO-1)
8929 goto out_unlock;
8930 if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))
8931 goto out_unlock;
8932
8933 retval = -EPERM;
8934- if ((policy == SCHED_FIFO || policy == SCHED_RR) &&
8935+ if ((policy == SCHED_FIFO || policy == SCHED_RR) &&
8936 !capable(CAP_SYS_NICE))
8937 goto out_unlock;
8938 if ((current->euid != p->euid) && (current->euid != p->uid) &&
8939 !capable(CAP_SYS_NICE))
8940 goto out_unlock;
8941
8942+ array = p->array;
8943+ if (array)
8944+ deactivate_task(p, task_rq(p));
8945 retval = 0;
8946 p->policy = policy;
8947 p->rt_priority = lp.sched_priority;
8948-
8949- current->need_resched = 1;
8950+ if (policy != SCHED_OTHER)
8951+ p->prio = MAX_USER_RT_PRIO-1 - p->rt_priority;
8952+ else
8953+ p->prio = p->static_prio;
8954+ if (array)
8955+ activate_task(p, task_rq(p));
8956
8957 out_unlock:
8958- spin_unlock(&runqueue_lock);
8959+ task_rq_unlock(rq, &flags);
8960+out_unlock_tasklist:
8961 read_unlock_irq(&tasklist_lock);
8962
8963 out_nounlock:
8964 return retval;
8965 }
8966
8967-asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
8968+asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
8969 struct sched_param *param)
8970 {
8971 return setscheduler(pid, policy, param);
8972@@ -1015,7 +1324,7 @@
8973
8974 asmlinkage long sys_sched_getscheduler(pid_t pid)
8975 {
8976- struct task_struct *p;
8977+ task_t *p;
8978 int retval;
8979
8980 retval = -EINVAL;
8981@@ -1026,7 +1335,7 @@
8982 read_lock(&tasklist_lock);
8983 p = find_process_by_pid(pid);
8984 if (p)
8985- retval = p->policy & ~SCHED_YIELD;
8986+ retval = p->policy;
8987 read_unlock(&tasklist_lock);
8988
8989 out_nounlock:
8990@@ -1035,7 +1344,7 @@
8991
8992 asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param *param)
8993 {
8994- struct task_struct *p;
8995+ task_t *p;
8996 struct sched_param lp;
8997 int retval;
8998
8999@@ -1066,42 +1375,64 @@
9000
9001 asmlinkage long sys_sched_yield(void)
9002 {
9003- /*
9004- * Trick. sched_yield() first counts the number of truly
9005- * 'pending' runnable processes, then returns if it's
9006- * only the current processes. (This test does not have
9007- * to be atomic.) In threaded applications this optimization
9008- * gets triggered quite often.
9009- */
9010+ runqueue_t *rq = this_rq();
9011+ prio_array_t *array;
9012+ int i;
9013
9014- int nr_pending = nr_running;
9015+ spin_lock_irq(&rq->lock);
9016+
9017+ if (unlikely(rq->nr_running == 1)) {
9018+ spin_unlock_irq(&rq->lock);
9019+ return 0;
9020+ }
9021
9022-#if CONFIG_SMP
9023- int i;
9024+ array = current->array;
9025+ if (unlikely(rt_task(current))) {
9026+ list_del(&current->run_list);
9027+ list_add_tail(&current->run_list, array->queue + current->prio);
9028+ goto out_unlock;
9029+ }
9030
9031- // Subtract non-idle processes running on other CPUs.
9032- for (i = 0; i < smp_num_cpus; i++) {
9033- int cpu = cpu_logical_map(i);
9034- if (aligned_data[cpu].schedule_data.curr != idle_task(cpu))
9035- nr_pending--;
9036+ if (unlikely(array == rq->expired) && rq->active->nr_active)
9037+ goto out_unlock;
9038+
9039+ list_del(&current->run_list);
9040+ if (!list_empty(array->queue + current->prio)) {
9041+ list_add(&current->run_list, array->queue[current->prio].next);
9042+ goto out_unlock;
9043 }
9044-#else
9045- // on UP this process is on the runqueue as well
9046- nr_pending--;
9047-#endif
9048- if (nr_pending) {
9049+
9050+ __clear_bit(current->prio, array->bitmap);
9051+ if (likely(array == rq->active) && array->nr_active == 1) {
9052 /*
9053- * This process can only be rescheduled by us,
9054- * so this is safe without any locking.
9055+ * We're the last task in the active queue so
9056+ * we must move ourself to the expired array
9057+ * to avoid running again immediatly.
9058 */
9059- if (current->policy == SCHED_OTHER)
9060- current->policy |= SCHED_YIELD;
9061- current->need_resched = 1;
9062-
9063- spin_lock_irq(&runqueue_lock);
9064- move_last_runqueue(current);
9065- spin_unlock_irq(&runqueue_lock);
9066+ array->nr_active--;
9067+ array = rq->expired;
9068+ array->nr_active++;
9069 }
9070+
9071+ i = sched_find_first_bit(array->bitmap);
9072+
9073+ BUG_ON(i == MAX_PRIO);
9074+ BUG_ON(i == current->prio && array == current->array);
9075+
9076+ if (array == current->array && i < current->prio)
9077+ i = current->prio;
9078+ else {
9079+ current->array = array;
9080+ current->prio = i;
9081+ }
9082+ list_add(&current->run_list, array->queue[i].next);
9083+ __set_bit(i, array->bitmap);
9084+
9085+out_unlock:
9086+ spin_unlock_irq(&rq->lock);
9087+
9088+ schedule();
9089+
9090 return 0;
9091 }
9092
9093@@ -1113,14 +1444,13 @@
9094 */
9095 void yield(void)
9096 {
9097- set_current_state(TASK_RUNNING);
9098+ __set_current_state(TASK_RUNNING);
9099 sys_sched_yield();
9100- schedule();
9101 }
9102
9103 void __cond_resched(void)
9104 {
9105- set_current_state(TASK_RUNNING);
9106+ __set_current_state(TASK_RUNNING);
9107 schedule();
9108 }
9109
9110@@ -1131,7 +1461,7 @@
9111 switch (policy) {
9112 case SCHED_FIFO:
9113 case SCHED_RR:
9114- ret = 99;
9115+ ret = MAX_USER_RT_PRIO-1;
9116 break;
9117 case SCHED_OTHER:
9118 ret = 0;
9119@@ -1158,7 +1488,7 @@
9120 asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
9121 {
9122 struct timespec t;
9123- struct task_struct *p;
9124+ task_t *p;
9125 int retval = -EINVAL;
9126
9127 if (pid < 0)
9128@@ -1168,8 +1498,8 @@
9129 read_lock(&tasklist_lock);
9130 p = find_process_by_pid(pid);
9131 if (p)
9132- jiffies_to_timespec(p->policy & SCHED_FIFO ? 0 : NICE_TO_TICKS(p->nice),
9133- &t);
9134+ jiffies_to_timespec(p->policy & SCHED_FIFO ?
9135+ 0 : TASK_TIMESLICE(p), &t);
9136 read_unlock(&tasklist_lock);
9137 if (p)
9138 retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
9139@@ -1177,14 +1507,14 @@
9140 return retval;
9141 }
9142
9143-static void show_task(struct task_struct * p)
9144+static void show_task(task_t * p)
9145 {
9146 unsigned long free = 0;
9147 int state;
9148 static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
9149
9150 printk("%-13.13s ", p->comm);
9151- state = p->state ? ffz(~p->state) + 1 : 0;
9152+ state = p->state ? __ffs(p->state) + 1 : 0;
9153 if (((unsigned) state) < sizeof(stat_nam)/sizeof(char *))
9154 printk(stat_nam[state]);
9155 else
9156@@ -1225,7 +1555,7 @@
9157 printk(" (NOTLB)\n");
9158
9159 {
9160- extern void show_trace_task(struct task_struct *tsk);
9161+ extern void show_trace_task(task_t *tsk);
9162 show_trace_task(p);
9163 }
9164 }
9165@@ -1247,7 +1577,7 @@
9166
9167 void show_state(void)
9168 {
9169- struct task_struct *p;
9170+ task_t *p;
9171
9172 #if (BITS_PER_LONG == 32)
9173 printk("\n"
9174@@ -1270,128 +1600,280 @@
9175 read_unlock(&tasklist_lock);
9176 }
9177
9178-/**
9179- * reparent_to_init() - Reparent the calling kernel thread to the init task.
9180- *
9181- * If a kernel thread is launched as a result of a system call, or if
9182- * it ever exits, it should generally reparent itself to init so that
9183- * it is correctly cleaned up on exit.
9184+/*
9185+ * double_rq_lock - safely lock two runqueues
9186 *
9187- * The various task state such as scheduling policy and priority may have
9188- * been inherited fro a user process, so we reset them to sane values here.
9189+ * Note this does not disable interrupts like task_rq_lock,
9190+ * you need to do so manually before calling.
9191+ */
9192+static inline void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
9193+{
9194+ if (rq1 == rq2)
9195+ spin_lock(&rq1->lock);
9196+ else {
9197+ if (rq1 < rq2) {
9198+ spin_lock(&rq1->lock);
9199+ spin_lock(&rq2->lock);
9200+ } else {
9201+ spin_lock(&rq2->lock);
9202+ spin_lock(&rq1->lock);
9203+ }
9204+ }
9205+}
9206+
9207+/*
9208+ * double_rq_unlock - safely unlock two runqueues
9209 *
9210- * NOTE that reparent_to_init() gives the caller full capabilities.
9211+ * Note this does not restore interrupts like task_rq_unlock,
9212+ * you need to do so manually after calling.
9213 */
9214-void reparent_to_init(void)
9215+static inline void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2)
9216 {
9217- struct task_struct *this_task = current;
9218+ spin_unlock(&rq1->lock);
9219+ if (rq1 != rq2)
9220+ spin_unlock(&rq2->lock);
9221+}
9222
9223- write_lock_irq(&tasklist_lock);
9224+void __init init_idle(task_t *idle, int cpu)
9225+{
9226+ runqueue_t *idle_rq = cpu_rq(cpu), *rq = cpu_rq(idle->cpu);
9227+ unsigned long flags;
9228
9229- /* Reparent to init */
9230- REMOVE_LINKS(this_task);
9231- this_task->p_pptr = child_reaper;
9232- this_task->p_opptr = child_reaper;
9233- SET_LINKS(this_task);
9234+ __save_flags(flags);
9235+ __cli();
9236+ double_rq_lock(idle_rq, rq);
9237+
9238+ idle_rq->curr = idle_rq->idle = idle;
9239+ deactivate_task(idle, rq);
9240+ idle->array = NULL;
9241+ idle->prio = MAX_PRIO;
9242+ idle->state = TASK_RUNNING;
9243+ idle->cpu = cpu;
9244+ double_rq_unlock(idle_rq, rq);
9245+ set_tsk_need_resched(idle);
9246+ __restore_flags(flags);
9247+}
9248+
9249+extern void init_timervecs(void);
9250+extern void timer_bh(void);
9251+extern void tqueue_bh(void);
9252+extern void immediate_bh(void);
9253+
9254+void __init sched_init(void)
9255+{
9256+ runqueue_t *rq;
9257+ int i, j, k;
9258+
9259+ for (i = 0; i < NR_CPUS; i++) {
9260+ prio_array_t *array;
9261
9262- /* Set the exit signal to SIGCHLD so we signal init on exit */
9263- this_task->exit_signal = SIGCHLD;
9264+ rq = cpu_rq(i);
9265+ rq->active = rq->arrays;
9266+ rq->expired = rq->arrays + 1;
9267+ spin_lock_init(&rq->lock);
9268+#ifdef CONFIG_SMP
9269+ INIT_LIST_HEAD(&rq->migration_queue);
9270+#endif
9271
9272- /* We also take the runqueue_lock while altering task fields
9273- * which affect scheduling decisions */
9274- spin_lock(&runqueue_lock);
9275+ for (j = 0; j < 2; j++) {
9276+ array = rq->arrays + j;
9277+ for (k = 0; k < MAX_PRIO; k++) {
9278+ INIT_LIST_HEAD(array->queue + k);
9279+ __clear_bit(k, array->bitmap);
9280+ }
9281+ // delimiter for bitsearch
9282+ __set_bit(MAX_PRIO, array->bitmap);
9283+ }
9284+ }
9285+ /*
9286+ * We have to do a little magic to get the first
9287+ * process right in SMP mode.
9288+ */
9289+ rq = this_rq();
9290+ rq->curr = current;
9291+ rq->idle = current;
9292+ current->cpu = smp_processor_id();
9293+ wake_up_process(current);
9294
9295- this_task->ptrace = 0;
9296- this_task->nice = DEF_NICE;
9297- this_task->policy = SCHED_OTHER;
9298- /* cpus_allowed? */
9299- /* rt_priority? */
9300- /* signals? */
9301- this_task->cap_effective = CAP_INIT_EFF_SET;
9302- this_task->cap_inheritable = CAP_INIT_INH_SET;
9303- this_task->cap_permitted = CAP_FULL_SET;
9304- this_task->keep_capabilities = 0;
9305- memcpy(this_task->rlim, init_task.rlim, sizeof(*(this_task->rlim)));
9306- switch_uid(INIT_USER);
9307+ init_timervecs();
9308+ init_bh(TIMER_BH, timer_bh);
9309+ init_bh(TQUEUE_BH, tqueue_bh);
9310+ init_bh(IMMEDIATE_BH, immediate_bh);
9311
9312- spin_unlock(&runqueue_lock);
9313- write_unlock_irq(&tasklist_lock);
9314+ /*
9315+ * The boot idle thread does lazy MMU switching as well:
9316+ */
9317+ atomic_inc(&init_mm.mm_count);
9318+ enter_lazy_tlb(&init_mm, current, smp_processor_id());
9319 }
9320
9321+#if CONFIG_SMP
9322+
9323 /*
9324- * Put all the gunge required to become a kernel thread without
9325- * attached user resources in one place where it belongs.
9326- */
9327+ * This is how migration works:
9328+ *
9329+ * 1) we queue a migration_req_t structure in the source CPU's
9330+ * runqueue and wake up that CPU's migration thread.
9331+ * 2) we down() the locked semaphore => thread blocks.
9332+ * 3) migration thread wakes up (implicitly it forces the migrated
9333+ * thread off the CPU)
9334+ * 4) it gets the migration request and checks whether the migrated
9335+ * task is still in the wrong runqueue.
9336+ * 5) if it's in the wrong runqueue then the migration thread removes
9337+ * it and puts it into the right queue.
9338+ * 6) migration thread up()s the semaphore.
9339+ * 7) we wake up and the migration is done.
9340+ */
9341+
9342+typedef struct {
9343+ struct list_head list;
9344+ task_t *task;
9345+ struct completion done;
9346+} migration_req_t;
9347
9348-void daemonize(void)
9349+/*
9350+ * Change a given task's CPU affinity. Migrate the process to a
9351+ * proper CPU and schedule it away if the CPU it's executing on
9352+ * is removed from the allowed bitmask.
9353+ *
9354+ * NOTE: the caller must have a valid reference to the task, the
9355+ * task must not exit() & deallocate itself prematurely. The
9356+ * call is not atomic; no spinlocks may be held.
9357+ */
9358+void set_cpus_allowed(task_t *p, unsigned long new_mask)
9359 {
9360- struct fs_struct *fs;
9361+ unsigned long flags;
9362+ migration_req_t req;
9363+ runqueue_t *rq;
9364
9365+ new_mask &= cpu_online_map;
9366+ if (!new_mask)
9367+ BUG();
9368
9369+ rq = task_rq_lock(p, &flags);
9370+ p->cpus_allowed = new_mask;
9371 /*
9372- * If we were started as result of loading a module, close all of the
9373- * user space pages. We don't need them, and if we didn't close them
9374- * they would be locked into memory.
9375+ * Can the task run on the task's current CPU? If not then
9376+ * migrate the process off to a proper CPU.
9377 */
9378- exit_mm(current);
9379+ if (new_mask & (1UL << p->cpu)) {
9380+ task_rq_unlock(rq, &flags);
9381+ return;
9382+ }
9383
9384- current->session = 1;
9385- current->pgrp = 1;
9386- current->tty = NULL;
9387+ /*
9388+ * If the task is not on a runqueue, then it is safe to
9389+ * simply update the task's cpu field.
9390+ */
9391+ if (!p->array && (p != rq->curr)) {
9392+ p->cpu = __ffs(p->cpus_allowed);
9393+ task_rq_unlock(rq, &flags);
9394+ return;
9395+ }
9396
9397- /* Become as one with the init task */
9398+ init_completion(&req.done);
9399+ req.task = p;
9400+ list_add(&req.list, &rq->migration_queue);
9401+ task_rq_unlock(rq, &flags);
9402+ wake_up_process(rq->migration_thread);
9403
9404- exit_fs(current); /* current->fs->count--; */
9405- fs = init_task.fs;
9406- current->fs = fs;
9407- atomic_inc(&fs->count);
9408- exit_files(current);
9409- current->files = init_task.files;
9410- atomic_inc(&current->files->count);
9411+ wait_for_completion(&req.done);
9412 }
9413
9414-extern unsigned long wait_init_idle;
9415+static __initdata int master_migration_thread;
9416
9417-void __init init_idle(void)
9418+static int migration_thread(void * bind_cpu)
9419 {
9420- struct schedule_data * sched_data;
9421- sched_data = &aligned_data[smp_processor_id()].schedule_data;
9422+ int cpu = cpu_logical_map((int) (long) bind_cpu);
9423+ struct sched_param param = { sched_priority: MAX_RT_PRIO-1 };
9424+ runqueue_t *rq;
9425+ int ret;
9426
9427- if (current != &init_task && task_on_runqueue(current)) {
9428- printk("UGH! (%d:%d) was on the runqueue, removing.\n",
9429- smp_processor_id(), current->pid);
9430- del_from_runqueue(current);
9431+ daemonize();
9432+ sigfillset(&current->blocked);
9433+ set_fs(KERNEL_DS);
9434+ /*
9435+ * The first migration thread is started on the boot CPU, it
9436+ * migrates the other migration threads to their destination CPUs.
9437+ */
9438+ if (cpu != master_migration_thread) {
9439+ while (!cpu_rq(master_migration_thread)->migration_thread)
9440+ yield();
9441+ set_cpus_allowed(current, 1UL << cpu);
9442 }
9443- sched_data->curr = current;
9444- sched_data->last_schedule = get_cycles();
9445- clear_bit(current->processor, &wait_init_idle);
9446-}
9447+ printk("migration_task %d on cpu=%d\n", cpu, smp_processor_id());
9448+ ret = setscheduler(0, SCHED_FIFO, &param);
9449
9450-extern void init_timervecs (void);
9451+ rq = this_rq();
9452+ rq->migration_thread = current;
9453
9454-void __init sched_init(void)
9455-{
9456- /*
9457- * We have to do a little magic to get the first
9458- * process right in SMP mode.
9459- */
9460- int cpu = smp_processor_id();
9461- int nr;
9462+ sprintf(current->comm, "migration_CPU%d", smp_processor_id());
9463
9464- init_task.processor = cpu;
9465+ for (;;) {
9466+ runqueue_t *rq_src, *rq_dest;
9467+ struct list_head *head;
9468+ int cpu_src, cpu_dest;
9469+ migration_req_t *req;
9470+ unsigned long flags;
9471+ task_t *p;
9472
9473- for(nr = 0; nr < PIDHASH_SZ; nr++)
9474- pidhash[nr] = NULL;
9475+ spin_lock_irqsave(&rq->lock, flags);
9476+ head = &rq->migration_queue;
9477+ current->state = TASK_INTERRUPTIBLE;
9478+ if (list_empty(head)) {
9479+ spin_unlock_irqrestore(&rq->lock, flags);
9480+ schedule();
9481+ continue;
9482+ }
9483+ req = list_entry(head->next, migration_req_t, list);
9484+ list_del_init(head->next);
9485+ spin_unlock_irqrestore(&rq->lock, flags);
9486+
9487+ p = req->task;
9488+ cpu_dest = __ffs(p->cpus_allowed);
9489+ rq_dest = cpu_rq(cpu_dest);
9490+repeat:
9491+ cpu_src = p->cpu;
9492+ rq_src = cpu_rq(cpu_src);
9493+
9494+ local_irq_save(flags);
9495+ double_rq_lock(rq_src, rq_dest);
9496+ if (p->cpu != cpu_src) {
9497+ double_rq_unlock(rq_src, rq_dest);
9498+ local_irq_restore(flags);
9499+ goto repeat;
9500+ }
9501+ if (rq_src == rq) {
9502+ p->cpu = cpu_dest;
9503+ if (p->array) {
9504+ deactivate_task(p, rq_src);
9505+ activate_task(p, rq_dest);
9506+ }
9507+ }
9508+ double_rq_unlock(rq_src, rq_dest);
9509+ local_irq_restore(flags);
9510
9511- init_timervecs();
9512+ complete(&req->done);
9513+ }
9514+}
9515
9516- init_bh(TIMER_BH, timer_bh);
9517- init_bh(TQUEUE_BH, tqueue_bh);
9518- init_bh(IMMEDIATE_BH, immediate_bh);
9519+void __init migration_init(void)
9520+{
9521+ int cpu;
9522
9523- /*
9524- * The boot idle thread does lazy MMU switching as well:
9525- */
9526- atomic_inc(&init_mm.mm_count);
9527- enter_lazy_tlb(&init_mm, current, cpu);
9528+ master_migration_thread = smp_processor_id();
9529+ current->cpus_allowed = 1UL << master_migration_thread;
9530+
9531+ for (cpu = 0; cpu < smp_num_cpus; cpu++) {
9532+ if (kernel_thread(migration_thread, (void *) (long) cpu,
9533+ CLONE_FS | CLONE_FILES | CLONE_SIGNAL) < 0)
9534+ BUG();
9535+ }
9536+ current->cpus_allowed = -1L;
9537+
9538+ for (cpu = 0; cpu < smp_num_cpus; cpu++)
9539+ while (!cpu_rq(cpu_logical_map(cpu))->migration_thread)
9540+ schedule_timeout(2);
9541 }
9542+
9543+#endif /* CONFIG_SMP */
9544diff -urN linux-2.4.24.org/kernel/signal.c linux-2.4.24/kernel/signal.c
5d16fd25
AM
9545--- linux-2.4.24.org/kernel/signal.c 2004-02-04 20:47:26.821945338 +0100
9546+++ linux-2.4.24/kernel/signal.c 2004-02-04 20:52:55.082667907 +0100
0aa7655b
AM
9547@@ -507,12 +507,9 @@
9548 * process of changing - but no harm is done by that
9549 * other than doing an extra (lightweight) IPI interrupt.
9550 */
9551- spin_lock(&runqueue_lock);
9552- if (task_has_cpu(t) && t->processor != smp_processor_id())
9553- smp_send_reschedule(t->processor);
9554- spin_unlock(&runqueue_lock);
9555-#endif /* CONFIG_SMP */
9556-
9557+ if ((t->state == TASK_RUNNING) && (t->cpu != cpu()))
9558+ kick_if_running(t);
9559+#endif
9560 if (t->state & TASK_INTERRUPTIBLE) {
9561 wake_up_process(t);
9562 return;
9563diff -urN linux-2.4.24.org/kernel/softirq.c linux-2.4.24/kernel/softirq.c
5d16fd25
AM
9564--- linux-2.4.24.org/kernel/softirq.c 2004-02-04 20:47:27.211864234 +0100
9565+++ linux-2.4.24/kernel/softirq.c 2004-02-04 20:52:55.110662084 +0100
0aa7655b
AM
9566@@ -364,13 +364,13 @@
9567 int cpu = cpu_logical_map(bind_cpu);
9568
9569 daemonize();
9570- current->nice = 19;
9571+ set_user_nice(current, 19);
9572 sigfillset(&current->blocked);
9573
9574 /* Migrate to the right CPU */
9575- current->cpus_allowed = 1UL << cpu;
9576- while (smp_processor_id() != cpu)
9577- schedule();
9578+ set_cpus_allowed(current, 1UL << cpu);
9579+ if (cpu() != cpu)
9580+ BUG();
9581
9582 sprintf(current->comm, "ksoftirqd_CPU%d", bind_cpu);
9583
9584@@ -395,7 +395,7 @@
9585 }
9586 }
9587
9588-static __init int spawn_ksoftirqd(void)
9589+__init int spawn_ksoftirqd(void)
9590 {
9591 int cpu;
9592
9593diff -urN linux-2.4.24.org/kernel/sys.c linux-2.4.24/kernel/sys.c
5d16fd25
AM
9594--- linux-2.4.24.org/kernel/sys.c 2004-02-04 20:47:26.739962391 +0100
9595+++ linux-2.4.24/kernel/sys.c 2004-02-04 20:52:55.139656054 +0100
0aa7655b
AM
9596@@ -239,10 +239,10 @@
9597 }
9598 if (error == -ESRCH)
9599 error = 0;
9600- if (niceval < p->nice && !capable(CAP_SYS_NICE))
9601+ if (niceval < task_nice(p) && !capable(CAP_SYS_NICE))
9602 error = -EACCES;
9603 else
9604- p->nice = niceval;
9605+ set_user_nice(p, niceval);
9606 }
9607 read_unlock(&tasklist_lock);
9608
9609@@ -268,7 +268,7 @@
9610 long niceval;
9611 if (!proc_sel(p, which, who))
9612 continue;
9613- niceval = 20 - p->nice;
9614+ niceval = 20 - task_nice(p);
9615 if (niceval > retval)
9616 retval = niceval;
9617 }
9618diff -urN linux-2.4.24.org/kernel/timer.c linux-2.4.24/kernel/timer.c
5d16fd25
AM
9619--- linux-2.4.24.org/kernel/timer.c 2004-02-04 20:47:27.115884198 +0100
9620+++ linux-2.4.24/kernel/timer.c 2004-02-04 20:52:55.155652727 +0100
0aa7655b
AM
9621@@ -25,6 +25,8 @@
9622
9623 #include <asm/uaccess.h>
9624
9625+struct kernel_stat kstat;
9626+
9627 /*
9628 * Timekeeping variables
9629 */
9630@@ -598,25 +600,7 @@
9631 int cpu = smp_processor_id(), system = user_tick ^ 1;
9632
9633 update_one_process(p, user_tick, system, cpu);
9634- if (p->pid) {
9635- if (--p->counter <= 0) {
9636- p->counter = 0;
9637- /*
9638- * SCHED_FIFO is priority preemption, so this is
9639- * not the place to decide whether to reschedule a
9640- * SCHED_FIFO task or not - Bhavesh Davda
9641- */
9642- if (p->policy != SCHED_FIFO) {
9643- p->need_resched = 1;
9644- }
9645- }
9646- if (p->nice > 0)
9647- kstat.per_cpu_nice[cpu] += user_tick;
9648- else
9649- kstat.per_cpu_user[cpu] += user_tick;
9650- kstat.per_cpu_system[cpu] += system;
9651- } else if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
9652- kstat.per_cpu_system[cpu] += system;
9653+ scheduler_tick(user_tick, system);
9654 }
9655
9656 /*
9657@@ -624,17 +608,7 @@
9658 */
9659 static unsigned long count_active_tasks(void)
9660 {
9661- struct task_struct *p;
9662- unsigned long nr = 0;
9663-
9664- read_lock(&tasklist_lock);
9665- for_each_task(p) {
9666- if ((p->state == TASK_RUNNING ||
9667- (p->state & TASK_UNINTERRUPTIBLE)))
9668- nr += FIXED_1;
9669- }
9670- read_unlock(&tasklist_lock);
9671- return nr;
9672+ return (nr_running() + nr_uninterruptible()) * FIXED_1;
9673 }
9674
9675 /*
9676@@ -827,6 +801,89 @@
9677
9678 #endif
9679
9680+static void process_timeout(unsigned long __data)
9681+{
9682+ wake_up_process((task_t *)__data);
9683+}
9684+
9685+/**
9686+ * schedule_timeout - sleep until timeout
9687+ * @timeout: timeout value in jiffies
9688+ *
9689+ * Make the current task sleep until @timeout jiffies have
9690+ * elapsed. The routine will return immediately unless
9691+ * the current task state has been set (see set_current_state()).
9692+ *
9693+ * You can set the task state as follows -
9694+ *
9695+ * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
9696+ * pass before the routine returns. The routine will return 0
9697+ *
9698+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
9699+ * delivered to the current task. In this case the remaining time
9700+ * in jiffies will be returned, or 0 if the timer expired in time
9701+ *
9702+ * The current task state is guaranteed to be TASK_RUNNING when this
9703+ * routine returns.
9704+ *
9705+ * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
9706+ * the CPU away without a bound on the timeout. In this case the return
9707+ * value will be %MAX_SCHEDULE_TIMEOUT.
9708+ *
9709+ * In all cases the return value is guaranteed to be non-negative.
9710+ */
9711+signed long schedule_timeout(signed long timeout)
9712+{
9713+ struct timer_list timer;
9714+ unsigned long expire;
9715+
9716+ switch (timeout)
9717+ {
9718+ case MAX_SCHEDULE_TIMEOUT:
9719+ /*
9720+ * These two special cases are useful to be comfortable
9721+ * in the caller. Nothing more. We could take
9722+ * MAX_SCHEDULE_TIMEOUT from one of the negative value
9723+ * but I' d like to return a valid offset (>=0) to allow
9724+ * the caller to do everything it want with the retval.
9725+ */
9726+ schedule();
9727+ goto out;
9728+ default:
9729+ /*
9730+ * Another bit of PARANOID. Note that the retval will be
9731+ * 0 since no piece of kernel is supposed to do a check
9732+ * for a negative retval of schedule_timeout() (since it
9733+ * should never happens anyway). You just have the printk()
9734+ * that will tell you if something is gone wrong and where.
9735+ */
9736+ if (timeout < 0)
9737+ {
9738+ printk(KERN_ERR "schedule_timeout: wrong timeout "
9739+ "value %lx from %p\n", timeout,
9740+ __builtin_return_address(0));
9741+ current->state = TASK_RUNNING;
9742+ goto out;
9743+ }
9744+ }
9745+
9746+ expire = timeout + jiffies;
9747+
9748+ init_timer(&timer);
9749+ timer.expires = expire;
9750+ timer.data = (unsigned long) current;
9751+ timer.function = process_timeout;
9752+
9753+ add_timer(&timer);
9754+ schedule();
9755+ del_timer_sync(&timer);
9756+
9757+ timeout = expire - jiffies;
9758+
9759+ out:
9760+ return timeout < 0 ? 0 : timeout;
9761+}
9762+
9763 /* Thread ID - the internal kernel "pid" */
9764 asmlinkage long sys_gettid(void)
9765 {
9766@@ -873,4 +930,3 @@
9767 }
9768 return 0;
9769 }
9770-
0aa7655b 9771diff -urN linux-2.4.24.org/mm/oom_kill.c linux-2.4.24/mm/oom_kill.c
5d16fd25
AM
9772--- linux-2.4.24.org/mm/oom_kill.c 2004-02-04 20:47:28.626569974 +0100
9773+++ linux-2.4.24/mm/oom_kill.c 2004-02-04 20:57:30.567369583 +0100
0aa7655b
AM
9774@@ -86,7 +86,7 @@
9775 * Niced processes are most likely less important, so double
9776 * their badness points.
9777 */
9778- if (p->nice > 0)
9779+ if (task_nice(p) > 0)
9780 points *= 2;
9781
9782 /*
9783@@ -150,7 +150,7 @@
9784 * all the memory it needs. That way it should be able to
9785 * exit() and clear out its resources quickly...
9786 */
9787- p->counter = 5 * HZ;
9788+ p->time_slice = HZ;
9789 p->flags |= PF_MEMALLOC | PF_MEMDIE;
9790
9791 /* This process has hardware access, be more careful. */
9792diff -urN linux-2.4.24.org/net/bluetooth/bnep/core.c linux-2.4.24/net/bluetooth/bnep/core.c
5d16fd25
AM
9793--- linux-2.4.24.org/net/bluetooth/bnep/core.c 2004-02-04 20:48:41.535404904 +0100
9794+++ linux-2.4.24/net/bluetooth/bnep/core.c 2004-02-04 20:52:55.199643577 +0100
0aa7655b
AM
9795@@ -460,7 +460,7 @@
9796 sigfillset(&current->blocked);
9797 flush_signals(current);
9798
9799- current->nice = -15;
9800+ set_user_nice(current, -15);
9801
9802 set_fs(KERNEL_DS);
9803
9804diff -urN linux-2.4.24.org/net/bluetooth/cmtp/core.c linux-2.4.24/net/bluetooth/cmtp/core.c
5d16fd25
AM
9805--- linux-2.4.24.org/net/bluetooth/cmtp/core.c 2004-02-04 20:48:41.311451486 +0100
9806+++ linux-2.4.24/net/bluetooth/cmtp/core.c 2004-02-04 20:52:55.224638378 +0100
0aa7655b
AM
9807@@ -298,7 +298,7 @@
9808 sigfillset(&current->blocked);
9809 flush_signals(current);
9810
9811- current->nice = -15;
9812+ set_user_nice(current, -15);
9813
9814 set_fs(KERNEL_DS);
9815
1e2c2bef
JB
9816--- linux-2.4.33/arch/i386/kernel/i387.c.orig 2006-08-11 06:18:20.000000000 +0200
9817+++ linux-2.4.33/arch/i386/kernel/i387.c 2006-08-16 16:03:30.925971000 +0200
9818@@ -68,15 +68,18 @@
9819 * FPU lazy state save handling.
9820 */
9821
9822+unsigned long nr_context_switches(void);
9823+
9824 static inline void __save_init_fpu( struct task_struct *tsk )
9825 {
9826 if ( cpu_has_fxsr ) {
9827+ unsigned int cswtch = nr_context_switches();
9828 asm volatile( "fxsave %0"
9829 : "=m" (tsk->thread.i387.fxsave) );
9830 if (tsk->thread.i387.fxsave.swd & (1<<7))
9831 asm volatile("fnclex");
9832 /* AMD CPUs leak F?P. Clear it here */
9833- asm volatile("ffree %%st(7) ; fildl %0" :: "m" (kstat.context_swtch));
9834+ asm volatile("ffree %%st(7) ; fildl %0" :: "m" (cswtch));
9835 } else {
9836 asm volatile( "fnsave %0 ; fwait"
9837 : "=m" (tsk->thread.i387.fsave) );
This page took 1.354635 seconds and 4 git commands to generate.