]> git.pld-linux.org Git - packages/kernel.git/blame - linux-2.4.25-sched-O1.patch
- updated for 2.4.29
[packages/kernel.git] / linux-2.4.25-sched-O1.patch
CommitLineData
0aa7655b 1diff -urN linux-2.4.24.org/arch/alpha/kernel/entry.S linux-2.4.24/arch/alpha/kernel/entry.S
5d16fd25
AM
2--- linux-2.4.24.org/arch/alpha/kernel/entry.S 2004-02-04 20:50:50.273627588 +0100
3+++ linux-2.4.24/arch/alpha/kernel/entry.S 2004-02-04 20:52:52.801142450 +0100
0aa7655b
AM
4@@ -695,7 +695,9 @@
5 ret_from_fork:
6 lda $26,ret_from_sys_call
7 mov $17,$16
8+#if CONFIG_SMP
9 jsr $31,schedule_tail
10+#endif
11 .end ret_from_fork
12
13 .align 3
14diff -urN linux-2.4.24.org/arch/alpha/kernel/process.c linux-2.4.24/arch/alpha/kernel/process.c
5d16fd25
AM
15--- linux-2.4.24.org/arch/alpha/kernel/process.c 2004-02-04 20:50:48.800933904 +0100
16+++ linux-2.4.24/arch/alpha/kernel/process.c 2004-02-04 20:52:52.805141619 +0100
0aa7655b
AM
17@@ -74,9 +74,6 @@
18 cpu_idle(void)
19 {
20 /* An endless idle loop with no priority at all. */
21- current->nice = 20;
22- current->counter = -100;
23-
24 while (1) {
25 /* FIXME -- EV6 and LCA45 know how to power down
26 the CPU. */
27diff -urN linux-2.4.24.org/arch/alpha/kernel/smp.c linux-2.4.24/arch/alpha/kernel/smp.c
5d16fd25
AM
28--- linux-2.4.24.org/arch/alpha/kernel/smp.c 2004-02-04 20:50:49.083875053 +0100
29+++ linux-2.4.24/arch/alpha/kernel/smp.c 2004-02-04 20:52:52.820138499 +0100
0aa7655b
AM
30@@ -81,6 +81,8 @@
31 int smp_num_probed; /* Internal processor count */
32 int smp_num_cpus = 1; /* Number that came online. */
33 int smp_threads_ready; /* True once the per process idle is forked. */
34+cycles_t cacheflush_time;
35+unsigned long cache_decay_ticks;
36
37 int __cpu_number_map[NR_CPUS];
38 int __cpu_logical_map[NR_CPUS];
39@@ -155,11 +157,6 @@
40 {
41 int cpuid = hard_smp_processor_id();
42
43- if (current != init_tasks[cpu_number_map(cpuid)]) {
44- printk("BUG: smp_calling: cpu %d current %p init_tasks[cpu_number_map(cpuid)] %p\n",
45- cpuid, current, init_tasks[cpu_number_map(cpuid)]);
46- }
47-
48 DBGS(("CALLIN %d state 0x%lx\n", cpuid, current->state));
49
50 /* Turn on machine checks. */
51@@ -217,9 +214,6 @@
52 DBGS(("smp_callin: commencing CPU %d current %p\n",
53 cpuid, current));
54
55- /* Setup the scheduler for this processor. */
56- init_idle();
57-
58 /* ??? This should be in init_idle. */
59 atomic_inc(&init_mm.mm_count);
60 current->active_mm = &init_mm;
61@@ -227,6 +221,57 @@
62 cpu_idle();
63 }
64
65+
66+/*
67+ * Rough estimation for SMP scheduling, this is the number of cycles it
68+ * takes for a fully memory-limited process to flush the SMP-local cache.
69+ *
70+ * We are not told how much cache there is, so we have to guess.
71+ */
72+static void __init
73+smp_tune_scheduling (int cpuid)
74+{
75+ struct percpu_struct *cpu;
76+ unsigned long on_chip_cache; /* kB */
77+ unsigned long freq; /* Hz */
78+ unsigned long bandwidth = 350; /* MB/s */
79+
80+ cpu = (struct percpu_struct*)((char*)hwrpb + hwrpb->processor_offset
81+ + cpuid * hwrpb->processor_size);
82+ switch (cpu->type)
83+ {
84+ case EV45_CPU:
85+ on_chip_cache = 16 + 16;
86+ break;
87+
88+ case EV5_CPU:
89+ case EV56_CPU:
90+ on_chip_cache = 8 + 8 + 96;
91+ break;
92+
93+ case PCA56_CPU:
94+ on_chip_cache = 16 + 8;
95+ break;
96+
97+ case EV6_CPU:
98+ case EV67_CPU:
99+ default:
100+ on_chip_cache = 64 + 64;
101+ break;
102+ }
103+
104+ freq = hwrpb->cycle_freq ? : est_cycle_freq;
105+
106+ cacheflush_time = (freq / 1000000) * (on_chip_cache << 10) / bandwidth;
107+ cache_decay_ticks = cacheflush_time / (freq / 1000) * HZ / 1000;
108+
109+ printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
110+ cacheflush_time/(freq/1000000),
111+ (cacheflush_time*100/(freq/1000000)) % 100);
112+ printk("task migration cache decay timeout: %ld msecs.\n",
113+ (cache_decay_ticks + 1) * 1000 / HZ);
114+}
115+
116 /*
117 * Send a message to a secondary's console. "START" is one such
118 * interesting message. ;-)
119@@ -449,14 +494,11 @@
120 if (idle == &init_task)
121 panic("idle process is init_task for CPU %d", cpuid);
122
123- idle->processor = cpuid;
124- idle->cpus_runnable = 1 << cpuid; /* we schedule the first task manually */
125+ init_idle(idle, cpuid);
126+ unhash_process(idle);
127+
128 __cpu_logical_map[cpunum] = cpuid;
129 __cpu_number_map[cpuid] = cpunum;
130-
131- del_from_runqueue(idle);
132- unhash_process(idle);
133- init_tasks[cpunum] = idle;
134
135 DBGS(("smp_boot_one_cpu: CPU %d state 0x%lx flags 0x%lx\n",
136 cpuid, idle->state, idle->flags));
137@@ -563,13 +605,11 @@
138
139 __cpu_number_map[boot_cpuid] = 0;
140 __cpu_logical_map[0] = boot_cpuid;
141- current->processor = boot_cpuid;
142
143 smp_store_cpu_info(boot_cpuid);
144+ smp_tune_scheduling(boot_cpuid);
145 smp_setup_percpu_timer(boot_cpuid);
146
147- init_idle();
148-
149 /* ??? This should be in init_idle. */
150 atomic_inc(&init_mm.mm_count);
151 current->active_mm = &init_mm;
152diff -urN linux-2.4.24.org/arch/arm/kernel/process.c linux-2.4.24/arch/arm/kernel/process.c
5d16fd25
AM
153--- linux-2.4.24.org/arch/arm/kernel/process.c 2004-02-04 20:51:34.213488266 +0100
154+++ linux-2.4.24/arch/arm/kernel/process.c 2004-02-04 20:52:52.824137668 +0100
0aa7655b
AM
155@@ -87,8 +87,6 @@
156 {
157 /* endless idle loop with no priority at all */
158 init_idle();
159- current->nice = 20;
160- current->counter = -100;
161
162 while (1) {
163 void (*idle)(void) = pm_idle;
164diff -urN linux-2.4.24.org/arch/i386/kernel/entry.S linux-2.4.24/arch/i386/kernel/entry.S
5d16fd25
AM
165--- linux-2.4.24.org/arch/i386/kernel/entry.S 2004-02-04 20:50:47.376230238 +0100
166+++ linux-2.4.24/arch/i386/kernel/entry.S 2004-02-04 20:52:52.828136836 +0100
0aa7655b
AM
167@@ -79,7 +79,7 @@
168 exec_domain = 16
169 need_resched = 20
170 tsk_ptrace = 24
171-processor = 52
172+cpu = 32
173
174 ENOSYS = 38
175
176@@ -184,9 +184,11 @@
177
178
179 ENTRY(ret_from_fork)
180+#if CONFIG_SMP
181 pushl %ebx
182 call SYMBOL_NAME(schedule_tail)
183 addl $4, %esp
184+#endif
185 GET_CURRENT(%ebx)
186 testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS
187 jne tracesys_exit
188diff -urN linux-2.4.24.org/arch/i386/kernel/process.c linux-2.4.24/arch/i386/kernel/process.c
5d16fd25
AM
189--- linux-2.4.24.org/arch/i386/kernel/process.c 2004-02-04 20:50:46.799350227 +0100
190+++ linux-2.4.24/arch/i386/kernel/process.c 2004-02-04 20:52:52.833135796 +0100
0aa7655b
AM
191@@ -84,7 +84,7 @@
192 {
193 if (current_cpu_data.hlt_works_ok && !hlt_counter) {
194 __cli();
195- if (!current->need_resched)
196+ if (!need_resched())
197 safe_halt();
198 else
199 __sti();
200@@ -126,9 +126,6 @@
201 void cpu_idle (void)
202 {
203 /* endless idle loop with no priority at all */
204- init_idle();
205- current->nice = 20;
206- current->counter = -100;
207
208 while (1) {
209 void (*idle)(void) = pm_idle;
5d16fd25 210@@ -665,15 +662,17 @@
0aa7655b
AM
211 asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs));
212
213 /*
214- * Restore %fs and %gs.
215+ * Restore %fs and %gs if needed.
216 */
217- loadsegment(fs, next->fs);
218- loadsegment(gs, next->gs);
219+ if (unlikely(prev->fs | prev->gs | next->fs | next->gs)) {
220+ loadsegment(fs, next->fs);
221+ loadsegment(gs, next->gs);
222+ }
223
224 /*
225 * Now maybe reload the debug registers
226 */
227- if (next->debugreg[7]){
228+ if (unlikely(next->debugreg[7])) {
229 loaddebug(next, 0);
230 loaddebug(next, 1);
231 loaddebug(next, 2);
5d16fd25 232@@ -683,7 +682,7 @@
0aa7655b
AM
233 loaddebug(next, 7);
234 }
235
236- if (prev->ioperm || next->ioperm) {
237+ if (unlikely(prev->ioperm || next->ioperm)) {
238 if (next->ioperm) {
239 /*
240 * 4 cachelines copy ... not good, but not that
241diff -urN linux-2.4.24.org/arch/i386/kernel/setup.c linux-2.4.24/arch/i386/kernel/setup.c
5d16fd25
AM
242--- linux-2.4.24.org/arch/i386/kernel/setup.c 2004-02-04 20:50:46.790352099 +0100
243+++ linux-2.4.24/arch/i386/kernel/setup.c 2004-02-04 20:52:52.840134340 +0100
0aa7655b
AM
244@@ -3193,9 +3193,10 @@
245 load_TR(nr);
5d16fd25 246 load_LDT(&init_mm.context);
0aa7655b
AM
247
248- /*
249- * Clear all 6 debug registers:
250- */
251+ /* Clear %fs and %gs. */
252+ asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
253+
254+ /* Clear all 6 debug registers: */
255
256 #define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) );
257
258diff -urN linux-2.4.24.org/arch/i386/kernel/smpboot.c linux-2.4.24/arch/i386/kernel/smpboot.c
5d16fd25
AM
259--- linux-2.4.24.org/arch/i386/kernel/smpboot.c 2004-02-04 20:50:46.762357921 +0100
260+++ linux-2.4.24/arch/i386/kernel/smpboot.c 2004-02-04 20:52:52.864129350 +0100
0aa7655b
AM
261@@ -308,14 +308,14 @@
262 if (tsc_values[i] < avg)
263 realdelta = -realdelta;
264
265- printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n",
266- i, realdelta);
267+ printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n", i, realdelta);
268 }
269
270 sum += delta;
271 }
272 if (!buggy)
273 printk("passed.\n");
274+ ;
275 }
276
277 static void __init synchronize_tsc_ap (void)
278@@ -365,7 +365,7 @@
279 * (This works even if the APIC is not enabled.)
280 */
281 phys_id = GET_APIC_ID(apic_read(APIC_ID));
282- cpuid = current->processor;
283+ cpuid = cpu();
284 if (test_and_set_bit(cpuid, &cpu_online_map)) {
285 printk("huh, phys CPU#%d, CPU#%d already present??\n",
286 phys_id, cpuid);
287@@ -435,6 +435,7 @@
288 */
289 smp_store_cpu_info(cpuid);
290
291+ disable_APIC_timer();
292 /*
293 * Allow the master to continue.
294 */
295@@ -465,6 +466,7 @@
296 smp_callin();
297 while (!atomic_read(&smp_commenced))
298 rep_nop();
299+ enable_APIC_timer();
300 /*
301 * low-memory mappings have been cleared, flush them from
302 * the local TLBs too.
303@@ -803,16 +805,13 @@
304 if (!idle)
305 panic("No idle process for CPU %d", cpu);
306
307- idle->processor = cpu;
308- idle->cpus_runnable = 1 << cpu; /* we schedule the first task manually */
309+ init_idle(idle, cpu);
310
311 map_cpu_to_boot_apicid(cpu, apicid);
312
313 idle->thread.eip = (unsigned long) start_secondary;
314
315- del_from_runqueue(idle);
316 unhash_process(idle);
317- init_tasks[cpu] = idle;
318
319 /* start_eip had better be page-aligned! */
320 start_eip = setup_trampoline();
321@@ -925,6 +924,7 @@
322 }
323
324 cycles_t cacheflush_time;
325+unsigned long cache_decay_ticks;
326
327 static void smp_tune_scheduling (void)
328 {
329@@ -958,9 +958,13 @@
330 cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
331 }
332
333+ cache_decay_ticks = (long)cacheflush_time/cpu_khz * HZ / 1000;
334+
335 printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
336 (long)cacheflush_time/(cpu_khz/1000),
337 ((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
338+ printk("task migration cache decay timeout: %ld msecs.\n",
339+ (cache_decay_ticks + 1) * 1000 / HZ);
340 }
341
342 /*
343@@ -1026,8 +1030,7 @@
344 map_cpu_to_boot_apicid(0, boot_cpu_apicid);
345
346 global_irq_holder = 0;
347- current->processor = 0;
348- init_idle();
349+ current->cpu = 0;
350 smp_tune_scheduling();
351
352 /*
353diff -urN linux-2.4.24.org/arch/i386/kernel/smp.c linux-2.4.24/arch/i386/kernel/smp.c
5d16fd25
AM
354--- linux-2.4.24.org/arch/i386/kernel/smp.c 2004-02-04 20:50:47.312243547 +0100
355+++ linux-2.4.24/arch/i386/kernel/smp.c 2004-02-04 20:52:52.868128518 +0100
7f7e7712 356@@ -503,6 +503,17 @@
0aa7655b
AM
357 }
358
359 /*
360+ * this function sends a reschedule IPI to all (other) CPUs.
361+ * This should only be used if some 'global' task became runnable,
362+ * such as a RT task, that must be handled now. The first CPU
363+ * that manages to grab the task will run it.
364+ */
365+void smp_send_reschedule_all(void)
366+{
367+ send_IPI_allbutself(RESCHEDULE_VECTOR);
368+}
369+
370+/*
371 * Structure and data for smp_call_function(). This is designed to minimise
372 * static memory requirements. It also looks cleaner.
373 */
374diff -urN linux-2.4.24.org/arch/mips64/kernel/process.c linux-2.4.24/arch/mips64/kernel/process.c
5d16fd25
AM
375--- linux-2.4.24.org/arch/mips64/kernel/process.c 2004-02-04 20:51:53.268524907 +0100
376+++ linux-2.4.24/arch/mips64/kernel/process.c 2004-02-04 20:52:52.872127686 +0100
0aa7655b
AM
377@@ -39,8 +39,7 @@
378 {
379 /* endless idle loop with no priority at all */
380 init_idle();
381- current->nice = 20;
382- current->counter = -100;
383+
384 while (1) {
385 while (!current->need_resched)
386 if (cpu_wait)
387diff -urN linux-2.4.24.org/arch/parisc/kernel/process.c linux-2.4.24/arch/parisc/kernel/process.c
5d16fd25
AM
388--- linux-2.4.24.org/arch/parisc/kernel/process.c 2004-02-04 20:51:58.602415484 +0100
389+++ linux-2.4.24/arch/parisc/kernel/process.c 2004-02-04 20:52:52.876126854 +0100
0aa7655b
AM
390@@ -65,8 +65,6 @@
391 {
392 /* endless idle loop with no priority at all */
393 init_idle();
394- current->nice = 20;
395- current->counter = -100;
396
397 while (1) {
398 while (!current->need_resched) {
0aa7655b 399diff -urN linux-2.4.24.org/arch/ppc/kernel/entry.S linux-2.4.24/arch/ppc/kernel/entry.S
5d16fd25
AM
400--- linux-2.4.24.org/arch/ppc/kernel/entry.S 2004-02-04 20:51:15.913294629 +0100
401+++ linux-2.4.24/arch/ppc/kernel/entry.S 2004-02-04 20:52:52.903121239 +0100
0aa7655b
AM
402@@ -269,7 +269,9 @@
403
404 .globl ret_from_fork
405 ret_from_fork:
406+#if CONFIG_SMP
407 bl schedule_tail
408+#endif
409 lwz r0,TASK_PTRACE(r2)
410 andi. r0,r0,PT_TRACESYS
411 bnel- syscall_trace
412diff -urN linux-2.4.24.org/arch/ppc/kernel/idle.c linux-2.4.24/arch/ppc/kernel/idle.c
5d16fd25
AM
413--- linux-2.4.24.org/arch/ppc/kernel/idle.c 2004-02-04 20:51:16.300214151 +0100
414+++ linux-2.4.24/arch/ppc/kernel/idle.c 2004-02-04 20:52:52.908120200 +0100
0aa7655b
AM
415@@ -46,9 +46,7 @@
416 do_power_save = 1;
417
418 /* endless loop with no priority at all */
419- current->nice = 20;
420- current->counter = -100;
421- init_idle();
422+
423 for (;;) {
424 #ifdef CONFIG_SMP
425 if (!do_power_save) {
426diff -urN linux-2.4.24.org/arch/ppc/kernel/mk_defs.c linux-2.4.24/arch/ppc/kernel/mk_defs.c
5d16fd25
AM
427--- linux-2.4.24.org/arch/ppc/kernel/mk_defs.c 2004-02-04 20:51:14.150661249 +0100
428+++ linux-2.4.24/arch/ppc/kernel/mk_defs.c 2004-02-04 20:52:52.913119160 +0100
0aa7655b
AM
429@@ -34,8 +34,8 @@
430 /*DEFINE(KERNELBASE, KERNELBASE);*/
431 DEFINE(STATE, offsetof(struct task_struct, state));
432 DEFINE(NEXT_TASK, offsetof(struct task_struct, next_task));
433- DEFINE(COUNTER, offsetof(struct task_struct, counter));
434- DEFINE(PROCESSOR, offsetof(struct task_struct, processor));
435+ DEFINE(COUNTER, offsetof(struct task_struct, time_slice));
436+ DEFINE(PROCESSOR, offsetof(struct task_struct, cpu));
437 DEFINE(SIGPENDING, offsetof(struct task_struct, sigpending));
438 DEFINE(THREAD, offsetof(struct task_struct, thread));
439 DEFINE(MM, offsetof(struct task_struct, mm));
440diff -urN linux-2.4.24.org/arch/ppc/kernel/process.c linux-2.4.24/arch/ppc/kernel/process.c
5d16fd25
AM
441--- linux-2.4.24.org/arch/ppc/kernel/process.c 2004-02-04 20:51:14.062679549 +0100
442+++ linux-2.4.24/arch/ppc/kernel/process.c 2004-02-04 20:52:52.917118328 +0100
0aa7655b
AM
443@@ -281,7 +281,7 @@
444 #endif
445
446 #ifdef CONFIG_SMP
447- printk(" CPU: %d", current->processor);
448+ printk(" CPU: %d", current->cpu);
449 #endif /* CONFIG_SMP */
450
451 printk("\n");
452diff -urN linux-2.4.24.org/arch/ppc/kernel/smp.c linux-2.4.24/arch/ppc/kernel/smp.c
5d16fd25
AM
453--- linux-2.4.24.org/arch/ppc/kernel/smp.c 2004-02-04 20:51:15.993277992 +0100
454+++ linux-2.4.24/arch/ppc/kernel/smp.c 2004-02-04 20:52:52.923117080 +0100
0aa7655b
AM
455@@ -51,6 +51,7 @@
456 unsigned long cpu_online_map;
457 int smp_hw_index[NR_CPUS];
458 static struct smp_ops_t *smp_ops;
459+unsigned long cache_decay_ticks = HZ/100;
460
461 /* all cpu mappings are 1-1 -- Cort */
462 volatile unsigned long cpu_callin_map[NR_CPUS];
463@@ -292,9 +293,7 @@
464 * cpu 0, the master -- Cort
465 */
466 cpu_callin_map[0] = 1;
467- current->processor = 0;
468-
469- init_idle();
470+ current->cpu = 0;
471
472 for (i = 0; i < NR_CPUS; i++) {
473 prof_counter[i] = 1;
474@@ -351,12 +350,9 @@
475 p = init_task.prev_task;
476 if (!p)
477 panic("No idle task for CPU %d", i);
478- del_from_runqueue(p);
479+ init_idle(p, i);
480 unhash_process(p);
481- init_tasks[i] = p;
482
483- p->processor = i;
484- p->cpus_runnable = 1 << i; /* we schedule the first task manually */
485 current_set[i] = p;
486
487 /*
488@@ -505,7 +501,7 @@
489
490 void __init smp_callin(void)
491 {
492- int cpu = current->processor;
493+ int cpu = current->cpu;
494
495 smp_store_cpu_info(cpu);
496 smp_ops->setup_cpu(cpu);
497diff -urN linux-2.4.24.org/arch/ppc/lib/dec_and_lock.c linux-2.4.24/arch/ppc/lib/dec_and_lock.c
5d16fd25
AM
498--- linux-2.4.24.org/arch/ppc/lib/dec_and_lock.c 2004-02-04 20:51:18.406775995 +0100
499+++ linux-2.4.24/arch/ppc/lib/dec_and_lock.c 2004-02-04 20:52:52.927116249 +0100
0aa7655b
AM
500@@ -1,4 +1,5 @@
501 #include <linux/module.h>
502+#include <linux/sched.h>
503 #include <linux/spinlock.h>
504 #include <asm/atomic.h>
505 #include <asm/system.h>
506diff -urN linux-2.4.24.org/arch/ppc/mm/init.c linux-2.4.24/arch/ppc/mm/init.c
5d16fd25
AM
507--- linux-2.4.24.org/arch/ppc/mm/init.c 2004-02-04 20:51:13.814731121 +0100
508+++ linux-2.4.24/arch/ppc/mm/init.c 2004-02-04 20:52:52.931115417 +0100
0aa7655b
AM
509@@ -192,9 +192,9 @@
510 {
511 int iscur = 0;
512 #ifdef CONFIG_SMP
513- printk("%3d ", p->processor);
514- if ( (p->processor != NO_PROC_ID) &&
515- (p == current_set[p->processor]) )
516+ printk("%3d ", p->cpu);
517+ if ( (p->cpu != NO_PROC_ID) &&
518+ (p == current_set[p->cpu]) )
519 {
520 iscur = 1;
521 printk("current");
522diff -urN linux-2.4.24.org/arch/ppc64/kernel/entry.S linux-2.4.24/arch/ppc64/kernel/entry.S
5d16fd25
AM
523--- linux-2.4.24.org/arch/ppc64/kernel/entry.S 2004-02-04 20:50:43.056128805 +0100
524+++ linux-2.4.24/arch/ppc64/kernel/entry.S 2004-02-04 20:53:40.136297052 +0100
0aa7655b
AM
525@@ -299,7 +299,9 @@
526 blr
527
528 _GLOBAL(ret_from_fork)
529+#if CONFIG_SMP
530 bl .schedule_tail
531+#endif
532 ld r4,PACACURRENT(r13)
533 ld r0,TASK_PTRACE(r4)
534 andi. r0,r0,PT_TRACESYS
535diff -urN linux-2.4.24.org/arch/ppc64/kernel/idle.c linux-2.4.24/arch/ppc64/kernel/idle.c
5d16fd25
AM
536--- linux-2.4.24.org/arch/ppc64/kernel/idle.c 2004-02-04 20:50:43.329072034 +0100
537+++ linux-2.4.24/arch/ppc64/kernel/idle.c 2004-02-04 20:55:09.907625341 +0100
0aa7655b
AM
538@@ -88,15 +88,12 @@
539 unsigned long CTRL;
540
541 /* endless loop with no priority at all */
542- current->nice = 20;
543- current->counter = -100;
5d16fd25
AM
544-
545+
0aa7655b
AM
546 /* ensure iSeries run light will be out when idle */
547 current->thread.flags &= ~PPC_FLAG_RUN_LIGHT;
548 CTRL = mfspr(CTRLF);
549 CTRL &= ~RUNLATCH;
550 mtspr(CTRLT, CTRL);
551- init_idle();
552
553 lpaca = get_paca();
554
555diff -urN linux-2.4.24.org/arch/ppc64/kernel/process.c linux-2.4.24/arch/ppc64/kernel/process.c
5d16fd25
AM
556--- linux-2.4.24.org/arch/ppc64/kernel/process.c 2004-02-04 20:50:42.774187448 +0100
557+++ linux-2.4.24/arch/ppc64/kernel/process.c 2004-02-04 20:52:52.986103980 +0100
0aa7655b
AM
558@@ -138,7 +138,7 @@
559 #ifdef SHOW_TASK_SWITCHES
560 printk("%s/%d -> %s/%d NIP %08lx cpu %d root %x/%x\n",
561 prev->comm,prev->pid,
562- new->comm,new->pid,new->thread.regs->nip,new->processor,
563+ new->comm,new->pid,new->thread.regs->nip,new->cpu,
564 new->fs->root,prev->fs->root);
565 #endif
566 #ifdef CONFIG_SMP
567diff -urN linux-2.4.24.org/arch/ppc64/kernel/smp.c linux-2.4.24/arch/ppc64/kernel/smp.c
5d16fd25
AM
568--- linux-2.4.24.org/arch/ppc64/kernel/smp.c 2004-02-04 20:50:43.176103851 +0100
569+++ linux-2.4.24/arch/ppc64/kernel/smp.c 2004-02-04 20:52:52.990103148 +0100
0aa7655b
AM
570@@ -70,6 +70,7 @@
571 extern atomic_t ipi_sent;
572 spinlock_t kernel_flag __cacheline_aligned = SPIN_LOCK_UNLOCKED;
573 cycles_t cacheflush_time;
574+unsigned long cache_decay_ticks = HZ/100;
575 static int max_cpus __initdata = NR_CPUS;
576
577 unsigned long cpu_online_map;
578@@ -636,9 +637,7 @@
579 * cpu 0, the master -- Cort
580 */
581 cpu_callin_map[0] = 1;
582- current->processor = 0;
583-
584- init_idle();
585+ current->cpu = 0;
586
587 for (i = 0; i < NR_CPUS; i++) {
588 paca[i].prof_counter = 1;
589@@ -709,12 +708,9 @@
590
591 PPCDBG(PPCDBG_SMP,"\tProcessor %d, task = 0x%lx\n", i, p);
592
593- del_from_runqueue(p);
594+ init_idle(p, i);
595 unhash_process(p);
596- init_tasks[i] = p;
597
598- p->processor = i;
599- p->cpus_runnable = 1 << i; /* we schedule the first task manually */
600 current_set[i].task = p;
601 sp = ((unsigned long)p) + sizeof(union task_union)
602 - STACK_FRAME_OVERHEAD;
603@@ -765,7 +761,7 @@
604
605 void __init smp_callin(void)
606 {
607- int cpu = current->processor;
608+ int cpu = current->cpu;
609
610 smp_store_cpu_info(cpu);
611 set_dec(paca[cpu].default_decr);
612@@ -773,8 +769,6 @@
613
614 ppc_md.smp_setup_cpu(cpu);
615
616- init_idle();
617-
618 set_bit(smp_processor_id(), &cpu_online_map);
619
620 while(!smp_commenced) {
621@@ -793,7 +787,7 @@
622 {
623 int cpu;
624
625- cpu = current->processor;
626+ cpu = current->cpu;
627 atomic_inc(&init_mm.mm_count);
628 current->active_mm = &init_mm;
629 smp_callin();
630diff -urN linux-2.4.24.org/arch/s390/kernel/process.c linux-2.4.24/arch/s390/kernel/process.c
5d16fd25
AM
631--- linux-2.4.24.org/arch/s390/kernel/process.c 2004-02-04 20:51:56.088938275 +0100
632+++ linux-2.4.24/arch/s390/kernel/process.c 2004-02-04 20:52:52.994102316 +0100
0aa7655b
AM
633@@ -57,8 +57,7 @@
634
635 /* endless idle loop with no priority at all */
636 init_idle();
637- current->nice = 20;
638- current->counter = -100;
639+
640 while (1) {
641 __cli();
642 if (current->need_resched) {
643diff -urN linux-2.4.24.org/arch/s390x/kernel/process.c linux-2.4.24/arch/s390x/kernel/process.c
5d16fd25
AM
644--- linux-2.4.24.org/arch/s390x/kernel/process.c 2004-02-04 20:52:03.781338295 +0100
645+++ linux-2.4.24/arch/s390x/kernel/process.c 2004-02-04 20:52:52.997101692 +0100
0aa7655b
AM
646@@ -57,8 +57,7 @@
647
648 /* endless idle loop with no priority at all */
649 init_idle();
650- current->nice = 20;
651- current->counter = -100;
652+
653 while (1) {
654 __cli();
655 if (current->need_resched) {
656diff -urN linux-2.4.24.org/arch/sh/kernel/process.c linux-2.4.24/arch/sh/kernel/process.c
5d16fd25
AM
657--- linux-2.4.24.org/arch/sh/kernel/process.c 2004-02-04 20:51:43.820490054 +0100
658+++ linux-2.4.24/arch/sh/kernel/process.c 2004-02-04 20:52:53.000101068 +0100
0aa7655b
AM
659@@ -42,8 +42,6 @@
660 {
661 /* endless idle loop with no priority at all */
662 init_idle();
663- current->nice = 20;
664- current->counter = -100;
665
666 while (1) {
667 if (hlt_counter) {
668diff -urN linux-2.4.24.org/arch/sparc/kernel/entry.S linux-2.4.24/arch/sparc/kernel/entry.S
5d16fd25
AM
669--- linux-2.4.24.org/arch/sparc/kernel/entry.S 2004-02-04 20:50:51.877294031 +0100
670+++ linux-2.4.24/arch/sparc/kernel/entry.S 2004-02-04 20:52:53.005100028 +0100
0aa7655b
AM
671@@ -1471,7 +1471,9 @@
672
673 .globl C_LABEL(ret_from_fork)
674 C_LABEL(ret_from_fork):
675+#if CONFIG_SMP
676 call schedule_tail
677+#endif
678 mov %g3, %o0
679 b C_LABEL(ret_sys_call)
680 ld [%sp + STACKFRAME_SZ + PT_I0], %o0
681diff -urN linux-2.4.24.org/arch/sparc/kernel/process.c linux-2.4.24/arch/sparc/kernel/process.c
5d16fd25
AM
682--- linux-2.4.24.org/arch/sparc/kernel/process.c 2004-02-04 20:50:51.550362032 +0100
683+++ linux-2.4.24/arch/sparc/kernel/process.c 2004-02-04 20:52:53.009099197 +0100
0aa7655b
AM
684@@ -74,9 +74,6 @@
685 goto out;
686
687 /* endless idle loop with no priority at all */
688- current->nice = 20;
689- current->counter = -100;
690- init_idle();
691
692 for (;;) {
693 if (ARCH_SUN4C_SUN4) {
694@@ -128,9 +125,6 @@
695 int cpu_idle(void)
696 {
697 /* endless idle loop with no priority at all */
698- current->nice = 20;
699- current->counter = -100;
700- init_idle();
701
702 while(1) {
703 if(current->need_resched) {
704diff -urN linux-2.4.24.org/arch/sparc/kernel/smp.c linux-2.4.24/arch/sparc/kernel/smp.c
5d16fd25
AM
705--- linux-2.4.24.org/arch/sparc/kernel/smp.c 2004-02-04 20:50:51.522367854 +0100
706+++ linux-2.4.24/arch/sparc/kernel/smp.c 2004-02-04 20:52:53.013098365 +0100
0aa7655b
AM
707@@ -57,6 +57,7 @@
708 volatile int __cpu_number_map[NR_CPUS];
709 volatile int __cpu_logical_map[NR_CPUS];
710 cycles_t cacheflush_time = 0; /* XXX */
711+unsigned long cache_decay_ticks = HZ/100; /* XXX */
712
713 /* The only guaranteed locking primitive available on all Sparc
714 * processors is 'ldstub [%reg + immediate], %dest_reg' which atomically
715diff -urN linux-2.4.24.org/arch/sparc/kernel/sun4d_smp.c linux-2.4.24/arch/sparc/kernel/sun4d_smp.c
5d16fd25
AM
716--- linux-2.4.24.org/arch/sparc/kernel/sun4d_smp.c 2004-02-04 20:50:51.254423586 +0100
717+++ linux-2.4.24/arch/sparc/kernel/sun4d_smp.c 2004-02-04 20:52:53.027095454 +0100
0aa7655b
AM
718@@ -107,7 +107,6 @@
719 * the SMP initialization the master will be just allowed
720 * to call the scheduler code.
721 */
722- init_idle();
723
724 /* Get our local ticker going. */
725 smp_setup_percpu_timer();
726@@ -127,7 +126,7 @@
727 while((unsigned long)current_set[cpuid] < PAGE_OFFSET)
728 barrier();
729
730- while(current_set[cpuid]->processor != cpuid)
731+ while(current_set[cpuid]->cpu != cpuid)
732 barrier();
733
734 /* Fix idle thread fields. */
735@@ -197,10 +196,8 @@
736 mid_xlate[i] = i;
737 __cpu_number_map[boot_cpu_id] = 0;
738 __cpu_logical_map[0] = boot_cpu_id;
739- current->processor = boot_cpu_id;
740 smp_store_cpu_info(boot_cpu_id);
741 smp_setup_percpu_timer();
742- init_idle();
743 local_flush_cache_all();
744 if(linux_num_cpus == 1)
745 return; /* Not an MP box. */
746@@ -222,14 +219,10 @@
747 cpucount++;
748
749 p = init_task.prev_task;
750- init_tasks[i] = p;
751-
752- p->processor = i;
753- p->cpus_runnable = 1 << i; /* we schedule the first task manually */
754
755 current_set[i] = p;
756
757- del_from_runqueue(p);
758+ init_idle(p, i);
759 unhash_process(p);
760
761 for (no = 0; no < linux_num_cpus; no++)
762diff -urN linux-2.4.24.org/arch/sparc/kernel/sun4m_smp.c linux-2.4.24/arch/sparc/kernel/sun4m_smp.c
5d16fd25
AM
763--- linux-2.4.24.org/arch/sparc/kernel/sun4m_smp.c 2004-02-04 20:50:52.194228110 +0100
764+++ linux-2.4.24/arch/sparc/kernel/sun4m_smp.c 2004-02-04 20:52:53.030094830 +0100
0aa7655b
AM
765@@ -104,7 +104,6 @@
766 * the SMP initialization the master will be just allowed
767 * to call the scheduler code.
768 */
769- init_idle();
770
771 /* Allow master to continue. */
772 swap((unsigned long *)&cpu_callin_map[cpuid], 1);
773@@ -170,12 +169,10 @@
774 mid_xlate[boot_cpu_id] = (linux_cpus[boot_cpu_id].mid & ~8);
775 __cpu_number_map[boot_cpu_id] = 0;
776 __cpu_logical_map[0] = boot_cpu_id;
777- current->processor = boot_cpu_id;
778
779 smp_store_cpu_info(boot_cpu_id);
780 set_irq_udt(mid_xlate[boot_cpu_id]);
781 smp_setup_percpu_timer();
782- init_idle();
783 local_flush_cache_all();
784 if(linux_num_cpus == 1)
785 return; /* Not an MP box. */
786@@ -195,14 +192,10 @@
787 cpucount++;
788
789 p = init_task.prev_task;
790- init_tasks[i] = p;
791-
792- p->processor = i;
793- p->cpus_runnable = 1 << i; /* we schedule the first task manually */
794
795 current_set[i] = p;
796
797- del_from_runqueue(p);
798+ init_idle(p, i);
799 unhash_process(p);
800
801 /* See trampoline.S for details... */
802diff -urN linux-2.4.24.org/arch/sparc64/kernel/entry.S linux-2.4.24/arch/sparc64/kernel/entry.S
5d16fd25
AM
803--- linux-2.4.24.org/arch/sparc64/kernel/entry.S 2004-02-04 20:51:29.076556726 +0100
804+++ linux-2.4.24/arch/sparc64/kernel/entry.S 2004-02-04 20:52:53.039092958 +0100
0aa7655b
AM
805@@ -1627,7 +1627,9 @@
806 */
807 andn %o7, SPARC_FLAG_NEWCHILD, %l0
808 mov %g5, %o0 /* 'prev' */
809+#if CONFIG_SMP
810 call schedule_tail
811+#endif
812 stb %l0, [%g6 + AOFF_task_thread + AOFF_thread_flags]
813 andcc %l0, SPARC_FLAG_PERFCTR, %g0
814 be,pt %icc, 1f
815diff -urN linux-2.4.24.org/arch/sparc64/kernel/irq.c linux-2.4.24/arch/sparc64/kernel/irq.c
5d16fd25
AM
816--- linux-2.4.24.org/arch/sparc64/kernel/irq.c 2004-02-04 20:51:28.993573986 +0100
817+++ linux-2.4.24/arch/sparc64/kernel/irq.c 2004-02-04 20:52:53.044091918 +0100
0aa7655b
AM
818@@ -174,7 +174,7 @@
819 tid = ((tid & UPA_CONFIG_MID) << 9);
820 tid &= IMAP_TID_UPA;
821 } else {
822- tid = (starfire_translate(imap, current->processor) << 26);
823+ tid = (starfire_translate(imap, current->cpu) << 26);
824 tid &= IMAP_TID_UPA;
825 }
826
827diff -urN linux-2.4.24.org/arch/sparc64/kernel/process.c linux-2.4.24/arch/sparc64/kernel/process.c
5d16fd25
AM
828--- linux-2.4.24.org/arch/sparc64/kernel/process.c 2004-02-04 20:51:29.998364993 +0100
829+++ linux-2.4.24/arch/sparc64/kernel/process.c 2004-02-04 20:52:53.049090879 +0100
0aa7655b
AM
830@@ -54,9 +54,6 @@
831 return -EPERM;
832
833 /* endless idle loop with no priority at all */
834- current->nice = 20;
835- current->counter = -100;
836- init_idle();
837
838 for (;;) {
839 /* If current->need_resched is zero we should really
840@@ -80,14 +77,10 @@
841 /*
842 * the idle loop on a UltraMultiPenguin...
843 */
844-#define idle_me_harder() (cpu_data[current->processor].idle_volume += 1)
845-#define unidle_me() (cpu_data[current->processor].idle_volume = 0)
846+#define idle_me_harder() (cpu_data[current->cpu].idle_volume += 1)
847+#define unidle_me() (cpu_data[current->cpu].idle_volume = 0)
848 int cpu_idle(void)
849 {
850- current->nice = 20;
851- current->counter = -100;
852- init_idle();
853-
854 while(1) {
855 if (current->need_resched != 0) {
856 unidle_me();
857diff -urN linux-2.4.24.org/arch/sparc64/kernel/rtrap.S linux-2.4.24/arch/sparc64/kernel/rtrap.S
5d16fd25
AM
858--- linux-2.4.24.org/arch/sparc64/kernel/rtrap.S 2004-02-04 20:51:29.910383293 +0100
859+++ linux-2.4.24/arch/sparc64/kernel/rtrap.S 2004-02-04 20:52:53.053090047 +0100
0aa7655b
AM
860@@ -140,7 +140,7 @@
861 .align 64
862 .globl rtrap_clr_l6, rtrap, irqsz_patchme, rtrap_xcall
863 rtrap_clr_l6: clr %l6
864-rtrap: lduw [%g6 + AOFF_task_processor], %l0
865+rtrap: lduw [%g6 + AOFF_task_cpu], %l0
866 sethi %hi(irq_stat), %l2 ! &softirq_active
867 or %l2, %lo(irq_stat), %l2 ! &softirq_active
868 irqsz_patchme: sllx %l0, 0, %l0
869diff -urN linux-2.4.24.org/arch/sparc64/kernel/smp.c linux-2.4.24/arch/sparc64/kernel/smp.c
5d16fd25
AM
870--- linux-2.4.24.org/arch/sparc64/kernel/smp.c 2004-02-04 20:51:28.749624726 +0100
871+++ linux-2.4.24/arch/sparc64/kernel/smp.c 2004-02-04 20:52:53.068086928 +0100
468c4c0f
JB
872@@ -347,6 +347,8 @@
873
874 extern unsigned long sparc64_cpu_startup;
875
876+static void __init smp_tune_scheduling(void);
877+
878 /* The OBP cpu startup callback truncates the 3rd arg cookie to
879 * 32-bits (I think) so to be safe we have it read the pointer
880 * contained here so we work on >4GB machines. -DaveM
0aa7655b
AM
881@@ -360,7 +360,7 @@
882 printk("Entering UltraSMPenguin Mode...\n");
883 __sti();
884 smp_store_cpu_info(boot_cpu_id);
885- init_idle();
886+ smp_tune_scheduling();
887
888 if (linux_num_cpus == 1)
889 return;
890@@ -383,12 +383,8 @@
891 cpucount++;
892
893 p = init_task.prev_task;
894- init_tasks[cpucount] = p;
895
896- p->processor = i;
897- p->cpus_runnable = 1UL << i; /* we schedule the first task manually */
898-
899- del_from_runqueue(p);
900+ init_idle(p, i);
901 unhash_process(p);
902
903 callin_flag = 0;
904@@ -1214,10 +1210,96 @@
905 __cpu_number_map[boot_cpu_id] = 0;
906 prom_cpu_nodes[boot_cpu_id] = linux_cpus[0].prom_node;
907 __cpu_logical_map[0] = boot_cpu_id;
908- current->processor = boot_cpu_id;
909 prof_counter(boot_cpu_id) = prof_multiplier(boot_cpu_id) = 1;
910 }
911
912+cycles_t cacheflush_time;
913+unsigned long cache_decay_ticks;
914+
915+extern unsigned long cheetah_tune_scheduling(void);
916+
917+static void __init smp_tune_scheduling(void)
918+{
919+ unsigned long orig_flush_base, flush_base, flags, *p;
920+ unsigned int ecache_size, order;
921+ cycles_t tick1, tick2, raw;
922+
923+ /* Approximate heuristic for SMP scheduling. It is an
924+ * estimation of the time it takes to flush the L2 cache
925+ * on the local processor.
926+ *
927+ * The ia32 chooses to use the L1 cache flush time instead,
928+ * and I consider this complete nonsense. The Ultra can service
929+ * a miss to the L1 with a hit to the L2 in 7 or 8 cycles, and
930+ * L2 misses are what create extra bus traffic (ie. the "cost"
931+ * of moving a process from one cpu to another).
932+ */
933+ printk("SMP: Calibrating ecache flush... ");
934+ if (tlb_type == cheetah || tlb_type == cheetah_plus) {
935+ cacheflush_time = cheetah_tune_scheduling();
936+ goto report;
937+ }
938+
939+ ecache_size = prom_getintdefault(linux_cpus[0].prom_node,
940+ "ecache-size", (512 * 1024));
941+ if (ecache_size > (4 * 1024 * 1024))
942+ ecache_size = (4 * 1024 * 1024);
943+ orig_flush_base = flush_base =
944+ __get_free_pages(GFP_KERNEL, order = get_order(ecache_size));
945+
946+ if (flush_base != 0UL) {
947+ local_irq_save(flags);
948+
949+ /* Scan twice the size once just to get the TLB entries
950+ * loaded and make sure the second scan measures pure misses.
951+ */
952+ for (p = (unsigned long *)flush_base;
953+ ((unsigned long)p) < (flush_base + (ecache_size<<1));
954+ p += (64 / sizeof(unsigned long)))
955+ *((volatile unsigned long *)p);
956+
957+ tick1 = tick_ops->get_tick();
958+
959+ __asm__ __volatile__("1:\n\t"
960+ "ldx [%0 + 0x000], %%g1\n\t"
961+ "ldx [%0 + 0x040], %%g2\n\t"
962+ "ldx [%0 + 0x080], %%g3\n\t"
963+ "ldx [%0 + 0x0c0], %%g5\n\t"
964+ "add %0, 0x100, %0\n\t"
965+ "cmp %0, %2\n\t"
966+ "bne,pt %%xcc, 1b\n\t"
967+ " nop"
968+ : "=&r" (flush_base)
969+ : "0" (flush_base),
970+ "r" (flush_base + ecache_size)
971+ : "g1", "g2", "g3", "g5");
972+
973+ tick2 = tick_ops->get_tick();
974+
975+ local_irq_restore(flags);
976+
977+ raw = (tick2 - tick1);
978+
979+ /* Dampen it a little, considering two processes
980+ * sharing the cache and fitting.
981+ */
982+ cacheflush_time = (raw - (raw >> 2));
983+
984+ free_pages(orig_flush_base, order);
985+ } else {
986+ cacheflush_time = ((ecache_size << 2) +
987+ (ecache_size << 1));
988+ }
989+report:
990+ /* Convert ticks/sticks to jiffies. */
991+ cache_decay_ticks = cacheflush_time / timer_tick_offset;
992+ if (cache_decay_ticks < 1)
993+ cache_decay_ticks = 1;
994+
995+ printk("Using heuristic of %ld cycles, %ld ticks.\n",
996+ cacheflush_time, cache_decay_ticks);
997+}
998+
999 static inline unsigned long find_flush_base(unsigned long size)
1000 {
1001 struct page *p = mem_map;
1002diff -urN linux-2.4.24.org/arch/sparc64/kernel/trampoline.S linux-2.4.24/arch/sparc64/kernel/trampoline.S
5d16fd25
AM
1003--- linux-2.4.24.org/arch/sparc64/kernel/trampoline.S 2004-02-04 20:51:29.425484150 +0100
1004+++ linux-2.4.24/arch/sparc64/kernel/trampoline.S 2004-02-04 20:52:53.073085888 +0100
0aa7655b
AM
1005@@ -250,7 +250,7 @@
1006 wrpr %o1, PSTATE_IG, %pstate
1007
1008 /* Get our UPA MID. */
1009- lduw [%o2 + AOFF_task_processor], %g1
1010+ lduw [%o2 + AOFF_task_cpu], %g1
1011 sethi %hi(cpu_data), %g5
1012 or %g5, %lo(cpu_data), %g5
1013
1014diff -urN linux-2.4.24.org/arch/sparc64/kernel/traps.c linux-2.4.24/arch/sparc64/kernel/traps.c
5d16fd25
AM
1015--- linux-2.4.24.org/arch/sparc64/kernel/traps.c 2004-02-04 20:51:28.672640738 +0100
1016+++ linux-2.4.24/arch/sparc64/kernel/traps.c 2004-02-04 20:52:53.078084848 +0100
0aa7655b
AM
1017@@ -16,6 +16,7 @@
1018 #include <linux/smp.h>
1019 #include <linux/smp_lock.h>
1020 #include <linux/mm.h>
1021+#include <linux/init.h>
1022
1023 #include <asm/delay.h>
1024 #include <asm/system.h>
1025@@ -755,6 +756,48 @@
1026 "i" (ASI_PHYS_USE_EC));
1027 }
1028
1029+#ifdef CONFIG_SMP
1030+unsigned long __init cheetah_tune_scheduling(void)
1031+{
1032+ unsigned long tick1, tick2, raw;
1033+ unsigned long flush_base = ecache_flush_physbase;
1034+ unsigned long flush_linesize = ecache_flush_linesize;
1035+ unsigned long flush_size = ecache_flush_size;
1036+
1037+ /* Run through the whole cache to guarentee the timed loop
1038+ * is really displacing cache lines.
1039+ */
1040+ __asm__ __volatile__("1: subcc %0, %4, %0\n\t"
1041+ " bne,pt %%xcc, 1b\n\t"
1042+ " ldxa [%2 + %0] %3, %%g0\n\t"
1043+ : "=&r" (flush_size)
1044+ : "0" (flush_size), "r" (flush_base),
1045+ "i" (ASI_PHYS_USE_EC), "r" (flush_linesize));
1046+
1047+ /* The flush area is 2 X Ecache-size, so cut this in half for
1048+ * the timed loop.
1049+ */
1050+ flush_base = ecache_flush_physbase;
1051+ flush_linesize = ecache_flush_linesize;
1052+ flush_size = ecache_flush_size >> 1;
1053+
1054+ __asm__ __volatile__("rd %%tick, %0" : "=r" (tick1));
1055+
1056+ __asm__ __volatile__("1: subcc %0, %4, %0\n\t"
1057+ " bne,pt %%xcc, 1b\n\t"
1058+ " ldxa [%2 + %0] %3, %%g0\n\t"
1059+ : "=&r" (flush_size)
1060+ : "0" (flush_size), "r" (flush_base),
1061+ "i" (ASI_PHYS_USE_EC), "r" (flush_linesize));
1062+
1063+ __asm__ __volatile__("rd %%tick, %0" : "=r" (tick2));
1064+
1065+ raw = (tick2 - tick1);
1066+
1067+ return (raw - (raw >> 2));
1068+}
1069+#endif
1070+
1071 /* Unfortunately, the diagnostic access to the I-cache tags we need to
1072 * use to clear the thing interferes with I-cache coherency transactions.
1073 *
1074diff -urN linux-2.4.24.org/Documentation/sched-coding.txt linux-2.4.24/Documentation/sched-coding.txt
1075--- linux-2.4.24.org/Documentation/sched-coding.txt 1970-01-01 01:00:00.000000000 +0100
5d16fd25 1076+++ linux-2.4.24/Documentation/sched-coding.txt 2004-02-04 20:52:53.082084016 +0100
0aa7655b
AM
1077@@ -0,0 +1,126 @@
1078+ Reference for various scheduler-related methods in the O(1) scheduler
1079+ Robert Love <rml@tech9.net>, MontaVista Software
1080+
1081+
1082+Note most of these methods are local to kernel/sched.c - this is by design.
1083+The scheduler is meant to be self-contained and abstracted away. This document
1084+is primarily for understanding the scheduler, not interfacing to it. Some of
1085+the discussed interfaces, however, are general process/scheduling methods.
1086+They are typically defined in include/linux/sched.h.
1087+
1088+
1089+Main Scheduling Methods
1090+-----------------------
1091+
1092+void load_balance(runqueue_t *this_rq, int idle)
1093+ Attempts to pull tasks from one cpu to another to balance cpu usage,
1094+ if needed. This method is called explicitly if the runqueues are
1095+ inbalanced or periodically by the timer tick. Prior to calling,
1096+ the current runqueue must be locked and interrupts disabled.
1097+
1098+void schedule()
1099+ The main scheduling function. Upon return, the highest priority
1100+ process will be active.
1101+
1102+
1103+Locking
1104+-------
1105+
1106+Each runqueue has its own lock, rq->lock. When multiple runqueues need
1107+to be locked, lock acquires must be ordered by ascending &runqueue value.
1108+
1109+A specific runqueue is locked via
1110+
1111+ task_rq_lock(task_t pid, unsigned long *flags)
1112+
1113+which disables preemption, disables interrupts, and locks the runqueue pid is
1114+running on. Likewise,
1115+
1116+ task_rq_unlock(task_t pid, unsigned long *flags)
1117+
1118+unlocks the runqueue pid is running on, restores interrupts to their previous
1119+state, and reenables preemption.
1120+
1121+The routines
1122+
1123+ double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
1124+
1125+and
1126+
1127+ double_rq_unlock(runqueue_t *rq1, runqueue_t rq2)
1128+
1129+safely lock and unlock, respectively, the two specified runqueues. They do
1130+not, however, disable and restore interrupts. Users are required to do so
1131+manually before and after calls.
1132+
1133+
1134+Values
1135+------
1136+
1137+MAX_PRIO
1138+ The maximum priority of the system, stored in the task as task->prio.
1139+ Lower priorities are higher. Normal (non-RT) priorities range from
1140+ MAX_RT_PRIO to (MAX_PRIO - 1).
1141+MAX_RT_PRIO
1142+ The maximum real-time priority of the system. Valid RT priorities
1143+ range from 0 to (MAX_RT_PRIO - 1).
1144+MAX_USER_RT_PRIO
1145+ The maximum real-time priority that is exported to user-space. Should
1146+ always be equal to or less than MAX_RT_PRIO. Setting it less allows
1147+ kernel threads to have higher priorities than any user-space task.
1148+MIN_TIMESLICE
1149+MAX_TIMESLICE
1150+ Respectively, the minimum and maximum timeslices (quanta) of a process.
1151+
1152+Data
1153+----
1154+
1155+struct runqueue
1156+ The main per-CPU runqueue data structure.
1157+struct task_struct
1158+ The main per-process data structure.
1159+
1160+
1161+General Methods
1162+---------------
1163+
1164+cpu_rq(cpu)
1165+ Returns the runqueue of the specified cpu.
1166+this_rq()
1167+ Returns the runqueue of the current cpu.
1168+task_rq(pid)
1169+ Returns the runqueue which holds the specified pid.
1170+cpu_curr(cpu)
1171+ Returns the task currently running on the given cpu.
1172+rt_task(pid)
1173+ Returns true if pid is real-time, false if not.
1174+
1175+
1176+Process Control Methods
1177+-----------------------
1178+
1179+void set_user_nice(task_t *p, long nice)
1180+ Sets the "nice" value of task p to the given value.
1181+int setscheduler(pid_t pid, int policy, struct sched_param *param)
1182+ Sets the scheduling policy and parameters for the given pid.
1183+void set_cpus_allowed(task_t *p, unsigned long new_mask)
1184+ Sets a given task's CPU affinity and migrates it to a proper cpu.
1185+ Callers must have a valid reference to the task and assure the
1186+ task not exit prematurely. No locks can be held during the call.
1187+set_task_state(tsk, state_value)
1188+ Sets the given task's state to the given value.
1189+set_current_state(state_value)
1190+ Sets the current task's state to the given value.
1191+void set_tsk_need_resched(struct task_struct *tsk)
1192+ Sets need_resched in the given task.
1193+void clear_tsk_need_resched(struct task_struct *tsk)
1194+ Clears need_resched in the given task.
1195+void set_need_resched()
1196+ Sets need_resched in the current task.
1197+void clear_need_resched()
1198+ Clears need_resched in the current task.
1199+int need_resched()
1200+ Returns true if need_resched is set in the current task, false
1201+ otherwise.
1202+yield()
1203+ Place the current process at the end of the runqueue and call schedule.
1204diff -urN linux-2.4.24.org/Documentation/sched-design.txt linux-2.4.24/Documentation/sched-design.txt
1205--- linux-2.4.24.org/Documentation/sched-design.txt 1970-01-01 01:00:00.000000000 +0100
5d16fd25 1206+++ linux-2.4.24/Documentation/sched-design.txt 2004-02-04 20:52:53.088082769 +0100
0aa7655b
AM
1207@@ -0,0 +1,165 @@
1208+ Goals, Design and Implementation of the
1209+ new ultra-scalable O(1) scheduler
1210+
1211+
1212+ This is an edited version of an email Ingo Molnar sent to
1213+ lkml on 4 Jan 2002. It describes the goals, design, and
1214+ implementation of Ingo's new ultra-scalable O(1) scheduler.
1215+ Last Updated: 18 April 2002.
1216+
1217+
1218+Goal
1219+====
1220+
1221+The main goal of the new scheduler is to keep all the good things we know
1222+and love about the current Linux scheduler:
1223+
1224+ - good interactive performance even during high load: if the user
1225+ types or clicks then the system must react instantly and must execute
1226+ the user tasks smoothly, even during considerable background load.
1227+
1228+ - good scheduling/wakeup performance with 1-2 runnable processes.
1229+
1230+ - fairness: no process should stay without any timeslice for any
1231+ unreasonable amount of time. No process should get an unjustly high
1232+ amount of CPU time.
1233+
1234+ - priorities: less important tasks can be started with lower priority,
1235+ more important tasks with higher priority.
1236+
1237+ - SMP efficiency: no CPU should stay idle if there is work to do.
1238+
1239+ - SMP affinity: processes which run on one CPU should stay affine to
1240+ that CPU. Processes should not bounce between CPUs too frequently.
1241+
1242+ - plus additional scheduler features: RT scheduling, CPU binding.
1243+
1244+and the goal is also to add a few new things:
1245+
1246+ - fully O(1) scheduling. Are you tired of the recalculation loop
1247+ blowing the L1 cache away every now and then? Do you think the goodness
1248+ loop is taking a bit too long to finish if there are lots of runnable
1249+ processes? This new scheduler takes no prisoners: wakeup(), schedule(),
1250+ the timer interrupt are all O(1) algorithms. There is no recalculation
1251+ loop. There is no goodness loop either.
1252+
1253+ - 'perfect' SMP scalability. With the new scheduler there is no 'big'
1254+ runqueue_lock anymore - it's all per-CPU runqueues and locks - two
1255+ tasks on two separate CPUs can wake up, schedule and context-switch
1256+ completely in parallel, without any interlocking. All
1257+ scheduling-relevant data is structured for maximum scalability.
1258+
1259+ - better SMP affinity. The old scheduler has a particular weakness that
1260+ causes the random bouncing of tasks between CPUs if/when higher
1261+ priority/interactive tasks, this was observed and reported by many
1262+ people. The reason is that the timeslice recalculation loop first needs
1263+ every currently running task to consume its timeslice. But when this
1264+ happens on eg. an 8-way system, then this property starves an
1265+ increasing number of CPUs from executing any process. Once the last
1266+ task that has a timeslice left has finished using up that timeslice,
1267+ the recalculation loop is triggered and other CPUs can start executing
1268+ tasks again - after having idled around for a number of timer ticks.
1269+ The more CPUs, the worse this effect.
1270+
1271+ Furthermore, this same effect causes the bouncing effect as well:
1272+ whenever there is such a 'timeslice squeeze' of the global runqueue,
1273+ idle processors start executing tasks which are not affine to that CPU.
1274+ (because the affine tasks have finished off their timeslices already.)
1275+
1276+ The new scheduler solves this problem by distributing timeslices on a
1277+ per-CPU basis, without having any global synchronization or
1278+ recalculation.
1279+
1280+ - batch scheduling. A significant proportion of computing-intensive tasks
1281+ benefit from batch-scheduling, where timeslices are long and processes
1282+ are roundrobin scheduled. The new scheduler does such batch-scheduling
1283+ of the lowest priority tasks - so nice +19 jobs will get
1284+ 'batch-scheduled' automatically. With this scheduler, nice +19 jobs are
1285+ in essence SCHED_IDLE, from an interactiveness point of view.
1286+
1287+ - handle extreme loads more smoothly, without breakdown and scheduling
1288+ storms.
1289+
1290+ - O(1) RT scheduling. For those RT folks who are paranoid about the
1291+ O(nr_running) property of the goodness loop and the recalculation loop.
1292+
1293+ - run fork()ed children before the parent. Andrea has pointed out the
1294+ advantages of this a few months ago, but patches for this feature
1295+ do not work with the old scheduler as well as they should,
1296+ because idle processes often steal the new child before the fork()ing
1297+ CPU gets to execute it.
1298+
1299+
1300+Design
1301+======
1302+
1303+the core of the new scheduler are the following mechanizms:
1304+
1305+ - *two*, priority-ordered 'priority arrays' per CPU. There is an 'active'
1306+ array and an 'expired' array. The active array contains all tasks that
1307+ are affine to this CPU and have timeslices left. The expired array
1308+ contains all tasks which have used up their timeslices - but this array
1309+ is kept sorted as well. The active and expired array is not accessed
1310+ directly, it's accessed through two pointers in the per-CPU runqueue
1311+ structure. If all active tasks are used up then we 'switch' the two
1312+ pointers and from now on the ready-to-go (former-) expired array is the
1313+ active array - and the empty active array serves as the new collector
1314+ for expired tasks.
1315+
1316+ - there is a 64-bit bitmap cache for array indices. Finding the highest
1317+ priority task is thus a matter of two x86 BSFL bit-search instructions.
1318+
1319+the split-array solution enables us to have an arbitrary number of active
1320+and expired tasks, and the recalculation of timeslices can be done
1321+immediately when the timeslice expires. Because the arrays are always
1322+access through the pointers in the runqueue, switching the two arrays can
1323+be done very quickly.
1324+
1325+this is a hybride priority-list approach coupled with roundrobin
1326+scheduling and the array-switch method of distributing timeslices.
1327+
1328+ - there is a per-task 'load estimator'.
1329+
1330+one of the toughest things to get right is good interactive feel during
1331+heavy system load. While playing with various scheduler variants i found
1332+that the best interactive feel is achieved not by 'boosting' interactive
1333+tasks, but by 'punishing' tasks that want to use more CPU time than there
1334+is available. This method is also much easier to do in an O(1) fashion.
1335+
1336+to establish the actual 'load' the task contributes to the system, a
1337+complex-looking but pretty accurate method is used: there is a 4-entry
1338+'history' ringbuffer of the task's activities during the last 4 seconds.
1339+This ringbuffer is operated without much overhead. The entries tell the
1340+scheduler a pretty accurate load-history of the task: has it used up more
1341+CPU time or less during the past N seconds. [the size '4' and the interval
1342+of 4x 1 seconds was found by lots of experimentation - this part is
1343+flexible and can be changed in both directions.]
1344+
1345+the penalty a task gets for generating more load than the CPU can handle
1346+is a priority decrease - there is a maximum amount to this penalty
1347+relative to their static priority, so even fully CPU-bound tasks will
1348+observe each other's priorities, and will share the CPU accordingly.
1349+
1350+the SMP load-balancer can be extended/switched with additional parallel
1351+computing and cache hierarchy concepts: NUMA scheduling, multi-core CPUs
1352+can be supported easily by changing the load-balancer. Right now it's
1353+tuned for my SMP systems.
1354+
1355+i skipped the prev->mm == next->mm advantage - no workload i know of shows
1356+any sensitivity to this. It can be added back by sacrificing O(1)
1357+schedule() [the current and one-lower priority list can be searched for a
1358+that->mm == current->mm condition], but costs a fair number of cycles
1359+during a number of important workloads, so i wanted to avoid this as much
1360+as possible.
1361+
1362+- the SMP idle-task startup code was still racy and the new scheduler
1363+triggered this. So i streamlined the idle-setup code a bit. We do not call
1364+into schedule() before all processors have started up fully and all idle
1365+threads are in place.
1366+
1367+- the patch also cleans up a number of aspects of sched.c - moves code
1368+into other areas of the kernel where it's appropriate, and simplifies
1369+certain code paths and data constructs. As a result, the new scheduler's
1370+code is smaller than the old one.
1371+
1372+ Ingo
1373diff -urN linux-2.4.24.org/drivers/char/drm-4.0/tdfx_drv.c linux-2.4.24/drivers/char/drm-4.0/tdfx_drv.c
5d16fd25
AM
1374--- linux-2.4.24.org/drivers/char/drm-4.0/tdfx_drv.c 2004-02-04 20:49:21.677055474 +0100
1375+++ linux-2.4.24/drivers/char/drm-4.0/tdfx_drv.c 2004-02-04 20:52:53.236051992 +0100
0aa7655b
AM
1376@@ -554,7 +554,6 @@
1377 lock.context, current->pid, j,
1378 dev->lock.lock_time, jiffies);
1379 current->state = TASK_INTERRUPTIBLE;
1380- current->policy |= SCHED_YIELD;
1381 schedule_timeout(DRM_LOCK_SLICE-j);
1382 DRM_DEBUG("jiffies=%d\n", jiffies);
1383 }
1384diff -urN linux-2.4.24.org/drivers/char/mwave/mwavedd.c linux-2.4.24/drivers/char/mwave/mwavedd.c
5d16fd25
AM
1385--- linux-2.4.24.org/drivers/char/mwave/mwavedd.c 2004-02-04 20:49:18.334750669 +0100
1386+++ linux-2.4.24/drivers/char/mwave/mwavedd.c 2004-02-04 20:52:53.321034316 +0100
0aa7655b
AM
1387@@ -279,7 +279,6 @@
1388 pDrvData->IPCs[ipcnum].bIsHere = FALSE;
1389 pDrvData->IPCs[ipcnum].bIsEnabled = TRUE;
1390 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0)
1391- current->nice = -20; /* boost to provide priority timing */
1392 #else
1393 current->priority = 0x28; /* boost to provide priority timing */
1394 #endif
1395diff -urN linux-2.4.24.org/drivers/char/serial_txx927.c linux-2.4.24/drivers/char/serial_txx927.c
5d16fd25
AM
1396--- linux-2.4.24.org/drivers/char/serial_txx927.c 2004-02-04 20:49:11.902088655 +0100
1397+++ linux-2.4.24/drivers/char/serial_txx927.c 2004-02-04 20:52:53.361025998 +0100
0aa7655b
AM
1398@@ -1533,7 +1533,6 @@
1399 printk("cisr = %d (jiff=%lu)...", cisr, jiffies);
1400 #endif
1401 current->state = TASK_INTERRUPTIBLE;
1402- current->counter = 0; /* make us low-priority */
1403 schedule_timeout(char_time);
1404 if (signal_pending(current))
1405 break;
1406diff -urN linux-2.4.24.org/drivers/md/md.c linux-2.4.24/drivers/md/md.c
5d16fd25
AM
1407--- linux-2.4.24.org/drivers/md/md.c 2004-02-04 20:50:32.930234961 +0100
1408+++ linux-2.4.24/drivers/md/md.c 2004-02-04 20:52:53.369024334 +0100
0aa7655b
AM
1409@@ -2939,8 +2939,6 @@
1410 * bdflush, otherwise bdflush will deadlock if there are too
1411 * many dirty RAID5 blocks.
1412 */
1413- current->policy = SCHED_OTHER;
1414- current->nice = -20;
1415 md_unlock_kernel();
1416
1417 complete(thread->event);
1418@@ -3464,11 +3462,6 @@
1419 "(but not more than %d KB/sec) for reconstruction.\n",
1420 sysctl_speed_limit_max);
1421
1422- /*
1423- * Resync has low priority.
1424- */
1425- current->nice = 19;
1426-
1427 is_mddev_idle(mddev); /* this also initializes IO event counters */
1428 for (m = 0; m < SYNC_MARKS; m++) {
1429 mark[m] = jiffies;
1430@@ -3546,16 +3539,13 @@
1431 currspeed = (j-mddev->resync_mark_cnt)/2/((jiffies-mddev->resync_mark)/HZ +1) +1;
1432
1433 if (currspeed > sysctl_speed_limit_min) {
1434- current->nice = 19;
1435-
1436 if ((currspeed > sysctl_speed_limit_max) ||
1437 !is_mddev_idle(mddev)) {
1438 current->state = TASK_INTERRUPTIBLE;
1439 md_schedule_timeout(HZ/4);
1440 goto repeat;
1441 }
1442- } else
1443- current->nice = -20;
1444+ }
1445 }
1446 printk(KERN_INFO "md: md%d: sync done.\n",mdidx(mddev));
1447 err = 0;
1448diff -urN linux-2.4.24.org/fs/binfmt_elf.c linux-2.4.24/fs/binfmt_elf.c
5d16fd25
AM
1449--- linux-2.4.24.org/fs/binfmt_elf.c 2004-02-04 20:47:14.464515701 +0100
1450+++ linux-2.4.24/fs/binfmt_elf.c 2004-02-04 20:52:53.390019967 +0100
0aa7655b
AM
1451@@ -1173,7 +1173,7 @@
1452 psinfo.pr_state = i;
1453 psinfo.pr_sname = (i < 0 || i > 5) ? '.' : "RSDZTD"[i];
1454 psinfo.pr_zomb = psinfo.pr_sname == 'Z';
1455- psinfo.pr_nice = current->nice;
1456+ psinfo.pr_nice = task_nice(current);
1457 psinfo.pr_flag = current->flags;
1458 psinfo.pr_uid = NEW_TO_OLD_UID(current->uid);
1459 psinfo.pr_gid = NEW_TO_OLD_GID(current->gid);
1460diff -urN linux-2.4.24.org/fs/jffs2/background.c linux-2.4.24/fs/jffs2/background.c
5d16fd25
AM
1461--- linux-2.4.24.org/fs/jffs2/background.c 2004-02-04 20:47:24.029526165 +0100
1462+++ linux-2.4.24/fs/jffs2/background.c 2004-02-04 20:52:53.418014145 +0100
0aa7655b
AM
1463@@ -106,9 +106,6 @@
1464
1465 sprintf(current->comm, "jffs2_gcd_mtd%d", c->mtd->index);
1466
1467- /* FIXME in the 2.2 backport */
1468- current->nice = 10;
1469-
1470 for (;;) {
1471 spin_lock_irq(&current->sigmask_lock);
1472 siginitsetinv (&current->blocked, sigmask(SIGHUP) | sigmask(SIGKILL) | sigmask(SIGSTOP) | sigmask(SIGCONT));
1473diff -urN linux-2.4.24.org/fs/proc/array.c linux-2.4.24/fs/proc/array.c
5d16fd25
AM
1474--- linux-2.4.24.org/fs/proc/array.c 2004-02-04 20:47:14.980408395 +0100
1475+++ linux-2.4.24/fs/proc/array.c 2004-02-04 20:52:53.447008114 +0100
0aa7655b
AM
1476@@ -339,9 +339,8 @@
1477
1478 /* scale priority and nice values from timeslices to -20..20 */
1479 /* to make it look like a "normal" Unix priority/nice value */
1480- priority = task->counter;
1481- priority = 20 - (priority * 10 + DEF_COUNTER / 2) / DEF_COUNTER;
1482- nice = task->nice;
1483+ priority = task_prio(task);
1484+ nice = task_nice(task);
1485
1486 read_lock(&tasklist_lock);
1487 ppid = task->pid ? task->p_opptr->pid : 0;
1488@@ -391,7 +390,7 @@
1489 task->nswap,
1490 task->cnswap,
1491 task->exit_signal,
1492- task->processor);
1493+ task->cpu);
1494 if(mm)
1495 mmput(mm);
1496 return res;
1497diff -urN linux-2.4.24.org/fs/proc/proc_misc.c linux-2.4.24/fs/proc/proc_misc.c
5d16fd25
AM
1498--- linux-2.4.24.org/fs/proc/proc_misc.c 2004-02-04 20:47:14.897425655 +0100
1499+++ linux-2.4.24/fs/proc/proc_misc.c 2004-02-04 20:52:53.485000212 +0100
0aa7655b
AM
1500@@ -109,11 +109,11 @@
1501 a = avenrun[0] + (FIXED_1/200);
1502 b = avenrun[1] + (FIXED_1/200);
1503 c = avenrun[2] + (FIXED_1/200);
1504- len = sprintf(page,"%d.%02d %d.%02d %d.%02d %d/%d %d\n",
1505+ len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
1506 LOAD_INT(a), LOAD_FRAC(a),
1507 LOAD_INT(b), LOAD_FRAC(b),
1508 LOAD_INT(c), LOAD_FRAC(c),
1509- nr_running, nr_threads, last_pid);
1510+ nr_running(), nr_threads, last_pid);
1511 return proc_calc_metrics(page, start, off, count, eof, len);
1512 }
1513
1514@@ -125,7 +125,7 @@
1515 int len;
1516
1517 uptime = jiffies;
1518- idle = init_tasks[0]->times.tms_utime + init_tasks[0]->times.tms_stime;
1519+ idle = init_task.times.tms_utime + init_task.times.tms_stime;
1520
1521 /* The formula for the fraction parts really is ((t * 100) / HZ) % 100, but
1522 that would overflow about every five days at HZ == 100.
1523@@ -374,10 +374,10 @@
1524 }
1525
1526 proc_sprintf(page, &off, &len,
1527- "\nctxt %u\n"
1528+ "\nctxt %lu\n"
1529 "btime %lu\n"
1530 "processes %lu\n",
1531- kstat.context_swtch,
1532+ nr_context_switches(),
1533 xtime.tv_sec - jif / HZ,
1534 total_forks);
1535
1536diff -urN linux-2.4.24.org/fs/reiserfs/buffer2.c linux-2.4.24/fs/reiserfs/buffer2.c
5d16fd25
AM
1537--- linux-2.4.24.org/fs/reiserfs/buffer2.c 2004-02-04 20:47:23.322673191 +0100
1538+++ linux-2.4.24/fs/reiserfs/buffer2.c 2004-02-04 20:52:53.511994597 +0100
0aa7655b
AM
1539@@ -51,11 +51,11 @@
1540 struct buffer_head * reiserfs_bread (struct super_block *super, int n_block, int n_size)
1541 {
1542 struct buffer_head *result;
1543- PROC_EXP( unsigned int ctx_switches = kstat.context_swtch );
1544+ PROC_EXP( unsigned int ctx_switches = nr_context_switches(); );
1545
1546 result = bread (super -> s_dev, n_block, n_size);
1547 PROC_INFO_INC( super, breads );
1548- PROC_EXP( if( kstat.context_swtch != ctx_switches )
1549+ PROC_EXP( if( nr_context_switches() != ctx_switches )
1550 PROC_INFO_INC( super, bread_miss ) );
1551 return result;
1552 }
1553diff -urN linux-2.4.24.org/include/asm-alpha/bitops.h linux-2.4.24/include/asm-alpha/bitops.h
5d16fd25
AM
1554--- linux-2.4.24.org/include/asm-alpha/bitops.h 2004-02-04 20:47:46.527846489 +0100
1555+++ linux-2.4.24/include/asm-alpha/bitops.h 2004-02-04 20:52:53.537989191 +0100
0aa7655b
AM
1556@@ -3,6 +3,7 @@
1557
1558 #include <linux/config.h>
1559 #include <linux/kernel.h>
1560+#include <asm/compiler.h>
1561
1562 /*
1563 * Copyright 1994, Linus Torvalds.
1564@@ -60,25 +61,25 @@
1565
1566 __asm__ __volatile__(
1567 "1: ldl_l %0,%3\n"
1568- " and %0,%2,%0\n"
1569+ " bic %0,%2,%0\n"
1570 " stl_c %0,%1\n"
1571 " beq %0,2f\n"
1572 ".subsection 2\n"
1573 "2: br 1b\n"
1574 ".previous"
1575 :"=&r" (temp), "=m" (*m)
1576- :"Ir" (~(1UL << (nr & 31))), "m" (*m));
1577+ :"Ir" (1UL << (nr & 31)), "m" (*m));
1578 }
1579
1580 /*
1581 * WARNING: non atomic version.
1582 */
1583 static __inline__ void
1584-__change_bit(unsigned long nr, volatile void * addr)
1585+__clear_bit(unsigned long nr, volatile void * addr)
1586 {
1587 int *m = ((int *) addr) + (nr >> 5);
1588
1589- *m ^= 1 << (nr & 31);
1590+ *m &= ~(1 << (nr & 31));
1591 }
1592
1593 static inline void
1594@@ -99,6 +100,17 @@
1595 :"Ir" (1UL << (nr & 31)), "m" (*m));
1596 }
1597
1598+/*
1599+ * WARNING: non atomic version.
1600+ */
1601+static __inline__ void
1602+__change_bit(unsigned long nr, volatile void * addr)
1603+{
1604+ int *m = ((int *) addr) + (nr >> 5);
1605+
1606+ *m ^= 1 << (nr & 31);
1607+}
1608+
1609 static inline int
1610 test_and_set_bit(unsigned long nr, volatile void *addr)
1611 {
1612@@ -181,20 +193,6 @@
1613 return (old & mask) != 0;
1614 }
1615
1616-/*
1617- * WARNING: non atomic version.
1618- */
1619-static __inline__ int
1620-__test_and_change_bit(unsigned long nr, volatile void * addr)
1621-{
1622- unsigned long mask = 1 << (nr & 0x1f);
1623- int *m = ((int *) addr) + (nr >> 5);
1624- int old = *m;
1625-
1626- *m = old ^ mask;
1627- return (old & mask) != 0;
1628-}
1629-
1630 static inline int
1631 test_and_change_bit(unsigned long nr, volatile void * addr)
1632 {
1633@@ -220,6 +218,20 @@
1634 return oldbit != 0;
1635 }
1636
1637+/*
1638+ * WARNING: non atomic version.
1639+ */
1640+static __inline__ int
1641+__test_and_change_bit(unsigned long nr, volatile void * addr)
1642+{
1643+ unsigned long mask = 1 << (nr & 0x1f);
1644+ int *m = ((int *) addr) + (nr >> 5);
1645+ int old = *m;
1646+
1647+ *m = old ^ mask;
1648+ return (old & mask) != 0;
1649+}
1650+
1651 static inline int
1652 test_bit(int nr, volatile void * addr)
1653 {
1654@@ -235,12 +247,15 @@
1655 */
1656 static inline unsigned long ffz_b(unsigned long x)
1657 {
1658- unsigned long sum = 0;
1659+ unsigned long sum, x1, x2, x4;
1660
1661 x = ~x & -~x; /* set first 0 bit, clear others */
1662- if (x & 0xF0) sum += 4;
1663- if (x & 0xCC) sum += 2;
1664- if (x & 0xAA) sum += 1;
1665+ x1 = x & 0xAA;
1666+ x2 = x & 0xCC;
1667+ x4 = x & 0xF0;
1668+ sum = x2 ? 2 : 0;
1669+ sum += (x4 != 0) * 4;
1670+ sum += (x1 != 0);
1671
1672 return sum;
1673 }
1674@@ -257,24 +272,46 @@
1675
1676 __asm__("cmpbge %1,%2,%0" : "=r"(bits) : "r"(word), "r"(~0UL));
1677 qofs = ffz_b(bits);
1678- __asm__("extbl %1,%2,%0" : "=r"(bits) : "r"(word), "r"(qofs));
1679+ bits = __kernel_extbl(word, qofs);
1680 bofs = ffz_b(bits);
1681
1682 return qofs*8 + bofs;
1683 #endif
1684 }
1685
1686+/*
1687+ * __ffs = Find First set bit in word. Undefined if no set bit exists.
1688+ */
1689+static inline unsigned long __ffs(unsigned long word)
1690+{
1691+#if defined(__alpha_cix__) && defined(__alpha_fix__)
1692+ /* Whee. EV67 can calculate it directly. */
1693+ unsigned long result;
1694+ __asm__("cttz %1,%0" : "=r"(result) : "r"(word));
1695+ return result;
1696+#else
1697+ unsigned long bits, qofs, bofs;
1698+
1699+ __asm__("cmpbge $31,%1,%0" : "=r"(bits) : "r"(word));
1700+ qofs = ffz_b(bits);
1701+ bits = __kernel_extbl(word, qofs);
1702+ bofs = ffz_b(~bits);
1703+
1704+ return qofs*8 + bofs;
1705+#endif
1706+}
1707+
1708 #ifdef __KERNEL__
1709
1710 /*
1711 * ffs: find first bit set. This is defined the same way as
1712 * the libc and compiler builtin ffs routines, therefore
1713- * differs in spirit from the above ffz (man ffs).
1714+ * differs in spirit from the above __ffs.
1715 */
1716
1717 static inline int ffs(int word)
1718 {
1719- int result = ffz(~word);
1720+ int result = __ffs(word);
1721 return word ? result+1 : 0;
1722 }
1723
1724@@ -316,6 +353,14 @@
1725 #define hweight16(x) hweight64((x) & 0xfffful)
1726 #define hweight8(x) hweight64((x) & 0xfful)
1727 #else
1728+static inline unsigned long hweight64(unsigned long w)
1729+{
1730+ unsigned long result;
1731+ for (result = 0; w ; w >>= 1)
1732+ result += (w & 1);
1733+ return result;
1734+}
1735+
1736 #define hweight32(x) generic_hweight32(x)
1737 #define hweight16(x) generic_hweight16(x)
1738 #define hweight8(x) generic_hweight8(x)
1739@@ -365,13 +410,77 @@
1740 }
1741
1742 /*
1743- * The optimizer actually does good code for this case..
1744+ * Find next one bit in a bitmap reasonably efficiently.
1745+ */
1746+static inline unsigned long
1747+find_next_bit(void * addr, unsigned long size, unsigned long offset)
1748+{
1749+ unsigned long * p = ((unsigned long *) addr) + (offset >> 6);
1750+ unsigned long result = offset & ~63UL;
1751+ unsigned long tmp;
1752+
1753+ if (offset >= size)
1754+ return size;
1755+ size -= result;
1756+ offset &= 63UL;
1757+ if (offset) {
1758+ tmp = *(p++);
1759+ tmp &= ~0UL << offset;
1760+ if (size < 64)
1761+ goto found_first;
1762+ if (tmp)
1763+ goto found_middle;
1764+ size -= 64;
1765+ result += 64;
1766+ }
1767+ while (size & ~63UL) {
1768+ if ((tmp = *(p++)))
1769+ goto found_middle;
1770+ result += 64;
1771+ size -= 64;
1772+ }
1773+ if (!size)
1774+ return result;
1775+ tmp = *p;
1776+found_first:
1777+ tmp &= ~0UL >> (64 - size);
1778+ if (!tmp)
1779+ return result + size;
1780+found_middle:
1781+ return result + __ffs(tmp);
1782+}
1783+
1784+/*
1785+ * The optimizer actually does good code for this case.
1786 */
1787 #define find_first_zero_bit(addr, size) \
1788 find_next_zero_bit((addr), (size), 0)
1789+#define find_first_bit(addr, size) \
1790+ find_next_bit((addr), (size), 0)
1791
1792 #ifdef __KERNEL__
1793
1794+/*
1795+ * Every architecture must define this function. It's the fastest
1796+ * way of searching a 140-bit bitmap where the first 100 bits are
1797+ * unlikely to be set. It's guaranteed that at least one of the 140
1798+ * bits is set.
1799+ */
1800+static inline unsigned long
1801+sched_find_first_bit(unsigned long b[3])
1802+{
1803+ unsigned long b0 = b[0], b1 = b[1], b2 = b[2];
1804+ unsigned long ofs;
1805+
1806+ ofs = (b1 ? 64 : 128);
1807+ b1 = (b1 ? b1 : b2);
1808+ ofs = (b0 ? 0 : ofs);
1809+ b0 = (b0 ? b0 : b1);
1810+
1811+ return __ffs(b0) + ofs;
1812+}
1813+
1814+
1815 #define ext2_set_bit __test_and_set_bit
1816 #define ext2_clear_bit __test_and_clear_bit
1817 #define ext2_test_bit test_bit
1818diff -urN linux-2.4.24.org/include/asm-alpha/smp.h linux-2.4.24/include/asm-alpha/smp.h
5d16fd25
AM
1819--- linux-2.4.24.org/include/asm-alpha/smp.h 2004-02-04 20:47:46.648821326 +0100
1820+++ linux-2.4.24/include/asm-alpha/smp.h 2004-02-04 20:52:53.540988567 +0100
0aa7655b
AM
1821@@ -55,7 +55,7 @@
1822 #define cpu_logical_map(cpu) __cpu_logical_map[cpu]
1823
1824 #define hard_smp_processor_id() __hard_smp_processor_id()
1825-#define smp_processor_id() (current->processor)
1826+#define smp_processor_id() (current->cpu)
1827
1828 extern unsigned long cpu_present_mask;
1829 #define cpu_online_map cpu_present_mask
1830diff -urN linux-2.4.24.org/include/asm-alpha/system.h linux-2.4.24/include/asm-alpha/system.h
5d16fd25
AM
1831--- linux-2.4.24.org/include/asm-alpha/system.h 2004-02-04 20:47:45.924971887 +0100
1832+++ linux-2.4.24/include/asm-alpha/system.h 2004-02-04 20:52:53.545987527 +0100
0aa7655b
AM
1833@@ -131,7 +131,6 @@
1834 extern void halt(void) __attribute__((noreturn));
1835 #define __halt() __asm__ __volatile__ ("call_pal %0 #halt" : : "i" (PAL_halt))
1836
1837-#define prepare_to_switch() do { } while(0)
1838 #define switch_to(prev,next,last) \
1839 do { \
1840 unsigned long pcbb; \
1841diff -urN linux-2.4.24.org/include/asm-arm/bitops.h linux-2.4.24/include/asm-arm/bitops.h
5d16fd25
AM
1842--- linux-2.4.24.org/include/asm-arm/bitops.h 2004-02-04 20:48:05.614876374 +0100
1843+++ linux-2.4.24/include/asm-arm/bitops.h 2004-02-04 20:52:53.589978377 +0100
0aa7655b
AM
1844@@ -2,6 +2,8 @@
1845 * Copyright 1995, Russell King.
1846 * Various bits and pieces copyrights include:
1847 * Linus Torvalds (test_bit).
1848+ * Big endian support: Copyright 2001, Nicolas Pitre
1849+ * reworked by rmk.
1850 *
1851 * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
1852 *
1853@@ -17,81 +19,271 @@
1854
1855 #ifdef __KERNEL__
1856
1857+#include <asm/system.h>
1858+
1859 #define smp_mb__before_clear_bit() do { } while (0)
1860 #define smp_mb__after_clear_bit() do { } while (0)
1861
1862 /*
1863- * Function prototypes to keep gcc -Wall happy.
1864+ * These functions are the basis of our bit ops.
1865+ * First, the atomic bitops.
1866+ *
1867+ * The endian issue for these functions is handled by the macros below.
1868 */
1869-extern void set_bit(int nr, volatile void * addr);
1870+static inline void
1871+____atomic_set_bit_mask(unsigned int mask, volatile unsigned char *p)
1872+{
1873+ unsigned long flags;
1874+
1875+ local_irq_save(flags);
1876+ *p |= mask;
1877+ local_irq_restore(flags);
1878+}
1879+
1880+static inline void
1881+____atomic_clear_bit_mask(unsigned int mask, volatile unsigned char *p)
1882+{
1883+ unsigned long flags;
1884+
1885+ local_irq_save(flags);
1886+ *p &= ~mask;
1887+ local_irq_restore(flags);
1888+}
1889+
1890+static inline void
1891+____atomic_change_bit_mask(unsigned int mask, volatile unsigned char *p)
1892+{
1893+ unsigned long flags;
1894+
1895+ local_irq_save(flags);
1896+ *p ^= mask;
1897+ local_irq_restore(flags);
1898+}
1899
1900-static inline void __set_bit(int nr, volatile void *addr)
1901+static inline int
1902+____atomic_test_and_set_bit_mask(unsigned int mask, volatile unsigned char *p)
1903 {
1904- ((unsigned char *) addr)[nr >> 3] |= (1U << (nr & 7));
1905+ unsigned long flags;
1906+ unsigned int res;
1907+
1908+ local_irq_save(flags);
1909+ res = *p;
1910+ *p = res | mask;
1911+ local_irq_restore(flags);
1912+
1913+ return res & mask;
1914 }
1915
1916-extern void clear_bit(int nr, volatile void * addr);
1917+static inline int
1918+____atomic_test_and_clear_bit_mask(unsigned int mask, volatile unsigned char *p)
1919+{
1920+ unsigned long flags;
1921+ unsigned int res;
1922+
1923+ local_irq_save(flags);
1924+ res = *p;
1925+ *p = res & ~mask;
1926+ local_irq_restore(flags);
1927+
1928+ return res & mask;
1929+}
1930
1931-static inline void __clear_bit(int nr, volatile void *addr)
1932+static inline int
1933+____atomic_test_and_change_bit_mask(unsigned int mask, volatile unsigned char *p)
1934 {
1935- ((unsigned char *) addr)[nr >> 3] &= ~(1U << (nr & 7));
1936+ unsigned long flags;
1937+ unsigned int res;
1938+
1939+ local_irq_save(flags);
1940+ res = *p;
1941+ *p = res ^ mask;
1942+ local_irq_restore(flags);
1943+
1944+ return res & mask;
1945 }
1946
1947-extern void change_bit(int nr, volatile void * addr);
1948+/*
1949+ * Now the non-atomic variants. We let the compiler handle all optimisations
1950+ * for these.
1951+ */
1952+static inline void ____nonatomic_set_bit(int nr, volatile void *p)
1953+{
1954+ ((unsigned char *) p)[nr >> 3] |= (1U << (nr & 7));
1955+}
1956
1957-static inline void __change_bit(int nr, volatile void *addr)
1958+static inline void ____nonatomic_clear_bit(int nr, volatile void *p)
1959 {
1960- ((unsigned char *) addr)[nr >> 3] ^= (1U << (nr & 7));
1961+ ((unsigned char *) p)[nr >> 3] &= ~(1U << (nr & 7));
1962 }
1963
1964-extern int test_and_set_bit(int nr, volatile void * addr);
1965+static inline void ____nonatomic_change_bit(int nr, volatile void *p)
1966+{
1967+ ((unsigned char *) p)[nr >> 3] ^= (1U << (nr & 7));
1968+}
1969
1970-static inline int __test_and_set_bit(int nr, volatile void *addr)
1971+static inline int ____nonatomic_test_and_set_bit(int nr, volatile void *p)
1972 {
1973 unsigned int mask = 1 << (nr & 7);
1974 unsigned int oldval;
1975
1976- oldval = ((unsigned char *) addr)[nr >> 3];
1977- ((unsigned char *) addr)[nr >> 3] = oldval | mask;
1978+ oldval = ((unsigned char *) p)[nr >> 3];
1979+ ((unsigned char *) p)[nr >> 3] = oldval | mask;
1980 return oldval & mask;
1981 }
1982
1983-extern int test_and_clear_bit(int nr, volatile void * addr);
1984-
1985-static inline int __test_and_clear_bit(int nr, volatile void *addr)
1986+static inline int ____nonatomic_test_and_clear_bit(int nr, volatile void *p)
1987 {
1988 unsigned int mask = 1 << (nr & 7);
1989 unsigned int oldval;
1990
1991- oldval = ((unsigned char *) addr)[nr >> 3];
1992- ((unsigned char *) addr)[nr >> 3] = oldval & ~mask;
1993+ oldval = ((unsigned char *) p)[nr >> 3];
1994+ ((unsigned char *) p)[nr >> 3] = oldval & ~mask;
1995 return oldval & mask;
1996 }
1997
1998-extern int test_and_change_bit(int nr, volatile void * addr);
1999-
2000-static inline int __test_and_change_bit(int nr, volatile void *addr)
2001+static inline int ____nonatomic_test_and_change_bit(int nr, volatile void *p)
2002 {
2003 unsigned int mask = 1 << (nr & 7);
2004 unsigned int oldval;
2005
2006- oldval = ((unsigned char *) addr)[nr >> 3];
2007- ((unsigned char *) addr)[nr >> 3] = oldval ^ mask;
2008+ oldval = ((unsigned char *) p)[nr >> 3];
2009+ ((unsigned char *) p)[nr >> 3] = oldval ^ mask;
2010 return oldval & mask;
2011 }
2012
2013-extern int find_first_zero_bit(void * addr, unsigned size);
2014-extern int find_next_zero_bit(void * addr, int size, int offset);
2015-
2016 /*
2017 * This routine doesn't need to be atomic.
2018 */
2019-static inline int test_bit(int nr, const void * addr)
2020+static inline int ____test_bit(int nr, const void * p)
2021 {
2022- return (((unsigned char *) addr)[nr >> 3] >> (nr & 7)) & 1;
2023+ return (((volatile unsigned char *) p)[nr >> 3] >> (nr & 7)) & 1;
2024 }
2025
2026 /*
2027+ * A note about Endian-ness.
2028+ * -------------------------
2029+ *
2030+ * When the ARM is put into big endian mode via CR15, the processor
2031+ * merely swaps the order of bytes within words, thus:
2032+ *
2033+ * ------------ physical data bus bits -----------
2034+ * D31 ... D24 D23 ... D16 D15 ... D8 D7 ... D0
2035+ * little byte 3 byte 2 byte 1 byte 0
2036+ * big byte 0 byte 1 byte 2 byte 3
2037+ *
2038+ * This means that reading a 32-bit word at address 0 returns the same
2039+ * value irrespective of the endian mode bit.
2040+ *
2041+ * Peripheral devices should be connected with the data bus reversed in
2042+ * "Big Endian" mode. ARM Application Note 61 is applicable, and is
2043+ * available from http://www.arm.com/.
2044+ *
2045+ * The following assumes that the data bus connectivity for big endian
2046+ * mode has been followed.
2047+ *
2048+ * Note that bit 0 is defined to be 32-bit word bit 0, not byte 0 bit 0.
2049+ */
2050+
2051+/*
2052+ * Little endian assembly bitops. nr = 0 -> byte 0 bit 0.
2053+ */
2054+extern void _set_bit_le(int nr, volatile void * p);
2055+extern void _clear_bit_le(int nr, volatile void * p);
2056+extern void _change_bit_le(int nr, volatile void * p);
2057+extern int _test_and_set_bit_le(int nr, volatile void * p);
2058+extern int _test_and_clear_bit_le(int nr, volatile void * p);
2059+extern int _test_and_change_bit_le(int nr, volatile void * p);
2060+extern int _find_first_zero_bit_le(void * p, unsigned size);
2061+extern int _find_next_zero_bit_le(void * p, int size, int offset);
2062+
2063+/*
2064+ * Big endian assembly bitops. nr = 0 -> byte 3 bit 0.
2065+ */
2066+extern void _set_bit_be(int nr, volatile void * p);
2067+extern void _clear_bit_be(int nr, volatile void * p);
2068+extern void _change_bit_be(int nr, volatile void * p);
2069+extern int _test_and_set_bit_be(int nr, volatile void * p);
2070+extern int _test_and_clear_bit_be(int nr, volatile void * p);
2071+extern int _test_and_change_bit_be(int nr, volatile void * p);
2072+extern int _find_first_zero_bit_be(void * p, unsigned size);
2073+extern int _find_next_zero_bit_be(void * p, int size, int offset);
2074+
2075+
2076+/*
2077+ * The __* form of bitops are non-atomic and may be reordered.
2078+ */
2079+#define ATOMIC_BITOP_LE(name,nr,p) \
2080+ (__builtin_constant_p(nr) ? \
2081+ ____atomic_##name##_mask(1 << ((nr) & 7), \
2082+ ((unsigned char *)(p)) + ((nr) >> 3)) : \
2083+ _##name##_le(nr,p))
2084+
2085+#define ATOMIC_BITOP_BE(name,nr,p) \
2086+ (__builtin_constant_p(nr) ? \
2087+ ____atomic_##name##_mask(1 << ((nr) & 7), \
2088+ ((unsigned char *)(p)) + (((nr) >> 3) ^ 3)) : \
2089+ _##name##_be(nr,p))
2090+
2091+#define NONATOMIC_BITOP_LE(name,nr,p) \
2092+ (____nonatomic_##name(nr, p))
2093+
2094+#define NONATOMIC_BITOP_BE(name,nr,p) \
2095+ (____nonatomic_##name(nr ^ 0x18, p))
2096+
2097+#ifndef __ARMEB__
2098+/*
2099+ * These are the little endian, atomic definitions.
2100+ */
2101+#define set_bit(nr,p) ATOMIC_BITOP_LE(set_bit,nr,p)
2102+#define clear_bit(nr,p) ATOMIC_BITOP_LE(clear_bit,nr,p)
2103+#define change_bit(nr,p) ATOMIC_BITOP_LE(change_bit,nr,p)
2104+#define test_and_set_bit(nr,p) ATOMIC_BITOP_LE(test_and_set_bit,nr,p)
2105+#define test_and_clear_bit(nr,p) ATOMIC_BITOP_LE(test_and_clear_bit,nr,p)
2106+#define test_and_change_bit(nr,p) ATOMIC_BITOP_LE(test_and_change_bit,nr,p)
2107+#define test_bit(nr,p) ____test_bit(nr,p)
2108+#define find_first_zero_bit(p,sz) _find_first_zero_bit_le(p,sz)
2109+#define find_next_zero_bit(p,sz,off) _find_next_zero_bit_le(p,sz,off)
2110+
2111+/*
2112+ * These are the little endian, non-atomic definitions.
2113+ */
2114+#define __set_bit(nr,p) NONATOMIC_BITOP_LE(set_bit,nr,p)
2115+#define __clear_bit(nr,p) NONATOMIC_BITOP_LE(clear_bit,nr,p)
2116+#define __change_bit(nr,p) NONATOMIC_BITOP_LE(change_bit,nr,p)
2117+#define __test_and_set_bit(nr,p) NONATOMIC_BITOP_LE(test_and_set_bit,nr,p)
2118+#define __test_and_clear_bit(nr,p) NONATOMIC_BITOP_LE(test_and_clear_bit,nr,p)
2119+#define __test_and_change_bit(nr,p) NONATOMIC_BITOP_LE(test_and_change_bit,nr,p)
2120+#define __test_bit(nr,p) ____test_bit(nr,p)
2121+
2122+#else
2123+
2124+/*
2125+ * These are the big endian, atomic definitions.
2126+ */
2127+#define set_bit(nr,p) ATOMIC_BITOP_BE(set_bit,nr,p)
2128+#define clear_bit(nr,p) ATOMIC_BITOP_BE(clear_bit,nr,p)
2129+#define change_bit(nr,p) ATOMIC_BITOP_BE(change_bit,nr,p)
2130+#define test_and_set_bit(nr,p) ATOMIC_BITOP_BE(test_and_set_bit,nr,p)
2131+#define test_and_clear_bit(nr,p) ATOMIC_BITOP_BE(test_and_clear_bit,nr,p)
2132+#define test_and_change_bit(nr,p) ATOMIC_BITOP_BE(test_and_change_bit,nr,p)
2133+#define test_bit(nr,p) ____test_bit((nr) ^ 0x18, p)
2134+#define find_first_zero_bit(p,sz) _find_first_zero_bit_be(p,sz)
2135+#define find_next_zero_bit(p,sz,off) _find_next_zero_bit_be(p,sz,off)
2136+
2137+/*
2138+ * These are the big endian, non-atomic definitions.
2139+ */
2140+#define __set_bit(nr,p) NONATOMIC_BITOP_BE(set_bit,nr,p)
2141+#define __clear_bit(nr,p) NONATOMIC_BITOP_BE(clear_bit,nr,p)
2142+#define __change_bit(nr,p) NONATOMIC_BITOP_BE(change_bit,nr,p)
2143+#define __test_and_set_bit(nr,p) NONATOMIC_BITOP_BE(test_and_set_bit,nr,p)
2144+#define __test_and_clear_bit(nr,p) NONATOMIC_BITOP_BE(test_and_clear_bit,nr,p)
2145+#define __test_and_change_bit(nr,p) NONATOMIC_BITOP_BE(test_and_change_bit,nr,p)
2146+#define __test_bit(nr,p) ____test_bit((nr) ^ 0x18, p)
2147+
2148+#endif
2149+
2150+/*
2151 * ffz = Find First Zero in word. Undefined if no zero exists,
2152 * so code should check against ~0UL first..
2153 */
2154@@ -110,6 +302,29 @@
2155 }
2156
2157 /*
2158+ * ffz = Find First Zero in word. Undefined if no zero exists,
2159+ * so code should check against ~0UL first..
2160+ */
2161+static inline unsigned long __ffs(unsigned long word)
2162+{
2163+ int k;
2164+
2165+ k = 31;
2166+ if (word & 0x0000ffff) { k -= 16; word <<= 16; }
2167+ if (word & 0x00ff0000) { k -= 8; word <<= 8; }
2168+ if (word & 0x0f000000) { k -= 4; word <<= 4; }
2169+ if (word & 0x30000000) { k -= 2; word <<= 2; }
2170+ if (word & 0x40000000) { k -= 1; }
2171+ return k;
2172+}
2173+
2174+/*
2175+ * fls: find last bit set.
2176+ */
2177+
2178+#define fls(x) generic_fls(x)
2179+
2180+/*
2181 * ffs: find first bit set. This is defined the same way as
2182 * the libc and compiler builtin ffs routines, therefore
2183 * differs in spirit from the above ffz (man ffs).
2184@@ -118,6 +333,22 @@
2185 #define ffs(x) generic_ffs(x)
2186
2187 /*
2188+ * Find first bit set in a 168-bit bitmap, where the first
2189+ * 128 bits are unlikely to be set.
2190+ */
2191+static inline int sched_find_first_bit(unsigned long *b)
2192+{
2193+ unsigned long v;
2194+ unsigned int off;
2195+
2196+ for (off = 0; v = b[off], off < 4; off++) {
2197+ if (unlikely(v))
2198+ break;
2199+ }
2200+ return __ffs(v) + off * 32;
2201+}
2202+
2203+/*
2204 * hweightN: returns the hamming weight (i.e. the number
2205 * of bits set) of a N-bit word
2206 */
2207@@ -126,18 +357,25 @@
2208 #define hweight16(x) generic_hweight16(x)
2209 #define hweight8(x) generic_hweight8(x)
2210
2211-#define ext2_set_bit test_and_set_bit
2212-#define ext2_clear_bit test_and_clear_bit
2213-#define ext2_test_bit test_bit
2214-#define ext2_find_first_zero_bit find_first_zero_bit
2215-#define ext2_find_next_zero_bit find_next_zero_bit
2216-
2217-/* Bitmap functions for the minix filesystem. */
2218-#define minix_test_and_set_bit(nr,addr) test_and_set_bit(nr,addr)
2219-#define minix_set_bit(nr,addr) set_bit(nr,addr)
2220-#define minix_test_and_clear_bit(nr,addr) test_and_clear_bit(nr,addr)
2221-#define minix_test_bit(nr,addr) test_bit(nr,addr)
2222-#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
2223+/*
2224+ * Ext2 is defined to use little-endian byte ordering.
2225+ * These do not need to be atomic.
2226+ */
2227+#define ext2_set_bit(nr,p) NONATOMIC_BITOP_LE(test_and_set_bit,nr,p)
2228+#define ext2_clear_bit(nr,p) NONATOMIC_BITOP_LE(test_and_clear_bit,nr,p)
2229+#define ext2_test_bit(nr,p) __test_bit(nr,p)
2230+#define ext2_find_first_zero_bit(p,sz) _find_first_zero_bit_le(p,sz)
2231+#define ext2_find_next_zero_bit(p,sz,off) _find_next_zero_bit_le(p,sz,off)
2232+
2233+/*
2234+ * Minix is defined to use little-endian byte ordering.
2235+ * These do not need to be atomic.
2236+ */
2237+#define minix_set_bit(nr,p) NONATOMIC_BITOP_LE(set_bit,nr,p)
2238+#define minix_test_bit(nr,p) __test_bit(nr,p)
2239+#define minix_test_and_set_bit(nr,p) NONATOMIC_BITOP_LE(test_and_set_bit,nr,p)
2240+#define minix_test_and_clear_bit(nr,p) NONATOMIC_BITOP_LE(test_and_clear_bit,nr,p)
2241+#define minix_find_first_zero_bit(p,sz) _find_first_zero_bit_le(p,sz)
2242
2243 #endif /* __KERNEL__ */
2244
2245diff -urN linux-2.4.24.org/include/asm-cris/bitops.h linux-2.4.24/include/asm-cris/bitops.h
5d16fd25
AM
2246--- linux-2.4.24.org/include/asm-cris/bitops.h 2004-02-04 20:48:26.679494929 +0100
2247+++ linux-2.4.24/include/asm-cris/bitops.h 2004-02-04 20:52:53.595977130 +0100
0aa7655b
AM
2248@@ -22,6 +22,7 @@
2249 /* We use generic_ffs so get it; include guards resolve the possible
2250 mutually inclusion. */
2251 #include <linux/bitops.h>
2252+#include <linux/compiler.h>
2253
2254 /*
2255 * Some hacks to defeat gcc over-optimizations..
2256@@ -44,6 +45,8 @@
2257 #define set_bit(nr, addr) (void)test_and_set_bit(nr, addr)
2258 #define __set_bit(nr, addr) (void)__test_and_set_bit(nr, addr)
2259
2260+#define __set_bit(nr, addr) (void)__test_and_set_bit(nr, addr)
2261+
2262 /*
2263 * clear_bit - Clears a bit in memory
2264 * @nr: Bit to clear
2265@@ -58,6 +61,8 @@
2266 #define clear_bit(nr, addr) (void)test_and_clear_bit(nr, addr)
2267 #define __clear_bit(nr, addr) (void)__test_and_clear_bit(nr, addr)
2268
2269+#define __clear_bit(nr, addr) (void)__test_and_clear_bit(nr, addr)
2270+
2271 /*
2272 * change_bit - Toggle a bit in memory
2273 * @nr: Bit to change
2274@@ -91,7 +96,7 @@
2275 * It also implies a memory barrier.
2276 */
2277
2278-extern __inline__ int test_and_set_bit(int nr, void *addr)
2279+extern inline int test_and_set_bit(int nr, void *addr)
2280 {
2281 unsigned int mask, retval;
2282 unsigned long flags;
2283@@ -119,6 +124,18 @@
2284 return retval;
2285 }
2286
2287+extern inline int __test_and_set_bit(int nr, void *addr)
2288+{
2289+ unsigned int mask, retval;
2290+ unsigned int *adr = (unsigned int *)addr;
2291+
2292+ adr += nr >> 5;
2293+ mask = 1 << (nr & 0x1f);
2294+ retval = (mask & *adr) != 0;
2295+ *adr |= mask;
2296+ return retval;
2297+}
2298+
2299 /*
2300 * clear_bit() doesn't provide any barrier for the compiler.
2301 */
2302@@ -134,7 +151,7 @@
2303 * It also implies a memory barrier.
2304 */
2305
2306-extern __inline__ int test_and_clear_bit(int nr, void *addr)
2307+extern inline int test_and_clear_bit(int nr, void *addr)
2308 {
2309 unsigned int mask, retval;
2310 unsigned long flags;
2311@@ -160,7 +177,7 @@
2312 * but actually fail. You must protect multiple accesses with a lock.
2313 */
2314
2315-extern __inline__ int __test_and_clear_bit(int nr, void *addr)
2316+extern inline int __test_and_clear_bit(int nr, void *addr)
2317 {
2318 unsigned int mask, retval;
2319 unsigned int *adr = (unsigned int *)addr;
2320@@ -180,7 +197,7 @@
2321 * It also implies a memory barrier.
2322 */
2323
2324-extern __inline__ int test_and_change_bit(int nr, void *addr)
2325+extern inline int test_and_change_bit(int nr, void *addr)
2326 {
2327 unsigned int mask, retval;
2328 unsigned long flags;
2329@@ -197,7 +214,7 @@
2330
2331 /* WARNING: non atomic and it can be reordered! */
2332
2333-extern __inline__ int __test_and_change_bit(int nr, void *addr)
2334+extern inline int __test_and_change_bit(int nr, void *addr)
2335 {
2336 unsigned int mask, retval;
2337 unsigned int *adr = (unsigned int *)addr;
2338@@ -218,7 +235,7 @@
2339 * This routine doesn't need to be atomic.
2340 */
2341
2342-extern __inline__ int test_bit(int nr, const void *addr)
2343+extern inline int test_bit(int nr, const void *addr)
2344 {
2345 unsigned int mask;
2346 unsigned int *adr = (unsigned int *)addr;
2347@@ -239,7 +256,7 @@
2348 * number. They differ in that the first function also inverts all bits
2349 * in the input.
2350 */
2351-extern __inline__ unsigned long cris_swapnwbrlz(unsigned long w)
2352+extern inline unsigned long cris_swapnwbrlz(unsigned long w)
2353 {
2354 /* Let's just say we return the result in the same register as the
2355 input. Saying we clobber the input but can return the result
2356@@ -255,7 +272,7 @@
2357 return res;
2358 }
2359
2360-extern __inline__ unsigned long cris_swapwbrlz(unsigned long w)
2361+extern inline unsigned long cris_swapwbrlz(unsigned long w)
2362 {
2363 unsigned res;
2364 __asm__ ("swapwbr %0 \n\t"
2365@@ -269,7 +286,7 @@
2366 * ffz = Find First Zero in word. Undefined if no zero exists,
2367 * so code should check against ~0UL first..
2368 */
2369-extern __inline__ unsigned long ffz(unsigned long w)
2370+extern inline unsigned long ffz(unsigned long w)
2371 {
2372 /* The generic_ffs function is used to avoid the asm when the
2373 argument is a constant. */
2374@@ -282,7 +299,7 @@
2375 * Somewhat like ffz but the equivalent of generic_ffs: in contrast to
2376 * ffz we return the first one-bit *plus one*.
2377 */
2378-extern __inline__ unsigned long kernel_ffs(unsigned long w)
2379+extern inline unsigned long kernel_ffs(unsigned long w)
2380 {
2381 /* The generic_ffs function is used to avoid the asm when the
2382 argument is a constant. */
2383@@ -304,7 +321,7 @@
2384 * @offset: The bitnumber to start searching at
2385 * @size: The maximum size to search
2386 */
2387-extern __inline__ int find_next_zero_bit (void * addr, int size, int offset)
2388+extern inline int find_next_zero_bit (void * addr, int size, int offset)
2389 {
2390 unsigned long *p = ((unsigned long *) addr) + (offset >> 5);
2391 unsigned long result = offset & ~31UL;
2392@@ -375,7 +392,45 @@
2393 #define minix_test_bit(nr,addr) test_bit(nr,addr)
2394 #define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
2395
2396-#endif /* __KERNEL__ */
2397+#if 0
2398+/* TODO: see below */
2399+#define sched_find_first_zero_bit(addr) find_first_zero_bit(addr, 168)
2400+
2401+#else
2402+/* TODO: left out pending where to put it.. (there are .h dependencies) */
2403+
2404+ /*
2405+ * Every architecture must define this function. It's the fastest
2406+ * way of searching a 168-bit bitmap where the first 128 bits are
2407+ * unlikely to be set. It's guaranteed that at least one of the 168
2408+ * bits is cleared.
2409+ */
2410+#if 0
2411+#if MAX_RT_PRIO != 128 || MAX_PRIO != 168
2412+# error update this function.
2413+#endif
2414+#else
2415+#define MAX_RT_PRIO 128
2416+#define MAX_PRIO 168
2417+#endif
2418+
2419+static inline int sched_find_first_zero_bit(char *bitmap)
2420+{
2421+ unsigned int *b = (unsigned int *)bitmap;
2422+ unsigned int rt;
2423+
2424+ rt = b[0] & b[1] & b[2] & b[3];
2425+ if (unlikely(rt != 0xffffffff))
2426+ return find_first_zero_bit(bitmap, MAX_RT_PRIO);
2427+
2428+ if (b[4] != ~0)
2429+ return ffz(b[4]) + MAX_RT_PRIO;
2430+ return ffz(b[5]) + 32 + MAX_RT_PRIO;
2431+}
2432+#undef MAX_PRIO
2433+#undef MAX_RT_PRIO
2434+#endif
2435
2436+#endif /* __KERNEL__ */
2437
2438 #endif /* _CRIS_BITOPS_H */
2439diff -urN linux-2.4.24.org/include/asm-generic/bitops.h linux-2.4.24/include/asm-generic/bitops.h
5d16fd25
AM
2440--- linux-2.4.24.org/include/asm-generic/bitops.h 2004-02-04 20:47:40.855026441 +0100
2441+++ linux-2.4.24/include/asm-generic/bitops.h 2004-02-04 20:52:53.630969851 +0100
0aa7655b
AM
2442@@ -51,6 +51,12 @@
2443 return ((mask & *addr) != 0);
2444 }
2445
2446+/*
2447+ * fls: find last bit set.
2448+ */
2449+
2450+#define fls(x) generic_fls(x)
2451+
2452 #ifdef __KERNEL__
2453
2454 /*
2455diff -urN linux-2.4.24.org/include/asm-i386/bitops.h linux-2.4.24/include/asm-i386/bitops.h
5d16fd25
AM
2456--- linux-2.4.24.org/include/asm-i386/bitops.h 2004-02-04 20:47:40.983999614 +0100
2457+++ linux-2.4.24/include/asm-i386/bitops.h 2004-02-04 20:52:53.655964653 +0100
0aa7655b
AM
2458@@ -6,6 +6,7 @@
2459 */
2460
2461 #include <linux/config.h>
2462+#include <linux/compiler.h>
2463
2464 /*
2465 * These have to be done with inline assembly: that way the bit-setting
2466@@ -75,6 +76,14 @@
2467 :"=m" (ADDR)
2468 :"Ir" (nr));
2469 }
2470+
2471+static __inline__ void __clear_bit(int nr, volatile void * addr)
2472+{
2473+ __asm__ __volatile__(
2474+ "btrl %1,%0"
2475+ :"=m" (ADDR)
2476+ :"Ir" (nr));
2477+}
2478 #define smp_mb__before_clear_bit() barrier()
2479 #define smp_mb__after_clear_bit() barrier()
2480
2481@@ -284,6 +293,34 @@
2482 }
2483
2484 /**
2485+ * find_first_bit - find the first set bit in a memory region
2486+ * @addr: The address to start the search at
2487+ * @size: The maximum size to search
2488+ *
2489+ * Returns the bit-number of the first set bit, not the number of the byte
2490+ * containing a bit.
2491+ */
2492+static __inline__ int find_first_bit(void * addr, unsigned size)
2493+{
2494+ int d0, d1;
2495+ int res;
2496+
2497+ /* This looks at memory. Mark it volatile to tell gcc not to move it around */
2498+ __asm__ __volatile__(
2499+ "xorl %%eax,%%eax\n\t"
2500+ "repe; scasl\n\t"
2501+ "jz 1f\n\t"
2502+ "leal -4(%%edi),%%edi\n\t"
2503+ "bsfl (%%edi),%%eax\n"
2504+ "1:\tsubl %%ebx,%%edi\n\t"
2505+ "shll $3,%%edi\n\t"
2506+ "addl %%edi,%%eax"
2507+ :"=a" (res), "=&c" (d0), "=&D" (d1)
2508+ :"1" ((size + 31) >> 5), "2" (addr), "b" (addr));
2509+ return res;
2510+}
2511+
2512+/**
2513 * find_next_zero_bit - find the first zero bit in a memory region
2514 * @addr: The address to base the search on
2515 * @offset: The bitnumber to start searching at
2516@@ -296,7 +333,7 @@
2517
2518 if (bit) {
2519 /*
2520- * Look for zero in first byte
2521+ * Look for zero in the first 32 bits.
2522 */
2523 __asm__("bsfl %1,%0\n\t"
2524 "jne 1f\n\t"
2525@@ -317,6 +354,39 @@
2526 }
2527
2528 /**
2529+ * find_next_bit - find the first set bit in a memory region
2530+ * @addr: The address to base the search on
2531+ * @offset: The bitnumber to start searching at
2532+ * @size: The maximum size to search
2533+ */
2534+static __inline__ int find_next_bit (void * addr, int size, int offset)
2535+{
2536+ unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
2537+ int set = 0, bit = offset & 31, res;
2538+
2539+ if (bit) {
2540+ /*
2541+ * Look for nonzero in the first 32 bits:
2542+ */
2543+ __asm__("bsfl %1,%0\n\t"
2544+ "jne 1f\n\t"
2545+ "movl $32, %0\n"
2546+ "1:"
2547+ : "=r" (set)
2548+ : "r" (*p >> bit));
2549+ if (set < (32 - bit))
2550+ return set + offset;
2551+ set = 32 - bit;
2552+ p++;
2553+ }
2554+ /*
2555+ * No set bit yet, search remaining full words for a bit
2556+ */
2557+ res = find_first_bit (p, size - 32 * (p - (unsigned long *) addr));
2558+ return (offset + set + res);
2559+}
2560+
2561+/**
2562 * ffz - find first zero in word.
2563 * @word: The word to search
2564 *
2565@@ -330,8 +400,41 @@
2566 return word;
2567 }
2568
2569+/**
2570+ * __ffs - find first bit in word.
2571+ * @word: The word to search
2572+ * Undefined if no bit exists, so code should check against 0 first.
2573+ */
2574+static __inline__ unsigned long __ffs(unsigned long word)
2575+{
2576+ __asm__("bsfl %1,%0"
2577+ :"=r" (word)
2578+ :"rm" (word));
2579+ return word;
2580+}
2581+#define fls(x) generic_fls(x)
2582+
2583 #ifdef __KERNEL__
2584
2585+/*
2586+ * Every architecture must define this function. It's the fastest
2587+ * way of searching a 140-bit bitmap where the first 100 bits are
2588+ * unlikely to be set. It's guaranteed that at least one of the 140
2589+ * bits is cleared.
2590+ */
2591+static inline int sched_find_first_bit(unsigned long *b)
2592+{
2593+ if (unlikely(b[0]))
2594+ return __ffs(b[0]);
2595+ if (unlikely(b[1]))
2596+ return __ffs(b[1]) + 32;
2597+ if (unlikely(b[2]))
2598+ return __ffs(b[2]) + 64;
2599+ if (b[3])
2600+ return __ffs(b[3]) + 96;
2601+ return __ffs(b[4]) + 128;
2602+}
2603+
2604 /**
2605 * ffs - find first bit set
2606 * @x: the word to search
2607diff -urN linux-2.4.24.org/include/asm-i386/mmu_context.h linux-2.4.24/include/asm-i386/mmu_context.h
5d16fd25
AM
2608--- linux-2.4.24.org/include/asm-i386/mmu_context.h 2004-02-04 20:47:41.355922254 +0100
2609+++ linux-2.4.24/include/asm-i386/mmu_context.h 2004-02-04 20:57:00.389646201 +0100
2610@@ -29,7 +29,7 @@
0aa7655b
AM
2611
2612 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu)
2613 {
2614- if (prev != next) {
2615+ if (likely(prev != next)) {
2616 /* stop flush ipis for the previous mm */
2617 clear_bit(cpu, &prev->cpu_vm_mask);
5d16fd25
AM
2618 #ifdef CONFIG_SMP
2619@@ -42,7 +42,7 @@
2620 /* load_LDT, if either the previous or next thread
2621 * has a non-default LDT.
0aa7655b 2622 */
5d16fd25
AM
2623- if (next->context.size+prev->context.size)
2624+ if (unlikely(next->context.size+prev->context.size))
2625 load_LDT(&next->context);
2626 }
0aa7655b 2627 #ifdef CONFIG_SMP
0aa7655b 2628diff -urN linux-2.4.24.org/include/asm-i386/processor.h linux-2.4.24/include/asm-i386/processor.h
5d16fd25
AM
2629--- linux-2.4.24.org/include/asm-i386/processor.h 2004-02-04 20:47:40.967003150 +0100
2630+++ linux-2.4.24/include/asm-i386/processor.h 2004-02-04 20:52:53.702954879 +0100
2631@@ -498,6 +498,8 @@
0aa7655b
AM
2632
2633 #define cpu_relax() rep_nop()
2634
2635+#define ARCH_HAS_SMP_BALANCE
2636+
2637 /* Prefetch instructions for Pentium III and AMD Athlon */
2638 #if defined(CONFIG_MPENTIUMIII) || defined (CONFIG_MPENTIUM4)
2639
2640diff -urN linux-2.4.24.org/include/asm-i386/smp_balance.h linux-2.4.24/include/asm-i386/smp_balance.h
2641--- linux-2.4.24.org/include/asm-i386/smp_balance.h 1970-01-01 01:00:00.000000000 +0100
5d16fd25 2642+++ linux-2.4.24/include/asm-i386/smp_balance.h 2004-02-04 20:52:53.705954255 +0100
0aa7655b
AM
2643@@ -0,0 +1,66 @@
2644+#ifndef _ASM_SMP_BALANCE_H
2645+#define _ASM_SMP_BALANCE_H
2646+
2647+/*
2648+ * We have an architecture-specific SMP load balancer to improve
2649+ * scheduling behavior on hyperthreaded CPUs. Since only P4s have
2650+ * HT, maybe this should be conditional on CONFIG_MPENTIUM4...
2651+ *
2652+ */
2653+
2654+/*
2655+ * Find any idle processor package (i.e. both virtual processors are idle)
2656+ */
2657+static inline int find_idle_package(int this_cpu)
2658+{
2659+ int i;
2660+
2661+ this_cpu = cpu_number_map(this_cpu);
2662+
2663+ for (i = (this_cpu + 1) % smp_num_cpus;
2664+ i != this_cpu;
2665+ i = (i + 1) % smp_num_cpus) {
2666+ int physical = cpu_logical_map(i);
2667+ int sibling = cpu_sibling_map[physical];
2668+
2669+ if (idle_cpu(physical) && idle_cpu(sibling))
2670+ return physical;
2671+ }
2672+ return -1; /* not found */
2673+}
2674+
2675+static inline int arch_reschedule_idle_override(task_t * p, int idle)
2676+{
2677+ if (unlikely(smp_num_siblings > 1) && !idle_cpu(cpu_sibling_map[idle])) {
2678+ int true_idle = find_idle_package(idle);
2679+ if (true_idle >= 0) {
2680+ if (likely(p->cpus_allowed & (1UL << true_idle)))
2681+ idle = true_idle;
2682+ else {
2683+ true_idle = cpu_sibling_map[true_idle];
2684+ if (p->cpus_allowed & (1UL << true_idle))
2685+ idle = true_idle;
2686+ }
2687+ }
2688+ }
2689+
2690+ return idle;
2691+}
2692+
2693+static inline int arch_load_balance(int this_cpu, int idle)
2694+{
2695+ /* Special hack for hyperthreading */
2696+ if (unlikely(smp_num_siblings > 1 && idle == 2 && !idle_cpu(cpu_sibling_map[this_cpu]))) {
2697+ int found;
2698+ struct runqueue *rq_target;
2699+
2700+ if ((found = find_idle_package(this_cpu)) >= 0 ) {
2701+ rq_target = cpu_rq(found);
2702+ resched_task(rq_target->idle);
2703+ return 1;
2704+ }
2705+ }
2706+ return 0;
2707+}
2708+
2709+#endif /* _ASM_SMP_BALANCE_H */
2710diff -urN linux-2.4.24.org/include/asm-i386/smp.h linux-2.4.24/include/asm-i386/smp.h
5d16fd25
AM
2711--- linux-2.4.24.org/include/asm-i386/smp.h 2004-02-04 20:47:41.153964261 +0100
2712+++ linux-2.4.24/include/asm-i386/smp.h 2004-02-04 20:52:53.733948432 +0100
0aa7655b
AM
2713@@ -40,6 +40,7 @@
2714 extern void smp_flush_tlb(void);
2715 extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
7f7e7712 2716 extern void fastcall smp_send_reschedule(int cpu);
0aa7655b
AM
2717+extern void smp_send_reschedule_all(void);
2718 extern void smp_invalidate_rcv(void); /* Process an NMI */
2719 extern void (*mtrr_hook) (void);
2720 extern void zap_low_mappings (void);
2721@@ -81,7 +82,7 @@
2722 * so this is correct in the x86 case.
2723 */
2724
2725-#define smp_processor_id() (current->processor)
2726+#define smp_processor_id() (current->cpu)
2727
2728 static __inline int hard_smp_processor_id(void)
2729 {
2730@@ -99,17 +100,5 @@
2731
2732 #define NO_PROC_ID 0xFF /* No processor magic marker */
2733
2734-/*
2735- * This magic constant controls our willingness to transfer
2736- * a process across CPUs. Such a transfer incurs misses on the L1
2737- * cache, and on a P6 or P5 with multiple L2 caches L2 hits. My
2738- * gut feeling is this will vary by board in value. For a board
2739- * with separate L2 cache it probably depends also on the RSS, and
2740- * for a board with shared L2 cache it ought to decay fast as other
2741- * processes are run.
2742- */
2743-
2744-#define PROC_CHANGE_PENALTY 15 /* Schedule penalty */
2745-
2746 #endif
2747 #endif
2748diff -urN linux-2.4.24.org/include/asm-i386/system.h linux-2.4.24/include/asm-i386/system.h
5d16fd25
AM
2749--- linux-2.4.24.org/include/asm-i386/system.h 2004-02-04 20:47:40.963003981 +0100
2750+++ linux-2.4.24/include/asm-i386/system.h 2004-02-04 20:52:53.759943026 +0100
0aa7655b
AM
2751@@ -12,25 +12,22 @@
2752 struct task_struct; /* one of the stranger aspects of C forward declarations.. */
2753 extern void FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next));
2754
2755-#define prepare_to_switch() do { } while(0)
2756 #define switch_to(prev,next,last) do { \
2757 asm volatile("pushl %%esi\n\t" \
2758 "pushl %%edi\n\t" \
2759 "pushl %%ebp\n\t" \
2760 "movl %%esp,%0\n\t" /* save ESP */ \
2761- "movl %3,%%esp\n\t" /* restore ESP */ \
2762+ "movl %2,%%esp\n\t" /* restore ESP */ \
2763 "movl $1f,%1\n\t" /* save EIP */ \
2764- "pushl %4\n\t" /* restore EIP */ \
2765+ "pushl %3\n\t" /* restore EIP */ \
2766 "jmp __switch_to\n" \
2767 "1:\t" \
2768 "popl %%ebp\n\t" \
2769 "popl %%edi\n\t" \
2770 "popl %%esi\n\t" \
2771- :"=m" (prev->thread.esp),"=m" (prev->thread.eip), \
2772- "=b" (last) \
2773+ :"=m" (prev->thread.esp),"=m" (prev->thread.eip) \
2774 :"m" (next->thread.esp),"m" (next->thread.eip), \
2775- "a" (prev), "d" (next), \
2776- "b" (prev)); \
2777+ "a" (prev), "d" (next)); \
2778 } while (0)
2779
2780 #define _set_base(addr,base) do { unsigned long __pr; \
2781diff -urN linux-2.4.24.org/include/asm-ia64/bitops.h linux-2.4.24/include/asm-ia64/bitops.h
5d16fd25
AM
2782--- linux-2.4.24.org/include/asm-ia64/bitops.h 2004-02-04 20:48:16.659579072 +0100
2783+++ linux-2.4.24/include/asm-ia64/bitops.h 2004-02-04 20:52:53.793935955 +0100
0aa7655b
AM
2784@@ -4,6 +4,9 @@
2785 /*
2786 * Copyright (C) 1998-2003 Hewlett-Packard Co
2787 * David Mosberger-Tang <davidm@hpl.hp.com>
2788+ *
2789+ * 02/06/02 find_next_bit() and find_first_bit() added from Erich Focht's ia64 O(1)
2790+ * scheduler patch
2791 */
2792
2793 #include <linux/types.h>
2794@@ -91,6 +94,17 @@
2795 }
2796
2797 /**
2798+ * __clear_bit - Clears a bit in memory (non-atomic version)
2799+ */
2800+static __inline__ void
2801+__clear_bit (int nr, volatile void *addr)
2802+{
2803+ volatile __u32 *p = (__u32 *) addr + (nr >> 5);
2804+ __u32 m = 1 << (nr & 31);
2805+ *p &= ~m;
2806+}
2807+
2808+/**
2809 * change_bit - Toggle a bit in memory
2810 * @nr: Bit to clear
2811 * @addr: Address to start counting from
2812@@ -266,12 +280,11 @@
2813 }
2814
2815 /**
2816- * ffz - find the first zero bit in a memory region
2817- * @x: The address to start the search at
2818+ * ffz - find the first zero bit in a long word
2819+ * @x: The long word to find the bit in
2820 *
2821- * Returns the bit-number (0..63) of the first (least significant) zero bit, not
2822- * the number of the byte containing a bit. Undefined if no zero exists, so
2823- * code should check against ~0UL first...
2824+ * Returns the bit-number (0..63) of the first (least significant) zero bit. Undefined if
2825+ * no zero exists, so code should check against ~0UL first...
2826 */
2827 static inline unsigned long
2828 ffz (unsigned long x)
2829@@ -297,6 +310,21 @@
2830 return result;
2831 }
2832
2833+/**
2834+ * __ffs - find first bit in word.
2835+ * @x: The word to search
2836+ *
2837+ * Undefined if no bit exists, so code should check against 0 first.
2838+ */
2839+static __inline__ unsigned long
2840+__ffs (unsigned long x)
2841+{
2842+ unsigned long result;
2843+
2844+ __asm__ ("popcnt %0=%1" : "=r" (result) : "r" ((x - 1) & ~x));
2845+ return result;
2846+}
2847+
2848 #ifdef __KERNEL__
2849
2850 /*
2851@@ -313,6 +341,12 @@
2852 return exp - 0xffff;
2853 }
2854
2855+static int
2856+fls (int x)
2857+{
2858+ return ia64_fls((unsigned int) x);
2859+}
2860+
2861 /*
2862 * ffs: find first bit set. This is defined the same way as the libc and compiler builtin
2863 * ffs routines, therefore differs in spirit from the above ffz (man ffs): it operates on
2864@@ -385,8 +419,53 @@
2865 */
2866 #define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
2867
2868+/*
2869+ * Find next bit in a bitmap reasonably efficiently..
2870+ */
2871+static inline int
2872+find_next_bit (void *addr, unsigned long size, unsigned long offset)
2873+{
2874+ unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
2875+ unsigned long result = offset & ~63UL;
2876+ unsigned long tmp;
2877+
2878+ if (offset >= size)
2879+ return size;
2880+ size -= result;
2881+ offset &= 63UL;
2882+ if (offset) {
2883+ tmp = *(p++);
2884+ tmp &= ~0UL << offset;
2885+ if (size < 64)
2886+ goto found_first;
2887+ if (tmp)
2888+ goto found_middle;
2889+ size -= 64;
2890+ result += 64;
2891+ }
2892+ while (size & ~63UL) {
2893+ if ((tmp = *(p++)))
2894+ goto found_middle;
2895+ result += 64;
2896+ size -= 64;
2897+ }
2898+ if (!size)
2899+ return result;
2900+ tmp = *p;
2901+ found_first:
2902+ tmp &= ~0UL >> (64-size);
2903+ if (tmp == 0UL) /* Are any bits set? */
2904+ return result + size; /* Nope. */
2905+ found_middle:
2906+ return result + __ffs(tmp);
2907+}
2908+
2909+#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
2910+
2911 #ifdef __KERNEL__
2912
2913+#define __clear_bit(nr, addr) clear_bit(nr, addr)
2914+
2915 #define ext2_set_bit test_and_set_bit
2916 #define ext2_clear_bit test_and_clear_bit
2917 #define ext2_test_bit test_bit
2918@@ -400,6 +479,16 @@
2919 #define minix_test_bit(nr,addr) test_bit(nr,addr)
2920 #define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
2921
2922+static inline int
2923+sched_find_first_bit (unsigned long *b)
2924+{
2925+ if (unlikely(b[0]))
2926+ return __ffs(b[0]);
2927+ if (unlikely(b[1]))
2928+ return 64 + __ffs(b[1]);
2929+ return __ffs(b[2]) + 128;
2930+}
2931+
2932 #endif /* __KERNEL__ */
2933
2934 #endif /* _ASM_IA64_BITOPS_H */
2935diff -urN linux-2.4.24.org/include/asm-m68k/bitops.h linux-2.4.24/include/asm-m68k/bitops.h
5d16fd25
AM
2936--- linux-2.4.24.org/include/asm-m68k/bitops.h 2004-02-04 20:47:47.882564706 +0100
2937+++ linux-2.4.24/include/asm-m68k/bitops.h 2004-02-04 20:52:53.798934916 +0100
0aa7655b
AM
2938@@ -97,6 +97,7 @@
2939 (__builtin_constant_p(nr) ? \
2940 __constant_clear_bit(nr, vaddr) : \
2941 __generic_clear_bit(nr, vaddr))
2942+#define __clear_bit(nr,vaddr) clear_bit(nr,vaddr)
2943
2944 static inline void __constant_clear_bit(int nr, volatile void *vaddr)
2945 {
2946@@ -238,6 +239,28 @@
2947
2948 return 32 - cnt;
2949 }
2950+#define __ffs(x) (ffs(x) - 1)
2951+
2952+
2953+/*
2954+ * Every architecture must define this function. It's the fastest
2955+ * way of searching a 140-bit bitmap where the first 100 bits are
2956+ * unlikely to be set. It's guaranteed that at least one of the 140
2957+ * bits is cleared.
2958+ */
2959+static inline int sched_find_first_bit(unsigned long *b)
2960+{
2961+ if (unlikely(b[0]))
2962+ return __ffs(b[0]);
2963+ if (unlikely(b[1]))
2964+ return __ffs(b[1]) + 32;
2965+ if (unlikely(b[2]))
2966+ return __ffs(b[2]) + 64;
2967+ if (b[3])
2968+ return __ffs(b[3]) + 96;
2969+ return __ffs(b[4]) + 128;
2970+}
2971+
2972
2973 /*
2974 * hweightN: returns the hamming weight (i.e. the number
2975diff -urN linux-2.4.24.org/include/asm-mips/bitops.h linux-2.4.24/include/asm-mips/bitops.h
5d16fd25
AM
2976--- linux-2.4.24.org/include/asm-mips/bitops.h 2004-02-04 20:47:43.266524847 +0100
2977+++ linux-2.4.24/include/asm-mips/bitops.h 2004-02-04 20:52:53.820930341 +0100
0aa7655b
AM
2978@@ -51,6 +51,8 @@
2979
2980 #ifdef CONFIG_CPU_HAS_LLSC
2981
2982+#include <asm/mipsregs.h>
2983+
2984 /*
2985 * These functions for MIPS ISA > 1 are interrupt and SMP proof and
2986 * interrupt friendly
2987@@ -593,21 +595,30 @@
2988 *
2989 * Undefined if no zero exists, so code should check against ~0UL first.
2990 */
2991-static __inline__ unsigned long ffz(unsigned long word)
2992+extern __inline__ unsigned long ffz(unsigned long word)
2993 {
2994- int b = 0, s;
2995+ unsigned int __res;
2996+ unsigned int mask = 1;
2997
2998- word = ~word;
2999- s = 16; if (word << 16 != 0) s = 0; b += s; word >>= s;
3000- s = 8; if (word << 24 != 0) s = 0; b += s; word >>= s;
3001- s = 4; if (word << 28 != 0) s = 0; b += s; word >>= s;
3002- s = 2; if (word << 30 != 0) s = 0; b += s; word >>= s;
3003- s = 1; if (word << 31 != 0) s = 0; b += s;
3004+ __asm__ (
3005+ ".set\tnoreorder\n\t"
3006+ ".set\tnoat\n\t"
3007+ "move\t%0,$0\n"
3008+ "1:\tand\t$1,%2,%1\n\t"
3009+ "beqz\t$1,2f\n\t"
3010+ "sll\t%1,1\n\t"
3011+ "bnez\t%1,1b\n\t"
3012+ "addiu\t%0,1\n\t"
3013+ ".set\tat\n\t"
3014+ ".set\treorder\n"
3015+ "2:\n\t"
3016+ : "=&r" (__res), "=r" (mask)
3017+ : "r" (word), "1" (mask)
3018+ : "$1");
3019
3020- return b;
3021+ return __res;
3022 }
3023
3024-
3025 #ifdef __KERNEL__
3026
3027 /*
3028diff -urN linux-2.4.24.org/include/asm-mips64/bitops.h linux-2.4.24/include/asm-mips64/bitops.h
5d16fd25
AM
3029--- linux-2.4.24.org/include/asm-mips64/bitops.h 2004-02-04 20:48:21.702530138 +0100
3030+++ linux-2.4.24/include/asm-mips64/bitops.h 2004-02-04 20:52:53.873919319 +0100
0aa7655b
AM
3031@@ -27,6 +27,7 @@
3032
3033 #include <asm/system.h>
3034 #include <asm/sgidefs.h>
3035+#include <asm/mipsregs.h>
3036
3037 /*
3038 * set_bit - Atomically set a bit in memory
3039@@ -38,7 +39,8 @@
3040 * Note that @nr may be almost arbitrarily large; this function is not
3041 * restricted to acting on a single-word quantity.
3042 */
3043-static inline void set_bit(unsigned long nr, volatile void *addr)
3044+extern __inline__ void
3045+set_bit(unsigned long nr, volatile void *addr)
3046 {
3047 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
3048 unsigned long temp;
3049@@ -62,7 +64,7 @@
3050 * If it's called on the same region of memory simultaneously, the effect
3051 * may be that only one operation succeeds.
3052 */
3053-static inline void __set_bit(int nr, volatile void * addr)
3054+extern __inline__ void __set_bit(int nr, volatile void * addr)
3055 {
3056 unsigned long * m = ((unsigned long *) addr) + (nr >> 6);
3057
3058@@ -79,7 +81,8 @@
3059 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
3060 * in order to ensure changes are visible on other processors.
3061 */
3062-static inline void clear_bit(unsigned long nr, volatile void *addr)
3063+extern __inline__ void
3064+clear_bit(unsigned long nr, volatile void *addr)
3065 {
3066 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
3067 unsigned long temp;
3068@@ -105,7 +108,8 @@
3069 * Note that @nr may be almost arbitrarily large; this function is not
3070 * restricted to acting on a single-word quantity.
3071 */
3072-static inline void change_bit(unsigned long nr, volatile void *addr)
3073+extern __inline__ void
3074+change_bit(unsigned long nr, volatile void *addr)
3075 {
3076 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
3077 unsigned long temp;
3078@@ -128,7 +132,7 @@
3079 * If it's called on the same region of memory simultaneously, the effect
3080 * may be that only one operation succeeds.
3081 */
3082-static inline void __change_bit(int nr, volatile void * addr)
3083+extern __inline__ void __change_bit(int nr, volatile void * addr)
3084 {
3085 unsigned long * m = ((unsigned long *) addr) + (nr >> 6);
3086
3087@@ -143,8 +147,8 @@
3088 * This operation is atomic and cannot be reordered.
3089 * It also implies a memory barrier.
3090 */
3091-static inline unsigned long test_and_set_bit(unsigned long nr,
3092- volatile void *addr)
3093+extern __inline__ unsigned long
3094+test_and_set_bit(unsigned long nr, volatile void *addr)
3095 {
3096 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
3097 unsigned long temp, res;
3098@@ -176,7 +180,8 @@
3099 * If two examples of this operation race, one can appear to succeed
3100 * but actually fail. You must protect multiple accesses with a lock.
3101 */
3102-static inline int __test_and_set_bit(int nr, volatile void *addr)
3103+extern __inline__ int
3104+__test_and_set_bit(int nr, volatile void * addr)
3105 {
3106 unsigned long mask, retval;
3107 long *a = (unsigned long *) addr;
3108@@ -197,8 +202,8 @@
3109 * This operation is atomic and cannot be reordered.
3110 * It also implies a memory barrier.
3111 */
3112-static inline unsigned long test_and_clear_bit(unsigned long nr,
3113- volatile void *addr)
3114+extern __inline__ unsigned long
3115+test_and_clear_bit(unsigned long nr, volatile void *addr)
3116 {
3117 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
3118 unsigned long temp, res;
3119@@ -231,7 +236,8 @@
3120 * If two examples of this operation race, one can appear to succeed
3121 * but actually fail. You must protect multiple accesses with a lock.
3122 */
3123-static inline int __test_and_clear_bit(int nr, volatile void * addr)
3124+extern __inline__ int
3125+__test_and_clear_bit(int nr, volatile void * addr)
3126 {
3127 unsigned long mask, retval;
3128 unsigned long *a = (unsigned long *) addr;
3129@@ -252,8 +258,8 @@
3130 * This operation is atomic and cannot be reordered.
3131 * It also implies a memory barrier.
3132 */
3133-static inline unsigned long test_and_change_bit(unsigned long nr,
3134- volatile void *addr)
3135+extern __inline__ unsigned long
3136+test_and_change_bit(unsigned long nr, volatile void *addr)
3137 {
3138 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
3139 unsigned long temp, res;
3140@@ -285,7 +291,8 @@
3141 * If two examples of this operation race, one can appear to succeed
3142 * but actually fail. You must protect multiple accesses with a lock.
3143 */
3144-static inline int __test_and_change_bit(int nr, volatile void *addr)
3145+extern __inline__ int
3146+__test_and_change_bit(int nr, volatile void * addr)
3147 {
3148 unsigned long mask, retval;
3149 unsigned long *a = (unsigned long *) addr;
3150@@ -302,7 +309,8 @@
3151 * @nr: bit number to test
3152 * @addr: Address to start counting from
3153 */
3154-static inline int test_bit(int nr, volatile void * addr)
3155+extern __inline__ unsigned long
3156+test_bit(int nr, volatile void * addr)
3157 {
3158 return 1UL & (((const volatile unsigned long *) addr)[nr >> SZLONG_LOG] >> (nr & SZLONG_MASK));
3159 }
3160@@ -313,19 +321,20 @@
3161 *
3162 * Undefined if no zero exists, so code should check against ~0UL first.
3163 */
3164-static __inline__ unsigned long ffz(unsigned long word)
3165+extern __inline__ unsigned long ffz(unsigned long word)
3166 {
3167- int b = 0, s;
3168+ unsigned long k;
3169
3170 word = ~word;
3171- s = 32; if (word << 32 != 0) s = 0; b += s; word >>= s;
3172- s = 16; if (word << 48 != 0) s = 0; b += s; word >>= s;
3173- s = 8; if (word << 56 != 0) s = 0; b += s; word >>= s;
3174- s = 4; if (word << 60 != 0) s = 0; b += s; word >>= s;
3175- s = 2; if (word << 62 != 0) s = 0; b += s; word >>= s;
3176- s = 1; if (word << 63 != 0) s = 0; b += s;
3177+ k = 63;
3178+ if (word & 0x00000000ffffffffUL) { k -= 32; word <<= 32; }
3179+ if (word & 0x0000ffff00000000UL) { k -= 16; word <<= 16; }
3180+ if (word & 0x00ff000000000000UL) { k -= 8; word <<= 8; }
3181+ if (word & 0x0f00000000000000UL) { k -= 4; word <<= 4; }
3182+ if (word & 0x3000000000000000UL) { k -= 2; word <<= 2; }
3183+ if (word & 0x4000000000000000UL) { k -= 1; }
3184
3185- return b;
3186+ return k;
3187 }
3188
3189 /*
3190@@ -334,8 +343,8 @@
3191 * @offset: The bitnumber to start searching at
3192 * @size: The maximum size to search
3193 */
3194-static inline unsigned long find_next_zero_bit(void *addr, unsigned long size,
3195- unsigned long offset)
3196+extern __inline__ unsigned long
3197+find_next_zero_bit(void *addr, unsigned long size, unsigned long offset)
3198 {
3199 unsigned long *p = ((unsigned long *) addr) + (offset >> SZLONG_LOG);
3200 unsigned long result = offset & ~SZLONG_MASK;
3201@@ -400,7 +409,8 @@
3202 #define hweight16(x) generic_hweight16(x)
3203 #define hweight8(x) generic_hweight8(x)
3204
3205-static inline int __test_and_set_le_bit(unsigned long nr, void * addr)
3206+extern inline int
3207+__test_and_set_le_bit(unsigned long nr, void * addr
3208 {
3209 unsigned char *ADDR = (unsigned char *) addr;
3210 int mask, retval;
3211@@ -413,7 +423,8 @@
3212 return retval;
3213 }
3214
3215-static inline int __test_and_clear_le_bit(unsigned long nr, void * addr)
3216+extern inline int
3217+__test_and_clear_le_bit(unsigned long nr, void * addr)
3218 {
3219 unsigned char *ADDR = (unsigned char *) addr;
3220 int mask, retval;
3221@@ -426,7 +437,8 @@
3222 return retval;
3223 }
3224
3225-static inline int test_le_bit(unsigned long nr, const void * addr)
3226+extern inline int
3227+test_le_bit(unsigned long nr, const void * addr)
3228 {
3229 const unsigned char *ADDR = (const unsigned char *) addr;
3230 int mask;
3231@@ -451,7 +463,7 @@
3232 return b;
3233 }
3234
3235-static inline unsigned long find_next_zero_le_bit(void *addr,
3236+extern inline unsigned long find_next_zero_le_bit(void *addr,
3237 unsigned long size, unsigned long offset)
3238 {
3239 unsigned int *p = ((unsigned int *) addr) + (offset >> 5);
3240diff -urN linux-2.4.24.org/include/asm-ppc/bitops.h linux-2.4.24/include/asm-ppc/bitops.h
5d16fd25
AM
3241--- linux-2.4.24.org/include/asm-ppc/bitops.h 2004-02-04 20:47:57.992461840 +0100
3242+++ linux-2.4.24/include/asm-ppc/bitops.h 2004-02-04 20:52:53.902913289 +0100
0aa7655b
AM
3243@@ -7,6 +7,7 @@
3244 #define _PPC_BITOPS_H
3245
3246 #include <linux/config.h>
3247+#include <linux/compiler.h>
3248 #include <asm/byteorder.h>
3249 #include <asm/atomic.h>
3250
3251@@ -26,7 +27,7 @@
3252 * These used to be if'd out here because using : "cc" as a constraint
3253 * resulted in errors from egcs. Things appear to be OK with gcc-2.95.
3254 */
3255-static __inline__ void set_bit(int nr, volatile void * addr)
3256+static __inline__ void set_bit(int nr, volatile unsigned long * addr)
3257 {
3258 unsigned long old;
3259 unsigned long mask = 1 << (nr & 0x1f);
3260@@ -46,7 +47,7 @@
3261 /*
3262 * non-atomic version
3263 */
3264-static __inline__ void __set_bit(int nr, volatile void *addr)
3265+static __inline__ void __set_bit(int nr, volatile unsigned long *addr)
3266 {
3267 unsigned long mask = 1 << (nr & 0x1f);
3268 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
3269@@ -60,7 +61,7 @@
3270 #define smp_mb__before_clear_bit() smp_mb()
3271 #define smp_mb__after_clear_bit() smp_mb()
3272
3273-static __inline__ void clear_bit(int nr, volatile void *addr)
3274+static __inline__ void clear_bit(int nr, volatile unsigned long *addr)
3275 {
3276 unsigned long old;
3277 unsigned long mask = 1 << (nr & 0x1f);
3278@@ -80,7 +81,7 @@
3279 /*
3280 * non-atomic version
3281 */
3282-static __inline__ void __clear_bit(int nr, volatile void *addr)
3283+static __inline__ void __clear_bit(int nr, volatile unsigned long *addr)
3284 {
3285 unsigned long mask = 1 << (nr & 0x1f);
3286 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
3287@@ -88,7 +89,7 @@
3288 *p &= ~mask;
3289 }
3290
3291-static __inline__ void change_bit(int nr, volatile void *addr)
3292+static __inline__ void change_bit(int nr, volatile unsigned long *addr)
3293 {
3294 unsigned long old;
3295 unsigned long mask = 1 << (nr & 0x1f);
3296@@ -108,7 +109,7 @@
3297 /*
3298 * non-atomic version
3299 */
3300-static __inline__ void __change_bit(int nr, volatile void *addr)
3301+static __inline__ void __change_bit(int nr, volatile unsigned long *addr)
3302 {
3303 unsigned long mask = 1 << (nr & 0x1f);
3304 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
3305@@ -119,7 +120,7 @@
3306 /*
3307 * test_and_*_bit do imply a memory barrier (?)
3308 */
3309-static __inline__ int test_and_set_bit(int nr, volatile void *addr)
3310+static __inline__ int test_and_set_bit(int nr, volatile unsigned long *addr)
3311 {
3312 unsigned int old, t;
3313 unsigned int mask = 1 << (nr & 0x1f);
3314@@ -142,7 +143,7 @@
3315 /*
3316 * non-atomic version
3317 */
3318-static __inline__ int __test_and_set_bit(int nr, volatile void *addr)
3319+static __inline__ int __test_and_set_bit(int nr, volatile unsigned long *addr)
3320 {
3321 unsigned long mask = 1 << (nr & 0x1f);
3322 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
3323@@ -152,7 +153,7 @@
3324 return (old & mask) != 0;
3325 }
3326
3327-static __inline__ int test_and_clear_bit(int nr, volatile void *addr)
3328+static __inline__ int test_and_clear_bit(int nr, volatile unsigned long *addr)
3329 {
3330 unsigned int old, t;
3331 unsigned int mask = 1 << (nr & 0x1f);
3332@@ -175,7 +176,7 @@
3333 /*
3334 * non-atomic version
3335 */
3336-static __inline__ int __test_and_clear_bit(int nr, volatile void *addr)
3337+static __inline__ int __test_and_clear_bit(int nr, volatile unsigned long *addr)
3338 {
3339 unsigned long mask = 1 << (nr & 0x1f);
3340 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
3341@@ -185,7 +186,7 @@
3342 return (old & mask) != 0;
3343 }
3344
3345-static __inline__ int test_and_change_bit(int nr, volatile void *addr)
3346+static __inline__ int test_and_change_bit(int nr, volatile unsigned long *addr)
3347 {
3348 unsigned int old, t;
3349 unsigned int mask = 1 << (nr & 0x1f);
3350@@ -208,7 +209,7 @@
3351 /*
3352 * non-atomic version
3353 */
3354-static __inline__ int __test_and_change_bit(int nr, volatile void *addr)
3355+static __inline__ int __test_and_change_bit(int nr, volatile unsigned long *addr)
3356 {
3357 unsigned long mask = 1 << (nr & 0x1f);
3358 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
3359@@ -218,7 +219,7 @@
3360 return (old & mask) != 0;
3361 }
3362
3363-static __inline__ int test_bit(int nr, __const__ volatile void *addr)
3364+static __inline__ int test_bit(int nr, __const__ volatile unsigned long *addr)
3365 {
3366 __const__ unsigned int *p = (__const__ unsigned int *) addr;
3367
3368@@ -226,7 +227,7 @@
3369 }
3370
3371 /* Return the bit position of the most significant 1 bit in a word */
3372-static __inline__ int __ilog2(unsigned int x)
3373+static __inline__ int __ilog2(unsigned long x)
3374 {
3375 int lz;
3376
3377@@ -234,13 +235,18 @@
3378 return 31 - lz;
3379 }
3380
3381-static __inline__ int ffz(unsigned int x)
3382+static __inline__ int ffz(unsigned long x)
3383 {
3384 if ((x = ~x) == 0)
3385 return 32;
3386 return __ilog2(x & -x);
3387 }
3388
3389+static inline int __ffs(unsigned long x)
3390+{
3391+ return __ilog2(x & -x);
3392+}
3393+
3394 /*
3395 * ffs: find first bit set. This is defined the same way as
3396 * the libc and compiler builtin ffs routines, therefore
3397@@ -252,6 +258,18 @@
3398 }
3399
3400 /*
3401+ * fls: find last (most-significant) bit set.
3402+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
3403+ */
3404+static __inline__ int fls(unsigned int x)
3405+{
3406+ int lz;
3407+
3408+ asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x));
3409+ return 32 - lz;
3410+}
3411+
3412+/*
3413 * hweightN: returns the hamming weight (i.e. the number
3414 * of bits set) of a N-bit word
3415 */
3416@@ -261,13 +279,86 @@
3417 #define hweight8(x) generic_hweight8(x)
3418
3419 /*
3420+ * Find the first bit set in a 140-bit bitmap.
3421+ * The first 100 bits are unlikely to be set.
3422+ */
3423+static inline int sched_find_first_bit(unsigned long *b)
3424+{
3425+ if (unlikely(b[0]))
3426+ return __ffs(b[0]);
3427+ if (unlikely(b[1]))
3428+ return __ffs(b[1]) + 32;
3429+ if (unlikely(b[2]))
3430+ return __ffs(b[2]) + 64;
3431+ if (b[3])
3432+ return __ffs(b[3]) + 96;
3433+ return __ffs(b[4]) + 128;
3434+}
3435+
3436+/**
3437+ * find_next_bit - find the next set bit in a memory region
3438+ * @addr: The address to base the search on
3439+ * @offset: The bitnumber to start searching at
3440+ * @size: The maximum size to search
3441+ */
3442+static __inline__ unsigned long find_next_bit(unsigned long *addr,
3443+ unsigned long size, unsigned long offset)
3444+{
3445+ unsigned int *p = ((unsigned int *) addr) + (offset >> 5);
3446+ unsigned int result = offset & ~31UL;
3447+ unsigned int tmp;
3448+
3449+ if (offset >= size)
3450+ return size;
3451+ size -= result;
3452+ offset &= 31UL;
3453+ if (offset) {
3454+ tmp = *p++;
3455+ tmp &= ~0UL << offset;
3456+ if (size < 32)
3457+ goto found_first;
3458+ if (tmp)
3459+ goto found_middle;
3460+ size -= 32;
3461+ result += 32;
3462+ }
3463+ while (size >= 32) {
3464+ if ((tmp = *p++) != 0)
3465+ goto found_middle;
3466+ result += 32;
3467+ size -= 32;
3468+ }
3469+ if (!size)
3470+ return result;
3471+ tmp = *p;
3472+
3473+found_first:
3474+ tmp &= ~0UL >> (32 - size);
3475+ if (tmp == 0UL) /* Are any bits set? */
3476+ return result + size; /* Nope. */
3477+found_middle:
3478+ return result + __ffs(tmp);
3479+}
3480+
3481+/**
3482+ * find_first_bit - find the first set bit in a memory region
3483+ * @addr: The address to start the search at
3484+ * @size: The maximum size to search
3485+ *
3486+ * Returns the bit-number of the first set bit, not the number of the byte
3487+ * containing a bit.
3488+ */
3489+#define find_first_bit(addr, size) \
3490+ find_next_bit((addr), (size), 0)
3491+
3492+/*
3493 * This implementation of find_{first,next}_zero_bit was stolen from
3494 * Linus' asm-alpha/bitops.h.
3495 */
3496 #define find_first_zero_bit(addr, size) \
3497 find_next_zero_bit((addr), (size), 0)
3498
3499-static __inline__ unsigned long find_next_zero_bit(void * addr,
3500+static __inline__ unsigned long find_next_zero_bit(unsigned long * addr,
3501 unsigned long size, unsigned long offset)
3502 {
3503 unsigned int * p = ((unsigned int *) addr) + (offset >> 5);
3504@@ -306,8 +397,8 @@
3505 }
3506
3507
3508-#define ext2_set_bit(nr, addr) __test_and_set_bit((nr) ^ 0x18, addr)
3509-#define ext2_clear_bit(nr, addr) __test_and_clear_bit((nr) ^ 0x18, addr)
3510+#define ext2_set_bit(nr, addr) __test_and_set_bit((nr) ^ 0x18, (unsigned long *)(addr))
3511+#define ext2_clear_bit(nr, addr) __test_and_clear_bit((nr) ^ 0x18, (unsigned long *)(addr))
3512
3513 static __inline__ int ext2_test_bit(int nr, __const__ void * addr)
3514 {
3515diff -urN linux-2.4.24.org/include/asm-ppc/smp.h linux-2.4.24/include/asm-ppc/smp.h
5d16fd25
AM
3516--- linux-2.4.24.org/include/asm-ppc/smp.h 2004-02-04 20:47:58.116436054 +0100
3517+++ linux-2.4.24/include/asm-ppc/smp.h 2004-02-04 20:52:53.906912457 +0100
0aa7655b
AM
3518@@ -45,7 +45,7 @@
3519 #define cpu_logical_map(cpu) (cpu)
3520 #define cpu_number_map(x) (x)
3521
3522-#define smp_processor_id() (current->processor)
3523+#define smp_processor_id() (current->cpu)
3524
3525 extern int smp_hw_index[NR_CPUS];
3526 #define hard_smp_processor_id() (smp_hw_index[smp_processor_id()])
3527diff -urN linux-2.4.24.org/include/asm-ppc64/bitops.h linux-2.4.24/include/asm-ppc64/bitops.h
5d16fd25
AM
3528--- linux-2.4.24.org/include/asm-ppc64/bitops.h 2004-02-04 20:47:31.682934246 +0100
3529+++ linux-2.4.24/include/asm-ppc64/bitops.h 2004-02-04 20:52:53.961901020 +0100
0aa7655b
AM
3530@@ -41,12 +41,12 @@
3531 #define smp_mb__before_clear_bit() smp_mb()
3532 #define smp_mb__after_clear_bit() smp_mb()
3533
3534-static __inline__ int test_bit(unsigned long nr, __const__ volatile void *addr)
3535+static __inline__ int test_bit(unsigned long nr, __const__ volatile unsigned long *addr)
3536 {
3537 return (1UL & (((__const__ long *) addr)[nr >> 6] >> (nr & 63)));
3538 }
3539
3540-static __inline__ void set_bit(unsigned long nr, volatile void *addr)
3541+static __inline__ void set_bit(unsigned long nr, volatile unsigned long *addr)
3542 {
3543 unsigned long old;
3544 unsigned long mask = 1UL << (nr & 0x3f);
3545@@ -62,7 +62,7 @@
3546 : "cc");
3547 }
3548
3549-static __inline__ void clear_bit(unsigned long nr, volatile void *addr)
3550+static __inline__ void clear_bit(unsigned long nr, volatile unsigned long *addr)
3551 {
3552 unsigned long old;
3553 unsigned long mask = 1UL << (nr & 0x3f);
3554@@ -78,7 +78,7 @@
3555 : "cc");
3556 }
3557
3558-static __inline__ void change_bit(unsigned long nr, volatile void *addr)
3559+static __inline__ void change_bit(unsigned long nr, volatile unsigned long *addr)
3560 {
3561 unsigned long old;
3562 unsigned long mask = 1UL << (nr & 0x3f);
3563@@ -94,7 +94,7 @@
3564 : "cc");
3565 }
3566
3567-static __inline__ int test_and_set_bit(unsigned long nr, volatile void *addr)
3568+static __inline__ int test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
3569 {
3570 unsigned long old, t;
3571 unsigned long mask = 1UL << (nr & 0x3f);
3572@@ -114,7 +114,7 @@
3573 return (old & mask) != 0;
3574 }
3575
3576-static __inline__ int test_and_clear_bit(unsigned long nr, volatile void *addr)
3577+static __inline__ int test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
3578 {
3579 unsigned long old, t;
3580 unsigned long mask = 1UL << (nr & 0x3f);
3581@@ -134,7 +134,7 @@
3582 return (old & mask) != 0;
3583 }
3584
3585-static __inline__ int test_and_change_bit(unsigned long nr, volatile void *addr)
3586+static __inline__ int test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
3587 {
3588 unsigned long old, t;
3589 unsigned long mask = 1UL << (nr & 0x3f);
3590@@ -157,7 +157,7 @@
3591 /*
3592 * non-atomic versions
3593 */
3594-static __inline__ void __set_bit(unsigned long nr, volatile void *addr)
3595+static __inline__ void __set_bit(unsigned long nr, volatile unsigned long *addr)
3596 {
3597 unsigned long mask = 1UL << (nr & 0x3f);
3598 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
3599@@ -165,7 +165,7 @@
3600 *p |= mask;
3601 }
3602
3603-static __inline__ void __clear_bit(unsigned long nr, volatile void *addr)
3604+static __inline__ void __clear_bit(unsigned long nr, volatile unsigned long *addr)
3605 {
3606 unsigned long mask = 1UL << (nr & 0x3f);
3607 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
3608@@ -173,7 +173,7 @@
3609 *p &= ~mask;
3610 }
3611
3612-static __inline__ void __change_bit(unsigned long nr, volatile void *addr)
3613+static __inline__ void __change_bit(unsigned long nr, volatile unsigned long *addr)
3614 {
3615 unsigned long mask = 1UL << (nr & 0x3f);
3616 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
3617@@ -181,7 +181,7 @@
3618 *p ^= mask;
3619 }
3620
3621-static __inline__ int __test_and_set_bit(unsigned long nr, volatile void *addr)
3622+static __inline__ int __test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
3623 {
3624 unsigned long mask = 1UL << (nr & 0x3f);
3625 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
3626@@ -191,7 +191,7 @@
3627 return (old & mask) != 0;
3628 }
3629
3630-static __inline__ int __test_and_clear_bit(unsigned long nr, volatile void *addr)
3631+static __inline__ int __test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
3632 {
3633 unsigned long mask = 1UL << (nr & 0x3f);
3634 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
3635@@ -201,7 +201,7 @@
3636 return (old & mask) != 0;
3637 }
3638
3639-static __inline__ int __test_and_change_bit(unsigned long nr, volatile void *addr)
3640+static __inline__ int __test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
3641 {
3642 unsigned long mask = 1UL << (nr & 0x3f);
3643 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
3644diff -urN linux-2.4.24.org/include/asm-s390/bitops.h linux-2.4.24/include/asm-s390/bitops.h
5d16fd25
AM
3645--- linux-2.4.24.org/include/asm-s390/bitops.h 2004-02-04 20:48:24.809883809 +0100
3646+++ linux-2.4.24/include/asm-s390/bitops.h 2004-02-04 20:52:53.990894989 +0100
0aa7655b
AM
3647@@ -47,272 +47,217 @@
3648 extern const char _oi_bitmap[];
3649 extern const char _ni_bitmap[];
3650 extern const char _zb_findmap[];
3651+extern const char _sb_findmap[];
3652
3653 #ifdef CONFIG_SMP
3654 /*
3655 * SMP save set_bit routine based on compare and swap (CS)
3656 */
3657-static __inline__ void set_bit_cs(int nr, volatile void * addr)
3658+static inline void set_bit_cs(int nr, volatile void *ptr)
3659 {
3660- unsigned long bits, mask;
3661- __asm__ __volatile__(
3662+ unsigned long addr, old, new, mask;
3663+
3664+ addr = (unsigned long) ptr;
3665 #if ALIGN_CS == 1
3666- " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
3667- " nr %2,%1\n" /* isolate last 2 bits of address */
3668- " xr %1,%2\n" /* make addr % 4 == 0 */
3669- " sll %2,3\n"
3670- " ar %0,%2\n" /* add alignement to bitnr */
3671+ addr ^= addr & 3; /* align address to 4 */
3672+ nr += (addr & 3) << 3; /* add alignment to bit number */
3673 #endif
3674- " lhi %2,31\n"
3675- " nr %2,%0\n" /* make shift value */
3676- " xr %0,%2\n"
3677- " srl %0,3\n"
3678- " lhi %3,1\n"
3679- " la %1,0(%0,%1)\n" /* calc. address for CS */
3680- " sll %3,0(%2)\n" /* make OR mask */
3681- " l %0,0(%1)\n"
3682- "0: lr %2,%0\n" /* CS loop starts here */
3683- " or %2,%3\n" /* set bit */
3684- " cs %0,%2,0(%1)\n"
3685- " jl 0b"
3686- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
3687- : "cc", "memory" );
3688+ addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
3689+ mask = 1UL << (nr & 31); /* make OR mask */
3690+ asm volatile(
3691+ " l %0,0(%4)\n"
3692+ "0: lr %1,%0\n"
3693+ " or %1,%3\n"
3694+ " cs %0,%1,0(%4)\n"
3695+ " jl 0b"
3696+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
3697+ : "d" (mask), "a" (addr)
3698+ : "cc" );
3699 }
3700
3701 /*
3702 * SMP save clear_bit routine based on compare and swap (CS)
3703 */
3704-static __inline__ void clear_bit_cs(int nr, volatile void * addr)
3705+static inline void clear_bit_cs(int nr, volatile void *ptr)
3706 {
3707- static const int minusone = -1;
3708- unsigned long bits, mask;
3709- __asm__ __volatile__(
3710+ unsigned long addr, old, new, mask;
3711+
3712+ addr = (unsigned long) ptr;
3713 #if ALIGN_CS == 1
3714- " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
3715- " nr %2,%1\n" /* isolate last 2 bits of address */
3716- " xr %1,%2\n" /* make addr % 4 == 0 */
3717- " sll %2,3\n"
3718- " ar %0,%2\n" /* add alignement to bitnr */
3719+ addr ^= addr & 3; /* align address to 4 */
3720+ nr += (addr & 3) << 3; /* add alignment to bit number */
3721 #endif
3722- " lhi %2,31\n"
3723- " nr %2,%0\n" /* make shift value */
3724- " xr %0,%2\n"
3725- " srl %0,3\n"
3726- " lhi %3,1\n"
3727- " la %1,0(%0,%1)\n" /* calc. address for CS */
3728- " sll %3,0(%2)\n"
3729- " x %3,%4\n" /* make AND mask */
3730- " l %0,0(%1)\n"
3731- "0: lr %2,%0\n" /* CS loop starts here */
3732- " nr %2,%3\n" /* clear bit */
3733- " cs %0,%2,0(%1)\n"
3734- " jl 0b"
3735- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask)
3736- : "m" (minusone) : "cc", "memory" );
3737+ addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
3738+ mask = ~(1UL << (nr & 31)); /* make AND mask */
3739+ asm volatile(
3740+ " l %0,0(%4)\n"
3741+ "0: lr %1,%0\n"
3742+ " nr %1,%3\n"
3743+ " cs %0,%1,0(%4)\n"
3744+ " jl 0b"
3745+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
3746+ : "d" (mask), "a" (addr)
3747+ : "cc" );
3748 }
3749
3750 /*
3751 * SMP save change_bit routine based on compare and swap (CS)
3752 */
3753-static __inline__ void change_bit_cs(int nr, volatile void * addr)
3754+static inline void change_bit_cs(int nr, volatile void *ptr)
3755 {
3756- unsigned long bits, mask;
3757- __asm__ __volatile__(
3758+ unsigned long addr, old, new, mask;
3759+
3760+ addr = (unsigned long) ptr;
3761 #if ALIGN_CS == 1
3762- " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
3763- " nr %2,%1\n" /* isolate last 2 bits of address */
3764- " xr %1,%2\n" /* make addr % 4 == 0 */
3765- " sll %2,3\n"
3766- " ar %0,%2\n" /* add alignement to bitnr */
3767+ addr ^= addr & 3; /* align address to 4 */
3768+ nr += (addr & 3) << 3; /* add alignment to bit number */
3769 #endif
3770- " lhi %2,31\n"
3771- " nr %2,%0\n" /* make shift value */
3772- " xr %0,%2\n"
3773- " srl %0,3\n"
3774- " lhi %3,1\n"
3775- " la %1,0(%0,%1)\n" /* calc. address for CS */
3776- " sll %3,0(%2)\n" /* make XR mask */
3777- " l %0,0(%1)\n"
3778- "0: lr %2,%0\n" /* CS loop starts here */
3779- " xr %2,%3\n" /* change bit */
3780- " cs %0,%2,0(%1)\n"
3781- " jl 0b"
3782- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
3783- : "cc", "memory" );
3784+ addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
3785+ mask = 1UL << (nr & 31); /* make XOR mask */
3786+ asm volatile(
3787+ " l %0,0(%4)\n"
3788+ "0: lr %1,%0\n"
3789+ " xr %1,%3\n"
3790+ " cs %0,%1,0(%4)\n"
3791+ " jl 0b"
3792+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
3793+ : "d" (mask), "a" (addr)
3794+ : "cc" );
3795 }
3796
3797 /*
3798 * SMP save test_and_set_bit routine based on compare and swap (CS)
3799 */
3800-static __inline__ int test_and_set_bit_cs(int nr, volatile void * addr)
3801+static inline int test_and_set_bit_cs(int nr, volatile void *ptr)
3802 {
3803- unsigned long bits, mask;
3804- __asm__ __volatile__(
3805+ unsigned long addr, old, new, mask;
3806+
3807+ addr = (unsigned long) ptr;
3808 #if ALIGN_CS == 1
3809- " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
3810- " nr %2,%1\n" /* isolate last 2 bits of address */
3811- " xr %1,%2\n" /* make addr % 4 == 0 */
3812- " sll %2,3\n"
3813- " ar %0,%2\n" /* add alignement to bitnr */
3814+ addr ^= addr & 3; /* align address to 4 */
3815+ nr += (addr & 3) << 3; /* add alignment to bit number */
3816 #endif
3817- " lhi %2,31\n"
3818- " nr %2,%0\n" /* make shift value */
3819- " xr %0,%2\n"
3820- " srl %0,3\n"
3821- " lhi %3,1\n"
3822- " la %1,0(%0,%1)\n" /* calc. address for CS */
3823- " sll %3,0(%2)\n" /* make OR mask */
3824- " l %0,0(%1)\n"
3825- "0: lr %2,%0\n" /* CS loop starts here */
3826- " or %2,%3\n" /* set bit */
3827- " cs %0,%2,0(%1)\n"
3828- " jl 0b\n"
3829- " nr %0,%3\n" /* isolate old bit */
3830- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
3831- : "cc", "memory" );
3832- return nr != 0;
3833+ addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
3834+ mask = 1UL << (nr & 31); /* make OR/test mask */
3835+ asm volatile(
3836+ " l %0,0(%4)\n"
3837+ "0: lr %1,%0\n"
3838+ " or %1,%3\n"
3839+ " cs %0,%1,0(%4)\n"
3840+ " jl 0b"
3841+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
3842+ : "d" (mask), "a" (addr)
3843+ : "cc" );
3844+ return (old & mask) != 0;
3845 }
3846
3847 /*
3848 * SMP save test_and_clear_bit routine based on compare and swap (CS)
3849 */
3850-static __inline__ int test_and_clear_bit_cs(int nr, volatile void * addr)
3851+static inline int test_and_clear_bit_cs(int nr, volatile void *ptr)
3852 {
3853- static const int minusone = -1;
3854- unsigned long bits, mask;
3855- __asm__ __volatile__(
3856+ unsigned long addr, old, new, mask;
3857+
3858+ addr = (unsigned long) ptr;
3859 #if ALIGN_CS == 1
3860- " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
3861- " nr %2,%1\n" /* isolate last 2 bits of address */
3862- " xr %1,%2\n" /* make addr % 4 == 0 */
3863- " sll %2,3\n"
3864- " ar %0,%2\n" /* add alignement to bitnr */
3865+ addr ^= addr & 3; /* align address to 4 */
3866+ nr += (addr & 3) << 3; /* add alignment to bit number */
3867 #endif
3868- " lhi %2,31\n"
3869- " nr %2,%0\n" /* make shift value */
3870- " xr %0,%2\n"
3871- " srl %0,3\n"
3872- " lhi %3,1\n"
3873- " la %1,0(%0,%1)\n" /* calc. address for CS */
3874- " sll %3,0(%2)\n"
3875- " l %0,0(%1)\n"
3876- " x %3,%4\n" /* make AND mask */
3877- "0: lr %2,%0\n" /* CS loop starts here */
3878- " nr %2,%3\n" /* clear bit */
3879- " cs %0,%2,0(%1)\n"
3880- " jl 0b\n"
3881- " x %3,%4\n"
3882- " nr %0,%3\n" /* isolate old bit */
3883- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask)
3884- : "m" (minusone) : "cc", "memory" );
3885- return nr;
3886+ addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
3887+ mask = ~(1UL << (nr & 31)); /* make AND mask */
3888+ asm volatile(
3889+ " l %0,0(%4)\n"
3890+ "0: lr %1,%0\n"
3891+ " nr %1,%3\n"
3892+ " cs %0,%1,0(%4)\n"
3893+ " jl 0b"
3894+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
3895+ : "d" (mask), "a" (addr)
3896+ : "cc" );
3897+ return (old ^ new) != 0;
3898 }
3899
3900 /*
3901 * SMP save test_and_change_bit routine based on compare and swap (CS)
3902 */
3903-static __inline__ int test_and_change_bit_cs(int nr, volatile void * addr)
3904+static inline int test_and_change_bit_cs(int nr, volatile void *ptr)
3905 {
3906- unsigned long bits, mask;
3907- __asm__ __volatile__(
3908+ unsigned long addr, old, new, mask;
3909+
3910+ addr = (unsigned long) ptr;
3911 #if ALIGN_CS == 1
3912- " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
3913- " nr %2,%1\n" /* isolate last 2 bits of address */
3914- " xr %1,%2\n" /* make addr % 4 == 0 */
3915- " sll %2,3\n"
3916- " ar %0,%2\n" /* add alignement to bitnr */
3917+ addr ^= addr & 3; /* align address to 4 */
3918+ nr += (addr & 3) << 3; /* add alignment to bit number */
3919 #endif
3920- " lhi %2,31\n"
3921- " nr %2,%0\n" /* make shift value */
3922- " xr %0,%2\n"
3923- " srl %0,3\n"
3924- " lhi %3,1\n"
3925- " la %1,0(%0,%1)\n" /* calc. address for CS */
3926- " sll %3,0(%2)\n" /* make OR mask */
3927- " l %0,0(%1)\n"
3928- "0: lr %2,%0\n" /* CS loop starts here */
3929- " xr %2,%3\n" /* change bit */
3930- " cs %0,%2,0(%1)\n"
3931- " jl 0b\n"
3932- " nr %0,%3\n" /* isolate old bit */
3933- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
3934- : "cc", "memory" );
3935- return nr != 0;
3936+ addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
3937+ mask = 1UL << (nr & 31); /* make XOR mask */
3938+ asm volatile(
3939+ " l %0,0(%4)\n"
3940+ "0: lr %1,%0\n"
3941+ " xr %1,%3\n"
3942+ " cs %0,%1,0(%4)\n"
3943+ " jl 0b"
3944+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
3945+ : "d" (mask), "a" (addr)
3946+ : "cc" );
3947+ return (old & mask) != 0;
3948 }
3949 #endif /* CONFIG_SMP */
3950
3951 /*
3952 * fast, non-SMP set_bit routine
3953 */
3954-static __inline__ void __set_bit(int nr, volatile void * addr)
3955+static inline void __set_bit(int nr, volatile void *ptr)
3956 {
3957- unsigned long reg1, reg2;
3958- __asm__ __volatile__(
3959- " lhi %1,24\n"
3960- " lhi %0,7\n"
3961- " xr %1,%2\n"
3962- " nr %0,%2\n"
3963- " srl %1,3\n"
3964- " la %1,0(%1,%3)\n"
3965- " la %0,0(%0,%4)\n"
3966- " oc 0(1,%1),0(%0)"
3967- : "=&a" (reg1), "=&a" (reg2)
3968- : "r" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
3969-}
3970-
3971-static __inline__ void
3972-__constant_set_bit(const int nr, volatile void * addr)
3973-{
3974- switch (nr&7) {
3975- case 0:
3976- __asm__ __volatile__ ("la 1,%0\n\t"
3977- "oi 0(1),0x01"
3978- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
3979- : : "1", "cc", "memory");
3980- break;
3981- case 1:
3982- __asm__ __volatile__ ("la 1,%0\n\t"
3983- "oi 0(1),0x02"
3984- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
3985- : : "1", "cc", "memory" );
3986- break;
3987- case 2:
3988- __asm__ __volatile__ ("la 1,%0\n\t"
3989- "oi 0(1),0x04"
3990- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
3991- : : "1", "cc", "memory" );
3992- break;
3993- case 3:
3994- __asm__ __volatile__ ("la 1,%0\n\t"
3995- "oi 0(1),0x08"
3996- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
3997- : : "1", "cc", "memory" );
3998- break;
3999- case 4:
4000- __asm__ __volatile__ ("la 1,%0\n\t"
4001- "oi 0(1),0x10"
4002- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4003- : : "1", "cc", "memory" );
4004- break;
4005- case 5:
4006- __asm__ __volatile__ ("la 1,%0\n\t"
4007- "oi 0(1),0x20"
4008- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4009- : : "1", "cc", "memory" );
4010- break;
4011- case 6:
4012- __asm__ __volatile__ ("la 1,%0\n\t"
4013- "oi 0(1),0x40"
4014- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4015- : : "1", "cc", "memory" );
4016- break;
4017- case 7:
4018- __asm__ __volatile__ ("la 1,%0\n\t"
4019- "oi 0(1),0x80"
4020- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4021- : : "1", "cc", "memory" );
4022- break;
4023- }
4024+ unsigned long addr;
4025+
4026+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
4027+ asm volatile("oc 0(1,%1),0(%2)"
4028+ : "+m" (*(char *) addr)
4029+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
4030+ : "cc" );
4031+}
4032+
4033+static inline void
4034+__constant_set_bit(const int nr, volatile void *ptr)
4035+{
4036+ unsigned long addr;
4037+
4038+ addr = ((unsigned long) ptr) + ((nr >> 3) ^ 3);
4039+ switch (nr&7) {
4040+ case 0:
4041+ asm volatile ("oi 0(%1),0x01"
4042+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4043+ break;
4044+ case 1:
4045+ asm volatile ("oi 0(%1),0x02"
4046+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4047+ break;
4048+ case 2:
4049+ asm volatile ("oi 0(%1),0x04"
4050+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4051+ break;
4052+ case 3:
4053+ asm volatile ("oi 0(%1),0x08"
4054+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4055+ break;
4056+ case 4:
4057+ asm volatile ("oi 0(%1),0x10"
4058+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4059+ break;
4060+ case 5:
4061+ asm volatile ("oi 0(%1),0x20"
4062+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4063+ break;
4064+ case 6:
4065+ asm volatile ("oi 0(%1),0x40"
4066+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4067+ break;
4068+ case 7:
4069+ asm volatile ("oi 0(%1),0x80"
4070+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4071+ break;
4072+ }
4073 }
4074
4075 #define set_bit_simple(nr,addr) \
4076@@ -323,76 +268,58 @@
4077 /*
4078 * fast, non-SMP clear_bit routine
4079 */
4080-static __inline__ void
4081-__clear_bit(int nr, volatile void * addr)
4082+static inline void
4083+__clear_bit(int nr, volatile void *ptr)
4084 {
4085- unsigned long reg1, reg2;
4086- __asm__ __volatile__(
4087- " lhi %1,24\n"
4088- " lhi %0,7\n"
4089- " xr %1,%2\n"
4090- " nr %0,%2\n"
4091- " srl %1,3\n"
4092- " la %1,0(%1,%3)\n"
4093- " la %0,0(%0,%4)\n"
4094- " nc 0(1,%1),0(%0)"
4095- : "=&a" (reg1), "=&a" (reg2)
4096- : "r" (nr), "a" (addr), "a" (&_ni_bitmap) : "cc", "memory" );
4097-}
4098-
4099-static __inline__ void
4100-__constant_clear_bit(const int nr, volatile void * addr)
4101-{
4102- switch (nr&7) {
4103- case 0:
4104- __asm__ __volatile__ ("la 1,%0\n\t"
4105- "ni 0(1),0xFE"
4106- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4107- : : "1", "cc", "memory" );
4108- break;
4109- case 1:
4110- __asm__ __volatile__ ("la 1,%0\n\t"
4111- "ni 0(1),0xFD"
4112- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4113- : : "1", "cc", "memory" );
4114- break;
4115- case 2:
4116- __asm__ __volatile__ ("la 1,%0\n\t"
4117- "ni 0(1),0xFB"
4118- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4119- : : "1", "cc", "memory" );
4120- break;
4121- case 3:
4122- __asm__ __volatile__ ("la 1,%0\n\t"
4123- "ni 0(1),0xF7"
4124- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4125- : : "1", "cc", "memory" );
4126- break;
4127- case 4:
4128- __asm__ __volatile__ ("la 1,%0\n\t"
4129- "ni 0(1),0xEF"
4130- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4131- : : "cc", "memory" );
4132- break;
4133- case 5:
4134- __asm__ __volatile__ ("la 1,%0\n\t"
4135- "ni 0(1),0xDF"
4136- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4137- : : "1", "cc", "memory" );
4138- break;
4139- case 6:
4140- __asm__ __volatile__ ("la 1,%0\n\t"
4141- "ni 0(1),0xBF"
4142- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4143- : : "1", "cc", "memory" );
4144- break;
4145- case 7:
4146- __asm__ __volatile__ ("la 1,%0\n\t"
4147- "ni 0(1),0x7F"
4148- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4149- : : "1", "cc", "memory" );
4150- break;
4151- }
4152+ unsigned long addr;
4153+
4154+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
4155+ asm volatile("nc 0(1,%1),0(%2)"
4156+ : "+m" (*(char *) addr)
4157+ : "a" (addr), "a" (_ni_bitmap + (nr & 7))
4158+ : "cc" );
4159+}
4160+
4161+static inline void
4162+__constant_clear_bit(const int nr, volatile void *ptr)
4163+{
4164+ unsigned long addr;
4165+
4166+ addr = ((unsigned long) ptr) + ((nr >> 3) ^ 3);
4167+ switch (nr&7) {
4168+ case 0:
4169+ asm volatile ("ni 0(%1),0xFE"
4170+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4171+ break;
4172+ case 1:
4173+ asm volatile ("ni 0(%1),0xFD"
4174+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4175+ break;
4176+ case 2:
4177+ asm volatile ("ni 0(%1),0xFB"
4178+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4179+ break;
4180+ case 3:
4181+ asm volatile ("ni 0(%1),0xF7"
4182+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4183+ break;
4184+ case 4:
4185+ asm volatile ("ni 0(%1),0xEF"
4186+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4187+ break;
4188+ case 5:
4189+ asm volatile ("ni 0(%1),0xDF"
4190+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4191+ break;
4192+ case 6:
4193+ asm volatile ("ni 0(%1),0xBF"
4194+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4195+ break;
4196+ case 7:
4197+ asm volatile ("ni 0(%1),0x7F"
4198+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4199+ break;
4200+ }
4201 }
4202
4203 #define clear_bit_simple(nr,addr) \
4204@@ -403,75 +330,57 @@
4205 /*
4206 * fast, non-SMP change_bit routine
4207 */
4208-static __inline__ void __change_bit(int nr, volatile void * addr)
4209+static inline void __change_bit(int nr, volatile void *ptr)
4210 {
4211- unsigned long reg1, reg2;
4212- __asm__ __volatile__(
4213- " lhi %1,24\n"
4214- " lhi %0,7\n"
4215- " xr %1,%2\n"
4216- " nr %0,%2\n"
4217- " srl %1,3\n"
4218- " la %1,0(%1,%3)\n"
4219- " la %0,0(%0,%4)\n"
4220- " xc 0(1,%1),0(%0)"
4221- : "=&a" (reg1), "=&a" (reg2)
4222- : "r" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
4223-}
4224-
4225-static __inline__ void
4226-__constant_change_bit(const int nr, volatile void * addr)
4227-{
4228- switch (nr&7) {
4229- case 0:
4230- __asm__ __volatile__ ("la 1,%0\n\t"
4231- "xi 0(1),0x01"
4232- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4233- : : "cc", "memory" );
4234- break;
4235- case 1:
4236- __asm__ __volatile__ ("la 1,%0\n\t"
4237- "xi 0(1),0x02"
4238- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4239- : : "cc", "memory" );
4240- break;
4241- case 2:
4242- __asm__ __volatile__ ("la 1,%0\n\t"
4243- "xi 0(1),0x04"
4244- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4245- : : "cc", "memory" );
4246- break;
4247- case 3:
4248- __asm__ __volatile__ ("la 1,%0\n\t"
4249- "xi 0(1),0x08"
4250- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4251- : : "cc", "memory" );
4252- break;
4253- case 4:
4254- __asm__ __volatile__ ("la 1,%0\n\t"
4255- "xi 0(1),0x10"
4256- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4257- : : "cc", "memory" );
4258- break;
4259- case 5:
4260- __asm__ __volatile__ ("la 1,%0\n\t"
4261- "xi 0(1),0x20"
4262- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4263- : : "1", "cc", "memory" );
4264- break;
4265- case 6:
4266- __asm__ __volatile__ ("la 1,%0\n\t"
4267- "xi 0(1),0x40"
4268- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4269- : : "1", "cc", "memory" );
4270- break;
4271- case 7:
4272- __asm__ __volatile__ ("la 1,%0\n\t"
4273- "xi 0(1),0x80"
4274- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
4275- : : "1", "cc", "memory" );
4276- break;
4277- }
4278+ unsigned long addr;
4279+
4280+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
4281+ asm volatile("xc 0(1,%1),0(%2)"
4282+ : "+m" (*(char *) addr)
4283+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
4284+ : "cc" );
4285+}
4286+
4287+static inline void
4288+__constant_change_bit(const int nr, volatile void *ptr)
4289+{
4290+ unsigned long addr;
4291+
4292+ addr = ((unsigned long) ptr) + ((nr >> 3) ^ 3);
4293+ switch (nr&7) {
4294+ case 0:
4295+ asm volatile ("xi 0(%1),0x01"
4296+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4297+ break;
4298+ case 1:
4299+ asm volatile ("xi 0(%1),0x02"
4300+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4301+ break;
4302+ case 2:
4303+ asm volatile ("xi 0(%1),0x04"
4304+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4305+ break;
4306+ case 3:
4307+ asm volatile ("xi 0(%1),0x08"
4308+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4309+ break;
4310+ case 4:
4311+ asm volatile ("xi 0(%1),0x10"
4312+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4313+ break;
4314+ case 5:
4315+ asm volatile ("xi 0(%1),0x20"
4316+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4317+ break;
4318+ case 6:
4319+ asm volatile ("xi 0(%1),0x40"
4320+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4321+ break;
4322+ case 7:
4323+ asm volatile ("xi 0(%1),0x80"
4324+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
4325+ break;
4326+ }
4327 }
4328
4329 #define change_bit_simple(nr,addr) \
4330@@ -482,74 +391,54 @@
4331 /*
4332 * fast, non-SMP test_and_set_bit routine
4333 */
4334-static __inline__ int test_and_set_bit_simple(int nr, volatile void * addr)
4335+static inline int test_and_set_bit_simple(int nr, volatile void *ptr)
4336 {
4337- unsigned long reg1, reg2;
4338- int oldbit;
4339- __asm__ __volatile__(
4340- " lhi %1,24\n"
4341- " lhi %2,7\n"
4342- " xr %1,%3\n"
4343- " nr %2,%3\n"
4344- " srl %1,3\n"
4345- " la %1,0(%1,%4)\n"
4346- " ic %0,0(%1)\n"
4347- " srl %0,0(%2)\n"
4348- " la %2,0(%2,%5)\n"
4349- " oc 0(1,%1),0(%2)"
4350- : "=d&" (oldbit), "=&a" (reg1), "=&a" (reg2)
4351- : "r" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
4352- return oldbit & 1;
4353+ unsigned long addr;
4354+ unsigned char ch;
4355+
4356+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
4357+ ch = *(unsigned char *) addr;
4358+ asm volatile("oc 0(1,%1),0(%2)"
4359+ : "+m" (*(char *) addr)
4360+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
4361+ : "cc" );
4362+ return (ch >> (nr & 7)) & 1;
4363 }
4364 #define __test_and_set_bit(X,Y) test_and_set_bit_simple(X,Y)
4365
4366 /*
4367 * fast, non-SMP test_and_clear_bit routine
4368 */
4369-static __inline__ int test_and_clear_bit_simple(int nr, volatile void * addr)
4370+static inline int test_and_clear_bit_simple(int nr, volatile void *ptr)
4371 {
4372- unsigned long reg1, reg2;
4373- int oldbit;
4374+ unsigned long addr;
4375+ unsigned char ch;
4376
4377- __asm__ __volatile__(
4378- " lhi %1,24\n"
4379- " lhi %2,7\n"
4380- " xr %1,%3\n"
4381- " nr %2,%3\n"
4382- " srl %1,3\n"
4383- " la %1,0(%1,%4)\n"
4384- " ic %0,0(%1)\n"
4385- " srl %0,0(%2)\n"
4386- " la %2,0(%2,%5)\n"
4387- " nc 0(1,%1),0(%2)"
4388- : "=d&" (oldbit), "=&a" (reg1), "=&a" (reg2)
4389- : "r" (nr), "a" (addr), "a" (&_ni_bitmap) : "cc", "memory" );
4390- return oldbit & 1;
4391+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
4392+ ch = *(unsigned char *) addr;
4393+ asm volatile("nc 0(1,%1),0(%2)"
4394+ : "+m" (*(char *) addr)
4395+ : "a" (addr), "a" (_ni_bitmap + (nr & 7))
4396+ : "cc" );
4397+ return (ch >> (nr & 7)) & 1;
4398 }
4399 #define __test_and_clear_bit(X,Y) test_and_clear_bit_simple(X,Y)
4400
4401 /*
4402 * fast, non-SMP test_and_change_bit routine
4403 */
4404-static __inline__ int test_and_change_bit_simple(int nr, volatile void * addr)
4405+static inline int test_and_change_bit_simple(int nr, volatile void *ptr)
4406 {
4407- unsigned long reg1, reg2;
4408- int oldbit;
4409+ unsigned long addr;
4410+ unsigned char ch;
4411
4412- __asm__ __volatile__(
4413- " lhi %1,24\n"
4414- " lhi %2,7\n"
4415- " xr %1,%3\n"
4416- " nr %2,%1\n"
4417- " srl %1,3\n"
4418- " la %1,0(%1,%4)\n"
4419- " ic %0,0(%1)\n"
4420- " srl %0,0(%2)\n"
4421- " la %2,0(%2,%5)\n"
4422- " xc 0(1,%1),0(%2)"
4423- : "=d&" (oldbit), "=&a" (reg1), "=&a" (reg2)
4424- : "r" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
4425- return oldbit & 1;
4426+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
4427+ ch = *(unsigned char *) addr;
4428+ asm volatile("xc 0(1,%1),0(%2)"
4429+ : "+m" (*(char *) addr)
4430+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
4431+ : "cc" );
4432+ return (ch >> (nr & 7)) & 1;
4433 }
4434 #define __test_and_change_bit(X,Y) test_and_change_bit_simple(X,Y)
4435
4436@@ -574,25 +463,17 @@
4437 * This routine doesn't need to be atomic.
4438 */
4439
4440-static __inline__ int __test_bit(int nr, volatile void * addr)
4441+static inline int __test_bit(int nr, volatile void *ptr)
4442 {
4443- unsigned long reg1, reg2;
4444- int oldbit;
4445+ unsigned long addr;
4446+ unsigned char ch;
4447
4448- __asm__ __volatile__(
4449- " lhi %2,24\n"
4450- " lhi %1,7\n"
4451- " xr %2,%3\n"
4452- " nr %1,%3\n"
4453- " srl %2,3\n"
4454- " ic %0,0(%2,%4)\n"
4455- " srl %0,0(%1)"
4456- : "=d&" (oldbit), "=&a" (reg1), "=&a" (reg2)
4457- : "r" (nr), "a" (addr) : "cc" );
4458- return oldbit & 1;
4459+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
4460+ ch = *(unsigned char *) addr;
4461+ return (ch >> (nr & 7)) & 1;
4462 }
4463
4464-static __inline__ int __constant_test_bit(int nr, volatile void * addr) {
4465+static inline int __constant_test_bit(int nr, volatile void * addr) {
4466 return (((volatile char *) addr)[(nr>>3)^3] & (1<<(nr&7))) != 0;
4467 }
4468
4469@@ -604,7 +485,7 @@
4470 /*
4471 * Find-bit routines..
4472 */
4473-static __inline__ int find_first_zero_bit(void * addr, unsigned size)
4474+static inline int find_first_zero_bit(void * addr, unsigned size)
4475 {
4476 unsigned long cmp, count;
4477 int res;
4478@@ -642,7 +523,45 @@
4479 return (res < size) ? res : size;
4480 }
4481
4482-static __inline__ int find_next_zero_bit (void * addr, int size, int offset)
4483+static inline int find_first_bit(void * addr, unsigned size)
4484+{
4485+ unsigned long cmp, count;
4486+ int res;
4487+
4488+ if (!size)
4489+ return 0;
4490+ __asm__(" slr %1,%1\n"
4491+ " lr %2,%3\n"
4492+ " slr %0,%0\n"
4493+ " ahi %2,31\n"
4494+ " srl %2,5\n"
4495+ "0: c %1,0(%0,%4)\n"
4496+ " jne 1f\n"
4497+ " ahi %0,4\n"
4498+ " brct %2,0b\n"
4499+ " lr %0,%3\n"
4500+ " j 4f\n"
4501+ "1: l %2,0(%0,%4)\n"
4502+ " sll %0,3\n"
4503+ " lhi %1,0xff\n"
4504+ " tml %2,0xffff\n"
4505+ " jnz 2f\n"
4506+ " ahi %0,16\n"
4507+ " srl %2,16\n"
4508+ "2: tml %2,0x00ff\n"
4509+ " jnz 3f\n"
4510+ " ahi %0,8\n"
4511+ " srl %2,8\n"
4512+ "3: nr %2,%1\n"
4513+ " ic %2,0(%2,%5)\n"
4514+ " alr %0,%2\n"
4515+ "4:"
4516+ : "=&a" (res), "=&d" (cmp), "=&a" (count)
4517+ : "a" (size), "a" (addr), "a" (&_sb_findmap) : "cc" );
4518+ return (res < size) ? res : size;
4519+}
4520+
4521+static inline int find_next_zero_bit (void * addr, int size, int offset)
4522 {
4523 unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
4524 unsigned long bitvec, reg;
4525@@ -680,11 +599,49 @@
4526 return (offset + res);
4527 }
4528
4529+static inline int find_next_bit (void * addr, int size, int offset)
4530+{
4531+ unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
4532+ unsigned long bitvec, reg;
4533+ int set, bit = offset & 31, res;
4534+
4535+ if (bit) {
4536+ /*
4537+ * Look for set bit in first word
4538+ */
4539+ bitvec = (*p) >> bit;
4540+ __asm__(" slr %0,%0\n"
4541+ " lhi %2,0xff\n"
4542+ " tml %1,0xffff\n"
4543+ " jnz 0f\n"
4544+ " ahi %0,16\n"
4545+ " srl %1,16\n"
4546+ "0: tml %1,0x00ff\n"
4547+ " jnz 1f\n"
4548+ " ahi %0,8\n"
4549+ " srl %1,8\n"
4550+ "1: nr %1,%2\n"
4551+ " ic %1,0(%1,%3)\n"
4552+ " alr %0,%1"
4553+ : "=&d" (set), "+a" (bitvec), "=&d" (reg)
4554+ : "a" (&_sb_findmap) : "cc" );
4555+ if (set < (32 - bit))
4556+ return set + offset;
4557+ offset += 32 - bit;
4558+ p++;
4559+ }
4560+ /*
4561+ * No set bit yet, search remaining full words for a bit
4562+ */
4563+ res = find_first_bit (p, size - 32 * (p - (unsigned long *) addr));
4564+ return (offset + res);
4565+}
4566+
4567 /*
4568 * ffz = Find First Zero in word. Undefined if no zero exists,
4569 * so code should check against ~0UL first..
4570 */
4571-static __inline__ unsigned long ffz(unsigned long word)
4572+static inline unsigned long ffz(unsigned long word)
4573 {
4574 unsigned long reg;
4575 int result;
4576@@ -708,40 +665,109 @@
4577 }
4578
4579 /*
4580+ * __ffs = find first bit in word. Undefined if no bit exists,
4581+ * so code should check against 0UL first..
4582+ */
4583+static inline unsigned long __ffs(unsigned long word)
4584+{
4585+ unsigned long reg, result;
4586+
4587+ __asm__(" slr %0,%0\n"
4588+ " lhi %2,0xff\n"
4589+ " tml %1,0xffff\n"
4590+ " jnz 0f\n"
4591+ " ahi %0,16\n"
4592+ " srl %1,16\n"
4593+ "0: tml %1,0x00ff\n"
4594+ " jnz 1f\n"
4595+ " ahi %0,8\n"
4596+ " srl %1,8\n"
4597+ "1: nr %1,%2\n"
4598+ " ic %1,0(%1,%3)\n"
4599+ " alr %0,%1"
4600+ : "=&d" (result), "+a" (word), "=&d" (reg)
4601+ : "a" (&_sb_findmap) : "cc" );
4602+ return result;
4603+}
4604+
4605+/*
4606+ * Every architecture must define this function. It's the fastest
4607+ * way of searching a 140-bit bitmap where the first 100 bits are
4608+ * unlikely to be set. It's guaranteed that at least one of the 140
4609+ * bits is cleared.
4610+ */
4611+static inline int sched_find_first_bit(unsigned long *b)
4612+{
4613+ return find_first_bit(b, 140);
4614+}
4615+
4616+/*
4617 * ffs: find first bit set. This is defined the same way as
4618 * the libc and compiler builtin ffs routines, therefore
4619 * differs in spirit from the above ffz (man ffs).
4620 */
4621
4622-extern int __inline__ ffs (int x)
4623+extern int inline ffs (int x)
4624 {
4625- int r;
4626+ int r = 1;
4627
4628 if (x == 0)
4629- return 0;
4630- __asm__(" slr %0,%0\n"
4631- " tml %1,0xffff\n"
4632+ return 0;
4633+ __asm__(" tml %1,0xffff\n"
4634 " jnz 0f\n"
4635- " ahi %0,16\n"
4636 " srl %1,16\n"
4637+ " ahi %0,16\n"
4638 "0: tml %1,0x00ff\n"
4639 " jnz 1f\n"
4640- " ahi %0,8\n"
4641 " srl %1,8\n"
4642+ " ahi %0,8\n"
4643 "1: tml %1,0x000f\n"
4644 " jnz 2f\n"
4645- " ahi %0,4\n"
4646 " srl %1,4\n"
4647+ " ahi %0,4\n"
4648 "2: tml %1,0x0003\n"
4649 " jnz 3f\n"
4650- " ahi %0,2\n"
4651 " srl %1,2\n"
4652+ " ahi %0,2\n"
4653 "3: tml %1,0x0001\n"
4654 " jnz 4f\n"
4655 " ahi %0,1\n"
4656 "4:"
4657 : "=&d" (r), "+d" (x) : : "cc" );
4658- return r+1;
4659+ return r;
4660+}
4661+
4662+/*
4663+ * fls: find last bit set.
4664+ */
4665+extern __inline__ int fls(int x)
4666+{
4667+ int r = 32;
4668+
4669+ if (x == 0)
4670+ return 0;
4671+ __asm__(" tmh %1,0xffff\n"
4672+ " jz 0f\n"
4673+ " sll %1,16\n"
4674+ " ahi %0,-16\n"
4675+ "0: tmh %1,0xff00\n"
4676+ " jz 1f\n"
4677+ " sll %1,8\n"
4678+ " ahi %0,-8\n"
4679+ "1: tmh %1,0xf000\n"
4680+ " jz 2f\n"
4681+ " sll %1,4\n"
4682+ " ahi %0,-4\n"
4683+ "2: tmh %1,0xc000\n"
4684+ " jz 3f\n"
4685+ " sll %1,2\n"
4686+ " ahi %0,-2\n"
4687+ "3: tmh %1,0x8000\n"
4688+ " jz 4f\n"
4689+ " ahi %0,-1\n"
4690+ "4:"
4691+ : "+d" (r), "+d" (x) : : "cc" );
4692+ return r;
4693 }
4694
4695 /*
4696@@ -769,7 +795,7 @@
4697 #define ext2_set_bit(nr, addr) test_and_set_bit((nr)^24, addr)
4698 #define ext2_clear_bit(nr, addr) test_and_clear_bit((nr)^24, addr)
4699 #define ext2_test_bit(nr, addr) test_bit((nr)^24, addr)
4700-static __inline__ int ext2_find_first_zero_bit(void *vaddr, unsigned size)
4701+static inline int ext2_find_first_zero_bit(void *vaddr, unsigned size)
4702 {
4703 unsigned long cmp, count;
4704 int res;
4705@@ -808,7 +834,7 @@
4706 return (res < size) ? res : size;
4707 }
4708
4709-static __inline__ int
4710+static inline int
4711 ext2_find_next_zero_bit(void *vaddr, unsigned size, unsigned offset)
4712 {
4713 unsigned long *addr = vaddr;
4714diff -urN linux-2.4.24.org/include/asm-s390x/bitops.h linux-2.4.24/include/asm-s390x/bitops.h
5d16fd25
AM
4715--- linux-2.4.24.org/include/asm-s390x/bitops.h 2004-02-04 20:48:28.470122479 +0100
4716+++ linux-2.4.24/include/asm-s390x/bitops.h 2004-02-04 20:52:54.030886671 +0100
0aa7655b
AM
4717@@ -51,271 +51,220 @@
4718 extern const char _oi_bitmap[];
4719 extern const char _ni_bitmap[];
4720 extern const char _zb_findmap[];
4721+extern const char _sb_findmap[];
4722
4723 #ifdef CONFIG_SMP
4724 /*
4725 * SMP save set_bit routine based on compare and swap (CS)
4726 */
4727-static __inline__ void set_bit_cs(unsigned long nr, volatile void * addr)
4728+static inline void set_bit_cs(unsigned long nr, volatile void *ptr)
4729 {
4730- unsigned long bits, mask;
4731- __asm__ __volatile__(
4732+ unsigned long addr, old, new, mask;
4733+
4734+ addr = (unsigned long) ptr;
4735 #if ALIGN_CS == 1
4736- " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
4737- " ngr %2,%1\n" /* isolate last 2 bits of address */
4738- " xgr %1,%2\n" /* make addr % 4 == 0 */
4739- " sllg %2,%2,3\n"
4740- " agr %0,%2\n" /* add alignement to bitnr */
4741+ addr ^= addr & 7; /* align address to 8 */
4742+ nr += (addr & 7) << 3; /* add alignment to bit number */
4743 #endif
4744- " lghi %2,63\n"
4745- " nr %2,%0\n" /* make shift value */
4746- " xr %0,%2\n"
4747- " srlg %0,%0,3\n"
4748- " lghi %3,1\n"
4749- " la %1,0(%0,%1)\n" /* calc. address for CS */
4750- " sllg %3,%3,0(%2)\n" /* make OR mask */
4751- " lg %0,0(%1)\n"
4752- "0: lgr %2,%0\n" /* CS loop starts here */
4753- " ogr %2,%3\n" /* set bit */
4754- " csg %0,%2,0(%1)\n"
4755- " jl 0b"
4756- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
4757- : "cc", "memory" );
4758+ addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
4759+ mask = 1UL << (nr & 63); /* make OR mask */
4760+ asm volatile(
4761+ " lg %0,0(%4)\n"
4762+ "0: lgr %1,%0\n"
4763+ " ogr %1,%3\n"
4764+ " csg %0,%1,0(%4)\n"
4765+ " jl 0b"
4766+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
4767+ : "d" (mask), "a" (addr)
4768+ : "cc" );
4769 }
4770
4771 /*
4772 * SMP save clear_bit routine based on compare and swap (CS)
4773 */
4774-static __inline__ void clear_bit_cs(unsigned long nr, volatile void * addr)
4775+static inline void clear_bit_cs(unsigned long nr, volatile void *ptr)
4776 {
4777- unsigned long bits, mask;
4778- __asm__ __volatile__(
4779+ unsigned long addr, old, new, mask;
4780+
4781+ addr = (unsigned long) ptr;
4782 #if ALIGN_CS == 1
4783- " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
4784- " ngr %2,%1\n" /* isolate last 2 bits of address */
4785- " xgr %1,%2\n" /* make addr % 4 == 0 */
4786- " sllg %2,%2,3\n"
4787- " agr %0,%2\n" /* add alignement to bitnr */
4788+ addr ^= addr & 7; /* align address to 8 */
4789+ nr += (addr & 7) << 3; /* add alignment to bit number */
4790 #endif
4791- " lghi %2,63\n"
4792- " nr %2,%0\n" /* make shift value */
4793- " xr %0,%2\n"
4794- " srlg %0,%0,3\n"
4795- " lghi %3,-2\n"
4796- " la %1,0(%0,%1)\n" /* calc. address for CS */
4797- " lghi %3,-2\n"
4798- " rllg %3,%3,0(%2)\n" /* make AND mask */
4799- " lg %0,0(%1)\n"
4800- "0: lgr %2,%0\n" /* CS loop starts here */
4801- " ngr %2,%3\n" /* clear bit */
4802- " csg %0,%2,0(%1)\n"
4803- " jl 0b"
4804- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
4805- : "cc", "memory" );
4806+ addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
4807+ mask = ~(1UL << (nr & 63)); /* make AND mask */
4808+ asm volatile(
4809+ " lg %0,0(%4)\n"
4810+ "0: lgr %1,%0\n"
4811+ " ngr %1,%3\n"
4812+ " csg %0,%1,0(%4)\n"
4813+ " jl 0b"
4814+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
4815+ : "d" (mask), "a" (addr)
4816+ : "cc" );
4817 }
4818
4819 /*
4820 * SMP save change_bit routine based on compare and swap (CS)
4821 */
4822-static __inline__ void change_bit_cs(unsigned long nr, volatile void * addr)
4823+static inline void change_bit_cs(unsigned long nr, volatile void *ptr)
4824 {
4825- unsigned long bits, mask;
4826- __asm__ __volatile__(
4827+ unsigned long addr, old, new, mask;
4828+
4829+ addr = (unsigned long) ptr;
4830 #if ALIGN_CS == 1
4831- " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
4832- " ngr %2,%1\n" /* isolate last 2 bits of address */
4833- " xgr %1,%2\n" /* make addr % 4 == 0 */
4834- " sllg %2,%2,3\n"
4835- " agr %0,%2\n" /* add alignement to bitnr */
4836+ addr ^= addr & 7; /* align address to 8 */
4837+ nr += (addr & 7) << 3; /* add alignment to bit number */
4838 #endif
4839- " lghi %2,63\n"
4840- " nr %2,%0\n" /* make shift value */
4841- " xr %0,%2\n"
4842- " srlg %0,%0,3\n"
4843- " lghi %3,1\n"
4844- " la %1,0(%0,%1)\n" /* calc. address for CS */
4845- " sllg %3,%3,0(%2)\n" /* make XR mask */
4846- " lg %0,0(%1)\n"
4847- "0: lgr %2,%0\n" /* CS loop starts here */
4848- " xgr %2,%3\n" /* change bit */
4849- " csg %0,%2,0(%1)\n"
4850- " jl 0b"
4851- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
4852- : "cc", "memory" );
4853+ addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
4854+ mask = 1UL << (nr & 63); /* make XOR mask */
4855+ asm volatile(
4856+ " lg %0,0(%4)\n"
4857+ "0: lgr %1,%0\n"
4858+ " xgr %1,%3\n"
4859+ " csg %0,%1,0(%4)\n"
4860+ " jl 0b"
4861+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
4862+ : "d" (mask), "a" (addr)
4863+ : "cc" );
4864 }
4865
4866 /*
4867 * SMP save test_and_set_bit routine based on compare and swap (CS)
4868 */
4869-static __inline__ int
4870-test_and_set_bit_cs(unsigned long nr, volatile void * addr)
4871+static inline int
4872+test_and_set_bit_cs(unsigned long nr, volatile void *ptr)
4873 {
4874- unsigned long bits, mask;
4875- __asm__ __volatile__(
4876+ unsigned long addr, old, new, mask;
4877+
4878+ addr = (unsigned long) ptr;
4879 #if ALIGN_CS == 1
4880- " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
4881- " ngr %2,%1\n" /* isolate last 2 bits of address */
4882- " xgr %1,%2\n" /* make addr % 4 == 0 */
4883- " sllg %2,%2,3\n"
4884- " agr %0,%2\n" /* add alignement to bitnr */
4885+ addr ^= addr & 7; /* align address to 8 */
4886+ nr += (addr & 7) << 3; /* add alignment to bit number */
4887 #endif
4888- " lghi %2,63\n"
4889- " nr %2,%0\n" /* make shift value */
4890- " xr %0,%2\n"
4891- " srlg %0,%0,3\n"
4892- " lghi %3,1\n"
4893- " la %1,0(%0,%1)\n" /* calc. address for CS */
4894- " sllg %3,%3,0(%2)\n" /* make OR mask */
4895- " lg %0,0(%1)\n"
4896- "0: lgr %2,%0\n" /* CS loop starts here */
4897- " ogr %2,%3\n" /* set bit */
4898- " csg %0,%2,0(%1)\n"
4899- " jl 0b\n"
4900- " ngr %0,%3\n" /* isolate old bit */
4901- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
4902- : "cc", "memory" );
4903- return nr != 0;
4904+ addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
4905+ mask = 1UL << (nr & 63); /* make OR/test mask */
4906+ asm volatile(
4907+ " lg %0,0(%4)\n"
4908+ "0: lgr %1,%0\n"
4909+ " ogr %1,%3\n"
4910+ " csg %0,%1,0(%4)\n"
4911+ " jl 0b"
4912+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
4913+ : "d" (mask), "a" (addr)
4914+ : "cc" );
4915+ return (old & mask) != 0;
4916 }
4917
4918 /*
4919 * SMP save test_and_clear_bit routine based on compare and swap (CS)
4920 */
4921-static __inline__ int
4922-test_and_clear_bit_cs(unsigned long nr, volatile void * addr)
4923+static inline int
4924+test_and_clear_bit_cs(unsigned long nr, volatile void *ptr)
4925 {
4926- unsigned long bits, mask;
4927- __asm__ __volatile__(
4928+ unsigned long addr, old, new, mask;
4929+
4930+ addr = (unsigned long) ptr;
4931 #if ALIGN_CS == 1
4932- " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
4933- " ngr %2,%1\n" /* isolate last 2 bits of address */
4934- " xgr %1,%2\n" /* make addr % 4 == 0 */
4935- " sllg %2,%2,3\n"
4936- " agr %0,%2\n" /* add alignement to bitnr */
4937+ addr ^= addr & 7; /* align address to 8 */
4938+ nr += (addr & 7) << 3; /* add alignment to bit number */
4939 #endif
4940- " lghi %2,63\n"
4941- " nr %2,%0\n" /* make shift value */
4942- " xr %0,%2\n"
4943- " srlg %0,%0,3\n"
4944- " lghi %3,-2\n"
4945- " la %1,0(%0,%1)\n" /* calc. address for CS */
4946- " rllg %3,%3,0(%2)\n" /* make AND mask */
4947- " lg %0,0(%1)\n"
4948- "0: lgr %2,%0\n" /* CS loop starts here */
4949- " ngr %2,%3\n" /* clear bit */
4950- " csg %0,%2,0(%1)\n"
4951- " jl 0b\n"
4952- " xgr %0,%2\n" /* isolate old bit */
4953- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
4954- : "cc", "memory" );
4955- return nr != 0;
4956+ addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
4957+ mask = ~(1UL << (nr & 63)); /* make AND mask */
4958+ asm volatile(
4959+ " lg %0,0(%4)\n"
4960+ "0: lgr %1,%0\n"
4961+ " ngr %1,%3\n"
4962+ " csg %0,%1,0(%4)\n"
4963+ " jl 0b"
4964+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
4965+ : "d" (mask), "a" (addr)
4966+ : "cc" );
4967+ return (old ^ new) != 0;
4968 }
4969
4970 /*
4971 * SMP save test_and_change_bit routine based on compare and swap (CS)
4972 */
4973-static __inline__ int
4974-test_and_change_bit_cs(unsigned long nr, volatile void * addr)
4975+static inline int
4976+test_and_change_bit_cs(unsigned long nr, volatile void *ptr)
4977 {
4978- unsigned long bits, mask;
4979- __asm__ __volatile__(
4980+ unsigned long addr, old, new, mask;
4981+
4982+ addr = (unsigned long) ptr;
4983 #if ALIGN_CS == 1
4984- " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
4985- " ngr %2,%1\n" /* isolate last 2 bits of address */
4986- " xgr %1,%2\n" /* make addr % 4 == 0 */
4987- " sllg %2,%2,3\n"
4988- " agr %0,%2\n" /* add alignement to bitnr */
4989+ addr ^= addr & 7; /* align address to 8 */
4990+ nr += (addr & 7) << 3; /* add alignment to bit number */
4991 #endif
4992- " lghi %2,63\n"
4993- " nr %2,%0\n" /* make shift value */
4994- " xr %0,%2\n"
4995- " srlg %0,%0,3\n"
4996- " lghi %3,1\n"
4997- " la %1,0(%0,%1)\n" /* calc. address for CS */
4998- " sllg %3,%3,0(%2)\n" /* make OR mask */
4999- " lg %0,0(%1)\n"
5000- "0: lgr %2,%0\n" /* CS loop starts here */
5001- " xgr %2,%3\n" /* change bit */
5002- " csg %0,%2,0(%1)\n"
5003- " jl 0b\n"
5004- " ngr %0,%3\n" /* isolate old bit */
5005- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
5006- : "cc", "memory" );
5007- return nr != 0;
5008+ addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
5009+ mask = 1UL << (nr & 63); /* make XOR mask */
5010+ asm volatile(
5011+ " lg %0,0(%4)\n"
5012+ "0: lgr %1,%0\n"
5013+ " xgr %1,%3\n"
5014+ " csg %0,%1,0(%4)\n"
5015+ " jl 0b"
5016+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
5017+ : "d" (mask), "a" (addr)
5018+ : "cc" );
5019+ return (old & mask) != 0;
5020 }
5021 #endif /* CONFIG_SMP */
5022
5023 /*
5024 * fast, non-SMP set_bit routine
5025 */
5026-static __inline__ void __set_bit(unsigned long nr, volatile void * addr)
5027+static inline void __set_bit(unsigned long nr, volatile void *ptr)
5028 {
5029- unsigned long reg1, reg2;
5030- __asm__ __volatile__(
5031- " lghi %1,56\n"
5032- " lghi %0,7\n"
5033- " xgr %1,%2\n"
5034- " nr %0,%2\n"
5035- " srlg %1,%1,3\n"
5036- " la %1,0(%1,%3)\n"
5037- " la %0,0(%0,%4)\n"
5038- " oc 0(1,%1),0(%0)"
5039- : "=&a" (reg1), "=&a" (reg2)
5040- : "a" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
5041-}
5042-
5043-static __inline__ void
5044-__constant_set_bit(const unsigned long nr, volatile void * addr)
5045-{
5046- switch (nr&7) {
5047- case 0:
5048- __asm__ __volatile__ ("la 1,%0\n\t"
5049- "oi 0(1),0x01"
5050- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5051- : : "1", "cc", "memory");
5052- break;
5053- case 1:
5054- __asm__ __volatile__ ("la 1,%0\n\t"
5055- "oi 0(1),0x02"
5056- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5057- : : "1", "cc", "memory" );
5058- break;
5059- case 2:
5060- __asm__ __volatile__ ("la 1,%0\n\t"
5061- "oi 0(1),0x04"
5062- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5063- : : "1", "cc", "memory" );
5064- break;
5065- case 3:
5066- __asm__ __volatile__ ("la 1,%0\n\t"
5067- "oi 0(1),0x08"
5068- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5069- : : "1", "cc", "memory" );
5070- break;
5071- case 4:
5072- __asm__ __volatile__ ("la 1,%0\n\t"
5073- "oi 0(1),0x10"
5074- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5075- : : "1", "cc", "memory" );
5076- break;
5077- case 5:
5078- __asm__ __volatile__ ("la 1,%0\n\t"
5079- "oi 0(1),0x20"
5080- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5081- : : "1", "cc", "memory" );
5082- break;
5083- case 6:
5084- __asm__ __volatile__ ("la 1,%0\n\t"
5085- "oi 0(1),0x40"
5086- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5087- : : "1", "cc", "memory" );
5088- break;
5089- case 7:
5090- __asm__ __volatile__ ("la 1,%0\n\t"
5091- "oi 0(1),0x80"
5092- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5093- : : "1", "cc", "memory" );
5094- break;
5095- }
5096+ unsigned long addr;
5097+
5098+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
5099+ asm volatile("oc 0(1,%1),0(%2)"
5100+ : "+m" (*(char *) addr)
5101+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
5102+ : "cc" );
5103+}
5104+
5105+static inline void
5106+__constant_set_bit(const unsigned long nr, volatile void *ptr)
5107+{
5108+ unsigned long addr;
5109+
5110+ addr = ((unsigned long) ptr) + ((nr >> 3) ^ 7);
5111+ switch (nr&7) {
5112+ case 0:
5113+ asm volatile ("oi 0(%1),0x01"
5114+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5115+ break;
5116+ case 1:
5117+ asm volatile ("oi 0(%1),0x02"
5118+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5119+ break;
5120+ case 2:
5121+ asm volatile ("oi 0(%1),0x04"
5122+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5123+ break;
5124+ case 3:
5125+ asm volatile ("oi 0(%1),0x08"
5126+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5127+ break;
5128+ case 4:
5129+ asm volatile ("oi 0(%1),0x10"
5130+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5131+ break;
5132+ case 5:
5133+ asm volatile ("oi 0(%1),0x20"
5134+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5135+ break;
5136+ case 6:
5137+ asm volatile ("oi 0(%1),0x40"
5138+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5139+ break;
5140+ case 7:
5141+ asm volatile ("oi 0(%1),0x80"
5142+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5143+ break;
5144+ }
5145 }
5146
5147 #define set_bit_simple(nr,addr) \
5148@@ -326,76 +275,58 @@
5149 /*
5150 * fast, non-SMP clear_bit routine
5151 */
5152-static __inline__ void
5153-__clear_bit(unsigned long nr, volatile void * addr)
5154+static inline void
5155+__clear_bit(unsigned long nr, volatile void *ptr)
5156 {
5157- unsigned long reg1, reg2;
5158- __asm__ __volatile__(
5159- " lghi %1,56\n"
5160- " lghi %0,7\n"
5161- " xgr %1,%2\n"
5162- " nr %0,%2\n"
5163- " srlg %1,%1,3\n"
5164- " la %1,0(%1,%3)\n"
5165- " la %0,0(%0,%4)\n"
5166- " nc 0(1,%1),0(%0)"
5167- : "=&a" (reg1), "=&a" (reg2)
5168- : "d" (nr), "a" (addr), "a" (&_ni_bitmap) : "cc", "memory" );
5169-}
5170-
5171-static __inline__ void
5172-__constant_clear_bit(const unsigned long nr, volatile void * addr)
5173-{
5174- switch (nr&7) {
5175- case 0:
5176- __asm__ __volatile__ ("la 1,%0\n\t"
5177- "ni 0(1),0xFE"
5178- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5179- : : "1", "cc", "memory" );
5180- break;
5181- case 1:
5182- __asm__ __volatile__ ("la 1,%0\n\t"
5183- "ni 0(1),0xFD"
5184- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5185- : : "1", "cc", "memory" );
5186- break;
5187- case 2:
5188- __asm__ __volatile__ ("la 1,%0\n\t"
5189- "ni 0(1),0xFB"
5190- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5191- : : "1", "cc", "memory" );
5192- break;
5193- case 3:
5194- __asm__ __volatile__ ("la 1,%0\n\t"
5195- "ni 0(1),0xF7"
5196- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5197- : : "1", "cc", "memory" );
5198- break;
5199- case 4:
5200- __asm__ __volatile__ ("la 1,%0\n\t"
5201- "ni 0(1),0xEF"
5202- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5203- : : "cc", "memory" );
5204- break;
5205- case 5:
5206- __asm__ __volatile__ ("la 1,%0\n\t"
5207- "ni 0(1),0xDF"
5208- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5209- : : "1", "cc", "memory" );
5210- break;
5211- case 6:
5212- __asm__ __volatile__ ("la 1,%0\n\t"
5213- "ni 0(1),0xBF"
5214- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5215- : : "1", "cc", "memory" );
5216- break;
5217- case 7:
5218- __asm__ __volatile__ ("la 1,%0\n\t"
5219- "ni 0(1),0x7F"
5220- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5221- : : "1", "cc", "memory" );
5222- break;
5223- }
5224+ unsigned long addr;
5225+
5226+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
5227+ asm volatile("nc 0(1,%1),0(%2)"
5228+ : "+m" (*(char *) addr)
5229+ : "a" (addr), "a" (_ni_bitmap + (nr & 7))
5230+ : "cc" );
5231+}
5232+
5233+static inline void
5234+__constant_clear_bit(const unsigned long nr, volatile void *ptr)
5235+{
5236+ unsigned long addr;
5237+
5238+ addr = ((unsigned long) ptr) + ((nr >> 3) ^ 7);
5239+ switch (nr&7) {
5240+ case 0:
5241+ asm volatile ("ni 0(%1),0xFE"
5242+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5243+ break;
5244+ case 1:
5245+ asm volatile ("ni 0(%1),0xFD"
5246+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5247+ break;
5248+ case 2:
5249+ asm volatile ("ni 0(%1),0xFB"
5250+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5251+ break;
5252+ case 3:
5253+ asm volatile ("ni 0(%1),0xF7"
5254+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5255+ break;
5256+ case 4:
5257+ asm volatile ("ni 0(%1),0xEF"
5258+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5259+ break;
5260+ case 5:
5261+ asm volatile ("ni 0(%1),0xDF"
5262+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5263+ break;
5264+ case 6:
5265+ asm volatile ("ni 0(%1),0xBF"
5266+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5267+ break;
5268+ case 7:
5269+ asm volatile ("ni 0(%1),0x7F"
5270+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5271+ break;
5272+ }
5273 }
5274
5275 #define clear_bit_simple(nr,addr) \
5276@@ -406,75 +337,57 @@
5277 /*
5278 * fast, non-SMP change_bit routine
5279 */
5280-static __inline__ void __change_bit(unsigned long nr, volatile void * addr)
5281+static inline void __change_bit(unsigned long nr, volatile void *ptr)
5282 {
5283- unsigned long reg1, reg2;
5284- __asm__ __volatile__(
5285- " lghi %1,56\n"
5286- " lghi %0,7\n"
5287- " xgr %1,%2\n"
5288- " nr %0,%2\n"
5289- " srlg %1,%1,3\n"
5290- " la %1,0(%1,%3)\n"
5291- " la %0,0(%0,%4)\n"
5292- " xc 0(1,%1),0(%0)"
5293- : "=&a" (reg1), "=&a" (reg2)
5294- : "d" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
5295-}
5296-
5297-static __inline__ void
5298-__constant_change_bit(const unsigned long nr, volatile void * addr)
5299-{
5300- switch (nr&7) {
5301- case 0:
5302- __asm__ __volatile__ ("la 1,%0\n\t"
5303- "xi 0(1),0x01"
5304- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5305- : : "cc", "memory" );
5306- break;
5307- case 1:
5308- __asm__ __volatile__ ("la 1,%0\n\t"
5309- "xi 0(1),0x02"
5310- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5311- : : "cc", "memory" );
5312- break;
5313- case 2:
5314- __asm__ __volatile__ ("la 1,%0\n\t"
5315- "xi 0(1),0x04"
5316- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5317- : : "cc", "memory" );
5318- break;
5319- case 3:
5320- __asm__ __volatile__ ("la 1,%0\n\t"
5321- "xi 0(1),0x08"
5322- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5323- : : "cc", "memory" );
5324- break;
5325- case 4:
5326- __asm__ __volatile__ ("la 1,%0\n\t"
5327- "xi 0(1),0x10"
5328- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5329- : : "cc", "memory" );
5330- break;
5331- case 5:
5332- __asm__ __volatile__ ("la 1,%0\n\t"
5333- "xi 0(1),0x20"
5334- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5335- : : "1", "cc", "memory" );
5336- break;
5337- case 6:
5338- __asm__ __volatile__ ("la 1,%0\n\t"
5339- "xi 0(1),0x40"
5340- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5341- : : "1", "cc", "memory" );
5342- break;
5343- case 7:
5344- __asm__ __volatile__ ("la 1,%0\n\t"
5345- "xi 0(1),0x80"
5346- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
5347- : : "1", "cc", "memory" );
5348- break;
5349- }
5350+ unsigned long addr;
5351+
5352+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
5353+ asm volatile("xc 0(1,%1),0(%2)"
5354+ : "+m" (*(char *) addr)
5355+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
5356+ : "cc" );
5357+}
5358+
5359+static inline void
5360+__constant_change_bit(const unsigned long nr, volatile void *ptr)
5361+{
5362+ unsigned long addr;
5363+
5364+ addr = ((unsigned long) ptr) + ((nr >> 3) ^ 7);
5365+ switch (nr&7) {
5366+ case 0:
5367+ asm volatile ("xi 0(%1),0x01"
5368+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5369+ break;
5370+ case 1:
5371+ asm volatile ("xi 0(%1),0x02"
5372+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5373+ break;
5374+ case 2:
5375+ asm volatile ("xi 0(%1),0x04"
5376+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5377+ break;
5378+ case 3:
5379+ asm volatile ("xi 0(%1),0x08"
5380+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5381+ break;
5382+ case 4:
5383+ asm volatile ("xi 0(%1),0x10"
5384+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5385+ break;
5386+ case 5:
5387+ asm volatile ("xi 0(%1),0x20"
5388+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5389+ break;
5390+ case 6:
5391+ asm volatile ("xi 0(%1),0x40"
5392+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5393+ break;
5394+ case 7:
5395+ asm volatile ("xi 0(%1),0x80"
5396+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
5397+ break;
5398+ }
5399 }
5400
5401 #define change_bit_simple(nr,addr) \
5402@@ -485,77 +398,57 @@
5403 /*
5404 * fast, non-SMP test_and_set_bit routine
5405 */
5406-static __inline__ int
5407-test_and_set_bit_simple(unsigned long nr, volatile void * addr)
5408+static inline int
5409+test_and_set_bit_simple(unsigned long nr, volatile void *ptr)
5410 {
5411- unsigned long reg1, reg2;
5412- int oldbit;
5413- __asm__ __volatile__(
5414- " lghi %1,56\n"
5415- " lghi %2,7\n"
5416- " xgr %1,%3\n"
5417- " nr %2,%3\n"
5418- " srlg %1,%1,3\n"
5419- " la %1,0(%1,%4)\n"
5420- " ic %0,0(%1)\n"
5421- " srl %0,0(%2)\n"
5422- " la %2,0(%2,%5)\n"
5423- " oc 0(1,%1),0(%2)"
5424- : "=&d" (oldbit), "=&a" (reg1), "=&a" (reg2)
5425- : "d" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
5426- return oldbit & 1;
5427+ unsigned long addr;
5428+ unsigned char ch;
5429+
5430+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
5431+ ch = *(unsigned char *) addr;
5432+ asm volatile("oc 0(1,%1),0(%2)"
5433+ : "+m" (*(char *) addr)
5434+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
5435+ : "cc" );
5436+ return (ch >> (nr & 7)) & 1;
5437 }
5438 #define __test_and_set_bit(X,Y) test_and_set_bit_simple(X,Y)
5439
5440 /*
5441 * fast, non-SMP test_and_clear_bit routine
5442 */
5443-static __inline__ int
5444-test_and_clear_bit_simple(unsigned long nr, volatile void * addr)
5445+static inline int
5446+test_and_clear_bit_simple(unsigned long nr, volatile void *ptr)
5447 {
5448- unsigned long reg1, reg2;
5449- int oldbit;
5450+ unsigned long addr;
5451+ unsigned char ch;
5452
5453- __asm__ __volatile__(
5454- " lghi %1,56\n"
5455- " lghi %2,7\n"
5456- " xgr %1,%3\n"
5457- " nr %2,%3\n"
5458- " srlg %1,%1,3\n"
5459- " la %1,0(%1,%4)\n"
5460- " ic %0,0(%1)\n"
5461- " srl %0,0(%2)\n"
5462- " la %2,0(%2,%5)\n"
5463- " nc 0(1,%1),0(%2)"
5464- : "=&d" (oldbit), "=&a" (reg1), "=&a" (reg2)
5465- : "d" (nr), "a" (addr), "a" (&_ni_bitmap) : "cc", "memory" );
5466- return oldbit & 1;
5467+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
5468+ ch = *(unsigned char *) addr;
5469+ asm volatile("nc 0(1,%1),0(%2)"
5470+ : "+m" (*(char *) addr)
5471+ : "a" (addr), "a" (_ni_bitmap + (nr & 7))
5472+ : "cc" );
5473+ return (ch >> (nr & 7)) & 1;
5474 }
5475 #define __test_and_clear_bit(X,Y) test_and_clear_bit_simple(X,Y)
5476
5477 /*
5478 * fast, non-SMP test_and_change_bit routine
5479 */
5480-static __inline__ int
5481-test_and_change_bit_simple(unsigned long nr, volatile void * addr)
5482+static inline int
5483+test_and_change_bit_simple(unsigned long nr, volatile void *ptr)
5484 {
5485- unsigned long reg1, reg2;
5486- int oldbit;
5487+ unsigned long addr;
5488+ unsigned char ch;
5489
5490- __asm__ __volatile__(
5491- " lghi %1,56\n"
5492- " lghi %2,7\n"
5493- " xgr %1,%3\n"
5494- " nr %2,%3\n"
5495- " srlg %1,%1,3\n"
5496- " la %1,0(%1,%4)\n"
5497- " ic %0,0(%1)\n"
5498- " srl %0,0(%2)\n"
5499- " la %2,0(%2,%5)\n"
5500- " xc 0(1,%1),0(%2)"
5501- : "=&d" (oldbit), "=&a" (reg1), "=&a" (reg2)
5502- : "d" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
5503- return oldbit & 1;
5504+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
5505+ ch = *(unsigned char *) addr;
5506+ asm volatile("xc 0(1,%1),0(%2)"
5507+ : "+m" (*(char *) addr)
5508+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
5509+ : "cc" );
5510+ return (ch >> (nr & 7)) & 1;
5511 }
5512 #define __test_and_change_bit(X,Y) test_and_change_bit_simple(X,Y)
5513
5514@@ -580,26 +473,18 @@
5515 * This routine doesn't need to be atomic.
5516 */
5517
5518-static __inline__ int __test_bit(unsigned long nr, volatile void * addr)
5519+static inline int __test_bit(unsigned long nr, volatile void *ptr)
5520 {
5521- unsigned long reg1, reg2;
5522- int oldbit;
5523+ unsigned long addr;
5524+ unsigned char ch;
5525
5526- __asm__ __volatile__(
5527- " lghi %2,56\n"
5528- " lghi %1,7\n"
5529- " xgr %2,%3\n"
5530- " nr %1,%3\n"
5531- " srlg %2,%2,3\n"
5532- " ic %0,0(%2,%4)\n"
5533- " srl %0,0(%1)\n"
5534- : "=&d" (oldbit), "=&a" (reg1), "=&a" (reg2)
5535- : "d" (nr), "a" (addr) : "cc" );
5536- return oldbit & 1;
5537+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
5538+ ch = *(unsigned char *) addr;
5539+ return (ch >> (nr & 7)) & 1;
5540 }
5541
5542-static __inline__ int
5543-__constant_test_bit(unsigned long nr, volatile void * addr) {
5544+static inline int
5545+__constant_test_bit(unsigned long nr, volatile void *addr) {
5546 return (((volatile char *) addr)[(nr>>3)^7] & (1<<(nr&7))) != 0;
5547 }
5548
5549@@ -611,7 +496,7 @@
5550 /*
5551 * Find-bit routines..
5552 */
5553-static __inline__ unsigned long
5554+static inline unsigned long
5555 find_first_zero_bit(void * addr, unsigned long size)
5556 {
5557 unsigned long res, cmp, count;
5558@@ -653,7 +538,49 @@
5559 return (res < size) ? res : size;
5560 }
5561
5562-static __inline__ unsigned long
5563+static inline unsigned long
5564+find_first_bit(void * addr, unsigned long size)
5565+{
5566+ unsigned long res, cmp, count;
5567+
5568+ if (!size)
5569+ return 0;
5570+ __asm__(" slgr %1,%1\n"
5571+ " lgr %2,%3\n"
5572+ " slgr %0,%0\n"
5573+ " aghi %2,63\n"
5574+ " srlg %2,%2,6\n"
5575+ "0: cg %1,0(%0,%4)\n"
5576+ " jne 1f\n"
5577+ " aghi %0,8\n"
5578+ " brct %2,0b\n"
5579+ " lgr %0,%3\n"
5580+ " j 5f\n"
5581+ "1: lg %2,0(%0,%4)\n"
5582+ " sllg %0,%0,3\n"
5583+ " clr %2,%1\n"
5584+ " jne 2f\n"
5585+ " aghi %0,32\n"
5586+ " srlg %2,%2,32\n"
5587+ "2: lghi %1,0xff\n"
5588+ " tmll %2,0xffff\n"
5589+ " jnz 3f\n"
5590+ " aghi %0,16\n"
5591+ " srl %2,16\n"
5592+ "3: tmll %2,0x00ff\n"
5593+ " jnz 4f\n"
5594+ " aghi %0,8\n"
5595+ " srl %2,8\n"
5596+ "4: ngr %2,%1\n"
5597+ " ic %2,0(%2,%5)\n"
5598+ " algr %0,%2\n"
5599+ "5:"
5600+ : "=&a" (res), "=&d" (cmp), "=&a" (count)
5601+ : "a" (size), "a" (addr), "a" (&_sb_findmap) : "cc" );
5602+ return (res < size) ? res : size;
5603+}
5604+
5605+static inline unsigned long
5606 find_next_zero_bit (void * addr, unsigned long size, unsigned long offset)
5607 {
5608 unsigned long * p = ((unsigned long *) addr) + (offset >> 6);
5609@@ -697,14 +624,56 @@
5610 return (offset + res);
5611 }
5612
5613+static inline unsigned long
5614+find_next_bit (void * addr, unsigned long size, unsigned long offset)
5615+{
5616+ unsigned long * p = ((unsigned long *) addr) + (offset >> 6);
5617+ unsigned long bitvec, reg;
5618+ unsigned long set, bit = offset & 63, res;
5619+
5620+ if (bit) {
5621+ /*
5622+ * Look for zero in first word
5623+ */
5624+ bitvec = (*p) >> bit;
5625+ __asm__(" slgr %0,%0\n"
5626+ " ltr %1,%1\n"
5627+ " jnz 0f\n"
5628+ " aghi %0,32\n"
5629+ " srlg %1,%1,32\n"
5630+ "0: lghi %2,0xff\n"
5631+ " tmll %1,0xffff\n"
5632+ " jnz 1f\n"
5633+ " aghi %0,16\n"
5634+ " srlg %1,%1,16\n"
5635+ "1: tmll %1,0x00ff\n"
5636+ " jnz 2f\n"
5637+ " aghi %0,8\n"
5638+ " srlg %1,%1,8\n"
5639+ "2: ngr %1,%2\n"
5640+ " ic %1,0(%1,%3)\n"
5641+ " algr %0,%1"
5642+ : "=&d" (set), "+a" (bitvec), "=&d" (reg)
5643+ : "a" (&_sb_findmap) : "cc" );
5644+ if (set < (64 - bit))
5645+ return set + offset;
5646+ offset += 64 - bit;
5647+ p++;
5648+ }
5649+ /*
5650+ * No set bit yet, search remaining full words for a bit
5651+ */
5652+ res = find_first_bit (p, size - 64 * (p - (unsigned long *) addr));
5653+ return (offset + res);
5654+}
5655+
5656 /*
5657 * ffz = Find First Zero in word. Undefined if no zero exists,
5658 * so code should check against ~0UL first..
5659 */
5660-static __inline__ unsigned long ffz(unsigned long word)
5661+static inline unsigned long ffz(unsigned long word)
5662 {
5663- unsigned long reg;
5664- int result;
5665+ unsigned long reg, result;
5666
5667 __asm__(" lhi %2,-1\n"
5668 " slgr %0,%0\n"
5669@@ -730,40 +699,112 @@
5670 }
5671
5672 /*
5673+ * __ffs = find first bit in word. Undefined if no bit exists,
5674+ * so code should check against 0UL first..
5675+ */
5676+static inline unsigned long __ffs (unsigned long word)
5677+{
5678+ unsigned long reg, result;
5679+
5680+ __asm__(" slgr %0,%0\n"
5681+ " ltr %1,%1\n"
5682+ " jnz 0f\n"
5683+ " aghi %0,32\n"
5684+ " srlg %1,%1,32\n"
5685+ "0: lghi %2,0xff\n"
5686+ " tmll %1,0xffff\n"
5687+ " jnz 1f\n"
5688+ " aghi %0,16\n"
5689+ " srlg %1,%1,16\n"
5690+ "1: tmll %1,0x00ff\n"
5691+ " jnz 2f\n"
5692+ " aghi %0,8\n"
5693+ " srlg %1,%1,8\n"
5694+ "2: ngr %1,%2\n"
5695+ " ic %1,0(%1,%3)\n"
5696+ " algr %0,%1"
5697+ : "=&d" (result), "+a" (word), "=&d" (reg)
5698+ : "a" (&_sb_findmap) : "cc" );
5699+ return result;
5700+}
5701+
5702+/*
5703+ * Every architecture must define this function. It's the fastest
5704+ * way of searching a 140-bit bitmap where the first 100 bits are
5705+ * unlikely to be set. It's guaranteed that at least one of the 140
5706+ * bits is cleared.
5707+ */
5708+static inline int sched_find_first_bit(unsigned long *b)
5709+{
5710+ return find_first_bit(b, 140);
5711+}
5712+
5713+/*
5714 * ffs: find first bit set. This is defined the same way as
5715 * the libc and compiler builtin ffs routines, therefore
5716 * differs in spirit from the above ffz (man ffs).
5717 */
5718-
5719-extern int __inline__ ffs (int x)
5720+extern int inline ffs (int x)
5721 {
5722- int r;
5723+ int r = 1;
5724
5725 if (x == 0)
5726- return 0;
5727- __asm__(" slr %0,%0\n"
5728- " tml %1,0xffff\n"
5729+ return 0;
5730+ __asm__(" tml %1,0xffff\n"
5731 " jnz 0f\n"
5732- " ahi %0,16\n"
5733 " srl %1,16\n"
5734+ " ahi %0,16\n"
5735 "0: tml %1,0x00ff\n"
5736 " jnz 1f\n"
5737- " ahi %0,8\n"
5738 " srl %1,8\n"
5739+ " ahi %0,8\n"
5740 "1: tml %1,0x000f\n"
5741 " jnz 2f\n"
5742- " ahi %0,4\n"
5743 " srl %1,4\n"
5744+ " ahi %0,4\n"
5745 "2: tml %1,0x0003\n"
5746 " jnz 3f\n"
5747- " ahi %0,2\n"
5748 " srl %1,2\n"
5749+ " ahi %0,2\n"
5750 "3: tml %1,0x0001\n"
5751 " jnz 4f\n"
5752 " ahi %0,1\n"
5753 "4:"
5754 : "=&d" (r), "+d" (x) : : "cc" );
5755- return r+1;
5756+ return r;
5757+}
5758+
5759+/*
5760+ * fls: find last bit set.
5761+ */
5762+extern __inline__ int fls(int x)
5763+{
5764+ int r = 32;
5765+
5766+ if (x == 0)
5767+ return 0;
5768+ __asm__(" tmh %1,0xffff\n"
5769+ " jz 0f\n"
5770+ " sll %1,16\n"
5771+ " ahi %0,-16\n"
5772+ "0: tmh %1,0xff00\n"
5773+ " jz 1f\n"
5774+ " sll %1,8\n"
5775+ " ahi %0,-8\n"
5776+ "1: tmh %1,0xf000\n"
5777+ " jz 2f\n"
5778+ " sll %1,4\n"
5779+ " ahi %0,-4\n"
5780+ "2: tmh %1,0xc000\n"
5781+ " jz 3f\n"
5782+ " sll %1,2\n"
5783+ " ahi %0,-2\n"
5784+ "3: tmh %1,0x8000\n"
5785+ " jz 4f\n"
5786+ " ahi %0,-1\n"
5787+ "4:"
5788+ : "+d" (r), "+d" (x) : : "cc" );
5789+ return r;
5790 }
5791
5792 /*
5793@@ -791,7 +832,7 @@
5794 #define ext2_set_bit(nr, addr) test_and_set_bit((nr)^56, addr)
5795 #define ext2_clear_bit(nr, addr) test_and_clear_bit((nr)^56, addr)
5796 #define ext2_test_bit(nr, addr) test_bit((nr)^56, addr)
5797-static __inline__ unsigned long
5798+static inline unsigned long
5799 ext2_find_first_zero_bit(void *vaddr, unsigned long size)
5800 {
5801 unsigned long res, cmp, count;
5802@@ -833,7 +874,7 @@
5803 return (res < size) ? res : size;
5804 }
5805
5806-static __inline__ unsigned long
5807+static inline unsigned long
5808 ext2_find_next_zero_bit(void *vaddr, unsigned long size, unsigned long offset)
5809 {
5810 unsigned long *addr = vaddr;
5811diff -urN linux-2.4.24.org/include/asm-sparc/bitops.h linux-2.4.24/include/asm-sparc/bitops.h
5d16fd25
AM
5812--- linux-2.4.24.org/include/asm-sparc/bitops.h 2004-02-04 20:47:50.760965997 +0100
5813+++ linux-2.4.24/include/asm-sparc/bitops.h 2004-02-04 20:52:54.074877521 +0100
529c1c99 5814@@ -231,6 +231,63 @@
0aa7655b
AM
5815 return result;
5816 }
5817
5818+/**
5819+ * __ffs - find first bit in word.
5820+ * @word: The word to search
5821+ *
5822+ * Undefined if no bit exists, so code should check against 0 first.
5823+ */
5824+static __inline__ int __ffs(unsigned long word)
5825+{
5826+ int num = 0;
5827+
5828+ if ((word & 0xffff) == 0) {
5829+ num += 16;
5830+ word >>= 16;
5831+ }
5832+ if ((word & 0xff) == 0) {
5833+ num += 8;
5834+ word >>= 8;
5835+ }
5836+ if ((word & 0xf) == 0) {
5837+ num += 4;
5838+ word >>= 4;
5839+ }
5840+ if ((word & 0x3) == 0) {
5841+ num += 2;
5842+ word >>= 2;
5843+ }
5844+ if ((word & 0x1) == 0)
5845+ num += 1;
5846+ return num;
5847+}
5848+
5849+/*
5850+ * Every architecture must define this function. It's the fastest
5851+ * way of searching a 140-bit bitmap where the first 100 bits are
5852+ * unlikely to be set. It's guaranteed that at least one of the 140
5853+ * bits is cleared.
5854+ */
5855+static __inline__ int sched_find_first_bit(unsigned long *b)
5856+{
5857+
5858+ if (unlikely(b[0]))
5859+ return __ffs(b[0]);
5860+ if (unlikely(b[1]))
5861+ return __ffs(b[1]) + 32;
5862+ if (unlikely(b[2]))
5863+ return __ffs(b[2]) + 64;
5864+ if (b[3])
5865+ return __ffs(b[3]) + 96;
5866+ return __ffs(b[4]) + 128;
5867+}
529c1c99
JB
5868+
5869+/*
5870+ * fls: find last bit set.
5871+ */
5872+
5873+#define fls(x) generic_fls(x)
0aa7655b
AM
5874+
5875 /*
5876 * ffs: find first bit set. This is defined the same way as
5877 * the libc and compiler builtin ffs routines, therefore
529c1c99 5878@@ -296,6 +353,32 @@
0aa7655b
AM
5879 #define find_first_zero_bit(addr, size) \
5880 find_next_zero_bit((addr), (size), 0)
5881
5882+/**
5883+ * find_next_bit - find the first set bit in a memory region
5884+ * @addr: The address to base the search on
5885+ * @offset: The bitnumber to start searching at
5886+ * @size: The maximum size to search
5887+ *
5888+ * Scheduler induced bitop, do not use.
5889+ */
5890+static inline int find_next_bit(unsigned long *addr, int size, int offset)
5891+{
5892+ unsigned long *p = addr + (offset >> 5);
5893+ int num = offset & ~0x1f;
5894+ unsigned long word;
5895+
5896+ word = *p++;
5897+ word &= ~((1 << (offset & 0x1f)) - 1);
5898+ while (num < size) {
5899+ if (word != 0) {
5900+ return __ffs(word) + num;
5901+ }
5902+ word = *p++;
5903+ num += 0x20;
5904+ }
5905+ return num;
5906+}
5907+
5908 static inline int test_le_bit(int nr, __const__ void * addr)
5909 {
5910 __const__ unsigned char *ADDR = (__const__ unsigned char *) addr;
5911diff -urN linux-2.4.24.org/include/asm-sparc/system.h linux-2.4.24/include/asm-sparc/system.h
5d16fd25
AM
5912--- linux-2.4.24.org/include/asm-sparc/system.h 2004-02-04 20:47:50.644990120 +0100
5913+++ linux-2.4.24/include/asm-sparc/system.h 2004-02-04 20:52:54.110870035 +0100
0aa7655b
AM
5914@@ -84,7 +84,7 @@
5915 *
5916 * SWITCH_ENTER and SWITH_DO_LAZY_FPU do not work yet (e.g. SMP does not work)
5917 */
5918-#define prepare_to_switch() do { \
5919+#define prepare_arch_switch(rq, next) do { \
5920 __asm__ __volatile__( \
5921 ".globl\tflush_patch_switch\nflush_patch_switch:\n\t" \
5922 "save %sp, -0x40, %sp; save %sp, -0x40, %sp; save %sp, -0x40, %sp\n\t" \
5923@@ -92,6 +92,8 @@
5924 "save %sp, -0x40, %sp\n\t" \
5925 "restore; restore; restore; restore; restore; restore; restore"); \
5926 } while(0)
5927+#define finish_arch_switch(rq, next) do{ }while(0)
5928+#define task_running(rq, p) ((rq)->curr == (p))
5929
5930 /* Much care has gone into this code, do not touch it.
5931 *
5932diff -urN linux-2.4.24.org/include/asm-sparc64/bitops.h linux-2.4.24/include/asm-sparc64/bitops.h
5d16fd25
AM
5933--- linux-2.4.24.org/include/asm-sparc64/bitops.h 2004-02-04 20:48:02.155595906 +0100
5934+++ linux-2.4.24/include/asm-sparc64/bitops.h 2004-02-04 20:52:54.137864420 +0100
0aa7655b
AM
5935@@ -1,4 +1,4 @@
5936-/* $Id$
5937+/* $Id$
5938 * bitops.h: Bit string operations on the V9.
5939 *
5940 * Copyright 1996, 1997 David S. Miller (davem@caip.rutgers.edu)
5941@@ -7,11 +7,12 @@
5942 #ifndef _SPARC64_BITOPS_H
5943 #define _SPARC64_BITOPS_H
5944
5945+#include <linux/compiler.h>
5946 #include <asm/byteorder.h>
5947
5948-extern long ___test_and_set_bit(unsigned long nr, volatile void *addr);
5949-extern long ___test_and_clear_bit(unsigned long nr, volatile void *addr);
5950-extern long ___test_and_change_bit(unsigned long nr, volatile void *addr);
5951+extern long ___test_and_set_bit(unsigned long nr, volatile unsigned long *addr);
5952+extern long ___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr);
5953+extern long ___test_and_change_bit(unsigned long nr, volatile unsigned long *addr);
5954
5955 #define test_and_set_bit(nr,addr) ({___test_and_set_bit(nr,addr)!=0;})
5956 #define test_and_clear_bit(nr,addr) ({___test_and_clear_bit(nr,addr)!=0;})
5957@@ -21,109 +22,132 @@
5958 #define change_bit(nr,addr) ((void)___test_and_change_bit(nr,addr))
5959
5960 /* "non-atomic" versions... */
5961-#define __set_bit(X,Y) \
5962-do { unsigned long __nr = (X); \
5963- long *__m = ((long *) (Y)) + (__nr >> 6); \
5964- *__m |= (1UL << (__nr & 63)); \
5965-} while (0)
5966-#define __clear_bit(X,Y) \
5967-do { unsigned long __nr = (X); \
5968- long *__m = ((long *) (Y)) + (__nr >> 6); \
5969- *__m &= ~(1UL << (__nr & 63)); \
5970-} while (0)
5971-#define __change_bit(X,Y) \
5972-do { unsigned long __nr = (X); \
5973- long *__m = ((long *) (Y)) + (__nr >> 6); \
5974- *__m ^= (1UL << (__nr & 63)); \
5975-} while (0)
5976-#define __test_and_set_bit(X,Y) \
5977-({ unsigned long __nr = (X); \
5978- long *__m = ((long *) (Y)) + (__nr >> 6); \
5979- long __old = *__m; \
5980- long __mask = (1UL << (__nr & 63)); \
5981- *__m = (__old | __mask); \
5982- ((__old & __mask) != 0); \
5983-})
5984-#define __test_and_clear_bit(X,Y) \
5985-({ unsigned long __nr = (X); \
5986- long *__m = ((long *) (Y)) + (__nr >> 6); \
5987- long __old = *__m; \
5988- long __mask = (1UL << (__nr & 63)); \
5989- *__m = (__old & ~__mask); \
5990- ((__old & __mask) != 0); \
5991-})
5992-#define __test_and_change_bit(X,Y) \
5993-({ unsigned long __nr = (X); \
5994- long *__m = ((long *) (Y)) + (__nr >> 6); \
5995- long __old = *__m; \
5996- long __mask = (1UL << (__nr & 63)); \
5997- *__m = (__old ^ __mask); \
5998- ((__old & __mask) != 0); \
5999-})
6000+
6001+static __inline__ void __set_bit(int nr, volatile unsigned long *addr)
6002+{
6003+ volatile unsigned long *m = addr + (nr >> 6);
6004+
6005+ *m |= (1UL << (nr & 63));
6006+}
6007+
6008+static __inline__ void __clear_bit(int nr, volatile unsigned long *addr)
6009+{
6010+ volatile unsigned long *m = addr + (nr >> 6);
6011+
6012+ *m &= ~(1UL << (nr & 63));
6013+}
6014+
6015+static __inline__ void __change_bit(int nr, volatile unsigned long *addr)
6016+{
6017+ volatile unsigned long *m = addr + (nr >> 6);
6018+
6019+ *m ^= (1UL << (nr & 63));
6020+}
6021+
6022+static __inline__ int __test_and_set_bit(int nr, volatile unsigned long *addr)
6023+{
6024+ volatile unsigned long *m = addr + (nr >> 6);
6025+ long old = *m;
6026+ long mask = (1UL << (nr & 63));
6027+
6028+ *m = (old | mask);
6029+ return ((old & mask) != 0);
6030+}
6031+
6032+static __inline__ int __test_and_clear_bit(int nr, volatile unsigned long *addr)
6033+{
6034+ volatile unsigned long *m = addr + (nr >> 6);
6035+ long old = *m;
6036+ long mask = (1UL << (nr & 63));
6037+
6038+ *m = (old & ~mask);
6039+ return ((old & mask) != 0);
6040+}
6041+
6042+static __inline__ int __test_and_change_bit(int nr, volatile unsigned long *addr)
6043+{
6044+ volatile unsigned long *m = addr + (nr >> 6);
6045+ long old = *m;
6046+ long mask = (1UL << (nr & 63));
6047+
6048+ *m = (old ^ mask);
6049+ return ((old & mask) != 0);
6050+}
6051
6052 #define smp_mb__before_clear_bit() do { } while(0)
6053 #define smp_mb__after_clear_bit() do { } while(0)
6054
6055-extern __inline__ int test_bit(int nr, __const__ void *addr)
6056+static __inline__ int test_bit(int nr, __const__ volatile unsigned long *addr)
6057 {
6058- return (1UL & (((__const__ long *) addr)[nr >> 6] >> (nr & 63))) != 0UL;
6059+ return (1UL & ((addr)[nr >> 6] >> (nr & 63))) != 0UL;
6060 }
6061
6062 /* The easy/cheese version for now. */
6063-extern __inline__ unsigned long ffz(unsigned long word)
6064+static __inline__ unsigned long ffz(unsigned long word)
6065 {
6066 unsigned long result;
6067
6068-#ifdef ULTRA_HAS_POPULATION_COUNT /* Thanks for nothing Sun... */
6069- __asm__ __volatile__(
6070-" brz,pn %0, 1f\n"
6071-" neg %0, %%g1\n"
6072-" xnor %0, %%g1, %%g2\n"
6073-" popc %%g2, %0\n"
6074-"1: " : "=&r" (result)
6075- : "0" (word)
6076- : "g1", "g2");
6077-#else
6078-#if 1 /* def EASY_CHEESE_VERSION */
6079 result = 0;
6080 while(word & 1) {
6081 result++;
6082 word >>= 1;
6083 }
6084-#else
6085- unsigned long tmp;
6086+ return result;
6087+}
6088
6089- result = 0;
6090- tmp = ~word & -~word;
6091- if (!(unsigned)tmp) {
6092- tmp >>= 32;
6093- result = 32;
6094- }
6095- if (!(unsigned short)tmp) {
6096- tmp >>= 16;
6097- result += 16;
6098- }
6099- if (!(unsigned char)tmp) {
6100- tmp >>= 8;
6101- result += 8;
6102+/**
6103+ * __ffs - find first bit in word.
6104+ * @word: The word to search
6105+ *
6106+ * Undefined if no bit exists, so code should check against 0 first.
6107+ */
6108+static __inline__ unsigned long __ffs(unsigned long word)
6109+{
6110+ unsigned long result = 0;
6111+
6112+ while (!(word & 1UL)) {
6113+ result++;
6114+ word >>= 1;
6115 }
6116- if (tmp & 0xf0) result += 4;
6117- if (tmp & 0xcc) result += 2;
6118- if (tmp & 0xaa) result ++;
6119-#endif
6120-#endif
6121 return result;
6122 }
6123
6124+/*
6125+ * fls: find last bit set.
6126+ */
6127+
6128+#define fls(x) generic_fls(x)
6129+
6130 #ifdef __KERNEL__
6131
6132 /*
6133+ * Every architecture must define this function. It's the fastest
6134+ * way of searching a 140-bit bitmap where the first 100 bits are
6135+ * unlikely to be set. It's guaranteed that at least one of the 140
6136+ * bits is cleared.
6137+ */
6138+static inline int sched_find_first_bit(unsigned long *b)
6139+{
6140+ if (unlikely(b[0]))
6141+ return __ffs(b[0]);
6142+ if (unlikely(((unsigned int)b[1])))
6143+ return __ffs(b[1]) + 64;
6144+ if (b[1] >> 32)
6145+ return __ffs(b[1] >> 32) + 96;
6146+ return __ffs(b[2]) + 128;
6147+}
6148+
6149+/*
6150 * ffs: find first bit set. This is defined the same way as
6151 * the libc and compiler builtin ffs routines, therefore
6152 * differs in spirit from the above ffz (man ffs).
6153 */
6154-
6155-#define ffs(x) generic_ffs(x)
6156+static __inline__ int ffs(int x)
6157+{
6158+ if (!x)
6159+ return 0;
6160+ return __ffs((unsigned long)x);
6161+}
6162
6163 /*
6164 * hweightN: returns the hamming weight (i.e. the number
6165@@ -132,7 +156,7 @@
6166
6167 #ifdef ULTRA_HAS_POPULATION_COUNT
6168
6169-extern __inline__ unsigned int hweight32(unsigned int w)
6170+static __inline__ unsigned int hweight32(unsigned int w)
6171 {
6172 unsigned int res;
6173
6174@@ -140,7 +164,7 @@
6175 return res;
6176 }
6177
6178-extern __inline__ unsigned int hweight16(unsigned int w)
6179+static __inline__ unsigned int hweight16(unsigned int w)
6180 {
6181 unsigned int res;
6182
6183@@ -148,7 +172,7 @@
6184 return res;
6185 }
6186
6187-extern __inline__ unsigned int hweight8(unsigned int w)
6188+static __inline__ unsigned int hweight8(unsigned int w)
6189 {
6190 unsigned int res;
6191
6192@@ -165,14 +189,69 @@
6193 #endif
6194 #endif /* __KERNEL__ */
6195
6196+/**
6197+ * find_next_bit - find the next set bit in a memory region
6198+ * @addr: The address to base the search on
6199+ * @offset: The bitnumber to start searching at
6200+ * @size: The maximum size to search
6201+ */
6202+static __inline__ unsigned long find_next_bit(unsigned long *addr, unsigned long size, unsigned long offset)
6203+{
6204+ unsigned long *p = addr + (offset >> 6);
6205+ unsigned long result = offset & ~63UL;
6206+ unsigned long tmp;
6207+
6208+ if (offset >= size)
6209+ return size;
6210+ size -= result;
6211+ offset &= 63UL;
6212+ if (offset) {
6213+ tmp = *(p++);
6214+ tmp &= (~0UL << offset);
6215+ if (size < 64)
6216+ goto found_first;
6217+ if (tmp)
6218+ goto found_middle;
6219+ size -= 64;
6220+ result += 64;
6221+ }
6222+ while (size & ~63UL) {
6223+ if ((tmp = *(p++)))
6224+ goto found_middle;
6225+ result += 64;
6226+ size -= 64;
6227+ }
6228+ if (!size)
6229+ return result;
6230+ tmp = *p;
6231+
6232+found_first:
6233+ tmp &= (~0UL >> (64 - size));
6234+ if (tmp == 0UL) /* Are any bits set? */
6235+ return result + size; /* Nope. */
6236+found_middle:
6237+ return result + __ffs(tmp);
6238+}
6239+
6240+/**
6241+ * find_first_bit - find the first set bit in a memory region
6242+ * @addr: The address to start the search at
6243+ * @size: The maximum size to search
6244+ *
6245+ * Returns the bit-number of the first set bit, not the number of the byte
6246+ * containing a bit.
6247+ */
6248+#define find_first_bit(addr, size) \
6249+ find_next_bit((addr), (size), 0)
6250+
6251 /* find_next_zero_bit() finds the first zero bit in a bit string of length
6252 * 'size' bits, starting the search at bit 'offset'. This is largely based
6253 * on Linus's ALPHA routines, which are pretty portable BTW.
6254 */
6255
6256-extern __inline__ unsigned long find_next_zero_bit(void *addr, unsigned long size, unsigned long offset)
6257+static __inline__ unsigned long find_next_zero_bit(unsigned long *addr, unsigned long size, unsigned long offset)
6258 {
6259- unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
6260+ unsigned long *p = addr + (offset >> 6);
6261 unsigned long result = offset & ~63UL;
6262 unsigned long tmp;
6263
6264@@ -211,15 +290,15 @@
6265 #define find_first_zero_bit(addr, size) \
6266 find_next_zero_bit((addr), (size), 0)
6267
6268-extern long ___test_and_set_le_bit(int nr, volatile void *addr);
6269-extern long ___test_and_clear_le_bit(int nr, volatile void *addr);
6270+extern long ___test_and_set_le_bit(int nr, volatile unsigned long *addr);
6271+extern long ___test_and_clear_le_bit(int nr, volatile unsigned long *addr);
6272
6273 #define test_and_set_le_bit(nr,addr) ({___test_and_set_le_bit(nr,addr)!=0;})
6274 #define test_and_clear_le_bit(nr,addr) ({___test_and_clear_le_bit(nr,addr)!=0;})
6275 #define set_le_bit(nr,addr) ((void)___test_and_set_le_bit(nr,addr))
6276 #define clear_le_bit(nr,addr) ((void)___test_and_clear_le_bit(nr,addr))
6277
6278-extern __inline__ int test_le_bit(int nr, __const__ void * addr)
6279+static __inline__ int test_le_bit(int nr, __const__ unsigned long * addr)
6280 {
6281 int mask;
6282 __const__ unsigned char *ADDR = (__const__ unsigned char *) addr;
6283@@ -232,9 +311,9 @@
6284 #define find_first_zero_le_bit(addr, size) \
6285 find_next_zero_le_bit((addr), (size), 0)
6286
6287-extern __inline__ unsigned long find_next_zero_le_bit(void *addr, unsigned long size, unsigned long offset)
6288+static __inline__ unsigned long find_next_zero_le_bit(unsigned long *addr, unsigned long size, unsigned long offset)
6289 {
6290- unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
6291+ unsigned long *p = addr + (offset >> 6);
6292 unsigned long result = offset & ~63UL;
6293 unsigned long tmp;
6294
6295@@ -271,18 +350,22 @@
6296
6297 #ifdef __KERNEL__
6298
6299-#define ext2_set_bit test_and_set_le_bit
6300-#define ext2_clear_bit test_and_clear_le_bit
6301-#define ext2_test_bit test_le_bit
6302-#define ext2_find_first_zero_bit find_first_zero_le_bit
6303-#define ext2_find_next_zero_bit find_next_zero_le_bit
6304+#define ext2_set_bit(nr,addr) test_and_set_le_bit((nr),(unsigned long *)(addr))
6305+#define ext2_clear_bit(nr,addr) test_and_clear_le_bit((nr),(unsigned long *)(addr))
6306+#define ext2_test_bit(nr,addr) test_le_bit((nr),(unsigned long *)(addr))
6307+#define ext2_find_first_zero_bit(addr, size) \
6308+ find_first_zero_le_bit((unsigned long *)(addr), (size))
6309+#define ext2_find_next_zero_bit(addr, size, off) \
6310+ find_next_zero_le_bit((unsigned long *)(addr), (size), (off))
6311
6312 /* Bitmap functions for the minix filesystem. */
6313-#define minix_test_and_set_bit(nr,addr) test_and_set_bit(nr,addr)
6314-#define minix_set_bit(nr,addr) set_bit(nr,addr)
6315-#define minix_test_and_clear_bit(nr,addr) test_and_clear_bit(nr,addr)
6316-#define minix_test_bit(nr,addr) test_bit(nr,addr)
6317-#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
6318+#define minix_test_and_set_bit(nr,addr) test_and_set_bit((nr),(unsigned long *)(addr))
6319+#define minix_set_bit(nr,addr) set_bit((nr),(unsigned long *)(addr))
6320+#define minix_test_and_clear_bit(nr,addr) \
6321+ test_and_clear_bit((nr),(unsigned long *)(addr))
6322+#define minix_test_bit(nr,addr) test_bit((nr),(unsigned long *)(addr))
6323+#define minix_find_first_zero_bit(addr,size) \
6324+ find_first_zero_bit((unsigned long *)(addr),(size))
6325
6326 #endif /* __KERNEL__ */
6327
6328diff -urN linux-2.4.24.org/include/asm-sparc64/smp.h linux-2.4.24/include/asm-sparc64/smp.h
5d16fd25
AM
6329--- linux-2.4.24.org/include/asm-sparc64/smp.h 2004-02-04 20:48:01.767676594 +0100
6330+++ linux-2.4.24/include/asm-sparc64/smp.h 2004-02-04 20:52:54.175856518 +0100
0aa7655b
AM
6331@@ -111,7 +111,7 @@
6332 }
6333 }
6334
6335-#define smp_processor_id() (current->processor)
6336+#define smp_processor_id() (current->cpu)
6337
6338 /* This needn't do anything as we do not sleep the cpu
6339 * inside of the idler task, so an interrupt is not needed
6340diff -urN linux-2.4.24.org/include/asm-sparc64/system.h linux-2.4.24/include/asm-sparc64/system.h
5d16fd25
AM
6341--- linux-2.4.24.org/include/asm-sparc64/system.h 2004-02-04 20:48:01.898649351 +0100
6342+++ linux-2.4.24/include/asm-sparc64/system.h 2004-02-04 20:52:54.208849656 +0100
0aa7655b
AM
6343@@ -154,7 +154,18 @@
6344
6345 #define flush_user_windows flushw_user
6346 #define flush_register_windows flushw_all
6347-#define prepare_to_switch flushw_all
6348+
6349+#define prepare_arch_schedule(prev) task_lock(prev)
6350+#define finish_arch_schedule(prev) task_unlock(prev)
6351+#define prepare_arch_switch(rq, next) \
6352+do { spin_lock(&(next)->switch_lock); \
6353+ spin_unlock(&(rq)->lock); \
6354+ flushw_all(); \
6355+} while (0)
6356+
6357+#define finish_arch_switch(rq, prev) \
6358+do { spin_unlock_irq(&(prev)->switch_lock); \
6359+} while (0)
6360
6361 #ifndef CONFIG_DEBUG_SPINLOCK
6362 #define CHECK_LOCKS(PREV) do { } while(0)
6363diff -urN linux-2.4.24.org/include/linux/bitops.h linux-2.4.24/include/linux/bitops.h
5d16fd25
AM
6364--- linux-2.4.24.org/include/linux/bitops.h 2004-02-04 20:47:38.725469391 +0100
6365+++ linux-2.4.24/include/linux/bitops.h 2004-02-04 20:52:54.244842170 +0100
0aa7655b
AM
6366@@ -1,6 +1,38 @@
6367 #ifndef _LINUX_BITOPS_H
6368 #define _LINUX_BITOPS_H
6369
6370+/*
6371+ * fls: find last bit set.
6372+ */
6373+
6374+extern __inline__ int generic_fls(int x)
6375+{
6376+ int r = 32;
6377+
6378+ if (!x)
6379+ return 0;
6380+ if (!(x & 0xffff0000u)) {
6381+ x <<= 16;
6382+ r -= 16;
6383+ }
6384+ if (!(x & 0xff000000u)) {
6385+ x <<= 8;
6386+ r -= 8;
6387+ }
6388+ if (!(x & 0xf0000000u)) {
6389+ x <<= 4;
6390+ r -= 4;
6391+ }
6392+ if (!(x & 0xc0000000u)) {
6393+ x <<= 2;
6394+ r -= 2;
6395+ }
6396+ if (!(x & 0x80000000u)) {
6397+ x <<= 1;
6398+ r -= 1;
6399+ }
6400+ return r;
6401+}
6402
6403 /*
6404 * ffs: find first bit set. This is defined the same way as
6405diff -urN linux-2.4.24.org/include/linux/kernel_stat.h linux-2.4.24/include/linux/kernel_stat.h
5d16fd25
AM
6406--- linux-2.4.24.org/include/linux/kernel_stat.h 2004-02-04 20:47:34.063439098 +0100
6407+++ linux-2.4.24/include/linux/kernel_stat.h 2004-02-04 20:52:54.297831148 +0100
0aa7655b
AM
6408@@ -31,7 +31,6 @@
6409 #elif !defined(CONFIG_ARCH_S390)
6410 unsigned int irqs[NR_CPUS][NR_IRQS];
6411 #endif
6412- unsigned int context_swtch;
6413 };
6414
6415 extern struct kernel_stat kstat;
6416diff -urN linux-2.4.24.org/include/linux/sched.h linux-2.4.24/include/linux/sched.h
5d16fd25
AM
6417--- linux-2.4.24.org/include/linux/sched.h 2004-02-04 20:47:32.755711107 +0100
6418+++ linux-2.4.24/include/linux/sched.h 2004-02-04 20:52:54.755735907 +0100
0aa7655b
AM
6419@@ -6,6 +6,7 @@
6420 extern unsigned long event;
6421
6422 #include <linux/config.h>
6423+#include <linux/compiler.h>
6424 #include <linux/binfmts.h>
6425 #include <linux/threads.h>
6426 #include <linux/kernel.h>
6427@@ -21,7 +22,7 @@
6428 #include <asm/mmu.h>
6429
6430 #include <linux/smp.h>
6431-#include <linux/tty.h>
6432+//#include <linux/tty.h>
6433 #include <linux/sem.h>
6434 #include <linux/signal.h>
6435 #include <linux/securebits.h>
6436@@ -73,10 +74,12 @@
6437 #define CT_TO_SECS(x) ((x) / HZ)
6438 #define CT_TO_USECS(x) (((x) % HZ) * 1000000/HZ)
6439
6440-extern int nr_running, nr_threads;
6441+extern int nr_threads;
6442 extern int last_pid;
6443+extern unsigned long nr_running(void);
6444+extern unsigned long nr_uninterruptible(void);
6445
6446-#include <linux/fs.h>
6447+//#include <linux/fs.h>
6448 #include <linux/time.h>
6449 #include <linux/param.h>
6450 #include <linux/resource.h>
6451@@ -109,12 +112,6 @@
6452 #define SCHED_FIFO 1
6453 #define SCHED_RR 2
6454
6455-/*
6456- * This is an additional bit set when we want to
6457- * yield the CPU for one re-schedule..
6458- */
6459-#define SCHED_YIELD 0x10
6460-
6461 struct sched_param {
6462 int sched_priority;
6463 };
6464@@ -132,17 +129,21 @@
6465 * a separate lock).
6466 */
6467 extern rwlock_t tasklist_lock;
6468-extern spinlock_t runqueue_lock;
6469 extern spinlock_t mmlist_lock;
6470
6471+typedef struct task_struct task_t;
6472+
6473 extern void sched_init(void);
6474-extern void init_idle(void);
6475+extern void init_idle(task_t *idle, int cpu);
6476 extern void show_state(void);
6477 extern void cpu_init (void);
6478 extern void trap_init(void);
6479 extern void update_process_times(int user);
6480-extern void update_one_process(struct task_struct *p, unsigned long user,
6481+extern void update_one_process(task_t *p, unsigned long user,
6482 unsigned long system, int cpu);
6483+extern void scheduler_tick(int user_tick, int system);
6484+extern void migration_init(void);
6485+extern unsigned long cache_decay_ticks;
6486
6487 #define MAX_SCHEDULE_TIMEOUT LONG_MAX
6488 extern signed long FASTCALL(schedule_timeout(signed long timeout));
6489@@ -152,6 +153,28 @@
6490 extern void flush_scheduled_tasks(void);
6491 extern int start_context_thread(void);
6492 extern int current_is_keventd(void);
6493+extern void FASTCALL(sched_exit(task_t * p));
6494+extern int FASTCALL(idle_cpu(int cpu));
6495+
6496+/*
6497+ * Priority of a process goes from 0..MAX_PRIO-1, valid RT
6498+ * priority is 0..MAX_RT_PRIO-1, and SCHED_OTHER tasks are
6499+ * in the range MAX_RT_PRIO..MAX_PRIO-1. Priority values
6500+ * are inverted: lower p->prio value means higher priority.
6501+ *
6502+ * The MAX_RT_USER_PRIO value allows the actual maximum
6503+ * RT priority to be separate from the value exported to
6504+ * user-space. This allows kernel threads to set their
6505+ * priority to a value higher than any user task. Note:
6506+ * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
6507+ *
6508+ * Both values are configurable at compile-time.
6509+ */
6510+
6511+#define MAX_USER_RT_PRIO 100
6512+#define MAX_RT_PRIO MAX_USER_RT_PRIO
6513+
6514+#define MAX_PRIO (MAX_RT_PRIO + 40)
6515
6516 #if CONFIG_SMP
6517 extern void set_cpus_allowed(struct task_struct *p, unsigned long new_mask);
6518@@ -280,6 +303,8 @@
6519 extern struct user_struct root_user;
6520 #define INIT_USER (&root_user)
6521
6522+typedef struct prio_array prio_array_t;
6523+
6524 struct task_struct {
6525 /*
6526 * offsets of these are hardcoded elsewhere - touch with care
6527@@ -297,35 +322,26 @@
6528
6529 int lock_depth; /* Lock depth */
6530
6531-/*
6532- * offset 32 begins here on 32-bit platforms. We keep
6533- * all fields in a single cacheline that are needed for
6534- * the goodness() loop in schedule().
6535- */
6536- long counter;
6537- long nice;
6538- unsigned long policy;
6539- struct mm_struct *mm;
6540- int processor;
6541 /*
6542- * cpus_runnable is ~0 if the process is not running on any
6543- * CPU. It's (1 << cpu) if it's running on a CPU. This mask
6544- * is updated under the runqueue lock.
6545- *
6546- * To determine whether a process might run on a CPU, this
6547- * mask is AND-ed with cpus_allowed.
6548- */
6549- unsigned long cpus_runnable, cpus_allowed;
6550- /*
6551- * (only the 'next' pointer fits into the cacheline, but
6552- * that's just fine.)
6553+ * offset 32 begins here on 32-bit platforms.
6554 */
6555+ unsigned int cpu;
6556+ int prio, static_prio;
6557 struct list_head run_list;
6558- unsigned long sleep_time;
6559+ prio_array_t *array;
6560
6561- struct task_struct *next_task, *prev_task;
6562- struct mm_struct *active_mm;
6563+ unsigned long sleep_avg;
6564+ unsigned long sleep_timestamp;
6565+
6566+ unsigned long policy;
6567+ unsigned long cpus_allowed;
6568+ unsigned int time_slice, first_time_slice;
6569+
6570+ task_t *next_task, *prev_task;
6571+
6572+ struct mm_struct *mm, *active_mm;
6573 struct list_head local_pages;
6574+
6575 unsigned int allocation_order, nr_local_pages;
6576
6577 /* task state */
6578@@ -348,12 +364,12 @@
6579 * older sibling, respectively. (p->father can be replaced with
6580 * p->p_pptr->pid)
6581 */
6582- struct task_struct *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr;
6583+ task_t *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr;
6584 struct list_head thread_group;
6585
6586 /* PID hash table linkage. */
6587- struct task_struct *pidhash_next;
6588- struct task_struct **pidhash_pprev;
6589+ task_t *pidhash_next;
6590+ task_t **pidhash_pprev;
6591
6592 wait_queue_head_t wait_chldexit; /* for wait4() */
6593 struct completion *vfork_done; /* for vfork() */
6594@@ -412,6 +428,8 @@
6595 u32 self_exec_id;
6596 /* Protection of (de-)allocation: mm, files, fs, tty */
6597 spinlock_t alloc_lock;
6598+/* context-switch lock */
6599+ spinlock_t switch_lock;
6600
6601 /* journalling filesystem info */
6602 void *journal_info;
6603@@ -454,9 +472,15 @@
6604 */
6605 #define _STK_LIM (8*1024*1024)
6606
6607-#define DEF_COUNTER (10*HZ/100) /* 100 ms time slice */
6608-#define MAX_COUNTER (20*HZ/100)
6609-#define DEF_NICE (0)
6610+#if CONFIG_SMP
6611+extern void set_cpus_allowed(task_t *p, unsigned long new_mask);
6612+#else
6613+#define set_cpus_allowed(p, new_mask) do { } while (0)
6614+#endif
6615+
6616+extern void set_user_nice(task_t *p, long nice);
6617+extern int task_prio(task_t *p);
6618+extern int task_nice(task_t *p);
6619
6620 extern void yield(void);
6621
6622@@ -477,14 +501,14 @@
6623 addr_limit: KERNEL_DS, \
6624 exec_domain: &default_exec_domain, \
6625 lock_depth: -1, \
6626- counter: DEF_COUNTER, \
6627- nice: DEF_NICE, \
6628+ prio: MAX_PRIO-20, \
6629+ static_prio: MAX_PRIO-20, \
6630 policy: SCHED_OTHER, \
6631+ cpus_allowed: ~0UL, \
6632 mm: NULL, \
6633 active_mm: &init_mm, \
6634- cpus_runnable: ~0UL, \
6635- cpus_allowed: ~0UL, \
6636 run_list: LIST_HEAD_INIT(tsk.run_list), \
6637+ time_slice: HZ, \
6638 next_task: &tsk, \
6639 prev_task: &tsk, \
6640 p_opptr: &tsk, \
6641@@ -509,6 +533,7 @@
6642 pending: { NULL, &tsk.pending.head, {{0}}}, \
6643 blocked: {{0}}, \
6644 alloc_lock: SPIN_LOCK_UNLOCKED, \
6645+ switch_lock: SPIN_LOCK_UNLOCKED, \
6646 journal_info: NULL, \
6647 }
6648
6649@@ -518,24 +543,23 @@
6650 #endif
6651
6652 union task_union {
6653- struct task_struct task;
6654+ task_t task;
6655 unsigned long stack[INIT_TASK_SIZE/sizeof(long)];
6656 };
6657
6658 extern union task_union init_task_union;
6659
6660 extern struct mm_struct init_mm;
6661-extern struct task_struct *init_tasks[NR_CPUS];
6662
6663 /* PID hashing. (shouldnt this be dynamic?) */
6664 #define PIDHASH_SZ (4096 >> 2)
6665-extern struct task_struct *pidhash[PIDHASH_SZ];
6666+extern task_t *pidhash[PIDHASH_SZ];
6667
6668 #define pid_hashfn(x) ((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1))
6669
6670-static inline void hash_pid(struct task_struct *p)
6671+static inline void hash_pid(task_t *p)
6672 {
6673- struct task_struct **htable = &pidhash[pid_hashfn(p->pid)];
6674+ task_t **htable = &pidhash[pid_hashfn(p->pid)];
6675
6676 if((p->pidhash_next = *htable) != NULL)
6677 (*htable)->pidhash_pprev = &p->pidhash_next;
6678@@ -543,16 +567,16 @@
6679 p->pidhash_pprev = htable;
6680 }
6681
6682-static inline void unhash_pid(struct task_struct *p)
6683+static inline void unhash_pid(task_t *p)
6684 {
6685 if(p->pidhash_next)
6686 p->pidhash_next->pidhash_pprev = p->pidhash_pprev;
6687 *p->pidhash_pprev = p->pidhash_next;
6688 }
6689
6690-static inline struct task_struct *find_task_by_pid(int pid)
6691+static inline task_t *find_task_by_pid(int pid)
6692 {
6693- struct task_struct *p, **htable = &pidhash[pid_hashfn(pid)];
6694+ task_t *p, **htable = &pidhash[pid_hashfn(pid)];
6695
6696 for(p = *htable; p && p->pid != pid; p = p->pidhash_next)
6697 ;
6698@@ -560,19 +584,6 @@
6699 return p;
6700 }
6701
6702-#define task_has_cpu(tsk) ((tsk)->cpus_runnable != ~0UL)
6703-
6704-static inline void task_set_cpu(struct task_struct *tsk, unsigned int cpu)
6705-{
6706- tsk->processor = cpu;
6707- tsk->cpus_runnable = 1UL << cpu;
6708-}
6709-
6710-static inline void task_release_cpu(struct task_struct *tsk)
6711-{
6712- tsk->cpus_runnable = ~0UL;
6713-}
6714-
6715 /* per-UID process charging. */
6716 extern struct user_struct * alloc_uid(uid_t);
6717 extern void free_uid(struct user_struct *);
6718@@ -600,47 +611,50 @@
6719 extern void FASTCALL(interruptible_sleep_on(wait_queue_head_t *q));
6720 extern long FASTCALL(interruptible_sleep_on_timeout(wait_queue_head_t *q,
6721 signed long timeout));
6722-extern int FASTCALL(wake_up_process(struct task_struct * tsk));
6723+extern int FASTCALL(wake_up_process(task_t * p));
6724+extern void FASTCALL(wake_up_forked_process(task_t * p));
6725
6726 #define wake_up(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
6727 #define wake_up_nr(x, nr) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
6728 #define wake_up_all(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0)
6729-#define wake_up_sync(x) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
6730-#define wake_up_sync_nr(x, nr) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
6731 #define wake_up_interruptible(x) __wake_up((x),TASK_INTERRUPTIBLE, 1)
6732 #define wake_up_interruptible_nr(x, nr) __wake_up((x),TASK_INTERRUPTIBLE, nr)
6733 #define wake_up_interruptible_all(x) __wake_up((x),TASK_INTERRUPTIBLE, 0)
6734-#define wake_up_interruptible_sync(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
6735-#define wake_up_interruptible_sync_nr(x, nr) __wake_up_sync((x),TASK_INTERRUPTIBLE, nr)
6736+#ifdef CONFIG_SMP
6737+#define wake_up_interruptible_sync(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
6738+#else
6739+#define wake_up_interruptible_sync(x) __wake_up((x),TASK_INTERRUPTIBLE, 1)
6740+#endif
6741+
6742 asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru);
6743
6744 extern int in_group_p(gid_t);
6745 extern int in_egroup_p(gid_t);
6746
6747 extern void proc_caches_init(void);
6748-extern void flush_signals(struct task_struct *);
6749-extern void flush_signal_handlers(struct task_struct *);
6750+extern void flush_signals(task_t *);
6751+extern void flush_signal_handlers(task_t *);
6752 extern void sig_exit(int, int, struct siginfo *);
6753 extern int dequeue_signal(sigset_t *, siginfo_t *);
6754 extern void block_all_signals(int (*notifier)(void *priv), void *priv,
6755 sigset_t *mask);
6756 extern void unblock_all_signals(void);
6757-extern int send_sig_info(int, struct siginfo *, struct task_struct *);
6758-extern int force_sig_info(int, struct siginfo *, struct task_struct *);
6759+extern int send_sig_info(int, struct siginfo *, task_t *);
6760+extern int force_sig_info(int, struct siginfo *, task_t *);
6761 extern int kill_pg_info(int, struct siginfo *, pid_t);
6762 extern int kill_sl_info(int, struct siginfo *, pid_t);
6763 extern int kill_proc_info(int, struct siginfo *, pid_t);
6764-extern void notify_parent(struct task_struct *, int);
6765-extern void do_notify_parent(struct task_struct *, int);
6766-extern void force_sig(int, struct task_struct *);
6767-extern int send_sig(int, struct task_struct *, int);
6768+extern void notify_parent(task_t *, int);
6769+extern void do_notify_parent(task_t *, int);
6770+extern void force_sig(int, task_t *);
6771+extern int send_sig(int, task_t *, int);
6772 extern int kill_pg(pid_t, int, int);
6773 extern int kill_sl(pid_t, int, int);
6774 extern int kill_proc(pid_t, int, int);
6775 extern int do_sigaction(int, const struct k_sigaction *, struct k_sigaction *);
6776 extern int do_sigaltstack(const stack_t *, stack_t *, unsigned long);
6777
6778-static inline int signal_pending(struct task_struct *p)
6779+static inline int signal_pending(task_t *p)
6780 {
6781 return (p->sigpending != 0);
6782 }
6783@@ -679,7 +693,7 @@
6784 This is required every time the blocked sigset_t changes.
6785 All callers should have t->sigmask_lock. */
6786
6787-static inline void recalc_sigpending(struct task_struct *t)
6788+static inline void recalc_sigpending(task_t *t)
6789 {
6790 t->sigpending = has_pending_signals(&t->pending.signal, &t->blocked);
6791 }
6792@@ -786,16 +800,17 @@
6793 extern int expand_fdset(struct files_struct *, int nr);
6794 extern void free_fdset(fd_set *, int);
6795
6796-extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
6797+extern int copy_thread(int, unsigned long, unsigned long, unsigned long, task_t *, struct pt_regs *);
6798 extern void flush_thread(void);
6799 extern void exit_thread(void);
6800
6801-extern void exit_mm(struct task_struct *);
6802-extern void exit_files(struct task_struct *);
6803-extern void exit_sighand(struct task_struct *);
6804+extern void exit_mm(task_t *);
6805+extern void exit_files(task_t *);
6806+extern void exit_sighand(task_t *);
6807
6808 extern void reparent_to_init(void);
6809 extern void daemonize(void);
6810+extern task_t *child_reaper;
6811
6812 extern int do_execve(char *, char **, char **, struct pt_regs *);
6813 extern int do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long);
6814@@ -806,6 +821,9 @@
6815
6816 extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
6817
6818+extern void wait_task_inactive(task_t * p);
6819+extern void kick_if_running(task_t * p);
6820+
6821 #define __wait_event(wq, condition) \
6822 do { \
6823 wait_queue_t __wait; \
6824@@ -887,27 +905,12 @@
6825 for (task = next_thread(current) ; task != current ; task = next_thread(task))
6826
6827 #define next_thread(p) \
6828- list_entry((p)->thread_group.next, struct task_struct, thread_group)
6829+ list_entry((p)->thread_group.next, task_t, thread_group)
6830
6831 #define thread_group_leader(p) (p->pid == p->tgid)
6832
6833-static inline void del_from_runqueue(struct task_struct * p)
6834+static inline void unhash_process(task_t *p)
6835 {
6836- nr_running--;
6837- p->sleep_time = jiffies;
6838- list_del(&p->run_list);
6839- p->run_list.next = NULL;
6840-}
6841-
6842-static inline int task_on_runqueue(struct task_struct *p)
6843-{
6844- return (p->run_list.next != NULL);
6845-}
6846-
6847-static inline void unhash_process(struct task_struct *p)
6848-{
6849- if (task_on_runqueue(p))
6850- out_of_line_bug();
6851 write_lock_irq(&tasklist_lock);
6852 nr_threads--;
6853 unhash_pid(p);
6854@@ -917,12 +920,12 @@
6855 }
6856
6857 /* Protects ->fs, ->files, ->mm, and synchronises with wait4(). Nests inside tasklist_lock */
6858-static inline void task_lock(struct task_struct *p)
6859+static inline void task_lock(task_t *p)
6860 {
6861 spin_lock(&p->alloc_lock);
6862 }
6863
6864-static inline void task_unlock(struct task_struct *p)
6865+static inline void task_unlock(task_t *p)
6866 {
6867 spin_unlock(&p->alloc_lock);
6868 }
6869@@ -946,6 +949,26 @@
6870 return res;
6871 }
6872
6873+static inline void set_need_resched(void)
6874+{
6875+ current->need_resched = 1;
6876+}
6877+
6878+static inline void clear_need_resched(void)
6879+{
6880+ current->need_resched = 0;
6881+}
6882+
6883+static inline void set_tsk_need_resched(task_t *tsk)
6884+{
6885+ tsk->need_resched = 1;
6886+}
6887+
6888+static inline void clear_tsk_need_resched(task_t *tsk)
6889+{
6890+ tsk->need_resched = 0;
6891+}
6892+
6893 static inline int need_resched(void)
6894 {
6895 return (unlikely(current->need_resched));
6896@@ -959,4 +982,5 @@
6897 }
6898
6899 #endif /* __KERNEL__ */
6900+
6901 #endif
6902diff -urN linux-2.4.24.org/include/linux/smp_balance.h linux-2.4.24/include/linux/smp_balance.h
6903--- linux-2.4.24.org/include/linux/smp_balance.h 1970-01-01 01:00:00.000000000 +0100
5d16fd25 6904+++ linux-2.4.24/include/linux/smp_balance.h 2004-02-04 20:52:54.758735283 +0100
0aa7655b
AM
6905@@ -0,0 +1,15 @@
6906+#ifndef _LINUX_SMP_BALANCE_H
6907+#define _LINUX_SMP_BALANCE_H
6908+
6909+/*
6910+ * per-architecture load balancing logic, e.g. for hyperthreading
6911+ */
6912+
6913+#ifdef ARCH_HAS_SMP_BALANCE
6914+#include <asm/smp_balance.h>
6915+#else
6916+#define arch_load_balance(x, y) (0)
6917+#define arch_reschedule_idle_override(x, idle) (idle)
6918+#endif
6919+
6920+#endif /* _LINUX_SMP_BALANCE_H */
6921diff -urN linux-2.4.24.org/include/linux/smp.h linux-2.4.24/include/linux/smp.h
5d16fd25
AM
6922--- linux-2.4.24.org/include/linux/smp.h 2004-02-04 20:47:38.184581896 +0100
6923+++ linux-2.4.24/include/linux/smp.h 2004-02-04 20:52:54.806725301 +0100
0aa7655b
AM
6924@@ -86,6 +86,14 @@
6925 #define cpu_number_map(cpu) 0
6926 #define smp_call_function(func,info,retry,wait) ({ 0; })
6927 #define cpu_online_map 1
6928+static inline void smp_send_reschedule(int cpu) { }
6929+static inline void smp_send_reschedule_all(void) { }
6930
6931 #endif
6932+
6933+/*
6934+ * Common definitions:
6935+ */
6936+#define cpu() smp_processor_id()
6937+
6938 #endif
6939diff -urN linux-2.4.24.org/include/linux/wait.h linux-2.4.24/include/linux/wait.h
5d16fd25
AM
6940--- linux-2.4.24.org/include/linux/wait.h 2004-02-04 20:47:33.472562001 +0100
6941+++ linux-2.4.24/include/linux/wait.h 2004-02-04 20:52:54.861713864 +0100
0aa7655b
AM
6942@@ -59,6 +59,7 @@
6943 # define wq_write_lock_irq write_lock_irq
6944 # define wq_write_lock_irqsave write_lock_irqsave
6945 # define wq_write_unlock_irqrestore write_unlock_irqrestore
6946+# define wq_write_unlock_irq write_unlock_irq
6947 # define wq_write_unlock write_unlock
6948 #else
6949 # define wq_lock_t spinlock_t
6950@@ -71,6 +72,7 @@
6951 # define wq_write_lock_irq spin_lock_irq
6952 # define wq_write_lock_irqsave spin_lock_irqsave
6953 # define wq_write_unlock_irqrestore spin_unlock_irqrestore
6954+# define wq_write_unlock_irq spin_unlock_irq
6955 # define wq_write_unlock spin_unlock
6956 #endif
6957
6958diff -urN linux-2.4.24.org/init/main.c linux-2.4.24/init/main.c
5d16fd25
AM
6959--- linux-2.4.24.org/init/main.c 2004-02-04 20:47:26.630985058 +0100
6960+++ linux-2.4.24/init/main.c 2004-02-04 20:52:54.909703882 +0100
0aa7655b
AM
6961@@ -293,8 +293,6 @@
6962 extern void setup_arch(char **);
6963 extern void cpu_idle(void);
6964
6965-unsigned long wait_init_idle;
6966-
6967 #ifndef CONFIG_SMP
6968
6969 #ifdef CONFIG_X86_LOCAL_APIC
6970@@ -303,34 +301,24 @@
6971 APIC_init_uniprocessor();
6972 }
6973 #else
6974-#define smp_init() do { } while (0)
6975+#define smp_init() do { } while (0)
6976 #endif
6977
6978 #else
6979
6980-
6981 /* Called by boot processor to activate the rest. */
6982 static void __init smp_init(void)
6983 {
6984 /* Get other processors into their bootup holding patterns. */
6985 smp_boot_cpus();
6986- wait_init_idle = cpu_online_map;
6987- clear_bit(current->processor, &wait_init_idle); /* Don't wait on me! */
6988
6989 smp_threads_ready=1;
6990 smp_commence();
6991-
6992- /* Wait for the other cpus to set up their idle processes */
6993- printk("Waiting on wait_init_idle (map = 0x%lx)\n", wait_init_idle);
6994- while (wait_init_idle) {
6995- cpu_relax();
6996- barrier();
6997- }
6998- printk("All processors have done init_idle\n");
6999 }
7000
7001 #endif
7002
7003+
7004 /*
7005 * We need to finalize in a non-__init function or else race conditions
7006 * between the root thread and the init thread may cause start_kernel to
7007@@ -342,9 +330,8 @@
7008 {
7009 kernel_thread(init, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
7010 unlock_kernel();
7011- current->need_resched = 1;
7012- cpu_idle();
7013-}
7014+ cpu_idle();
7015+}
7016
7017 /*
7018 * Activate the first processor.
7019@@ -428,6 +415,7 @@
7020 check_bugs();
7021 printk("POSIX conformance testing by UNIFIX\n");
7022
7023+ init_idle(current, smp_processor_id());
7024 /*
7025 * We count on the initial thread going ok
7026 * Like idlers init is an unlocked kernel thread, which will
7027@@ -465,6 +453,10 @@
7028 */
7029 static void __init do_basic_setup(void)
7030 {
7031+ /* Start the per-CPU migration threads */
7032+#if CONFIG_SMP
7033+ migration_init();
7034+#endif
7035
7036 /*
7037 * Tell the world that we're going to be the grim
7038diff -urN linux-2.4.24.org/kernel/capability.c linux-2.4.24/kernel/capability.c
5d16fd25
AM
7039--- linux-2.4.24.org/kernel/capability.c 2004-02-04 20:47:27.302845310 +0100
7040+++ linux-2.4.24/kernel/capability.c 2004-02-04 20:52:54.945696396 +0100
0aa7655b
AM
7041@@ -8,6 +8,8 @@
7042 #include <linux/mm.h>
7043 #include <asm/uaccess.h>
7044
7045+unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
7046+
7047 kernel_cap_t cap_bset = CAP_INIT_EFF_SET;
7048
7049 /* Note: never hold tasklist_lock while spinning for this one */
7050diff -urN linux-2.4.24.org/kernel/exit.c linux-2.4.24/kernel/exit.c
5d16fd25
AM
7051--- linux-2.4.24.org/kernel/exit.c 2004-02-04 20:47:27.240858204 +0100
7052+++ linux-2.4.24/kernel/exit.c 2004-02-04 20:52:54.951695148 +0100
0aa7655b
AM
7053@@ -28,49 +28,22 @@
7054
7055 static void release_task(struct task_struct * p)
7056 {
7057- if (p != current) {
7058+ if (p == current)
7059+ BUG();
7060 #ifdef CONFIG_SMP
7061- /*
7062- * Wait to make sure the process isn't on the
7063- * runqueue (active on some other CPU still)
7064- */
7065- for (;;) {
7066- task_lock(p);
7067- if (!task_has_cpu(p))
7068- break;
7069- task_unlock(p);
7070- do {
7071- cpu_relax();
7072- barrier();
7073- } while (task_has_cpu(p));
7074- }
7075- task_unlock(p);
7076+ wait_task_inactive(p);
7077 #endif
7078- atomic_dec(&p->user->processes);
7079- free_uid(p->user);
7080- unhash_process(p);
7081-
7082- release_thread(p);
7083- current->cmin_flt += p->min_flt + p->cmin_flt;
7084- current->cmaj_flt += p->maj_flt + p->cmaj_flt;
7085- current->cnswap += p->nswap + p->cnswap;
7086- /*
7087- * Potentially available timeslices are retrieved
7088- * here - this way the parent does not get penalized
7089- * for creating too many processes.
7090- *
7091- * (this cannot be used to artificially 'generate'
7092- * timeslices, because any timeslice recovered here
7093- * was given away by the parent in the first place.)
7094- */
7095- current->counter += p->counter;
7096- if (current->counter >= MAX_COUNTER)
7097- current->counter = MAX_COUNTER;
7098- p->pid = 0;
7099- free_task_struct(p);
7100- } else {
7101- printk("task releasing itself\n");
7102- }
7103+ atomic_dec(&p->user->processes);
7104+ free_uid(p->user);
7105+ unhash_process(p);
7106+
7107+ release_thread(p);
7108+ current->cmin_flt += p->min_flt + p->cmin_flt;
7109+ current->cmaj_flt += p->maj_flt + p->cmaj_flt;
7110+ current->cnswap += p->nswap + p->cnswap;
7111+ sched_exit(p);
7112+ p->pid = 0;
7113+ free_task_struct(p);
7114 }
7115
7116 /*
7117@@ -150,6 +123,79 @@
7118 return retval;
7119 }
7120
7121+/**
7122+ * reparent_to_init() - Reparent the calling kernel thread to the init task.
7123+ *
7124+ * If a kernel thread is launched as a result of a system call, or if
7125+ * it ever exits, it should generally reparent itself to init so that
7126+ * it is correctly cleaned up on exit.
7127+ *
7128+ * The various task state such as scheduling policy and priority may have
7129+ * been inherited from a user process, so we reset them to sane values here.
7130+ *
7131+ * NOTE that reparent_to_init() gives the caller full capabilities.
7132+ */
7133+void reparent_to_init(void)
7134+{
7135+ write_lock_irq(&tasklist_lock);
7136+
7137+ /* Reparent to init */
7138+ REMOVE_LINKS(current);
7139+ current->p_pptr = child_reaper;
7140+ current->p_opptr = child_reaper;
7141+ SET_LINKS(current);
7142+
7143+ /* Set the exit signal to SIGCHLD so we signal init on exit */
7144+ current->exit_signal = SIGCHLD;
7145+
7146+ current->ptrace = 0;
7147+ if ((current->policy == SCHED_OTHER) && (task_nice(current) < 0))
7148+ set_user_nice(current, 0);
7149+ /* cpus_allowed? */
7150+ /* rt_priority? */
7151+ /* signals? */
7152+ current->cap_effective = CAP_INIT_EFF_SET;
7153+ current->cap_inheritable = CAP_INIT_INH_SET;
7154+ current->cap_permitted = CAP_FULL_SET;
7155+ current->keep_capabilities = 0;
7156+ memcpy(current->rlim, init_task.rlim, sizeof(*(current->rlim)));
7157+ current->user = INIT_USER;
7158+
7159+ write_unlock_irq(&tasklist_lock);
7160+}
7161+
7162+/*
7163+ * Put all the gunge required to become a kernel thread without
7164+ * attached user resources in one place where it belongs.
7165+ */
7166+
7167+void daemonize(void)
7168+{
7169+ struct fs_struct *fs;
7170+
7171+
7172+ /*
7173+ * If we were started as result of loading a module, close all of the
7174+ * user space pages. We don't need them, and if we didn't close them
7175+ * they would be locked into memory.
7176+ */
7177+ exit_mm(current);
7178+
7179+ current->session = 1;
7180+ current->pgrp = 1;
7181+ current->tty = NULL;
7182+
7183+ /* Become as one with the init task */
7184+
7185+ exit_fs(current); /* current->fs->count--; */
7186+ fs = init_task.fs;
7187+ current->fs = fs;
7188+ atomic_inc(&fs->count);
7189+ exit_files(current);
7190+ current->files = init_task.files;
7191+ atomic_inc(&current->files->count);
7192+}
7193+
7194 /*
7195 * When we die, we re-parent all our children.
7196 * Try to give them to another thread in our thread
7197@@ -171,6 +217,7 @@
7198 /* Make sure we're not reparenting to ourselves */
7199 p->p_opptr = child_reaper;
7200
7201+ p->first_time_slice = 0;
7202 if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0);
7203 }
7204 }
7205diff -urN linux-2.4.24.org/kernel/fork.c linux-2.4.24/kernel/fork.c
5d16fd25
AM
7206--- linux-2.4.24.org/kernel/fork.c 2004-02-04 20:47:26.750960103 +0100
7207+++ linux-2.4.24/kernel/fork.c 2004-02-04 20:52:54.987687662 +0100
0aa7655b
AM
7208@@ -31,7 +31,6 @@
7209
7210 /* The idle threads do not count.. */
7211 int nr_threads;
7212-int nr_running;
7213
7214 int max_threads;
7215 unsigned long total_forks; /* Handle normal Linux uptimes. */
7216@@ -39,6 +38,8 @@
7217
7218 struct task_struct *pidhash[PIDHASH_SZ];
7219
7220+rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */
7f7e7712
KT
7221+
7222 void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
0aa7655b
AM
7223 {
7224 unsigned long flags;
7225@@ -700,9 +701,6 @@
7226 if (p->pid == 0 && current->pid != 0)
7227 goto bad_fork_cleanup;
7228
7229- p->run_list.next = NULL;
7230- p->run_list.prev = NULL;
7231-
7232 p->p_cptr = NULL;
7233 init_waitqueue_head(&p->wait_chldexit);
7234 p->vfork_done = NULL;
7235@@ -711,6 +709,7 @@
7236 init_completion(&vfork);
7237 }
7238 spin_lock_init(&p->alloc_lock);
7239+ spin_lock_init(&p->switch_lock);
7240
7241 p->sigpending = 0;
7242 init_sigpending(&p->pending);
7243@@ -727,11 +726,11 @@
7244 #ifdef CONFIG_SMP
7245 {
7246 int i;
7247- p->cpus_runnable = ~0UL;
7248- p->processor = current->processor;
7249+
7250 /* ?? should we just memset this ?? */
7251 for(i = 0; i < smp_num_cpus; i++)
7252- p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0;
7253+ p->per_cpu_utime[cpu_logical_map(i)] =
7254+ p->per_cpu_stime[cpu_logical_map(i)] = 0;
7255 spin_lock_init(&p->sigmask_lock);
7256 }
7257 #endif
7258@@ -769,15 +768,27 @@
7259 p->pdeath_signal = 0;
7260
7261 /*
7262- * "share" dynamic priority between parent and child, thus the
7263- * total amount of dynamic priorities in the system doesn't change,
7264- * more scheduling fairness. This is only important in the first
7265- * timeslice, on the long run the scheduling behaviour is unchanged.
7266- */
7267- p->counter = (current->counter + 1) >> 1;
7268- current->counter >>= 1;
7269- if (!current->counter)
7270- current->need_resched = 1;
7271+ * Share the timeslice between parent and child, thus the
7272+ * total amount of pending timeslices in the system doesnt change,
7273+ * resulting in more scheduling fairness.
7274+ */
7275+ __cli();
7276+ if (!current->time_slice)
7277+ BUG();
7278+ p->time_slice = (current->time_slice + 1) >> 1;
7279+ current->time_slice >>= 1;
7280+ p->first_time_slice = 1;
7281+ if (!current->time_slice) {
7282+ /*
7283+ * This case is rare, it happens when the parent has only
7284+ * a single jiffy left from its timeslice. Taking the
7285+ * runqueue lock is not a problem.
7286+ */
7287+ current->time_slice = 1;
7288+ scheduler_tick(0,0);
7289+ }
7290+ p->sleep_timestamp = jiffies;
7291+ __sti();
7292
7293 /*
7294 * Ok, add it to the run-queues and make it
7295@@ -813,11 +824,16 @@
7296
7297 if (p->ptrace & PT_PTRACED)
7298 send_sig(SIGSTOP, p, 1);
7299-
7300- wake_up_process(p); /* do this last */
7301+ wake_up_forked_process(p); /* do this last */
7302 ++total_forks;
7303 if (clone_flags & CLONE_VFORK)
7304 wait_for_completion(&vfork);
7305+ else
7306+ /*
7307+ * Let the child process run first, to avoid most of the
7308+ * COW overhead when the child exec()s afterwards.
7309+ */
7310+ current->need_resched = 1;
7311
7312 fork_out:
7313 return retval;
7314diff -urN linux-2.4.24.org/kernel/ksyms.c linux-2.4.24/kernel/ksyms.c
5d16fd25
AM
7315--- linux-2.4.24.org/kernel/ksyms.c 2004-02-04 20:47:26.747960727 +0100
7316+++ linux-2.4.24/kernel/ksyms.c 2004-02-04 20:52:54.992686623 +0100
0aa7655b
AM
7317@@ -461,7 +461,6 @@
7318 /* process management */
7319 EXPORT_SYMBOL(complete_and_exit);
7320 EXPORT_SYMBOL(__wake_up);
7321-EXPORT_SYMBOL(__wake_up_sync);
7322 EXPORT_SYMBOL(wake_up_process);
7323 EXPORT_SYMBOL(sleep_on);
7324 EXPORT_SYMBOL(sleep_on_timeout);
7325@@ -474,6 +473,8 @@
7326 #endif
7327 EXPORT_SYMBOL(yield);
7328 EXPORT_SYMBOL(__cond_resched);
7329+EXPORT_SYMBOL(set_user_nice);
7330+EXPORT_SYMBOL(nr_context_switches);
7331 EXPORT_SYMBOL(jiffies);
7332 EXPORT_SYMBOL(xtime);
7333 EXPORT_SYMBOL(do_gettimeofday);
7334@@ -484,7 +485,6 @@
7335 #endif
7336
7337 EXPORT_SYMBOL(kstat);
7338-EXPORT_SYMBOL(nr_running);
7339
7340 /* misc */
7341 EXPORT_SYMBOL(panic);
7342diff -urN linux-2.4.24.org/kernel/printk.c linux-2.4.24/kernel/printk.c
5d16fd25
AM
7343--- linux-2.4.24.org/kernel/printk.c 2004-02-04 20:47:26.744961351 +0100
7344+++ linux-2.4.24/kernel/printk.c 2004-02-04 20:52:55.015681840 +0100
0aa7655b
AM
7345@@ -26,6 +26,7 @@
7346 #include <linux/module.h>
7347 #include <linux/interrupt.h> /* For in_interrupt() */
7348 #include <linux/config.h>
7349+#include <linux/delay.h>
7350
7351 #include <asm/uaccess.h>
7352
7353diff -urN linux-2.4.24.org/kernel/ptrace.c linux-2.4.24/kernel/ptrace.c
5d16fd25
AM
7354--- linux-2.4.24.org/kernel/ptrace.c 2004-02-04 20:47:26.776954696 +0100
7355+++ linux-2.4.24/kernel/ptrace.c 2004-02-04 20:52:55.029678928 +0100
0aa7655b
AM
7356@@ -32,20 +32,7 @@
7357 if (child->state != TASK_STOPPED)
7358 return -ESRCH;
7359 #ifdef CONFIG_SMP
7360- /* Make sure the child gets off its CPU.. */
7361- for (;;) {
7362- task_lock(child);
7363- if (!task_has_cpu(child))
7364- break;
7365- task_unlock(child);
7366- do {
7367- if (child->state != TASK_STOPPED)
7368- return -ESRCH;
7369- barrier();
7370- cpu_relax();
7371- } while (task_has_cpu(child));
7372- }
7373- task_unlock(child);
7374+ wait_task_inactive(child);
7375 #endif
7376 }
7377
7378diff -urN linux-2.4.24.org/kernel/sched.c linux-2.4.24/kernel/sched.c
5d16fd25
AM
7379--- linux-2.4.24.org/kernel/sched.c 2004-02-04 20:47:26.741961975 +0100
7380+++ linux-2.4.24/kernel/sched.c 2004-02-04 20:52:55.076669155 +0100
0aa7655b
AM
7381@@ -3,340 +3,333 @@
7382 *
7383 * Kernel scheduler and related syscalls
7384 *
7385- * Copyright (C) 1991, 1992 Linus Torvalds
7386+ * Copyright (C) 1991-2002 Linus Torvalds
7387 *
7388 * 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and
7389 * make semaphores SMP safe
7390 * 1998-11-19 Implemented schedule_timeout() and related stuff
7391 * by Andrea Arcangeli
7392- * 1998-12-28 Implemented better SMP scheduling by Ingo Molnar
7393+ * 2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar:
7394+ * hybrid priority-list and round-robin design with
7395+ * an array-switch method of distributing timeslices
7396+ * and per-CPU runqueues. Additional code by Davide
7397+ * Libenzi, Robert Love, and Rusty Russell.
7398 */
7399
7400-/*
7401- * 'sched.c' is the main kernel file. It contains scheduling primitives
7402- * (sleep_on, wakeup, schedule etc) as well as a number of simple system
7403- * call functions (type getpid()), which just extract a field from
7404- * current-task
7405- */
7406-
7407-#include <linux/config.h>
7408 #include <linux/mm.h>
7409-#include <linux/init.h>
7410-#include <linux/smp_lock.h>
7411 #include <linux/nmi.h>
7412 #include <linux/interrupt.h>
7413-#include <linux/kernel_stat.h>
7414-#include <linux/completion.h>
7415-#include <linux/prefetch.h>
7416-#include <linux/compiler.h>
7417-
7418+#include <linux/init.h>
7419 #include <asm/uaccess.h>
7420+#include <linux/smp_lock.h>
7421 #include <asm/mmu_context.h>
7422-
7423-extern void timer_bh(void);
7424-extern void tqueue_bh(void);
7425-extern void immediate_bh(void);
7426+#include <linux/kernel_stat.h>
7427+#include <linux/completion.h>
7428
7429 /*
7430- * scheduler variables
7431- */
7432+ * Convert user-nice values [ -20 ... 0 ... 19 ]
7433+ * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
7434+ * and back.
7435+ */
7436+#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20)
7437+#define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20)
7438+#define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio)
7439
7440-unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
7441-
7442-extern void mem_use(void);
7443+/*
7444+ * 'User priority' is the nice value converted to something we
7445+ * can work with better when scaling various scheduler parameters,
7446+ * it's a [ 0 ... 39 ] range.
7447+ */
7448+#define USER_PRIO(p) ((p)-MAX_RT_PRIO)
7449+#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio)
7450+#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO))
7451
7452 /*
7453- * Scheduling quanta.
7454+ * These are the 'tuning knobs' of the scheduler:
7455 *
7456- * NOTE! The unix "nice" value influences how long a process
7457- * gets. The nice value ranges from -20 to +19, where a -20
7458- * is a "high-priority" task, and a "+10" is a low-priority
7459- * task.
7460- *
7461- * We want the time-slice to be around 50ms or so, so this
7462- * calculation depends on the value of HZ.
7463- */
7464-#if HZ < 200
7465-#define TICK_SCALE(x) ((x) >> 2)
7466-#elif HZ < 400
7467-#define TICK_SCALE(x) ((x) >> 1)
7468-#elif HZ < 800
7469-#define TICK_SCALE(x) (x)
7470-#elif HZ < 1600
7471-#define TICK_SCALE(x) ((x) << 1)
7472-#else
7473-#define TICK_SCALE(x) ((x) << 2)
7474-#endif
7475-
7476-#define NICE_TO_TICKS(nice) (TICK_SCALE(20-(nice))+1)
7477-
7478+ * Minimum timeslice is 10 msecs, default timeslice is 150 msecs,
7479+ * maximum timeslice is 300 msecs. Timeslices get refilled after
7480+ * they expire.
7481+ */
7482+#define MIN_TIMESLICE ( 10 * HZ / 1000)
7483+#define MAX_TIMESLICE (300 * HZ / 1000)
7484+#define CHILD_PENALTY 50
7485+#define PARENT_PENALTY 100
7486+#define PRIO_BONUS_RATIO 25
7487+#define INTERACTIVE_DELTA 2
7488+#define MAX_SLEEP_AVG (2*HZ)
7489+#define STARVATION_LIMIT (2*HZ)
7490
7491 /*
7492- * Init task must be ok at boot for the ix86 as we will check its signals
7493- * via the SMP irq return path.
7494- */
7495-
7496-struct task_struct * init_tasks[NR_CPUS] = {&init_task, };
7497+ * If a task is 'interactive' then we reinsert it in the active
7498+ * array after it has expired its current timeslice. (it will not
7499+ * continue to run immediately, it will still roundrobin with
7500+ * other interactive tasks.)
7501+ *
7502+ * This part scales the interactivity limit depending on niceness.
7503+ *
7504+ * We scale it linearly, offset by the INTERACTIVE_DELTA delta.
7505+ * Here are a few examples of different nice levels:
7506+ *
7507+ * TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0]
7508+ * TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0]
7509+ * TASK_INTERACTIVE( 0): [1,1,1,1,0,0,0,0,0,0,0]
7510+ * TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0]
7511+ * TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0]
7512+ *
7513+ * (the X axis represents the possible -5 ... 0 ... +5 dynamic
7514+ * priority range a task can explore, a value of '1' means the
7515+ * task is rated interactive.)
7516+ *
7517+ * Ie. nice +19 tasks can never get 'interactive' enough to be
7518+ * reinserted into the active array. And only heavily CPU-hog nice -20
7519+ * tasks will be expired. Default nice 0 tasks are somewhere between,
7520+ * it takes some effort for them to get interactive, but it's not
7521+ * too hard.
7522+ */
7523+
7524+#define SCALE(v1,v1_max,v2_max) \
7525+ (v1) * (v2_max) / (v1_max)
7526+
7527+#define DELTA(p) \
7528+ (SCALE(TASK_NICE(p), 40, MAX_USER_PRIO*PRIO_BONUS_RATIO/100) + \
7529+ INTERACTIVE_DELTA)
7530+
7531+#define TASK_INTERACTIVE(p) \
7532+ ((p)->prio <= (p)->static_prio - DELTA(p))
7533
7534 /*
7535- * The tasklist_lock protects the linked list of processes.
7536- *
7537- * The runqueue_lock locks the parts that actually access
7538- * and change the run-queues, and have to be interrupt-safe.
7539- *
7540- * If both locks are to be concurrently held, the runqueue_lock
7541- * nests inside the tasklist_lock.
7542+ * TASK_TIMESLICE scales user-nice values [ -20 ... 19 ]
7543+ * to time slice values.
7544 *
7545- * task->alloc_lock nests inside tasklist_lock.
7546+ * The higher a process's priority, the bigger timeslices
7547+ * it gets during one round of execution. But even the lowest
7548+ * priority process gets MIN_TIMESLICE worth of execution time.
7549 */
7550-spinlock_t runqueue_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED; /* inner */
7551-rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */
7552
7553-static LIST_HEAD(runqueue_head);
7554+#define TASK_TIMESLICE(p) (MIN_TIMESLICE + \
7555+ ((MAX_TIMESLICE - MIN_TIMESLICE) * (MAX_PRIO-1-(p)->static_prio)/39))
7556
7557 /*
7558- * We align per-CPU scheduling data on cacheline boundaries,
7559- * to prevent cacheline ping-pong.
7560+ * These are the runqueue data structures:
7561 */
7562-static union {
7563- struct schedule_data {
7564- struct task_struct * curr;
7565- cycles_t last_schedule;
7566- } schedule_data;
7567- char __pad [SMP_CACHE_BYTES];
7568-} aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}};
7569
7570-#define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr
7571-#define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule
7572+#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long))
7573
7574-struct kernel_stat kstat;
7575-extern struct task_struct *child_reaper;
7576+typedef struct runqueue runqueue_t;
7577
7578-#ifdef CONFIG_SMP
7579+struct prio_array {
7580+ int nr_active;
7581+ unsigned long bitmap[BITMAP_SIZE];
7582+ struct list_head queue[MAX_PRIO];
7583+};
7584
7585-#define idle_task(cpu) (init_tasks[cpu_number_map(cpu)])
7586-#define can_schedule(p,cpu) \
7587- ((p)->cpus_runnable & (p)->cpus_allowed & (1UL << cpu))
7588+/*
7589+ * This is the main, per-CPU runqueue data structure.
7590+ *
7591+ * Locking rule: those places that want to lock multiple runqueues
7592+ * (such as the load balancing or the process migration code), lock
7593+ * acquire operations must be ordered by ascending &runqueue.
7594+ */
7595+struct runqueue {
7596+ spinlock_t lock;
7597+ unsigned long nr_running, nr_switches, expired_timestamp;
7598+ task_t *curr, *idle;
7599+ prio_array_t *active, *expired, arrays[2];
7600+ long nr_uninterruptible;
7601+#ifdef CONFIG_SMP
7602+ long last_jiffy;
7603+ int prev_nr_running[NR_CPUS];
7604+ task_t *migration_thread;
7605+ struct list_head migration_queue;
7606+#endif
7607+} ____cacheline_aligned;
7608
7609-#else
7610+static struct runqueue runqueues[NR_CPUS] __cacheline_aligned;
7611
7612-#define idle_task(cpu) (&init_task)
7613-#define can_schedule(p,cpu) (1)
7614+#define cpu_rq(cpu) (runqueues + (cpu))
7615+#define this_rq() cpu_rq(smp_processor_id())
7616+#define task_rq(p) cpu_rq((p)->cpu)
7617+#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
7618+#define rt_task(p) ((p)->prio < MAX_RT_PRIO)
7619
7620+/*
7621+ * Default context-switch locking:
7622+ */
7623+#ifndef prepare_arch_switch
7624+# define prepare_arch_switch(rq, next) do { } while(0)
7625+# define finish_arch_switch(rq, prev) spin_unlock_irq(&(rq)->lock)
7626 #endif
7627
7628-void scheduling_functions_start_here(void) { }
7629-
7630 /*
7631- * This is the function that decides how desirable a process is..
7632- * You can weigh different processes against each other depending
7633- * on what CPU they've run on lately etc to try to handle cache
7634- * and TLB miss penalties.
7635- *
7636- * Return values:
7637- * -1000: never select this
7638- * 0: out of time, recalculate counters (but it might still be
7639- * selected)
7640- * +ve: "goodness" value (the larger, the better)
7641- * +1000: realtime process, select this.
7642+ * task_rq_lock - lock the runqueue a given task resides on and disable
7643+ * interrupts. Note the ordering: we can safely lookup the task_rq without
7644+ * explicitly disabling preemption.
7645 */
7646-
7647-static inline int goodness(struct task_struct * p, int this_cpu, struct mm_struct *this_mm)
7648+static inline runqueue_t *task_rq_lock(task_t *p, unsigned long *flags)
7649 {
7650- int weight;
7651-
7652- /*
7653- * select the current process after every other
7654- * runnable process, but before the idle thread.
7655- * Also, dont trigger a counter recalculation.
7656- */
7657- weight = -1;
7658- if (p->policy & SCHED_YIELD)
7659- goto out;
7660+ struct runqueue *rq;
7661
7662- /*
7663- * Non-RT process - normal case first.
7664- */
7665- if (p->policy == SCHED_OTHER) {
7666- /*
7667- * Give the process a first-approximation goodness value
7668- * according to the number of clock-ticks it has left.
7669- *
7670- * Don't do any other calculations if the time slice is
7671- * over..
7672- */
7673- weight = p->counter;
7674- if (!weight)
7675- goto out;
7676-
7677-#ifdef CONFIG_SMP
7678- /* Give a largish advantage to the same processor... */
7679- /* (this is equivalent to penalizing other processors) */
7680- if (p->processor == this_cpu)
7681- weight += PROC_CHANGE_PENALTY;
7682-#endif
7683-
7684- /* .. and a slight advantage to the current MM */
7685- if (p->mm == this_mm || !p->mm)
7686- weight += 1;
7687- weight += 20 - p->nice;
7688- goto out;
7689+repeat_lock_task:
7690+ rq = task_rq(p);
7691+ spin_lock_irqsave(&rq->lock, *flags);
7692+ if (unlikely(rq != task_rq(p))) {
7693+ spin_unlock_irqrestore(&rq->lock, *flags);
7694+ goto repeat_lock_task;
7695 }
7696+ return rq;
7697+}
7698
7699- /*
7700- * Realtime process, select the first one on the
7701- * runqueue (taking priorities within processes
7702- * into account).
7703- */
7704- weight = 1000 + p->rt_priority;
7705-out:
7706- return weight;
7707+static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags)
7708+{
7709+ spin_unlock_irqrestore(&rq->lock, *flags);
7710 }
7711
7712 /*
7713- * the 'goodness value' of replacing a process on a given CPU.
7714- * positive value means 'replace', zero or negative means 'dont'.
7715+ * Adding/removing a task to/from a priority array:
7716 */
7717-static inline int preemption_goodness(struct task_struct * prev, struct task_struct * p, int cpu)
7718+static inline void dequeue_task(struct task_struct *p, prio_array_t *array)
7719 {
7720- return goodness(p, cpu, prev->active_mm) - goodness(prev, cpu, prev->active_mm);
7721+ array->nr_active--;
7722+ list_del(&p->run_list);
7723+ if (list_empty(array->queue + p->prio))
7724+ __clear_bit(p->prio, array->bitmap);
7725 }
7726
7727-/*
7728- * This is ugly, but reschedule_idle() is very timing-critical.
7729- * We are called with the runqueue spinlock held and we must
7730- * not claim the tasklist_lock.
7731- */
7732-static FASTCALL(void reschedule_idle(struct task_struct * p));
7733+#define enqueue_task(p, array) __enqueue_task(p, array, NULL)
7734+static inline void __enqueue_task(struct task_struct *p, prio_array_t *array, task_t * parent)
7735+{
7736+ if (!parent) {
7737+ list_add_tail(&p->run_list, array->queue + p->prio);
7738+ __set_bit(p->prio, array->bitmap);
7739+ p->array = array;
7740+ } else {
7741+ list_add_tail(&p->run_list, &parent->run_list);
7742+ array = p->array = parent->array;
7743+ }
7744+ array->nr_active++;
7745+}
7746
7f7e7712 7747-static void fastcall reschedule_idle(struct task_struct * p)
0aa7655b
AM
7748+static inline int effective_prio(task_t *p)
7749 {
7750-#ifdef CONFIG_SMP
7751- int this_cpu = smp_processor_id();
7752- struct task_struct *tsk, *target_tsk;
7753- int cpu, best_cpu, i, max_prio;
7754- cycles_t oldest_idle;
7755+ int bonus, prio;
7756
7757 /*
7758- * shortcut if the woken up task's last CPU is
7759- * idle now.
7760+ * Here we scale the actual sleep average [0 .... MAX_SLEEP_AVG]
7761+ * into the -5 ... 0 ... +5 bonus/penalty range.
7762+ *
7763+ * We use 25% of the full 0...39 priority range so that:
7764+ *
7765+ * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
7766+ * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
7767+ *
7768+ * Both properties are important to certain workloads.
7769 */
7770- best_cpu = p->processor;
7771- if (can_schedule(p, best_cpu)) {
7772- tsk = idle_task(best_cpu);
7773- if (cpu_curr(best_cpu) == tsk) {
7774- int need_resched;
7775-send_now_idle:
7776- /*
7777- * If need_resched == -1 then we can skip sending
7778- * the IPI altogether, tsk->need_resched is
7779- * actively watched by the idle thread.
7780- */
7781- need_resched = tsk->need_resched;
7782- tsk->need_resched = 1;
7783- if ((best_cpu != this_cpu) && !need_resched)
7784- smp_send_reschedule(best_cpu);
7785- return;
7786- }
7787- }
7788+ bonus = MAX_USER_PRIO*PRIO_BONUS_RATIO*p->sleep_avg/MAX_SLEEP_AVG/100 -
7789+ MAX_USER_PRIO*PRIO_BONUS_RATIO/100/2;
7790
7791- /*
7792- * We know that the preferred CPU has a cache-affine current
7793- * process, lets try to find a new idle CPU for the woken-up
7794- * process. Select the least recently active idle CPU. (that
7795- * one will have the least active cache context.) Also find
7796- * the executing process which has the least priority.
7797- */
7798- oldest_idle = (cycles_t) -1;
7799- target_tsk = NULL;
7800- max_prio = 0;
7801+ prio = p->static_prio - bonus;
7802+ if (prio < MAX_RT_PRIO)
7803+ prio = MAX_RT_PRIO;
7804+ if (prio > MAX_PRIO-1)
7805+ prio = MAX_PRIO-1;
7806+ return prio;
7807+}
7808
7809- for (i = 0; i < smp_num_cpus; i++) {
7810- cpu = cpu_logical_map(i);
7811- if (!can_schedule(p, cpu))
7812- continue;
7813- tsk = cpu_curr(cpu);
7814+#define activate_task(p, rq) __activate_task(p, rq, NULL)
7815+static inline void __activate_task(task_t *p, runqueue_t *rq, task_t * parent)
7816+{
7817+ unsigned long sleep_time = jiffies - p->sleep_timestamp;
7818+ prio_array_t *array = rq->active;
7819+
7820+ if (!parent && !rt_task(p) && sleep_time) {
7821 /*
7822- * We use the first available idle CPU. This creates
7823- * a priority list between idle CPUs, but this is not
7824- * a problem.
7825+ * This code gives a bonus to interactive tasks. We update
7826+ * an 'average sleep time' value here, based on
7827+ * sleep_timestamp. The more time a task spends sleeping,
7828+ * the higher the average gets - and the higher the priority
7829+ * boost gets as well.
7830 */
7831- if (tsk == idle_task(cpu)) {
7832-#if defined(__i386__) && defined(CONFIG_SMP)
7833- /*
7834- * Check if two siblings are idle in the same
7835- * physical package. Use them if found.
7836- */
7837- if (smp_num_siblings == 2) {
7838- if (cpu_curr(cpu_sibling_map[cpu]) ==
7839- idle_task(cpu_sibling_map[cpu])) {
7840- oldest_idle = last_schedule(cpu);
7841- target_tsk = tsk;
7842- break;
7843- }
7844-
7845- }
7846-#endif
7847- if (last_schedule(cpu) < oldest_idle) {
7848- oldest_idle = last_schedule(cpu);
7849- target_tsk = tsk;
7850- }
7851- } else {
7852- if (oldest_idle == (cycles_t)-1) {
7853- int prio = preemption_goodness(tsk, p, cpu);
7854-
7855- if (prio > max_prio) {
7856- max_prio = prio;
7857- target_tsk = tsk;
7858- }
7859- }
7860- }
7861- }
7862- tsk = target_tsk;
7863- if (tsk) {
7864- if (oldest_idle != (cycles_t)-1) {
7865- best_cpu = tsk->processor;
7866- goto send_now_idle;
7867- }
7868- tsk->need_resched = 1;
7869- if (tsk->processor != this_cpu)
7870- smp_send_reschedule(tsk->processor);
7871+ p->sleep_timestamp = jiffies;
7872+ p->sleep_avg += sleep_time;
7873+ if (p->sleep_avg > MAX_SLEEP_AVG)
7874+ p->sleep_avg = MAX_SLEEP_AVG;
7875+ p->prio = effective_prio(p);
7876 }
7877- return;
7878-
7879+ __enqueue_task(p, array, parent);
7880+ rq->nr_running++;
7881+}
7882
7883-#else /* UP */
7884- int this_cpu = smp_processor_id();
7885- struct task_struct *tsk;
7886+static inline void deactivate_task(struct task_struct *p, runqueue_t *rq)
7887+{
7888+ rq->nr_running--;
7889+ if (p->state == TASK_UNINTERRUPTIBLE)
7890+ rq->nr_uninterruptible++;
7891+ dequeue_task(p, p->array);
7892+ p->array = NULL;
7893+}
7894+
7895+static inline void resched_task(task_t *p)
7896+{
7897+#ifdef CONFIG_SMP
7898+ int need_resched;
7899
7900- tsk = cpu_curr(this_cpu);
7901- if (preemption_goodness(tsk, p, this_cpu) > 0)
7902- tsk->need_resched = 1;
7903+ need_resched = p->need_resched;
7904+ set_tsk_need_resched(p);
7905+ if (!need_resched && (p->cpu != smp_processor_id()))
7906+ smp_send_reschedule(p->cpu);
7907+#else
7908+ set_tsk_need_resched(p);
7909 #endif
7910 }
7911
7912+#ifdef CONFIG_SMP
7913+
7914 /*
7915- * Careful!
7916- *
7917- * This has to add the process to the _end_ of the
7918- * run-queue, not the beginning. The goodness value will
7919- * determine whether this process will run next. This is
7920- * important to get SCHED_FIFO and SCHED_RR right, where
7921- * a process that is either pre-empted or its time slice
7922- * has expired, should be moved to the tail of the run
7923- * queue for its priority - Bhavesh Davda
7924+ * Wait for a process to unschedule. This is used by the exit() and
7925+ * ptrace() code.
7926 */
7927-static inline void add_to_runqueue(struct task_struct * p)
7928+void wait_task_inactive(task_t * p)
7929 {
7930- list_add_tail(&p->run_list, &runqueue_head);
7931- nr_running++;
7932+ unsigned long flags;
7933+ runqueue_t *rq;
7934+
7935+repeat:
7936+ rq = task_rq(p);
7937+ if (unlikely(rq->curr == p)) {
7938+ cpu_relax();
7939+ barrier();
7940+ goto repeat;
7941+ }
7942+ rq = task_rq_lock(p, &flags);
7943+ if (unlikely(rq->curr == p)) {
7944+ task_rq_unlock(rq, &flags);
7945+ goto repeat;
7946+ }
7947+ task_rq_unlock(rq, &flags);
7948 }
7949
7950-static inline void move_last_runqueue(struct task_struct * p)
7951+/*
7952+ * Kick the remote CPU if the task is running currently,
7953+ * this code is used by the signal code to signal tasks
7954+ * which are in user-mode as quickly as possible.
7955+ *
7956+ * (Note that we do this lockless - if the task does anything
7957+ * while the message is in flight then it will notice the
7958+ * sigpending condition anyway.)
7959+ */
7960+void kick_if_running(task_t * p)
7961 {
7962- list_del(&p->run_list);
7963- list_add_tail(&p->run_list, &runqueue_head);
7964+ if (p == task_rq(p)->curr && p->cpu != smp_processor_id())
7965+ resched_task(p);
7966 }
7967+#endif
7968+
7969+#ifdef CONFIG_SMP
7970+static int FASTCALL(reschedule_idle(task_t * p));
7971+static void FASTCALL(load_balance(runqueue_t *this_rq, int idle));
7972+#endif
7973+
7974
7975 /*
7976 * Wake up a process. Put it on the run-queue if it's not
7977@@ -345,429 +338,721 @@
7978 * progress), and as such you're allowed to do the simpler
7979 * "current->state = TASK_RUNNING" to mark yourself runnable
7980 * without the overhead of this.
7981+ *
7982+ * returns failure only if the task is already active.
7983 */
7984-static inline int try_to_wake_up(struct task_struct * p, int synchronous)
7985+static int try_to_wake_up(task_t * p, int sync)
7986 {
7987 unsigned long flags;
7988 int success = 0;
7989+ long old_state;
7990+ runqueue_t *rq;
7991+#ifdef CONFIG_SMP
7992+ int migrated_to_idle = 0;
7993+#endif
7994+
7995+#ifdef CONFIG_SMP
7996+repeat_lock_task:
7997+#endif
7998+ rq = task_rq_lock(p, &flags);
7999+ old_state = p->state;
8000+ if (!p->array) {
8001+#ifdef CONFIG_SMP
8002+ if (likely(rq->curr != p)) {
8003+ /* can migrate */
8004+ if (unlikely(sync)) {
8005+ if (p->cpu != smp_processor_id() &&
8006+ p->cpus_allowed & (1UL << smp_processor_id())) {
8007+ p->cpu = smp_processor_id();
8008+ goto migrated_task;
8009+ }
8010+ } else {
8011+ if (reschedule_idle(p))
8012+ goto migrated_task;
8013+ }
8014+ }
8015+#endif
8016+ if (old_state == TASK_UNINTERRUPTIBLE)
8017+ rq->nr_uninterruptible--;
8018+ activate_task(p, rq);
8019+ if (p->prio < rq->curr->prio)
8020+ resched_task(rq->curr);
8021+ success = 1;
8022+ }
8023+ p->state = TASK_RUNNING;
8024
8025+#ifdef CONFIG_SMP
8026 /*
8027- * We want the common case fall through straight, thus the goto.
8028+ * Subtle: we can load_balance only here (before unlock)
8029+ * because it can internally drop the lock. Claim
8030+ * that the cpu is running so it will be a light rebalance,
8031+ * if this cpu will go idle soon schedule() will trigger the
8032+ * idle rescheduling balancing by itself.
8033 */
8034- spin_lock_irqsave(&runqueue_lock, flags);
8035- p->state = TASK_RUNNING;
8036- if (task_on_runqueue(p))
8037- goto out;
8038- add_to_runqueue(p);
8039- if (!synchronous || !(p->cpus_allowed & (1UL << smp_processor_id())))
8040- reschedule_idle(p);
8041- success = 1;
8042-out:
8043- spin_unlock_irqrestore(&runqueue_lock, flags);
8044+ if (success && migrated_to_idle)
8045+ load_balance(rq, 0);
8046+#endif
8047+
8048+ task_rq_unlock(rq, &flags);
8049+
8050 return success;
8051+
8052+#ifdef CONFIG_SMP
8053+ migrated_task:
8054+ task_rq_unlock(rq, &flags);
8055+ migrated_to_idle = 1;
8056+ goto repeat_lock_task;
8057+#endif
8058 }
8059
7f7e7712 8060-inline int fastcall wake_up_process(struct task_struct * p)
0aa7655b
AM
8061+int wake_up_process(task_t * p)
8062 {
8063 return try_to_wake_up(p, 0);
8064 }
8065
8066-static void process_timeout(unsigned long __data)
8067+void wake_up_forked_process(task_t * p)
8068 {
8069- struct task_struct * p = (struct task_struct *) __data;
8070+ runqueue_t *rq;
8071+ task_t * parent = current;
8072
8073- wake_up_process(p);
8074-}
8075+ rq = this_rq();
8076+ spin_lock_irq(&rq->lock);
8077
8078-/**
8079- * schedule_timeout - sleep until timeout
8080- * @timeout: timeout value in jiffies
8081- *
8082- * Make the current task sleep until @timeout jiffies have
8083- * elapsed. The routine will return immediately unless
8084- * the current task state has been set (see set_current_state()).
8085- *
8086- * You can set the task state as follows -
8087- *
8088- * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
8089- * pass before the routine returns. The routine will return 0
8090- *
8091- * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
8092- * delivered to the current task. In this case the remaining time
8093- * in jiffies will be returned, or 0 if the timer expired in time
8094- *
8095- * The current task state is guaranteed to be TASK_RUNNING when this
8096- * routine returns.
8097- *
8098- * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
8099- * the CPU away without a bound on the timeout. In this case the return
8100- * value will be %MAX_SCHEDULE_TIMEOUT.
8101- *
8102- * In all cases the return value is guaranteed to be non-negative.
8103- */
7f7e7712 8104-signed long fastcall schedule_timeout(signed long timeout)
0aa7655b
AM
8105-{
8106- struct timer_list timer;
8107- unsigned long expire;
8108+ p->state = TASK_RUNNING;
8109+ if (likely(!rt_task(p) && parent->array)) {
8110+ /*
8111+ * We decrease the sleep average of forked
8112+ * children, to keep max-interactive tasks
8113+ * from forking tasks that are max-interactive.
8114+ * CHILD_PENALTY is set to 50% since we have
8115+ * no clue if this is still an interactive
8116+ * task like the parent or if this will be a
8117+ * cpu bound task. The parent isn't touched
8118+ * as we don't make assumption about the parent
8119+ * changing behaviour after the child is forked.
8120+ */
8121+ parent->sleep_avg = parent->sleep_avg * PARENT_PENALTY / 100;
8122+ p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100;
8123
8124- switch (timeout)
8125- {
8126- case MAX_SCHEDULE_TIMEOUT:
8127 /*
8128- * These two special cases are useful to be comfortable
8129- * in the caller. Nothing more. We could take
8130- * MAX_SCHEDULE_TIMEOUT from one of the negative value
8131- * but I' d like to return a valid offset (>=0) to allow
8132- * the caller to do everything it want with the retval.
8133+ * For its first schedule keep the child at the same
8134+ * priority (i.e. in the same list) of the parent,
8135+ * activate_forked_task() will take care to put the
8136+ * child in front of the parent (lifo) to guarantee a
8137+ * schedule-child-first behaviour after fork.
8138 */
8139- schedule();
8140- goto out;
8141- default:
8142+ p->prio = parent->prio;
8143+ } else {
8144 /*
8145- * Another bit of PARANOID. Note that the retval will be
8146- * 0 since no piece of kernel is supposed to do a check
8147- * for a negative retval of schedule_timeout() (since it
8148- * should never happens anyway). You just have the printk()
8149- * that will tell you if something is gone wrong and where.
8150+ * Take the usual wakeup path if it's RT or if
8151+ * it's a child of the first idle task (during boot
8152+ * only).
8153 */
8154- if (timeout < 0)
8155- {
8156- printk(KERN_ERR "schedule_timeout: wrong timeout "
8157- "value %lx from %p\n", timeout,
8158- __builtin_return_address(0));
8159- current->state = TASK_RUNNING;
8160- goto out;
8161- }
8162+ p->prio = effective_prio(p);
8163+ parent = NULL;
8164 }
8165
8166- expire = timeout + jiffies;
8167+ p->cpu = smp_processor_id();
8168+ __activate_task(p, rq, parent);
8169+ spin_unlock_irq(&rq->lock);
8170+}
8171
8172- init_timer(&timer);
8173- timer.expires = expire;
8174- timer.data = (unsigned long) current;
8175- timer.function = process_timeout;
8176+/*
8177+ * Potentially available exiting-child timeslices are
8178+ * retrieved here - this way the parent does not get
8179+ * penalized for creating too many processes.
8180+ *
8181+ * (this cannot be used to 'generate' timeslices
8182+ * artificially, because any timeslice recovered here
8183+ * was given away by the parent in the first place.)
8184+ */
8185+void sched_exit(task_t * p)
8186+{
8187+ __cli();
8188+ if (p->first_time_slice) {
8189+ current->time_slice += p->time_slice;
8190+ if (unlikely(current->time_slice > MAX_TIMESLICE))
8191+ current->time_slice = MAX_TIMESLICE;
8192+ }
8193+ __sti();
8194+}
8195
8196- add_timer(&timer);
8197- schedule();
8198- del_timer_sync(&timer);
8199+#if CONFIG_SMP
8200+asmlinkage void schedule_tail(task_t *prev)
8201+{
8202+ finish_arch_switch(this_rq(), prev);
8203+}
8204+#endif
8205+
8206+static inline task_t * context_switch(task_t *prev, task_t *next)
8207+{
8208+ struct mm_struct *mm = next->mm;
8209+ struct mm_struct *oldmm = prev->active_mm;
8210
8211- timeout = expire - jiffies;
8212+ if (unlikely(!mm)) {
8213+ next->active_mm = oldmm;
8214+ atomic_inc(&oldmm->mm_count);
8215+ enter_lazy_tlb(oldmm, next, smp_processor_id());
8216+ } else
8217+ switch_mm(oldmm, mm, next, smp_processor_id());
8218
8219- out:
8220- return timeout < 0 ? 0 : timeout;
8221+ if (unlikely(!prev->mm)) {
8222+ prev->active_mm = NULL;
8223+ mmdrop(oldmm);
8224+ }
8225+
8226+ /* Here we just switch the register state and the stack. */
8227+ switch_to(prev, next, prev);
8228+
8229+ return prev;
8230 }
8231
8232-/*
8233- * schedule_tail() is getting called from the fork return path. This
8234- * cleans up all remaining scheduler things, without impacting the
8235- * common case.
8236- */
8237-static inline void __schedule_tail(struct task_struct *prev)
8238+unsigned long nr_running(void)
8239 {
8240-#ifdef CONFIG_SMP
8241- int policy;
8242+ unsigned long i, sum = 0;
8243
8244- /*
8245- * prev->policy can be written from here only before `prev'
8246- * can be scheduled (before setting prev->cpus_runnable to ~0UL).
8247- * Of course it must also be read before allowing prev
8248- * to be rescheduled, but since the write depends on the read
8249- * to complete, wmb() is enough. (the spin_lock() acquired
8250- * before setting cpus_runnable is not enough because the spin_lock()
8251- * common code semantics allows code outside the critical section
8252- * to enter inside the critical section)
8253- */
8254- policy = prev->policy;
8255- prev->policy = policy & ~SCHED_YIELD;
8256- wmb();
8257+ for (i = 0; i < smp_num_cpus; i++)
8258+ sum += cpu_rq(cpu_logical_map(i))->nr_running;
8259
8260- /*
8261- * fast path falls through. We have to clear cpus_runnable before
8262- * checking prev->state to avoid a wakeup race. Protect against
8263- * the task exiting early.
8264- */
8265- task_lock(prev);
8266- task_release_cpu(prev);
8267- mb();
8268- if (prev->state == TASK_RUNNING)
8269- goto needs_resched;
8270+ return sum;
8271+}
8272
8273-out_unlock:
8274- task_unlock(prev); /* Synchronise here with release_task() if prev is TASK_ZOMBIE */
8275- return;
8276+/* Note: the per-cpu information is useful only to get the cumulative result */
8277+unsigned long nr_uninterruptible(void)
8278+{
8279+ unsigned long i, sum = 0;
8280
8281- /*
8282- * Slow path - we 'push' the previous process and
8283- * reschedule_idle() will attempt to find a new
8284- * processor for it. (but it might preempt the
8285- * current process as well.) We must take the runqueue
8286- * lock and re-check prev->state to be correct. It might
8287- * still happen that this process has a preemption
8288- * 'in progress' already - but this is not a problem and
8289- * might happen in other circumstances as well.
8290- */
8291-needs_resched:
8292- {
8293- unsigned long flags;
8294+ for (i = 0; i < smp_num_cpus; i++)
8295+ sum += cpu_rq(cpu_logical_map(i))->nr_uninterruptible;
8296
8297- /*
8298- * Avoid taking the runqueue lock in cases where
8299- * no preemption-check is necessery:
8300- */
8301- if ((prev == idle_task(smp_processor_id())) ||
8302- (policy & SCHED_YIELD))
8303- goto out_unlock;
8304+ return sum;
8305+}
8306
8307- spin_lock_irqsave(&runqueue_lock, flags);
8308- if ((prev->state == TASK_RUNNING) && !task_has_cpu(prev))
8309- reschedule_idle(prev);
8310- spin_unlock_irqrestore(&runqueue_lock, flags);
8311- goto out_unlock;
8312- }
8313-#else
8314- prev->policy &= ~SCHED_YIELD;
8315-#endif /* CONFIG_SMP */
8316+unsigned long nr_context_switches(void)
8317+{
8318+ unsigned long i, sum = 0;
8319+
8320+ for (i = 0; i < smp_num_cpus; i++)
8321+ sum += cpu_rq(cpu_logical_map(i))->nr_switches;
8322+
8323+ return sum;
8324 }
8325
8326-asmlinkage void schedule_tail(struct task_struct *prev)
8327+inline int idle_cpu(int cpu)
8328 {
8329- __schedule_tail(prev);
8330+ return cpu_curr(cpu) == cpu_rq(cpu)->idle;
8331 }
8332
8333+#if CONFIG_SMP
8334 /*
8335- * 'schedule()' is the scheduler function. It's a very simple and nice
8336- * scheduler: it's not perfect, but certainly works for most things.
8337- *
8338- * The goto is "interesting".
8339- *
8340- * NOTE!! Task 0 is the 'idle' task, which gets called when no other
8341- * tasks can run. It can not be killed, and it cannot sleep. The 'state'
8342- * information in task[0] is never used.
8343+ * Lock the busiest runqueue as well, this_rq is locked already.
8344+ * Recalculate nr_running if we have to drop the runqueue lock.
8345 */
8346-asmlinkage void schedule(void)
8347+static inline unsigned int double_lock_balance(runqueue_t *this_rq,
8348+ runqueue_t *busiest, int this_cpu, int idle, unsigned int nr_running)
8349 {
8350- struct schedule_data * sched_data;
8351- struct task_struct *prev, *next, *p;
8352- struct list_head *tmp;
8353- int this_cpu, c;
8354+ if (unlikely(!spin_trylock(&busiest->lock))) {
8355+ if (busiest < this_rq) {
8356+ spin_unlock(&this_rq->lock);
8357+ spin_lock(&busiest->lock);
8358+ spin_lock(&this_rq->lock);
8359+ /* Need to recalculate nr_running */
8360+ if (idle || (this_rq->nr_running > this_rq->prev_nr_running[this_cpu]))
8361+ nr_running = this_rq->nr_running;
8362+ else
8363+ nr_running = this_rq->prev_nr_running[this_cpu];
8364+ } else
8365+ spin_lock(&busiest->lock);
8366+ }
8367+ return nr_running;
8368+}
8369
8370+/*
8371+ * Move a task from a remote runqueue to the local runqueue.
8372+ * Both runqueues must be locked.
8373+ */
8374+static inline int pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, runqueue_t *this_rq, int this_cpu)
8375+{
8376+ int resched = 0;
8377
8378- spin_lock_prefetch(&runqueue_lock);
8379+ dequeue_task(p, src_array);
8380+ src_rq->nr_running--;
8381+ p->cpu = this_cpu;
8382+ this_rq->nr_running++;
8383+ enqueue_task(p, this_rq->active);
8384+ /*
8385+ * Note that idle threads have a prio of MAX_PRIO, for this test
8386+ * to be always true for them.
8387+ */
8388+ if (p->prio < this_rq->curr->prio)
8389+ resched = 1;
8390
8391- BUG_ON(!current->active_mm);
8392-need_resched_back:
8393- prev = current;
8394- this_cpu = prev->processor;
8395+ return resched;
8396+}
8397
8398- if (unlikely(in_interrupt())) {
8399- printk("Scheduling in interrupt\n");
8400- BUG();
8401+static inline int idle_cpu_reschedule(task_t * p, int cpu)
8402+{
8403+ if (unlikely(!(p->cpus_allowed & (1UL << cpu))))
8404+ return 0;
8405+ return idle_cpu(cpu);
8406+}
8407+
8408+#include <linux/smp_balance.h>
8409+
8410+static int reschedule_idle(task_t * p)
8411+{
8412+ int p_cpu = p->cpu, i;
8413+
8414+ if (idle_cpu(p_cpu))
8415+ return 0;
8416+
8417+ p_cpu = cpu_number_map(p_cpu);
8418+
8419+ for (i = (p_cpu + 1) % smp_num_cpus;
8420+ i != p_cpu;
8421+ i = (i + 1) % smp_num_cpus) {
8422+ int physical = cpu_logical_map(i);
8423+
8424+ if (idle_cpu_reschedule(p, physical)) {
8425+ physical = arch_reschedule_idle_override(p, physical);
8426+ p->cpu = physical;
8427+ return 1;
8428+ }
8429 }
8430
8431- release_kernel_lock(prev, this_cpu);
8432+ return 0;
8433+}
8434+
8435+/*
8436+ * Current runqueue is empty, or rebalance tick: if there is an
8437+ * inbalance (current runqueue is too short) then pull from
8438+ * busiest runqueue(s).
8439+ *
8440+ * We call this with the current runqueue locked,
8441+ * irqs disabled.
8442+ */
8443+static void load_balance(runqueue_t *this_rq, int idle)
8444+{
8445+ int imbalance, nr_running, load, max_load,
8446+ idx, i, this_cpu = this_rq - runqueues;
8447+ task_t *tmp;
8448+ runqueue_t *busiest, *rq_src;
8449+ prio_array_t *array;
8450+ struct list_head *head, *curr;
8451+ int resched;
8452
8453 /*
8454- * 'sched_data' is protected by the fact that we can run
8455- * only one process per CPU.
8456+ * Handle architecture-specific balancing, such as hyperthreading.
8457 */
8458- sched_data = & aligned_data[this_cpu].schedule_data;
8459+ if (arch_load_balance(this_cpu, idle))
8460+ return;
8461
8462- spin_lock_irq(&runqueue_lock);
8463+ retry:
8464+ /*
8465+ * We search all runqueues to find the most busy one.
8466+ * We do this lockless to reduce cache-bouncing overhead,
8467+ * we re-check the 'best' source CPU later on again, with
8468+ * the lock held.
8469+ *
8470+ * We fend off statistical fluctuations in runqueue lengths by
8471+ * saving the runqueue length during the previous load-balancing
8472+ * operation and using the smaller one the current and saved lengths.
8473+ * If a runqueue is long enough for a longer amount of time then
8474+ * we recognize it and pull tasks from it.
8475+ *
8476+ * The 'current runqueue length' is a statistical maximum variable,
8477+ * for that one we take the longer one - to avoid fluctuations in
8478+ * the other direction. So for a load-balance to happen it needs
8479+ * stable long runqueue on the target CPU and stable short runqueue
8480+ * on the local runqueue.
8481+ *
8482+ * We make an exception if this CPU is about to become idle - in
8483+ * that case we are less picky about moving a task across CPUs and
8484+ * take what can be taken.
8485+ */
8486+ if (idle || (this_rq->nr_running > this_rq->prev_nr_running[this_cpu]))
8487+ nr_running = this_rq->nr_running;
8488+ else
8489+ nr_running = this_rq->prev_nr_running[this_cpu];
8490
8491- /* move an exhausted RR process to be last.. */
8492- if (unlikely(prev->policy == SCHED_RR))
8493- if (!prev->counter) {
8494- prev->counter = NICE_TO_TICKS(prev->nice);
8495- move_last_runqueue(prev);
8496- }
8497+ busiest = NULL;
8498+ max_load = 1;
8499+ for (i = 0; i < smp_num_cpus; i++) {
8500+ int logical = cpu_logical_map(i);
8501
8502- switch (prev->state) {
8503- case TASK_INTERRUPTIBLE:
8504- if (signal_pending(prev)) {
8505- prev->state = TASK_RUNNING;
8506- break;
8507- }
8508- default:
8509- del_from_runqueue(prev);
8510- case TASK_RUNNING:;
8511+ rq_src = cpu_rq(logical);
8512+ if (idle || (rq_src->nr_running < this_rq->prev_nr_running[logical]))
8513+ load = rq_src->nr_running;
8514+ else
8515+ load = this_rq->prev_nr_running[logical];
8516+ this_rq->prev_nr_running[logical] = rq_src->nr_running;
8517+
8518+ if ((load > max_load) && (rq_src != this_rq)) {
8519+ busiest = rq_src;
8520+ max_load = load;
8521+ }
8522 }
8523- prev->need_resched = 0;
8524+
8525+ if (likely(!busiest))
8526+ return;
8527+
8528+ imbalance = (max_load - nr_running) / 2;
8529+
8530+ /* It needs an at least ~25% imbalance to trigger balancing. */
8531+ if (!idle && (imbalance < (max_load + 3)/4))
8532+ return;
8533
8534 /*
8535- * this is the scheduler proper:
8536+ * Make sure nothing significant changed since we checked the
8537+ * runqueue length.
8538 */
8539+ if (double_lock_balance(this_rq, busiest, this_cpu, idle, nr_running) > nr_running ||
8540+ busiest->nr_running < max_load)
8541+ goto out_unlock_retry;
8542
8543-repeat_schedule:
8544 /*
8545- * Default process to select..
8546+ * We first consider expired tasks. Those will likely not be
8547+ * executed in the near future, and they are most likely to
8548+ * be cache-cold, thus switching CPUs has the least effect
8549+ * on them.
8550 */
8551- next = idle_task(this_cpu);
8552- c = -1000;
8553- list_for_each(tmp, &runqueue_head) {
8554- p = list_entry(tmp, struct task_struct, run_list);
8555- if (can_schedule(p, this_cpu)) {
8556- int weight = goodness(p, this_cpu, prev->active_mm);
8557- if (weight > c)
8558- c = weight, next = p;
8559+ if (busiest->expired->nr_active)
8560+ array = busiest->expired;
8561+ else
8562+ array = busiest->active;
8563+
8564+ resched = 0;
8565+new_array:
8566+ /* Start searching at priority 0: */
8567+ idx = 0;
8568+skip_bitmap:
8569+ if (!idx)
8570+ idx = sched_find_first_bit(array->bitmap);
8571+ else
8572+ idx = find_next_bit(array->bitmap, MAX_PRIO, idx);
8573+ if (idx == MAX_PRIO) {
8574+ if (array == busiest->expired) {
8575+ array = busiest->active;
8576+ goto new_array;
8577 }
8578+ goto out_unlock;
8579 }
8580
8581- /* Do we need to re-calculate counters? */
8582- if (unlikely(!c)) {
8583- struct task_struct *p;
8584-
8585- spin_unlock_irq(&runqueue_lock);
8586- read_lock(&tasklist_lock);
8587- for_each_task(p)
8588- p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice);
8589- read_unlock(&tasklist_lock);
8590- spin_lock_irq(&runqueue_lock);
8591- goto repeat_schedule;
8592+ head = array->queue + idx;
8593+ curr = head->prev;
8594+skip_queue:
8595+ tmp = list_entry(curr, task_t, run_list);
8596+
8597+ /*
8598+ * We do not migrate tasks that are:
8599+ * 1) running (obviously), or
8600+ * 2) cannot be migrated to this CPU due to cpus_allowed, or
8601+ * 3) are cache-hot on their current CPU.
8602+ */
8603+
8604+#define CAN_MIGRATE_TASK(p,rq,this_cpu) \
8605+ ((jiffies - (p)->sleep_timestamp > cache_decay_ticks) && \
8606+ ((p) != (rq)->curr) && \
8607+ ((p)->cpus_allowed & (1UL << (this_cpu))))
8608+
8609+ curr = curr->prev;
8610+
8611+ if (!CAN_MIGRATE_TASK(tmp, busiest, this_cpu)) {
8612+ if (curr != head)
8613+ goto skip_queue;
8614+ idx++;
8615+ goto skip_bitmap;
8616+ }
8617+ resched |= pull_task(busiest, array, tmp, this_rq, this_cpu);
8618+ if (--imbalance > 0) {
8619+ if (curr != head)
8620+ goto skip_queue;
8621+ idx++;
8622+ goto skip_bitmap;
8623 }
8624+out_unlock:
8625+ spin_unlock(&busiest->lock);
8626+ if (resched)
8627+ resched_task(this_rq->curr);
8628+ return;
8629+out_unlock_retry:
8630+ spin_unlock(&busiest->lock);
8631+ goto retry;
8632+}
8633
8634- /*
8635- * from this point on nothing can prevent us from
8636- * switching to the next task, save this fact in
8637- * sched_data.
8638- */
8639- sched_data->curr = next;
8640- task_set_cpu(next, this_cpu);
8641- spin_unlock_irq(&runqueue_lock);
8642-
8643- if (unlikely(prev == next)) {
8644- /* We won't go through the normal tail, so do this by hand */
8645- prev->policy &= ~SCHED_YIELD;
8646- goto same_process;
8647+/*
8648+ * One of the idle_cpu_tick() or the busy_cpu_tick() function will
8649+ * gets called every timer tick, on every CPU. Our balancing action
8650+ * frequency and balancing agressivity depends on whether the CPU is
8651+ * idle or not.
8652+ *
8653+ * busy-rebalance every 250 msecs. idle-rebalance every 100 msec.
8654+ */
8655+#define BUSY_REBALANCE_TICK (HZ/4 ?: 1)
8656+#define IDLE_REBALANCE_TICK (HZ/10 ?: 1)
8657+
8658+static inline void idle_tick(void)
8659+{
8660+ if (unlikely(time_before_eq(this_rq()->last_jiffy + IDLE_REBALANCE_TICK, jiffies))) {
8661+ spin_lock(&this_rq()->lock);
8662+ load_balance(this_rq(), 1);
8663+ spin_unlock(&this_rq()->lock);
8664+ this_rq()->last_jiffy = jiffies;
8665 }
8666+}
8667
8668-#ifdef CONFIG_SMP
8669- /*
8670- * maintain the per-process 'last schedule' value.
8671- * (this has to be recalculated even if we reschedule to
8672- * the same process) Currently this is only used on SMP,
8673- * and it's approximate, so we do not have to maintain
8674- * it while holding the runqueue spinlock.
8675- */
8676- sched_data->last_schedule = get_cycles();
8677+#endif
8678
8679- /*
8680- * We drop the scheduler lock early (it's a global spinlock),
8681- * thus we have to lock the previous process from getting
8682- * rescheduled during switch_to().
8683- */
8684+/*
8685+ * We place interactive tasks back into the active array, if possible.
8686+ *
8687+ * To guarantee that this does not starve expired tasks we ignore the
8688+ * interactivity of a task if the first expired task had to wait more
8689+ * than a 'reasonable' amount of time. This deadline timeout is
8690+ * load-dependent, as the frequency of array switched decreases with
8691+ * increasing number of running tasks:
8692+ */
8693+#define EXPIRED_STARVING(rq) \
8694+ ((rq)->expired_timestamp && \
8695+ (jiffies - (rq)->expired_timestamp >= \
8696+ STARVATION_LIMIT * ((rq)->nr_running) + 1))
8697
8698-#endif /* CONFIG_SMP */
8699+/*
8700+ * This function gets called by the timer code, with HZ frequency.
8701+ * We call it with interrupts disabled.
8702+ */
8703+void scheduler_tick(int user_tick, int system)
8704+{
8705+ int cpu = smp_processor_id();
8706+ runqueue_t *rq = this_rq();
8707+ task_t *p = current;
8708
8709- kstat.context_swtch++;
8710- /*
8711- * there are 3 processes which are affected by a context switch:
8712- *
8713- * prev == .... ==> (last => next)
8714- *
8715- * It's the 'much more previous' 'prev' that is on next's stack,
8716- * but prev is set to (the just run) 'last' process by switch_to().
8717- * This might sound slightly confusing but makes tons of sense.
8718- */
8719- prepare_to_switch();
8720- {
8721- struct mm_struct *mm = next->mm;
8722- struct mm_struct *oldmm = prev->active_mm;
8723- if (!mm) {
8724- BUG_ON(next->active_mm);
8725- next->active_mm = oldmm;
8726- atomic_inc(&oldmm->mm_count);
8727- enter_lazy_tlb(oldmm, next, this_cpu);
8728- } else {
8729- BUG_ON(next->active_mm != mm);
8730- switch_mm(oldmm, mm, next, this_cpu);
8731+ if (p == rq->idle) {
8732+ if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
8733+ kstat.per_cpu_system[cpu] += system;
8734+#if CONFIG_SMP
8735+ idle_tick();
8736+#endif
8737+ return;
8738+ }
8739+ if (TASK_NICE(p) > 0)
8740+ kstat.per_cpu_nice[cpu] += user_tick;
8741+ else
8742+ kstat.per_cpu_user[cpu] += user_tick;
8743+ kstat.per_cpu_system[cpu] += system;
8744+
8745+ /* Task might have expired already, but not scheduled off yet */
8746+ if (p->array != rq->active) {
8747+ set_tsk_need_resched(p);
8748+ return;
8749+ }
8750+ spin_lock(&rq->lock);
8751+ if (unlikely(rt_task(p))) {
8752+ /*
8753+ * RR tasks need a special form of timeslice management.
8754+ * FIFO tasks have no timeslices.
8755+ */
8756+ if ((p->policy == SCHED_RR) && !--p->time_slice) {
8757+ p->time_slice = TASK_TIMESLICE(p);
8758+ p->first_time_slice = 0;
8759+ set_tsk_need_resched(p);
8760+
8761+ /* put it at the end of the queue: */
8762+ dequeue_task(p, rq->active);
8763+ enqueue_task(p, rq->active);
8764 }
8765+ goto out;
8766+ }
8767+ /*
8768+ * The task was running during this tick - update the
8769+ * time slice counter and the sleep average. Note: we
8770+ * do not update a process's priority until it either
8771+ * goes to sleep or uses up its timeslice. This makes
8772+ * it possible for interactive tasks to use up their
8773+ * timeslices at their highest priority levels.
8774+ */
8775+ if (p->sleep_avg)
8776+ p->sleep_avg--;
8777+ if (!--p->time_slice) {
8778+ dequeue_task(p, rq->active);
8779+ set_tsk_need_resched(p);
8780+ p->prio = effective_prio(p);
8781+ p->time_slice = TASK_TIMESLICE(p);
8782+ p->first_time_slice = 0;
8783+
8784+ if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) {
8785+ if (!rq->expired_timestamp)
8786+ rq->expired_timestamp = jiffies;
8787+ enqueue_task(p, rq->expired);
8788+ } else
8789+ enqueue_task(p, rq->active);
8790+ }
8791+out:
8792+#if CONFIG_SMP
8793+ if (unlikely(time_before_eq(this_rq()->last_jiffy + BUSY_REBALANCE_TICK, jiffies))) {
8794+ load_balance(rq, 0);
8795+ rq->last_jiffy = jiffies;
8796+ }
8797+#endif
8798+ spin_unlock(&rq->lock);
8799+}
8800+
8801+void scheduling_functions_start_here(void) { }
8802+
8803+/*
8804+ * 'schedule()' is the main scheduler function.
8805+ */
8806+asmlinkage void schedule(void)
8807+{
8808+ task_t *prev, *next;
8809+ runqueue_t *rq;
8810+ prio_array_t *array;
8811+ struct list_head *queue;
8812+ int idx;
8813+
8814+ if (unlikely(in_interrupt()))
8815+ BUG();
8816
8817- if (!prev->mm) {
8818- prev->active_mm = NULL;
8819- mmdrop(oldmm);
8820+need_resched:
8821+ prev = current;
8822+ rq = this_rq();
8823+
8824+ release_kernel_lock(prev, smp_processor_id());
8825+ prev->sleep_timestamp = jiffies;
8826+ spin_lock_irq(&rq->lock);
8827+
8828+ switch (prev->state) {
8829+ case TASK_INTERRUPTIBLE:
8830+ if (unlikely(signal_pending(prev))) {
8831+ prev->state = TASK_RUNNING;
8832+ break;
8833 }
8834+ default:
8835+ deactivate_task(prev, rq);
8836+ case TASK_RUNNING:
8837+ ;
8838+ }
8839+#if CONFIG_SMP
8840+pick_next_task:
8841+#endif
8842+ if (unlikely(!rq->nr_running)) {
8843+#if CONFIG_SMP
8844+ load_balance(rq, 2);
8845+ rq->last_jiffy = jiffies;
8846+ if (rq->nr_running)
8847+ goto pick_next_task;
8848+#endif
8849+ next = rq->idle;
8850+ rq->expired_timestamp = 0;
8851+ goto switch_tasks;
8852 }
8853
8854- /*
8855- * This just switches the register state and the
8856- * stack.
8857- */
8858- switch_to(prev, next, prev);
8859- __schedule_tail(prev);
8860+ array = rq->active;
8861+ if (unlikely(!array->nr_active)) {
8862+ /*
8863+ * Switch the active and expired arrays.
8864+ */
8865+ rq->active = rq->expired;
8866+ rq->expired = array;
8867+ array = rq->active;
8868+ rq->expired_timestamp = 0;
8869+ }
8870+
8871+ idx = sched_find_first_bit(array->bitmap);
8872+ queue = array->queue + idx;
8873+ next = list_entry(queue->next, task_t, run_list);
8874+
8875+switch_tasks:
8876+ prefetch(next);
8877+ clear_tsk_need_resched(prev);
8878+
8879+ if (likely(prev != next)) {
8880+ rq->nr_switches++;
8881+ rq->curr = next;
8882+
8883+ prepare_arch_switch(rq, next);
8884+ prev = context_switch(prev, next);
8885+ barrier();
8886+ rq = this_rq();
8887+ finish_arch_switch(rq, prev);
8888+ } else
8889+ spin_unlock_irq(&rq->lock);
8890
8891-same_process:
8892 reacquire_kernel_lock(current);
8893- if (current->need_resched)
8894- goto need_resched_back;
8895- return;
8896+ if (need_resched())
8897+ goto need_resched;
8898 }
8899
8900 /*
8901- * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just wake everything
8902- * up. If it's an exclusive wakeup (nr_exclusive == small +ve number) then we wake all the
8903- * non-exclusive tasks and one exclusive task.
8904+ * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
8905+ * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
8906+ * number) then we wake all the non-exclusive tasks and one exclusive task.
8907 *
8908 * There are circumstances in which we can try to wake a task which has already
8909- * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns zero
8910- * in this (rare) case, and we handle it by contonuing to scan the queue.
8911+ * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
8912+ * zero in this (rare) case, and we handle it by continuing to scan the queue.
8913 */
8914-static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode,
8915- int nr_exclusive, const int sync)
8916+static inline void __wake_up_common(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, int sync)
8917 {
8918 struct list_head *tmp;
8919- struct task_struct *p;
8920-
8921- CHECK_MAGIC_WQHEAD(q);
8922- WQ_CHECK_LIST_HEAD(&q->task_list);
8923-
8924- list_for_each(tmp,&q->task_list) {
8925- unsigned int state;
8926- wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
8927+ unsigned int state;
8928+ wait_queue_t *curr;
8929+ task_t *p;
8930
8931- CHECK_MAGIC(curr->__magic);
8932+ list_for_each(tmp, &q->task_list) {
8933+ curr = list_entry(tmp, wait_queue_t, task_list);
8934 p = curr->task;
8935 state = p->state;
8936- if (state & mode) {
8937- WQ_NOTE_WAKER(curr);
8938- if (try_to_wake_up(p, sync) && (curr->flags&WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
8939+ if ((state & mode) && try_to_wake_up(p, sync) &&
8940+ ((curr->flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive))
8941 break;
8942- }
8943 }
8944 }
8945
7f7e7712
KT
8946-void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode, int nr)
8947+void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
0aa7655b
AM
8948 {
8949- if (q) {
8950- unsigned long flags;
8951- wq_read_lock_irqsave(&q->lock, flags);
8952- __wake_up_common(q, mode, nr, 0);
8953- wq_read_unlock_irqrestore(&q->lock, flags);
8954- }
8955+ unsigned long flags;
8956+
8957+ if (unlikely(!q))
8958+ return;
8959+
8960+ wq_read_lock_irqsave(&q->lock, flags);
8961+ __wake_up_common(q, mode, nr_exclusive, 0);
8962+ wq_read_unlock_irqrestore(&q->lock, flags);
8963 }
8964
7f7e7712 8965-void fastcall __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr)
0aa7655b
AM
8966+#if CONFIG_SMP
8967+
7f7e7712 8968+void fastcall __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
0aa7655b
AM
8969 {
8970- if (q) {
8971- unsigned long flags;
8972- wq_read_lock_irqsave(&q->lock, flags);
8973- __wake_up_common(q, mode, nr, 1);
8974- wq_read_unlock_irqrestore(&q->lock, flags);
8975- }
8976+ unsigned long flags;
8977+
8978+ if (unlikely(!q))
8979+ return;
8980+
8981+ wq_read_lock_irqsave(&q->lock, flags);
8982+ if (likely(nr_exclusive))
8983+ __wake_up_common(q, mode, nr_exclusive, 1);
8984+ else
8985+ __wake_up_common(q, mode, nr_exclusive, 0);
8986+ wq_read_unlock_irqrestore(&q->lock, flags);
8987 }
8988
8989+#endif
8990+
7f7e7712 8991 void fastcall complete(struct completion *x)
0aa7655b
AM
8992 {
8993 unsigned long flags;
8994
8995- spin_lock_irqsave(&x->wait.lock, flags);
8996+ wq_write_lock_irqsave(&x->wait.lock, flags);
8997 x->done++;
8998 __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, 0);
8999- spin_unlock_irqrestore(&x->wait.lock, flags);
9000+ wq_write_unlock_irqrestore(&x->wait.lock, flags);
9001 }
9002
7f7e7712 9003 void fastcall wait_for_completion(struct completion *x)
0aa7655b
AM
9004 {
9005- spin_lock_irq(&x->wait.lock);
9006+ wq_write_lock_irq(&x->wait.lock);
9007 if (!x->done) {
9008 DECLARE_WAITQUEUE(wait, current);
9009
9010@@ -775,14 +1060,14 @@
9011 __add_wait_queue_tail(&x->wait, &wait);
9012 do {
9013 __set_current_state(TASK_UNINTERRUPTIBLE);
9014- spin_unlock_irq(&x->wait.lock);
9015+ wq_write_unlock_irq(&x->wait.lock);
9016 schedule();
9017- spin_lock_irq(&x->wait.lock);
9018+ wq_write_lock_irq(&x->wait.lock);
9019 } while (!x->done);
9020 __remove_wait_queue(&x->wait, &wait);
9021 }
9022 x->done--;
9023- spin_unlock_irq(&x->wait.lock);
9024+ wq_write_unlock_irq(&x->wait.lock);
9025 }
9026
9027 #define SLEEP_ON_VAR \
9028@@ -850,43 +1135,40 @@
9029
9030 void scheduling_functions_end_here(void) { }
9031
9032-#if CONFIG_SMP
9033-/**
9034- * set_cpus_allowed() - change a given task's processor affinity
9035- * @p: task to bind
9036- * @new_mask: bitmask of allowed processors
9037- *
9038- * Upon return, the task is running on a legal processor. Note the caller
9039- * must have a valid reference to the task: it must not exit() prematurely.
9040- * This call can sleep; do not hold locks on call.
9041- */
9042-void set_cpus_allowed(struct task_struct *p, unsigned long new_mask)
9043+void set_user_nice(task_t *p, long nice)
9044 {
9045- new_mask &= cpu_online_map;
9046- BUG_ON(!new_mask);
9047-
9048- p->cpus_allowed = new_mask;
9049+ unsigned long flags;
9050+ prio_array_t *array;
9051+ runqueue_t *rq;
9052
9053+ if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
9054+ return;
9055 /*
9056- * If the task is on a no-longer-allowed processor, we need to move
9057- * it. If the task is not current, then set need_resched and send
9058- * its processor an IPI to reschedule.
9059+ * We have to be careful, if called from sys_setpriority(),
9060+ * the task might be in the middle of scheduling on another CPU.
9061 */
9062- if (!(p->cpus_runnable & p->cpus_allowed)) {
9063- if (p != current) {
9064- p->need_resched = 1;
9065- smp_send_reschedule(p->processor);
9066- }
9067+ rq = task_rq_lock(p, &flags);
9068+ if (rt_task(p)) {
9069+ p->static_prio = NICE_TO_PRIO(nice);
9070+ goto out_unlock;
9071+ }
9072+ array = p->array;
9073+ if (array)
9074+ dequeue_task(p, array);
9075+ p->static_prio = NICE_TO_PRIO(nice);
9076+ p->prio = NICE_TO_PRIO(nice);
9077+ if (array) {
9078+ enqueue_task(p, array);
9079 /*
9080- * Wait until we are on a legal processor. If the task is
9081- * current, then we should be on a legal processor the next
9082- * time we reschedule. Otherwise, we need to wait for the IPI.
9083+ * If the task is running and lowered its priority,
9084+ * or increased its priority then reschedule its CPU:
9085 */
9086- while (!(p->cpus_runnable & p->cpus_allowed))
9087- schedule();
9088+ if (p == rq->curr)
9089+ resched_task(rq->curr);
9090 }
9091+out_unlock:
9092+ task_rq_unlock(rq, &flags);
9093 }
9094-#endif /* CONFIG_SMP */
9095
9096 #ifndef __alpha__
9097
9098@@ -898,7 +1180,7 @@
9099
9100 asmlinkage long sys_nice(int increment)
9101 {
9102- long newprio;
9103+ long nice;
9104
9105 /*
9106 * Setpriority might change our priority at the same moment.
9107@@ -914,32 +1196,46 @@
9108 if (increment > 40)
9109 increment = 40;
9110
9111- newprio = current->nice + increment;
9112- if (newprio < -20)
9113- newprio = -20;
9114- if (newprio > 19)
9115- newprio = 19;
9116- current->nice = newprio;
9117+ nice = PRIO_TO_NICE(current->static_prio) + increment;
9118+ if (nice < -20)
9119+ nice = -20;
9120+ if (nice > 19)
9121+ nice = 19;
9122+ set_user_nice(current, nice);
9123 return 0;
9124 }
9125
9126 #endif
9127
9128-static inline struct task_struct *find_process_by_pid(pid_t pid)
9129+/*
9130+ * This is the priority value as seen by users in /proc
9131+ *
9132+ * RT tasks are offset by -200. Normal tasks are centered
9133+ * around 0, value goes from -16 to +15.
9134+ */
9135+int task_prio(task_t *p)
9136 {
9137- struct task_struct *tsk = current;
9138+ return p->prio - MAX_USER_RT_PRIO;
9139+}
9140
9141- if (pid)
9142- tsk = find_task_by_pid(pid);
9143- return tsk;
9144+int task_nice(task_t *p)
9145+{
9146+ return TASK_NICE(p);
9147+}
9148+
9149+static inline task_t *find_process_by_pid(pid_t pid)
9150+{
9151+ return pid ? find_task_by_pid(pid) : current;
9152 }
9153
9154-static int setscheduler(pid_t pid, int policy,
9155- struct sched_param *param)
9156+static int setscheduler(pid_t pid, int policy, struct sched_param *param)
9157 {
9158 struct sched_param lp;
9159- struct task_struct *p;
9160+ prio_array_t *array;
9161+ unsigned long flags;
9162+ runqueue_t *rq;
9163 int retval;
9164+ task_t *p;
9165
9166 retval = -EINVAL;
9167 if (!param || pid < 0)
9168@@ -953,14 +1249,19 @@
9169 * We play safe to avoid deadlocks.
9170 */
9171 read_lock_irq(&tasklist_lock);
9172- spin_lock(&runqueue_lock);
9173
9174 p = find_process_by_pid(pid);
9175
9176 retval = -ESRCH;
9177 if (!p)
9178- goto out_unlock;
9179-
9180+ goto out_unlock_tasklist;
9181+
9182+ /*
9183+ * To be able to change p->policy safely, the apropriate
9184+ * runqueue lock must be held.
9185+ */
9186+ rq = task_rq_lock(p, &flags);
9187+
9188 if (policy < 0)
9189 policy = p->policy;
9190 else {
9191@@ -969,40 +1270,48 @@
9192 policy != SCHED_OTHER)
9193 goto out_unlock;
9194 }
9195-
9196+
9197 /*
9198- * Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid
9199- * priority for SCHED_OTHER is 0.
9200+ * Valid priorities for SCHED_FIFO and SCHED_RR are
9201+ * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_OTHER is 0.
9202 */
9203 retval = -EINVAL;
9204- if (lp.sched_priority < 0 || lp.sched_priority > 99)
9205+ if (lp.sched_priority < 0 || lp.sched_priority > MAX_USER_RT_PRIO-1)
9206 goto out_unlock;
9207 if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))
9208 goto out_unlock;
9209
9210 retval = -EPERM;
9211- if ((policy == SCHED_FIFO || policy == SCHED_RR) &&
9212+ if ((policy == SCHED_FIFO || policy == SCHED_RR) &&
9213 !capable(CAP_SYS_NICE))
9214 goto out_unlock;
9215 if ((current->euid != p->euid) && (current->euid != p->uid) &&
9216 !capable(CAP_SYS_NICE))
9217 goto out_unlock;
9218
9219+ array = p->array;
9220+ if (array)
9221+ deactivate_task(p, task_rq(p));
9222 retval = 0;
9223 p->policy = policy;
9224 p->rt_priority = lp.sched_priority;
9225-
9226- current->need_resched = 1;
9227+ if (policy != SCHED_OTHER)
9228+ p->prio = MAX_USER_RT_PRIO-1 - p->rt_priority;
9229+ else
9230+ p->prio = p->static_prio;
9231+ if (array)
9232+ activate_task(p, task_rq(p));
9233
9234 out_unlock:
9235- spin_unlock(&runqueue_lock);
9236+ task_rq_unlock(rq, &flags);
9237+out_unlock_tasklist:
9238 read_unlock_irq(&tasklist_lock);
9239
9240 out_nounlock:
9241 return retval;
9242 }
9243
9244-asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
9245+asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
9246 struct sched_param *param)
9247 {
9248 return setscheduler(pid, policy, param);
9249@@ -1015,7 +1324,7 @@
9250
9251 asmlinkage long sys_sched_getscheduler(pid_t pid)
9252 {
9253- struct task_struct *p;
9254+ task_t *p;
9255 int retval;
9256
9257 retval = -EINVAL;
9258@@ -1026,7 +1335,7 @@
9259 read_lock(&tasklist_lock);
9260 p = find_process_by_pid(pid);
9261 if (p)
9262- retval = p->policy & ~SCHED_YIELD;
9263+ retval = p->policy;
9264 read_unlock(&tasklist_lock);
9265
9266 out_nounlock:
9267@@ -1035,7 +1344,7 @@
9268
9269 asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param *param)
9270 {
9271- struct task_struct *p;
9272+ task_t *p;
9273 struct sched_param lp;
9274 int retval;
9275
9276@@ -1066,42 +1375,64 @@
9277
9278 asmlinkage long sys_sched_yield(void)
9279 {
9280- /*
9281- * Trick. sched_yield() first counts the number of truly
9282- * 'pending' runnable processes, then returns if it's
9283- * only the current processes. (This test does not have
9284- * to be atomic.) In threaded applications this optimization
9285- * gets triggered quite often.
9286- */
9287+ runqueue_t *rq = this_rq();
9288+ prio_array_t *array;
9289+ int i;
9290
9291- int nr_pending = nr_running;
9292+ spin_lock_irq(&rq->lock);
9293+
9294+ if (unlikely(rq->nr_running == 1)) {
9295+ spin_unlock_irq(&rq->lock);
9296+ return 0;
9297+ }
9298
9299-#if CONFIG_SMP
9300- int i;
9301+ array = current->array;
9302+ if (unlikely(rt_task(current))) {
9303+ list_del(&current->run_list);
9304+ list_add_tail(&current->run_list, array->queue + current->prio);
9305+ goto out_unlock;
9306+ }
9307
9308- // Subtract non-idle processes running on other CPUs.
9309- for (i = 0; i < smp_num_cpus; i++) {
9310- int cpu = cpu_logical_map(i);
9311- if (aligned_data[cpu].schedule_data.curr != idle_task(cpu))
9312- nr_pending--;
9313+ if (unlikely(array == rq->expired) && rq->active->nr_active)
9314+ goto out_unlock;
9315+
9316+ list_del(&current->run_list);
9317+ if (!list_empty(array->queue + current->prio)) {
9318+ list_add(&current->run_list, array->queue[current->prio].next);
9319+ goto out_unlock;
9320 }
9321-#else
9322- // on UP this process is on the runqueue as well
9323- nr_pending--;
9324-#endif
9325- if (nr_pending) {
9326+
9327+ __clear_bit(current->prio, array->bitmap);
9328+ if (likely(array == rq->active) && array->nr_active == 1) {
9329 /*
9330- * This process can only be rescheduled by us,
9331- * so this is safe without any locking.
9332+ * We're the last task in the active queue so
9333+ * we must move ourself to the expired array
9334+ * to avoid running again immediatly.
9335 */
9336- if (current->policy == SCHED_OTHER)
9337- current->policy |= SCHED_YIELD;
9338- current->need_resched = 1;
9339-
9340- spin_lock_irq(&runqueue_lock);
9341- move_last_runqueue(current);
9342- spin_unlock_irq(&runqueue_lock);
9343+ array->nr_active--;
9344+ array = rq->expired;
9345+ array->nr_active++;
9346 }
9347+
9348+ i = sched_find_first_bit(array->bitmap);
9349+
9350+ BUG_ON(i == MAX_PRIO);
9351+ BUG_ON(i == current->prio && array == current->array);
9352+
9353+ if (array == current->array && i < current->prio)
9354+ i = current->prio;
9355+ else {
9356+ current->array = array;
9357+ current->prio = i;
9358+ }
9359+ list_add(&current->run_list, array->queue[i].next);
9360+ __set_bit(i, array->bitmap);
9361+
9362+out_unlock:
9363+ spin_unlock_irq(&rq->lock);
9364+
9365+ schedule();
9366+
9367 return 0;
9368 }
9369
9370@@ -1113,14 +1444,13 @@
9371 */
9372 void yield(void)
9373 {
9374- set_current_state(TASK_RUNNING);
9375+ __set_current_state(TASK_RUNNING);
9376 sys_sched_yield();
9377- schedule();
9378 }
9379
9380 void __cond_resched(void)
9381 {
9382- set_current_state(TASK_RUNNING);
9383+ __set_current_state(TASK_RUNNING);
9384 schedule();
9385 }
9386
9387@@ -1131,7 +1461,7 @@
9388 switch (policy) {
9389 case SCHED_FIFO:
9390 case SCHED_RR:
9391- ret = 99;
9392+ ret = MAX_USER_RT_PRIO-1;
9393 break;
9394 case SCHED_OTHER:
9395 ret = 0;
9396@@ -1158,7 +1488,7 @@
9397 asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
9398 {
9399 struct timespec t;
9400- struct task_struct *p;
9401+ task_t *p;
9402 int retval = -EINVAL;
9403
9404 if (pid < 0)
9405@@ -1168,8 +1498,8 @@
9406 read_lock(&tasklist_lock);
9407 p = find_process_by_pid(pid);
9408 if (p)
9409- jiffies_to_timespec(p->policy & SCHED_FIFO ? 0 : NICE_TO_TICKS(p->nice),
9410- &t);
9411+ jiffies_to_timespec(p->policy & SCHED_FIFO ?
9412+ 0 : TASK_TIMESLICE(p), &t);
9413 read_unlock(&tasklist_lock);
9414 if (p)
9415 retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
9416@@ -1177,14 +1507,14 @@
9417 return retval;
9418 }
9419
9420-static void show_task(struct task_struct * p)
9421+static void show_task(task_t * p)
9422 {
9423 unsigned long free = 0;
9424 int state;
9425 static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
9426
9427 printk("%-13.13s ", p->comm);
9428- state = p->state ? ffz(~p->state) + 1 : 0;
9429+ state = p->state ? __ffs(p->state) + 1 : 0;
9430 if (((unsigned) state) < sizeof(stat_nam)/sizeof(char *))
9431 printk(stat_nam[state]);
9432 else
9433@@ -1225,7 +1555,7 @@
9434 printk(" (NOTLB)\n");
9435
9436 {
9437- extern void show_trace_task(struct task_struct *tsk);
9438+ extern void show_trace_task(task_t *tsk);
9439 show_trace_task(p);
9440 }
9441 }
9442@@ -1247,7 +1577,7 @@
9443
9444 void show_state(void)
9445 {
9446- struct task_struct *p;
9447+ task_t *p;
9448
9449 #if (BITS_PER_LONG == 32)
9450 printk("\n"
9451@@ -1270,128 +1600,280 @@
9452 read_unlock(&tasklist_lock);
9453 }
9454
9455-/**
9456- * reparent_to_init() - Reparent the calling kernel thread to the init task.
9457- *
9458- * If a kernel thread is launched as a result of a system call, or if
9459- * it ever exits, it should generally reparent itself to init so that
9460- * it is correctly cleaned up on exit.
9461+/*
9462+ * double_rq_lock - safely lock two runqueues
9463 *
9464- * The various task state such as scheduling policy and priority may have
9465- * been inherited fro a user process, so we reset them to sane values here.
9466+ * Note this does not disable interrupts like task_rq_lock,
9467+ * you need to do so manually before calling.
9468+ */
9469+static inline void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
9470+{
9471+ if (rq1 == rq2)
9472+ spin_lock(&rq1->lock);
9473+ else {
9474+ if (rq1 < rq2) {
9475+ spin_lock(&rq1->lock);
9476+ spin_lock(&rq2->lock);
9477+ } else {
9478+ spin_lock(&rq2->lock);
9479+ spin_lock(&rq1->lock);
9480+ }
9481+ }
9482+}
9483+
9484+/*
9485+ * double_rq_unlock - safely unlock two runqueues
9486 *
9487- * NOTE that reparent_to_init() gives the caller full capabilities.
9488+ * Note this does not restore interrupts like task_rq_unlock,
9489+ * you need to do so manually after calling.
9490 */
9491-void reparent_to_init(void)
9492+static inline void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2)
9493 {
9494- struct task_struct *this_task = current;
9495+ spin_unlock(&rq1->lock);
9496+ if (rq1 != rq2)
9497+ spin_unlock(&rq2->lock);
9498+}
9499
9500- write_lock_irq(&tasklist_lock);
9501+void __init init_idle(task_t *idle, int cpu)
9502+{
9503+ runqueue_t *idle_rq = cpu_rq(cpu), *rq = cpu_rq(idle->cpu);
9504+ unsigned long flags;
9505
9506- /* Reparent to init */
9507- REMOVE_LINKS(this_task);
9508- this_task->p_pptr = child_reaper;
9509- this_task->p_opptr = child_reaper;
9510- SET_LINKS(this_task);
9511+ __save_flags(flags);
9512+ __cli();
9513+ double_rq_lock(idle_rq, rq);
9514+
9515+ idle_rq->curr = idle_rq->idle = idle;
9516+ deactivate_task(idle, rq);
9517+ idle->array = NULL;
9518+ idle->prio = MAX_PRIO;
9519+ idle->state = TASK_RUNNING;
9520+ idle->cpu = cpu;
9521+ double_rq_unlock(idle_rq, rq);
9522+ set_tsk_need_resched(idle);
9523+ __restore_flags(flags);
9524+}
9525+
9526+extern void init_timervecs(void);
9527+extern void timer_bh(void);
9528+extern void tqueue_bh(void);
9529+extern void immediate_bh(void);
9530+
9531+void __init sched_init(void)
9532+{
9533+ runqueue_t *rq;
9534+ int i, j, k;
9535+
9536+ for (i = 0; i < NR_CPUS; i++) {
9537+ prio_array_t *array;
9538
9539- /* Set the exit signal to SIGCHLD so we signal init on exit */
9540- this_task->exit_signal = SIGCHLD;
9541+ rq = cpu_rq(i);
9542+ rq->active = rq->arrays;
9543+ rq->expired = rq->arrays + 1;
9544+ spin_lock_init(&rq->lock);
9545+#ifdef CONFIG_SMP
9546+ INIT_LIST_HEAD(&rq->migration_queue);
9547+#endif
9548
9549- /* We also take the runqueue_lock while altering task fields
9550- * which affect scheduling decisions */
9551- spin_lock(&runqueue_lock);
9552+ for (j = 0; j < 2; j++) {
9553+ array = rq->arrays + j;
9554+ for (k = 0; k < MAX_PRIO; k++) {
9555+ INIT_LIST_HEAD(array->queue + k);
9556+ __clear_bit(k, array->bitmap);
9557+ }
9558+ // delimiter for bitsearch
9559+ __set_bit(MAX_PRIO, array->bitmap);
9560+ }
9561+ }
9562+ /*
9563+ * We have to do a little magic to get the first
9564+ * process right in SMP mode.
9565+ */
9566+ rq = this_rq();
9567+ rq->curr = current;
9568+ rq->idle = current;
9569+ current->cpu = smp_processor_id();
9570+ wake_up_process(current);
9571
9572- this_task->ptrace = 0;
9573- this_task->nice = DEF_NICE;
9574- this_task->policy = SCHED_OTHER;
9575- /* cpus_allowed? */
9576- /* rt_priority? */
9577- /* signals? */
9578- this_task->cap_effective = CAP_INIT_EFF_SET;
9579- this_task->cap_inheritable = CAP_INIT_INH_SET;
9580- this_task->cap_permitted = CAP_FULL_SET;
9581- this_task->keep_capabilities = 0;
9582- memcpy(this_task->rlim, init_task.rlim, sizeof(*(this_task->rlim)));
9583- switch_uid(INIT_USER);
9584+ init_timervecs();
9585+ init_bh(TIMER_BH, timer_bh);
9586+ init_bh(TQUEUE_BH, tqueue_bh);
9587+ init_bh(IMMEDIATE_BH, immediate_bh);
9588
9589- spin_unlock(&runqueue_lock);
9590- write_unlock_irq(&tasklist_lock);
9591+ /*
9592+ * The boot idle thread does lazy MMU switching as well:
9593+ */
9594+ atomic_inc(&init_mm.mm_count);
9595+ enter_lazy_tlb(&init_mm, current, smp_processor_id());
9596 }
9597
9598+#if CONFIG_SMP
9599+
9600 /*
9601- * Put all the gunge required to become a kernel thread without
9602- * attached user resources in one place where it belongs.
9603- */
9604+ * This is how migration works:
9605+ *
9606+ * 1) we queue a migration_req_t structure in the source CPU's
9607+ * runqueue and wake up that CPU's migration thread.
9608+ * 2) we down() the locked semaphore => thread blocks.
9609+ * 3) migration thread wakes up (implicitly it forces the migrated
9610+ * thread off the CPU)
9611+ * 4) it gets the migration request and checks whether the migrated
9612+ * task is still in the wrong runqueue.
9613+ * 5) if it's in the wrong runqueue then the migration thread removes
9614+ * it and puts it into the right queue.
9615+ * 6) migration thread up()s the semaphore.
9616+ * 7) we wake up and the migration is done.
9617+ */
9618+
9619+typedef struct {
9620+ struct list_head list;
9621+ task_t *task;
9622+ struct completion done;
9623+} migration_req_t;
9624
9625-void daemonize(void)
9626+/*
9627+ * Change a given task's CPU affinity. Migrate the process to a
9628+ * proper CPU and schedule it away if the CPU it's executing on
9629+ * is removed from the allowed bitmask.
9630+ *
9631+ * NOTE: the caller must have a valid reference to the task, the
9632+ * task must not exit() & deallocate itself prematurely. The
9633+ * call is not atomic; no spinlocks may be held.
9634+ */
9635+void set_cpus_allowed(task_t *p, unsigned long new_mask)
9636 {
9637- struct fs_struct *fs;
9638+ unsigned long flags;
9639+ migration_req_t req;
9640+ runqueue_t *rq;
9641
9642+ new_mask &= cpu_online_map;
9643+ if (!new_mask)
9644+ BUG();
9645
9646+ rq = task_rq_lock(p, &flags);
9647+ p->cpus_allowed = new_mask;
9648 /*
9649- * If we were started as result of loading a module, close all of the
9650- * user space pages. We don't need them, and if we didn't close them
9651- * they would be locked into memory.
9652+ * Can the task run on the task's current CPU? If not then
9653+ * migrate the process off to a proper CPU.
9654 */
9655- exit_mm(current);
9656+ if (new_mask & (1UL << p->cpu)) {
9657+ task_rq_unlock(rq, &flags);
9658+ return;
9659+ }
9660
9661- current->session = 1;
9662- current->pgrp = 1;
9663- current->tty = NULL;
9664+ /*
9665+ * If the task is not on a runqueue, then it is safe to
9666+ * simply update the task's cpu field.
9667+ */
9668+ if (!p->array && (p != rq->curr)) {
9669+ p->cpu = __ffs(p->cpus_allowed);
9670+ task_rq_unlock(rq, &flags);
9671+ return;
9672+ }
9673
9674- /* Become as one with the init task */
9675+ init_completion(&req.done);
9676+ req.task = p;
9677+ list_add(&req.list, &rq->migration_queue);
9678+ task_rq_unlock(rq, &flags);
9679+ wake_up_process(rq->migration_thread);
9680
9681- exit_fs(current); /* current->fs->count--; */
9682- fs = init_task.fs;
9683- current->fs = fs;
9684- atomic_inc(&fs->count);
9685- exit_files(current);
9686- current->files = init_task.files;
9687- atomic_inc(&current->files->count);
9688+ wait_for_completion(&req.done);
9689 }
9690
9691-extern unsigned long wait_init_idle;
9692+static __initdata int master_migration_thread;
9693
9694-void __init init_idle(void)
9695+static int migration_thread(void * bind_cpu)
9696 {
9697- struct schedule_data * sched_data;
9698- sched_data = &aligned_data[smp_processor_id()].schedule_data;
9699+ int cpu = cpu_logical_map((int) (long) bind_cpu);
9700+ struct sched_param param = { sched_priority: MAX_RT_PRIO-1 };
9701+ runqueue_t *rq;
9702+ int ret;
9703
9704- if (current != &init_task && task_on_runqueue(current)) {
9705- printk("UGH! (%d:%d) was on the runqueue, removing.\n",
9706- smp_processor_id(), current->pid);
9707- del_from_runqueue(current);
9708+ daemonize();
9709+ sigfillset(&current->blocked);
9710+ set_fs(KERNEL_DS);
9711+ /*
9712+ * The first migration thread is started on the boot CPU, it
9713+ * migrates the other migration threads to their destination CPUs.
9714+ */
9715+ if (cpu != master_migration_thread) {
9716+ while (!cpu_rq(master_migration_thread)->migration_thread)
9717+ yield();
9718+ set_cpus_allowed(current, 1UL << cpu);
9719 }
9720- sched_data->curr = current;
9721- sched_data->last_schedule = get_cycles();
9722- clear_bit(current->processor, &wait_init_idle);
9723-}
9724+ printk("migration_task %d on cpu=%d\n", cpu, smp_processor_id());
9725+ ret = setscheduler(0, SCHED_FIFO, &param);
9726
9727-extern void init_timervecs (void);
9728+ rq = this_rq();
9729+ rq->migration_thread = current;
9730
9731-void __init sched_init(void)
9732-{
9733- /*
9734- * We have to do a little magic to get the first
9735- * process right in SMP mode.
9736- */
9737- int cpu = smp_processor_id();
9738- int nr;
9739+ sprintf(current->comm, "migration_CPU%d", smp_processor_id());
9740
9741- init_task.processor = cpu;
9742+ for (;;) {
9743+ runqueue_t *rq_src, *rq_dest;
9744+ struct list_head *head;
9745+ int cpu_src, cpu_dest;
9746+ migration_req_t *req;
9747+ unsigned long flags;
9748+ task_t *p;
9749
9750- for(nr = 0; nr < PIDHASH_SZ; nr++)
9751- pidhash[nr] = NULL;
9752+ spin_lock_irqsave(&rq->lock, flags);
9753+ head = &rq->migration_queue;
9754+ current->state = TASK_INTERRUPTIBLE;
9755+ if (list_empty(head)) {
9756+ spin_unlock_irqrestore(&rq->lock, flags);
9757+ schedule();
9758+ continue;
9759+ }
9760+ req = list_entry(head->next, migration_req_t, list);
9761+ list_del_init(head->next);
9762+ spin_unlock_irqrestore(&rq->lock, flags);
9763+
9764+ p = req->task;
9765+ cpu_dest = __ffs(p->cpus_allowed);
9766+ rq_dest = cpu_rq(cpu_dest);
9767+repeat:
9768+ cpu_src = p->cpu;
9769+ rq_src = cpu_rq(cpu_src);
9770+
9771+ local_irq_save(flags);
9772+ double_rq_lock(rq_src, rq_dest);
9773+ if (p->cpu != cpu_src) {
9774+ double_rq_unlock(rq_src, rq_dest);
9775+ local_irq_restore(flags);
9776+ goto repeat;
9777+ }
9778+ if (rq_src == rq) {
9779+ p->cpu = cpu_dest;
9780+ if (p->array) {
9781+ deactivate_task(p, rq_src);
9782+ activate_task(p, rq_dest);
9783+ }
9784+ }
9785+ double_rq_unlock(rq_src, rq_dest);
9786+ local_irq_restore(flags);
9787
9788- init_timervecs();
9789+ complete(&req->done);
9790+ }
9791+}
9792
9793- init_bh(TIMER_BH, timer_bh);
9794- init_bh(TQUEUE_BH, tqueue_bh);
9795- init_bh(IMMEDIATE_BH, immediate_bh);
9796+void __init migration_init(void)
9797+{
9798+ int cpu;
9799
9800- /*
9801- * The boot idle thread does lazy MMU switching as well:
9802- */
9803- atomic_inc(&init_mm.mm_count);
9804- enter_lazy_tlb(&init_mm, current, cpu);
9805+ master_migration_thread = smp_processor_id();
9806+ current->cpus_allowed = 1UL << master_migration_thread;
9807+
9808+ for (cpu = 0; cpu < smp_num_cpus; cpu++) {
9809+ if (kernel_thread(migration_thread, (void *) (long) cpu,
9810+ CLONE_FS | CLONE_FILES | CLONE_SIGNAL) < 0)
9811+ BUG();
9812+ }
9813+ current->cpus_allowed = -1L;
9814+
9815+ for (cpu = 0; cpu < smp_num_cpus; cpu++)
9816+ while (!cpu_rq(cpu_logical_map(cpu))->migration_thread)
9817+ schedule_timeout(2);
9818 }
9819+
9820+#endif /* CONFIG_SMP */
9821diff -urN linux-2.4.24.org/kernel/signal.c linux-2.4.24/kernel/signal.c
5d16fd25
AM
9822--- linux-2.4.24.org/kernel/signal.c 2004-02-04 20:47:26.821945338 +0100
9823+++ linux-2.4.24/kernel/signal.c 2004-02-04 20:52:55.082667907 +0100
0aa7655b
AM
9824@@ -507,12 +507,9 @@
9825 * process of changing - but no harm is done by that
9826 * other than doing an extra (lightweight) IPI interrupt.
9827 */
9828- spin_lock(&runqueue_lock);
9829- if (task_has_cpu(t) && t->processor != smp_processor_id())
9830- smp_send_reschedule(t->processor);
9831- spin_unlock(&runqueue_lock);
9832-#endif /* CONFIG_SMP */
9833-
9834+ if ((t->state == TASK_RUNNING) && (t->cpu != cpu()))
9835+ kick_if_running(t);
9836+#endif
9837 if (t->state & TASK_INTERRUPTIBLE) {
9838 wake_up_process(t);
9839 return;
9840diff -urN linux-2.4.24.org/kernel/softirq.c linux-2.4.24/kernel/softirq.c
5d16fd25
AM
9841--- linux-2.4.24.org/kernel/softirq.c 2004-02-04 20:47:27.211864234 +0100
9842+++ linux-2.4.24/kernel/softirq.c 2004-02-04 20:52:55.110662084 +0100
0aa7655b
AM
9843@@ -364,13 +364,13 @@
9844 int cpu = cpu_logical_map(bind_cpu);
9845
9846 daemonize();
9847- current->nice = 19;
9848+ set_user_nice(current, 19);
9849 sigfillset(&current->blocked);
9850
9851 /* Migrate to the right CPU */
9852- current->cpus_allowed = 1UL << cpu;
9853- while (smp_processor_id() != cpu)
9854- schedule();
9855+ set_cpus_allowed(current, 1UL << cpu);
9856+ if (cpu() != cpu)
9857+ BUG();
9858
9859 sprintf(current->comm, "ksoftirqd_CPU%d", bind_cpu);
9860
9861@@ -395,7 +395,7 @@
9862 }
9863 }
9864
9865-static __init int spawn_ksoftirqd(void)
9866+__init int spawn_ksoftirqd(void)
9867 {
9868 int cpu;
9869
9870diff -urN linux-2.4.24.org/kernel/sys.c linux-2.4.24/kernel/sys.c
5d16fd25
AM
9871--- linux-2.4.24.org/kernel/sys.c 2004-02-04 20:47:26.739962391 +0100
9872+++ linux-2.4.24/kernel/sys.c 2004-02-04 20:52:55.139656054 +0100
0aa7655b
AM
9873@@ -239,10 +239,10 @@
9874 }
9875 if (error == -ESRCH)
9876 error = 0;
9877- if (niceval < p->nice && !capable(CAP_SYS_NICE))
9878+ if (niceval < task_nice(p) && !capable(CAP_SYS_NICE))
9879 error = -EACCES;
9880 else
9881- p->nice = niceval;
9882+ set_user_nice(p, niceval);
9883 }
9884 read_unlock(&tasklist_lock);
9885
9886@@ -268,7 +268,7 @@
9887 long niceval;
9888 if (!proc_sel(p, which, who))
9889 continue;
9890- niceval = 20 - p->nice;
9891+ niceval = 20 - task_nice(p);
9892 if (niceval > retval)
9893 retval = niceval;
9894 }
9895diff -urN linux-2.4.24.org/kernel/timer.c linux-2.4.24/kernel/timer.c
5d16fd25
AM
9896--- linux-2.4.24.org/kernel/timer.c 2004-02-04 20:47:27.115884198 +0100
9897+++ linux-2.4.24/kernel/timer.c 2004-02-04 20:52:55.155652727 +0100
0aa7655b
AM
9898@@ -25,6 +25,8 @@
9899
9900 #include <asm/uaccess.h>
9901
9902+struct kernel_stat kstat;
9903+
9904 /*
9905 * Timekeeping variables
9906 */
9907@@ -598,25 +600,7 @@
9908 int cpu = smp_processor_id(), system = user_tick ^ 1;
9909
9910 update_one_process(p, user_tick, system, cpu);
9911- if (p->pid) {
9912- if (--p->counter <= 0) {
9913- p->counter = 0;
9914- /*
9915- * SCHED_FIFO is priority preemption, so this is
9916- * not the place to decide whether to reschedule a
9917- * SCHED_FIFO task or not - Bhavesh Davda
9918- */
9919- if (p->policy != SCHED_FIFO) {
9920- p->need_resched = 1;
9921- }
9922- }
9923- if (p->nice > 0)
9924- kstat.per_cpu_nice[cpu] += user_tick;
9925- else
9926- kstat.per_cpu_user[cpu] += user_tick;
9927- kstat.per_cpu_system[cpu] += system;
9928- } else if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
9929- kstat.per_cpu_system[cpu] += system;
9930+ scheduler_tick(user_tick, system);
9931 }
9932
9933 /*
9934@@ -624,17 +608,7 @@
9935 */
9936 static unsigned long count_active_tasks(void)
9937 {
9938- struct task_struct *p;
9939- unsigned long nr = 0;
9940-
9941- read_lock(&tasklist_lock);
9942- for_each_task(p) {
9943- if ((p->state == TASK_RUNNING ||
9944- (p->state & TASK_UNINTERRUPTIBLE)))
9945- nr += FIXED_1;
9946- }
9947- read_unlock(&tasklist_lock);
9948- return nr;
9949+ return (nr_running() + nr_uninterruptible()) * FIXED_1;
9950 }
9951
9952 /*
9953@@ -827,6 +801,89 @@
9954
9955 #endif
9956
9957+static void process_timeout(unsigned long __data)
9958+{
9959+ wake_up_process((task_t *)__data);
9960+}
9961+
9962+/**
9963+ * schedule_timeout - sleep until timeout
9964+ * @timeout: timeout value in jiffies
9965+ *
9966+ * Make the current task sleep until @timeout jiffies have
9967+ * elapsed. The routine will return immediately unless
9968+ * the current task state has been set (see set_current_state()).
9969+ *
9970+ * You can set the task state as follows -
9971+ *
9972+ * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
9973+ * pass before the routine returns. The routine will return 0
9974+ *
9975+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
9976+ * delivered to the current task. In this case the remaining time
9977+ * in jiffies will be returned, or 0 if the timer expired in time
9978+ *
9979+ * The current task state is guaranteed to be TASK_RUNNING when this
9980+ * routine returns.
9981+ *
9982+ * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
9983+ * the CPU away without a bound on the timeout. In this case the return
9984+ * value will be %MAX_SCHEDULE_TIMEOUT.
9985+ *
9986+ * In all cases the return value is guaranteed to be non-negative.
9987+ */
9988+signed long schedule_timeout(signed long timeout)
9989+{
9990+ struct timer_list timer;
9991+ unsigned long expire;
9992+
9993+ switch (timeout)
9994+ {
9995+ case MAX_SCHEDULE_TIMEOUT:
9996+ /*
9997+ * These two special cases are useful to be comfortable
9998+ * in the caller. Nothing more. We could take
9999+ * MAX_SCHEDULE_TIMEOUT from one of the negative value
10000+ * but I' d like to return a valid offset (>=0) to allow
10001+ * the caller to do everything it want with the retval.
10002+ */
10003+ schedule();
10004+ goto out;
10005+ default:
10006+ /*
10007+ * Another bit of PARANOID. Note that the retval will be
10008+ * 0 since no piece of kernel is supposed to do a check
10009+ * for a negative retval of schedule_timeout() (since it
10010+ * should never happens anyway). You just have the printk()
10011+ * that will tell you if something is gone wrong and where.
10012+ */
10013+ if (timeout < 0)
10014+ {
10015+ printk(KERN_ERR "schedule_timeout: wrong timeout "
10016+ "value %lx from %p\n", timeout,
10017+ __builtin_return_address(0));
10018+ current->state = TASK_RUNNING;
10019+ goto out;
10020+ }
10021+ }
10022+
10023+ expire = timeout + jiffies;
10024+
10025+ init_timer(&timer);
10026+ timer.expires = expire;
10027+ timer.data = (unsigned long) current;
10028+ timer.function = process_timeout;
10029+
10030+ add_timer(&timer);
10031+ schedule();
10032+ del_timer_sync(&timer);
10033+
10034+ timeout = expire - jiffies;
10035+
10036+ out:
10037+ return timeout < 0 ? 0 : timeout;
10038+}
10039+
10040 /* Thread ID - the internal kernel "pid" */
10041 asmlinkage long sys_gettid(void)
10042 {
10043@@ -873,4 +930,3 @@
10044 }
10045 return 0;
10046 }
10047-
0aa7655b 10048diff -urN linux-2.4.24.org/mm/oom_kill.c linux-2.4.24/mm/oom_kill.c
5d16fd25
AM
10049--- linux-2.4.24.org/mm/oom_kill.c 2004-02-04 20:47:28.626569974 +0100
10050+++ linux-2.4.24/mm/oom_kill.c 2004-02-04 20:57:30.567369583 +0100
0aa7655b
AM
10051@@ -86,7 +86,7 @@
10052 * Niced processes are most likely less important, so double
10053 * their badness points.
10054 */
10055- if (p->nice > 0)
10056+ if (task_nice(p) > 0)
10057 points *= 2;
10058
10059 /*
10060@@ -150,7 +150,7 @@
10061 * all the memory it needs. That way it should be able to
10062 * exit() and clear out its resources quickly...
10063 */
10064- p->counter = 5 * HZ;
10065+ p->time_slice = HZ;
10066 p->flags |= PF_MEMALLOC | PF_MEMDIE;
10067
10068 /* This process has hardware access, be more careful. */
10069diff -urN linux-2.4.24.org/net/bluetooth/bnep/core.c linux-2.4.24/net/bluetooth/bnep/core.c
5d16fd25
AM
10070--- linux-2.4.24.org/net/bluetooth/bnep/core.c 2004-02-04 20:48:41.535404904 +0100
10071+++ linux-2.4.24/net/bluetooth/bnep/core.c 2004-02-04 20:52:55.199643577 +0100
0aa7655b
AM
10072@@ -460,7 +460,7 @@
10073 sigfillset(&current->blocked);
10074 flush_signals(current);
10075
10076- current->nice = -15;
10077+ set_user_nice(current, -15);
10078
10079 set_fs(KERNEL_DS);
10080
10081diff -urN linux-2.4.24.org/net/bluetooth/cmtp/core.c linux-2.4.24/net/bluetooth/cmtp/core.c
5d16fd25
AM
10082--- linux-2.4.24.org/net/bluetooth/cmtp/core.c 2004-02-04 20:48:41.311451486 +0100
10083+++ linux-2.4.24/net/bluetooth/cmtp/core.c 2004-02-04 20:52:55.224638378 +0100
0aa7655b
AM
10084@@ -298,7 +298,7 @@
10085 sigfillset(&current->blocked);
10086 flush_signals(current);
10087
10088- current->nice = -15;
10089+ set_user_nice(current, -15);
10090
10091 set_fs(KERNEL_DS);
10092
This page took 1.337745 seconds and 4 git commands to generate.