]> git.pld-linux.org Git - packages/kernel.git/blame - linux-2.4.20-sched-O1+preempt.patch
- ugh, missing ;
[packages/kernel.git] / linux-2.4.20-sched-O1+preempt.patch
CommitLineData
54aa170e
JR
1diff -urN linux-2.4.20/CREDITS linux-2.4.20-o1-preempt/CREDITS
2--- linux-2.4.20/CREDITS Fri Nov 29 00:53:08 2002
3+++ linux-2.4.20-o1-preempt/CREDITS Tue Feb 18 03:52:06 2003
4@@ -1001,8 +1001,8 @@
5
6 N: Nigel Gamble
7 E: nigel@nrg.org
8-E: nigel@sgi.com
9 D: Interrupt-driven printer driver
10+D: Preemptible kernel
11 S: 120 Alley Way
12 S: Mountain View, California 94040
13 S: USA
14diff -urN linux-2.4.20/Documentation/Configure.help linux-2.4.20-o1-preempt/Documentation/Configure.help
15--- linux-2.4.20/Documentation/Configure.help Fri Nov 29 00:53:08 2002
16+++ linux-2.4.20-o1-preempt/Documentation/Configure.help Tue Feb 18 03:52:06 2003
17@@ -279,6 +279,17 @@
18 If you have a system with several CPUs, you do not need to say Y
19 here: the local APIC will be used automatically.
20
21+Preemptible Kernel
22+CONFIG_PREEMPT
23+ This option reduces the latency of the kernel when reacting to
24+ real-time or interactive events by allowing a low priority process to
25+ be preempted even if it is in kernel mode executing a system call.
26+ This allows applications to run more reliably even when the system is
27+ under load.
28+
29+ Say Y here if you are building a kernel for a desktop, embedded or
30+ real-time system. Say N if you are unsure.
31+
32 Kernel math emulation
33 CONFIG_MATH_EMULATION
34 Linux can emulate a math coprocessor (used for floating point
35@@ -4094,6 +4105,38 @@
36 You may say M here for module support and later load the module when
37 you have use for it; the module is called binfmt_misc.o. If you
38 don't know what to answer at this point, say Y.
39+
40+Maximum User Real-Time Priority
41+CONFIG_MAX_USER_RT_PRIO
42+ The maximum user real-time priority. Tasks with priorities from
43+ zero through one less than this value are scheduled as real-time.
44+ To the application, a higher priority value implies a higher
45+ priority task.
46+
47+ The minimum allowed value is 100 and the maximum allowed value
48+ is (arbitrary) 1000. Values specified outside this range will
49+ be rounded accordingly during compile-time. The default is 100.
50+ Setting this higher than 100 is safe but will result in slightly
51+ more processing overhead in the scheduler.
52+
53+ Unless you are doing specialized real-time computing and require
54+ a much larger range than usual, the default is fine.
55+
56+Maximum Kernel Real-Time Priority
57+CONFIG_MAX_RT_PRIO
58+ The difference between the maximum real-time priority and the
59+ maximum user real-time priority. Usually this value is zero,
60+ which sets the maximum real-time priority to the same as the
61+ maximum user real-time priority. Setting this higher,
62+ however, will allow kernel threads to set their priority to a
63+ value higher than any user task. This is safe, but will result
64+ in slightly more processing overhead in the scheduler.
65+
66+ This value can be at most 200. The default is zero, i.e. the
67+ maximum priority and maximum user priority are the same.
68+
69+ Unless you are doing specialized real-time programming with
70+ kernel threads, the default is fine.
71
72 Kernel support for JAVA binaries
73 CONFIG_BINFMT_JAVA
74diff -urN linux-2.4.20/Documentation/preempt-locking.txt linux-2.4.20-o1-preempt/Documentation/preempt-locking.txt
75--- linux-2.4.20/Documentation/preempt-locking.txt Thu Jan 1 01:00:00 1970
76+++ linux-2.4.20-o1-preempt/Documentation/preempt-locking.txt Tue Feb 18 03:52:06 2003
77@@ -0,0 +1,104 @@
78+ Proper Locking Under a Preemptible Kernel:
79+ Keeping Kernel Code Preempt-Safe
80+ Robert Love <rml@tech9.net>
81+ Last Updated: 22 Jan 2002
82+
83+
84+INTRODUCTION
85+
86+
87+A preemptible kernel creates new locking issues. The issues are the same as
88+those under SMP: concurrency and reentrancy. Thankfully, the Linux preemptible
89+kernel model leverages existing SMP locking mechanisms. Thus, the kernel
90+requires explicit additional locking for very few additional situations.
91+
92+This document is for all kernel hackers. Developing code in the kernel
93+requires protecting these situations.
94+
95+
96+RULE #1: Per-CPU data structures need explicit protection
97+
98+
99+Two similar problems arise. An example code snippet:
100+
101+ struct this_needs_locking tux[NR_CPUS];
102+ tux[smp_processor_id()] = some_value;
103+ /* task is preempted here... */
104+ something = tux[smp_processor_id()];
105+
106+First, since the data is per-CPU, it may not have explicit SMP locking, but
107+require it otherwise. Second, when a preempted task is finally rescheduled,
108+the previous value of smp_processor_id may not equal the current. You must
109+protect these situations by disabling preemption around them.
110+
111+
112+RULE #2: CPU state must be protected.
113+
114+
115+Under preemption, the state of the CPU must be protected. This is arch-
116+dependent, but includes CPU structures and state not preserved over a context
117+switch. For example, on x86, entering and exiting FPU mode is now a critical
118+section that must occur while preemption is disabled. Think what would happen
119+if the kernel is executing a floating-point instruction and is then preempted.
120+Remember, the kernel does not save FPU state except for user tasks. Therefore,
121+upon preemption, the FPU registers will be sold to the lowest bidder. Thus,
122+preemption must be disabled around such regions.
123+
124+Note, some FPU functions are already explicitly preempt safe. For example,
125+kernel_fpu_begin and kernel_fpu_end will disable and enable preemption.
126+However, math_state_restore must be called with preemption disabled.
127+
128+
129+RULE #3: Lock acquire and release must be performed by same task
130+
131+
132+A lock acquired in one task must be released by the same task. This
133+means you can't do oddball things like acquire a lock and go off to
134+play while another task releases it. If you want to do something
135+like this, acquire and release the task in the same code path and
136+have the caller wait on an event by the other task.
137+
138+
139+SOLUTION
140+
141+
142+Data protection under preemption is achieved by disabling preemption for the
143+duration of the critical region.
144+
145+preempt_enable() decrement the preempt counter
146+preempt_disable() increment the preempt counter
147+preempt_enable_no_resched() decrement, but do not immediately preempt
148+preempt_get_count() return the preempt counter
149+
150+The functions are nestable. In other words, you can call preempt_disable
151+n-times in a code path, and preemption will not be reenabled until the n-th
152+call to preempt_enable. The preempt statements define to nothing if
153+preemption is not enabled.
154+
155+Note that you do not need to explicitly prevent preemption if you are holding
156+any locks or interrupts are disabled, since preemption is implicitly disabled
157+in those cases.
158+
159+Example:
160+
161+ cpucache_t *cc; /* this is per-CPU */
162+ preempt_disable();
163+ cc = cc_data(searchp);
164+ if (cc && cc->avail) {
165+ __free_block(searchp, cc_entry(cc), cc->avail);
166+ cc->avail = 0;
167+ }
168+ preempt_enable();
169+ return 0;
170+
171+Notice how the preemption statements must encompass every reference of the
172+critical variables. Another example:
173+
174+ int buf[NR_CPUS];
175+ set_cpu_val(buf);
176+ if (buf[smp_processor_id()] == -1) printf(KERN_INFO "wee!\n");
177+ spin_lock(&buf_lock);
178+ /* ... */
179+
180+This code is not preempt-safe, but see how easily we can fix it by simply
181+moving the spin_lock up two lines.
182diff -urN linux-2.4.20/Documentation/sched-coding.txt linux-2.4.20-o1-preempt/Documentation/sched-coding.txt
183--- linux-2.4.20/Documentation/sched-coding.txt Thu Jan 1 01:00:00 1970
184+++ linux-2.4.20-o1-preempt/Documentation/sched-coding.txt Tue Feb 18 03:51:29 2003
185@@ -0,0 +1,126 @@
186+ Reference for various scheduler-related methods in the O(1) scheduler
187+ Robert Love <rml@tech9.net>, MontaVista Software
188+
189+
190+Note most of these methods are local to kernel/sched.c - this is by design.
191+The scheduler is meant to be self-contained and abstracted away. This document
192+is primarily for understanding the scheduler, not interfacing to it. Some of
193+the discussed interfaces, however, are general process/scheduling methods.
194+They are typically defined in include/linux/sched.h.
195+
196+
197+Main Scheduling Methods
198+-----------------------
199+
200+void load_balance(runqueue_t *this_rq, int idle)
201+ Attempts to pull tasks from one cpu to another to balance cpu usage,
202+ if needed. This method is called explicitly if the runqueues are
203+ inbalanced or periodically by the timer tick. Prior to calling,
204+ the current runqueue must be locked and interrupts disabled.
205+
206+void schedule()
207+ The main scheduling function. Upon return, the highest priority
208+ process will be active.
209+
210+
211+Locking
212+-------
213+
214+Each runqueue has its own lock, rq->lock. When multiple runqueues need
215+to be locked, lock acquires must be ordered by ascending &runqueue value.
216+
217+A specific runqueue is locked via
218+
219+ task_rq_lock(task_t pid, unsigned long *flags)
220+
221+which disables preemption, disables interrupts, and locks the runqueue pid is
222+running on. Likewise,
223+
224+ task_rq_unlock(task_t pid, unsigned long *flags)
225+
226+unlocks the runqueue pid is running on, restores interrupts to their previous
227+state, and reenables preemption.
228+
229+The routines
230+
231+ double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
232+
233+and
234+
235+ double_rq_unlock(runqueue_t *rq1, runqueue_t rq2)
236+
237+safely lock and unlock, respectively, the two specified runqueues. They do
238+not, however, disable and restore interrupts. Users are required to do so
239+manually before and after calls.
240+
241+
242+Values
243+------
244+
245+MAX_PRIO
246+ The maximum priority of the system, stored in the task as task->prio.
247+ Lower priorities are higher. Normal (non-RT) priorities range from
248+ MAX_RT_PRIO to (MAX_PRIO - 1).
249+MAX_RT_PRIO
250+ The maximum real-time priority of the system. Valid RT priorities
251+ range from 0 to (MAX_RT_PRIO - 1).
252+MAX_USER_RT_PRIO
253+ The maximum real-time priority that is exported to user-space. Should
254+ always be equal to or less than MAX_RT_PRIO. Setting it less allows
255+ kernel threads to have higher priorities than any user-space task.
256+MIN_TIMESLICE
257+MAX_TIMESLICE
258+ Respectively, the minimum and maximum timeslices (quanta) of a process.
259+
260+Data
261+----
262+
263+struct runqueue
264+ The main per-CPU runqueue data structure.
265+struct task_struct
266+ The main per-process data structure.
267+
268+
269+General Methods
270+---------------
271+
272+cpu_rq(cpu)
273+ Returns the runqueue of the specified cpu.
274+this_rq()
275+ Returns the runqueue of the current cpu.
276+task_rq(pid)
277+ Returns the runqueue which holds the specified pid.
278+cpu_curr(cpu)
279+ Returns the task currently running on the given cpu.
280+rt_task(pid)
281+ Returns true if pid is real-time, false if not.
282+
283+
284+Process Control Methods
285+-----------------------
286+
287+void set_user_nice(task_t *p, long nice)
288+ Sets the "nice" value of task p to the given value.
289+int setscheduler(pid_t pid, int policy, struct sched_param *param)
290+ Sets the scheduling policy and parameters for the given pid.
291+void set_cpus_allowed(task_t *p, unsigned long new_mask)
292+ Sets a given task's CPU affinity and migrates it to a proper cpu.
293+ Callers must have a valid reference to the task and assure the
294+ task not exit prematurely. No locks can be held during the call.
295+set_task_state(tsk, state_value)
296+ Sets the given task's state to the given value.
297+set_current_state(state_value)
298+ Sets the current task's state to the given value.
299+void set_tsk_need_resched(struct task_struct *tsk)
300+ Sets need_resched in the given task.
301+void clear_tsk_need_resched(struct task_struct *tsk)
302+ Clears need_resched in the given task.
303+void set_need_resched()
304+ Sets need_resched in the current task.
305+void clear_need_resched()
306+ Clears need_resched in the current task.
307+int need_resched()
308+ Returns true if need_resched is set in the current task, false
309+ otherwise.
310+yield()
311+ Place the current process at the end of the runqueue and call schedule.
312diff -urN linux-2.4.20/Documentation/sched-design.txt linux-2.4.20-o1-preempt/Documentation/sched-design.txt
313--- linux-2.4.20/Documentation/sched-design.txt Thu Jan 1 01:00:00 1970
314+++ linux-2.4.20-o1-preempt/Documentation/sched-design.txt Tue Feb 18 03:51:29 2003
315@@ -0,0 +1,165 @@
316+ Goals, Design and Implementation of the
317+ new ultra-scalable O(1) scheduler
318+
319+
320+ This is an edited version of an email Ingo Molnar sent to
321+ lkml on 4 Jan 2002. It describes the goals, design, and
322+ implementation of Ingo's new ultra-scalable O(1) scheduler.
323+ Last Updated: 18 April 2002.
324+
325+
326+Goal
327+====
328+
329+The main goal of the new scheduler is to keep all the good things we know
330+and love about the current Linux scheduler:
331+
332+ - good interactive performance even during high load: if the user
333+ types or clicks then the system must react instantly and must execute
334+ the user tasks smoothly, even during considerable background load.
335+
336+ - good scheduling/wakeup performance with 1-2 runnable processes.
337+
338+ - fairness: no process should stay without any timeslice for any
339+ unreasonable amount of time. No process should get an unjustly high
340+ amount of CPU time.
341+
342+ - priorities: less important tasks can be started with lower priority,
343+ more important tasks with higher priority.
344+
345+ - SMP efficiency: no CPU should stay idle if there is work to do.
346+
347+ - SMP affinity: processes which run on one CPU should stay affine to
348+ that CPU. Processes should not bounce between CPUs too frequently.
349+
350+ - plus additional scheduler features: RT scheduling, CPU binding.
351+
352+and the goal is also to add a few new things:
353+
354+ - fully O(1) scheduling. Are you tired of the recalculation loop
355+ blowing the L1 cache away every now and then? Do you think the goodness
356+ loop is taking a bit too long to finish if there are lots of runnable
357+ processes? This new scheduler takes no prisoners: wakeup(), schedule(),
358+ the timer interrupt are all O(1) algorithms. There is no recalculation
359+ loop. There is no goodness loop either.
360+
361+ - 'perfect' SMP scalability. With the new scheduler there is no 'big'
362+ runqueue_lock anymore - it's all per-CPU runqueues and locks - two
363+ tasks on two separate CPUs can wake up, schedule and context-switch
364+ completely in parallel, without any interlocking. All
365+ scheduling-relevant data is structured for maximum scalability.
366+
367+ - better SMP affinity. The old scheduler has a particular weakness that
368+ causes the random bouncing of tasks between CPUs if/when higher
369+ priority/interactive tasks, this was observed and reported by many
370+ people. The reason is that the timeslice recalculation loop first needs
371+ every currently running task to consume its timeslice. But when this
372+ happens on eg. an 8-way system, then this property starves an
373+ increasing number of CPUs from executing any process. Once the last
374+ task that has a timeslice left has finished using up that timeslice,
375+ the recalculation loop is triggered and other CPUs can start executing
376+ tasks again - after having idled around for a number of timer ticks.
377+ The more CPUs, the worse this effect.
378+
379+ Furthermore, this same effect causes the bouncing effect as well:
380+ whenever there is such a 'timeslice squeeze' of the global runqueue,
381+ idle processors start executing tasks which are not affine to that CPU.
382+ (because the affine tasks have finished off their timeslices already.)
383+
384+ The new scheduler solves this problem by distributing timeslices on a
385+ per-CPU basis, without having any global synchronization or
386+ recalculation.
387+
388+ - batch scheduling. A significant proportion of computing-intensive tasks
389+ benefit from batch-scheduling, where timeslices are long and processes
390+ are roundrobin scheduled. The new scheduler does such batch-scheduling
391+ of the lowest priority tasks - so nice +19 jobs will get
392+ 'batch-scheduled' automatically. With this scheduler, nice +19 jobs are
393+ in essence SCHED_IDLE, from an interactiveness point of view.
394+
395+ - handle extreme loads more smoothly, without breakdown and scheduling
396+ storms.
397+
398+ - O(1) RT scheduling. For those RT folks who are paranoid about the
399+ O(nr_running) property of the goodness loop and the recalculation loop.
400+
401+ - run fork()ed children before the parent. Andrea has pointed out the
402+ advantages of this a few months ago, but patches for this feature
403+ do not work with the old scheduler as well as they should,
404+ because idle processes often steal the new child before the fork()ing
405+ CPU gets to execute it.
406+
407+
408+Design
409+======
410+
411+the core of the new scheduler are the following mechanizms:
412+
413+ - *two*, priority-ordered 'priority arrays' per CPU. There is an 'active'
414+ array and an 'expired' array. The active array contains all tasks that
415+ are affine to this CPU and have timeslices left. The expired array
416+ contains all tasks which have used up their timeslices - but this array
417+ is kept sorted as well. The active and expired array is not accessed
418+ directly, it's accessed through two pointers in the per-CPU runqueue
419+ structure. If all active tasks are used up then we 'switch' the two
420+ pointers and from now on the ready-to-go (former-) expired array is the
421+ active array - and the empty active array serves as the new collector
422+ for expired tasks.
423+
424+ - there is a 64-bit bitmap cache for array indices. Finding the highest
425+ priority task is thus a matter of two x86 BSFL bit-search instructions.
426+
427+the split-array solution enables us to have an arbitrary number of active
428+and expired tasks, and the recalculation of timeslices can be done
429+immediately when the timeslice expires. Because the arrays are always
430+access through the pointers in the runqueue, switching the two arrays can
431+be done very quickly.
432+
433+this is a hybride priority-list approach coupled with roundrobin
434+scheduling and the array-switch method of distributing timeslices.
435+
436+ - there is a per-task 'load estimator'.
437+
438+one of the toughest things to get right is good interactive feel during
439+heavy system load. While playing with various scheduler variants i found
440+that the best interactive feel is achieved not by 'boosting' interactive
441+tasks, but by 'punishing' tasks that want to use more CPU time than there
442+is available. This method is also much easier to do in an O(1) fashion.
443+
444+to establish the actual 'load' the task contributes to the system, a
445+complex-looking but pretty accurate method is used: there is a 4-entry
446+'history' ringbuffer of the task's activities during the last 4 seconds.
447+This ringbuffer is operated without much overhead. The entries tell the
448+scheduler a pretty accurate load-history of the task: has it used up more
449+CPU time or less during the past N seconds. [the size '4' and the interval
450+of 4x 1 seconds was found by lots of experimentation - this part is
451+flexible and can be changed in both directions.]
452+
453+the penalty a task gets for generating more load than the CPU can handle
454+is a priority decrease - there is a maximum amount to this penalty
455+relative to their static priority, so even fully CPU-bound tasks will
456+observe each other's priorities, and will share the CPU accordingly.
457+
458+the SMP load-balancer can be extended/switched with additional parallel
459+computing and cache hierarchy concepts: NUMA scheduling, multi-core CPUs
460+can be supported easily by changing the load-balancer. Right now it's
461+tuned for my SMP systems.
462+
463+i skipped the prev->mm == next->mm advantage - no workload i know of shows
464+any sensitivity to this. It can be added back by sacrificing O(1)
465+schedule() [the current and one-lower priority list can be searched for a
466+that->mm == current->mm condition], but costs a fair number of cycles
467+during a number of important workloads, so i wanted to avoid this as much
468+as possible.
469+
470+- the SMP idle-task startup code was still racy and the new scheduler
471+triggered this. So i streamlined the idle-setup code a bit. We do not call
472+into schedule() before all processors have started up fully and all idle
473+threads are in place.
474+
475+- the patch also cleans up a number of aspects of sched.c - moves code
476+into other areas of the kernel where it's appropriate, and simplifies
477+certain code paths and data constructs. As a result, the new scheduler's
478+code is smaller than the old one.
479+
480+ Ingo
481diff -urN linux-2.4.20/MAINTAINERS linux-2.4.20-o1-preempt/MAINTAINERS
482--- linux-2.4.20/MAINTAINERS Fri Nov 29 00:53:08 2002
483+++ linux-2.4.20-o1-preempt/MAINTAINERS Tue Feb 18 03:52:07 2003
484@@ -1310,6 +1310,14 @@
485 M: mostrows@styx.uwaterloo.ca
486 S: Maintained
487
488+PREEMPTIBLE KERNEL
489+P: Robert M. Love
490+M: rml@tech9.net
491+L: linux-kernel@vger.kernel.org
492+L: kpreempt-tech@lists.sourceforge.net
493+W: http://tech9.net/rml/linux
494+S: Supported
495+
496 PROMISE DC4030 CACHING DISK CONTROLLER DRIVER
497 P: Peter Denison
498 M: promise@pnd-pc.demon.co.uk
499diff -urN linux-2.4.20/arch/alpha/config.in linux-2.4.20-o1-preempt/arch/alpha/config.in
500--- linux-2.4.20/arch/alpha/config.in Fri Nov 29 00:53:08 2002
501+++ linux-2.4.20-o1-preempt/arch/alpha/config.in Tue Feb 18 03:51:29 2003
502@@ -273,6 +273,8 @@
503 bool 'System V IPC' CONFIG_SYSVIPC
504 bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT
505 bool 'Sysctl support' CONFIG_SYSCTL
506+int 'Maximum User Real-Time Priority' CONFIG_MAX_USER_RT_PRIO 100
507+int 'Maximum Kernel Real-time Priority' CONFIG_MAX_RT_PRIO 0
508 if [ "$CONFIG_PROC_FS" = "y" ]; then
509 choice 'Kernel core (/proc/kcore) format' \
510 "ELF CONFIG_KCORE_ELF \
783a8eeb
JR
511--- linux-2.4.20/arch/alpha/kernel/entry.S.orig Fri Mar 7 16:13:50 2003
512+++ linux-2.4.20/arch/alpha/kernel/entry.S Fri Mar 7 16:43:09 2003
513@@ -695,7 +695,9 @@
514 ret_from_fork:
515 lda $26,ret_from_sys_call
516 mov $17,$16
517+#if CONFIG_SMP
518 jsr $31,schedule_tail
519+#endif
520 .end ret_from_fork
521
522 .align 3
54aa170e
JR
523diff -urN linux-2.4.20/arch/alpha/kernel/process.c linux-2.4.20-o1-preempt/arch/alpha/kernel/process.c
524--- linux-2.4.20/arch/alpha/kernel/process.c Sun Sep 30 21:26:08 2001
525+++ linux-2.4.20-o1-preempt/arch/alpha/kernel/process.c Tue Feb 18 03:51:29 2003
526@@ -74,9 +74,6 @@
527 cpu_idle(void)
528 {
529 /* An endless idle loop with no priority at all. */
530- current->nice = 20;
531- current->counter = -100;
532-
533 while (1) {
534 /* FIXME -- EV6 and LCA45 know how to power down
535 the CPU. */
536diff -urN linux-2.4.20/arch/alpha/kernel/smp.c linux-2.4.20-o1-preempt/arch/alpha/kernel/smp.c
537--- linux-2.4.20/arch/alpha/kernel/smp.c Sat Aug 3 02:39:42 2002
538+++ linux-2.4.20-o1-preempt/arch/alpha/kernel/smp.c Tue Feb 18 03:51:29 2003
539@@ -82,6 +82,7 @@
540 int smp_num_cpus = 1; /* Number that came online. */
541 int smp_threads_ready; /* True once the per process idle is forked. */
542 cycles_t cacheflush_time;
543+unsigned long cache_decay_ticks;
544
545 int __cpu_number_map[NR_CPUS];
546 int __cpu_logical_map[NR_CPUS];
547@@ -156,11 +157,6 @@
548 {
549 int cpuid = hard_smp_processor_id();
550
551- if (current != init_tasks[cpu_number_map(cpuid)]) {
552- printk("BUG: smp_calling: cpu %d current %p init_tasks[cpu_number_map(cpuid)] %p\n",
553- cpuid, current, init_tasks[cpu_number_map(cpuid)]);
554- }
555-
556 DBGS(("CALLIN %d state 0x%lx\n", cpuid, current->state));
557
558 /* Turn on machine checks. */
559@@ -215,9 +211,6 @@
560 DBGS(("smp_callin: commencing CPU %d current %p\n",
561 cpuid, current));
562
563- /* Setup the scheduler for this processor. */
564- init_idle();
565-
566 /* ??? This should be in init_idle. */
567 atomic_inc(&init_mm.mm_count);
568 current->active_mm = &init_mm;
569@@ -236,8 +229,9 @@
570 smp_tune_scheduling (int cpuid)
571 {
572 struct percpu_struct *cpu;
573- unsigned long on_chip_cache;
574- unsigned long freq;
575+ unsigned long on_chip_cache; /* kB */
576+ unsigned long freq; /* Hz */
577+ unsigned long bandwidth = 350; /* MB/s */
578
579 cpu = (struct percpu_struct*)((char*)hwrpb + hwrpb->processor_offset
580 + cpuid * hwrpb->processor_size);
581@@ -258,29 +252,21 @@
582
583 case EV6_CPU:
584 case EV67_CPU:
585- on_chip_cache = 64 + 64;
586- break;
587-
588 default:
589- on_chip_cache = 8 + 8;
590+ on_chip_cache = 64 + 64;
591 break;
592 }
593
594 freq = hwrpb->cycle_freq ? : est_cycle_freq;
595
596-#if 0
597- /* Magic estimation stolen from x86 port. */
598- cacheflush_time = freq / 1024L * on_chip_cache / 5000L;
599-
600- printk("Using heuristic of %d cycles.\n",
601- cacheflush_time);
602-#else
603- /* Magic value to force potential preemption of other CPUs. */
604- cacheflush_time = INT_MAX;
605+ cacheflush_time = (freq / 1000000) * (on_chip_cache << 10) / bandwidth;
606+ cache_decay_ticks = cacheflush_time / (freq / 1000) * HZ / 1000;
607
608- printk("Using heuristic of %d cycles.\n",
609- cacheflush_time);
610-#endif
611+ printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
612+ cacheflush_time/(freq/1000000),
613+ (cacheflush_time*100/(freq/1000000)) % 100);
614+ printk("task migration cache decay timeout: %ld msecs.\n",
615+ (cache_decay_ticks + 1) * 1000 / HZ);
616 }
617
618 /*
619@@ -505,14 +491,11 @@
620 if (idle == &init_task)
621 panic("idle process is init_task for CPU %d", cpuid);
622
623- idle->processor = cpuid;
624- idle->cpus_runnable = 1 << cpuid; /* we schedule the first task manually */
625+ init_idle(idle, cpuid);
626+ unhash_process(idle);
627+
628 __cpu_logical_map[cpunum] = cpuid;
629 __cpu_number_map[cpuid] = cpunum;
630-
631- del_from_runqueue(idle);
632- unhash_process(idle);
633- init_tasks[cpunum] = idle;
634
635 DBGS(("smp_boot_one_cpu: CPU %d state 0x%lx flags 0x%lx\n",
636 cpuid, idle->state, idle->flags));
637@@ -619,13 +602,10 @@
638
639 __cpu_number_map[boot_cpuid] = 0;
640 __cpu_logical_map[0] = boot_cpuid;
641- current->processor = boot_cpuid;
642
643 smp_store_cpu_info(boot_cpuid);
644 smp_tune_scheduling(boot_cpuid);
645 smp_setup_percpu_timer(boot_cpuid);
646-
647- init_idle();
648
649 /* ??? This should be in init_idle. */
650 atomic_inc(&init_mm.mm_count);
651diff -urN linux-2.4.20/arch/arm/config.in linux-2.4.20-o1-preempt/arch/arm/config.in
652--- linux-2.4.20/arch/arm/config.in Fri Nov 29 00:53:09 2002
653+++ linux-2.4.20-o1-preempt/arch/arm/config.in Tue Feb 18 03:52:06 2003
654@@ -372,7 +372,7 @@
655 else
656 define_bool CONFIG_DISCONTIGMEM n
657 fi
658-
659+dep_bool 'Preemptible Kernel' CONFIG_PREEMPT $CONFIG_CPU_32
660 endmenu
661
662 mainmenu_option next_comment
663@@ -427,6 +427,8 @@
664 bool 'System V IPC' CONFIG_SYSVIPC
665 bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT
666 bool 'Sysctl support' CONFIG_SYSCTL
667+int 'Maximum User Real-Time Priority' CONFIG_MAX_USER_RT_PRIO 100
668+int 'Maximum Kernel Real-time Priority' CONFIG_MAX_RT_PRIO 0
669 comment 'At least one math emulation must be selected'
670 tristate 'NWFPE math emulation' CONFIG_FPE_NWFPE
671 dep_tristate 'FastFPE math emulation (experimental)' CONFIG_FPE_FASTFPE $CONFIG_EXPERIMENTAL
672diff -urN linux-2.4.20/arch/arm/kernel/entry-armv.S linux-2.4.20-o1-preempt/arch/arm/kernel/entry-armv.S
673--- linux-2.4.20/arch/arm/kernel/entry-armv.S Sat Aug 3 02:39:42 2002
674+++ linux-2.4.20-o1-preempt/arch/arm/kernel/entry-armv.S Tue Feb 18 03:52:07 2003
675@@ -697,6 +697,12 @@
676 add r4, sp, #S_SP
677 mov r6, lr
678 stmia r4, {r5, r6, r7, r8, r9} @ save sp_SVC, lr_SVC, pc, cpsr, old_ro
679+#ifdef CONFIG_PREEMPT
680+ get_current_task r9
681+ ldr r8, [r9, #TSK_PREEMPT]
682+ add r8, r8, #1
683+ str r8, [r9, #TSK_PREEMPT]
684+#endif
685 1: get_irqnr_and_base r0, r6, r5, lr
686 movne r1, sp
687 @
688@@ -704,6 +710,25 @@
689 @
690 adrsvc ne, lr, 1b
691 bne do_IRQ
692+#ifdef CONFIG_PREEMPT
693+2: ldr r8, [r9, #TSK_PREEMPT]
694+ subs r8, r8, #1
695+ bne 3f
696+ ldr r7, [r9, #TSK_NEED_RESCHED]
697+ teq r7, #0
698+ beq 3f
699+ ldr r6, .LCirqstat
700+ ldr r0, [r6, #IRQSTAT_BH_COUNT]
701+ teq r0, #0
702+ bne 3f
703+ mov r0, #MODE_SVC
704+ msr cpsr_c, r0 @ enable interrupts
705+ bl SYMBOL_NAME(preempt_schedule)
706+ mov r0, #I_BIT | MODE_SVC
707+ msr cpsr_c, r0 @ disable interrupts
708+ b 2b
709+3: str r8, [r9, #TSK_PREEMPT]
710+#endif
711 ldr r0, [sp, #S_PSR] @ irqs are already disabled
712 msr spsr, r0
713 ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr
714@@ -761,6 +786,9 @@
715 .LCprocfns: .word SYMBOL_NAME(processor)
716 #endif
717 .LCfp: .word SYMBOL_NAME(fp_enter)
718+#ifdef CONFIG_PREEMPT
719+.LCirqstat: .word SYMBOL_NAME(irq_stat)
720+#endif
721
722 irq_prio_table
723
724@@ -801,6 +829,12 @@
725 stmdb r8, {sp, lr}^
726 alignment_trap r4, r7, __temp_irq
727 zero_fp
728+ get_current_task tsk
729+#ifdef CONFIG_PREEMPT
730+ ldr r0, [tsk, #TSK_PREEMPT]
731+ add r0, r0, #1
732+ str r0, [tsk, #TSK_PREEMPT]
733+#endif
734 1: get_irqnr_and_base r0, r6, r5, lr
735 movne r1, sp
736 adrsvc ne, lr, 1b
737@@ -808,8 +842,12 @@
738 @ routine called with r0 = irq number, r1 = struct pt_regs *
739 @
740 bne do_IRQ
741+#ifdef CONFIG_PREEMPT
742+ ldr r0, [tsk, #TSK_PREEMPT]
743+ sub r0, r0, #1
744+ str r0, [tsk, #TSK_PREEMPT]
745+#endif
746 mov why, #0
747- get_current_task tsk
748 b ret_to_user
749
750 .align 5
751diff -urN linux-2.4.20/arch/arm/kernel/process.c linux-2.4.20-o1-preempt/arch/arm/kernel/process.c
752--- linux-2.4.20/arch/arm/kernel/process.c Sat Aug 3 02:39:42 2002
753+++ linux-2.4.20-o1-preempt/arch/arm/kernel/process.c Tue Feb 18 03:51:29 2003
754@@ -83,8 +83,6 @@
755 {
756 /* endless idle loop with no priority at all */
757 init_idle();
758- current->nice = 20;
759- current->counter = -100;
760
761 while (1) {
762 void (*idle)(void) = pm_idle;
763diff -urN linux-2.4.20/arch/arm/tools/getconstants.c linux-2.4.20-o1-preempt/arch/arm/tools/getconstants.c
764--- linux-2.4.20/arch/arm/tools/getconstants.c Thu Oct 11 18:04:57 2001
765+++ linux-2.4.20-o1-preempt/arch/arm/tools/getconstants.c Tue Feb 18 03:52:07 2003
766@@ -13,6 +13,7 @@
767
768 #include <asm/pgtable.h>
769 #include <asm/uaccess.h>
770+#include <asm/hardirq.h>
771
772 /*
773 * Make sure that the compiler and target are compatible.
774@@ -38,6 +39,11 @@
775
776 DEFN("TSS_SAVE", OFF_TSK(thread.save));
777 DEFN("TSS_FPESAVE", OFF_TSK(thread.fpstate.soft.save));
778+
779+#ifdef CONFIG_PREEMPT
780+DEFN("TSK_PREEMPT", OFF_TSK(preempt_count));
781+DEFN("IRQSTAT_BH_COUNT", (unsigned long)&(((irq_cpustat_t *)0)->__local_bh_count));
782+#endif
783
784 #ifdef CONFIG_CPU_32
785 DEFN("TSS_DOMAIN", OFF_TSK(thread.domain));
786diff -urN linux-2.4.20/arch/cris/config.in linux-2.4.20-o1-preempt/arch/cris/config.in
787--- linux-2.4.20/arch/cris/config.in Fri Nov 29 00:53:09 2002
788+++ linux-2.4.20-o1-preempt/arch/cris/config.in Tue Feb 18 03:51:29 2003
789@@ -29,6 +29,8 @@
790 bool 'System V IPC' CONFIG_SYSVIPC
791 bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT
792 bool 'Sysctl support' CONFIG_SYSCTL
793+int 'Maximum User Real-Time Priority' CONFIG_MAX_USER_RT_PRIO 100
794+int 'Maximum Kernel Real-time Priority' CONFIG_MAX_RT_PRIO 0
795
796 tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF
797
798diff -urN linux-2.4.20/arch/cris/kernel/process.c linux-2.4.20-o1-preempt/arch/cris/kernel/process.c
799--- linux-2.4.20/arch/cris/kernel/process.c Mon Feb 25 20:37:52 2002
800+++ linux-2.4.20-o1-preempt/arch/cris/kernel/process.c Tue Feb 18 03:51:29 2003
801@@ -124,10 +124,10 @@
802
803 int cpu_idle(void *unused)
804 {
805- while(1) {
806- current->counter = -100;
807+ init_idle();
808+
809+ while(1)
810 schedule();
811- }
812 }
813
814 /* if the watchdog is enabled, we can simply disable interrupts and go
815diff -urN linux-2.4.20/arch/i386/config.in linux-2.4.20-o1-preempt/arch/i386/config.in
816--- linux-2.4.20/arch/i386/config.in Fri Nov 29 00:53:09 2002
817+++ linux-2.4.20-o1-preempt/arch/i386/config.in Tue Feb 18 03:52:06 2003
818@@ -206,6 +206,7 @@
819 bool 'Math emulation' CONFIG_MATH_EMULATION
820 bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
821 bool 'Symmetric multi-processing support' CONFIG_SMP
822+bool 'Preemptible Kernel' CONFIG_PREEMPT
823 if [ "$CONFIG_SMP" != "y" ]; then
824 bool 'Local APIC support on uniprocessors' CONFIG_X86_UP_APIC
825 dep_bool 'IO-APIC support on uniprocessors' CONFIG_X86_UP_IOAPIC $CONFIG_X86_UP_APIC
826@@ -224,9 +225,12 @@
827 define_bool CONFIG_X86_TSC y
828 fi
829
830-if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
831- define_bool CONFIG_HAVE_DEC_LOCK y
832+if [ "$CONFIG_SMP" = "y" -o "$CONFIG_PREEMPT" = "y" ]; then
833+ if [ "$CONFIG_X86_CMPXCHG" = "y" ]; then
834+ define_bool CONFIG_HAVE_DEC_LOCK y
835+ fi
836 fi
837+
838 endmenu
839
840 mainmenu_option next_comment
841@@ -286,6 +290,8 @@
842 bool 'System V IPC' CONFIG_SYSVIPC
843 bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT
844 bool 'Sysctl support' CONFIG_SYSCTL
845+int 'Maximum User Real-Time Priority' CONFIG_MAX_USER_RT_PRIO 100
846+int 'Maximum Kernel Real-time Priority' CONFIG_MAX_RT_PRIO 0
847 if [ "$CONFIG_PROC_FS" = "y" ]; then
848 choice 'Kernel core (/proc/kcore) format' \
849 "ELF CONFIG_KCORE_ELF \
850diff -urN linux-2.4.20/arch/i386/kernel/entry.S linux-2.4.20-o1-preempt/arch/i386/kernel/entry.S
851--- linux-2.4.20/arch/i386/kernel/entry.S Fri Nov 29 00:53:09 2002
852+++ linux-2.4.20-o1-preempt/arch/i386/kernel/entry.S Tue Feb 18 03:52:06 2003
853@@ -73,16 +73,36 @@
854 * these are offsets into the task-struct.
855 */
856 state = 0
857-flags = 4
858+preempt_count = 4
859 sigpending = 8
860 addr_limit = 12
861 exec_domain = 16
862 need_resched = 20
863 tsk_ptrace = 24
864-processor = 52
865+cpu = 32
866+
867+/* These are offsets into the irq_stat structure
868+ * There is one per cpu and it is aligned to 32
869+ * byte boundry (we put that here as a shift count)
870+ */
871+irq_array_shift = CONFIG_X86_L1_CACHE_SHIFT
872+
873+irq_stat_local_irq_count = 4
874+irq_stat_local_bh_count = 8
875
876 ENOSYS = 38
877
878+#ifdef CONFIG_SMP
879+#define GET_CPU_INDX movl cpu(%ebx),%eax; \
880+ shll $irq_array_shift,%eax
881+#define GET_CURRENT_CPU_INDX GET_CURRENT(%ebx); \
882+ GET_CPU_INDX
883+#define CPU_INDX (,%eax)
884+#else
885+#define GET_CPU_INDX
886+#define GET_CURRENT_CPU_INDX GET_CURRENT(%ebx)
887+#define CPU_INDX
888+#endif
889
890 #define SAVE_ALL \
891 cld; \
892@@ -184,9 +204,11 @@
893
894
895 ENTRY(ret_from_fork)
896+#if CONFIG_SMP
897 pushl %ebx
898 call SYMBOL_NAME(schedule_tail)
899 addl $4, %esp
900+#endif
901 GET_CURRENT(%ebx)
902 testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS
903 jne tracesys_exit
904@@ -255,12 +277,30 @@
905 ALIGN
906 ENTRY(ret_from_intr)
907 GET_CURRENT(%ebx)
908+#ifdef CONFIG_PREEMPT
909+ cli
910+ decl preempt_count(%ebx)
911+#endif
912 ret_from_exception:
913 movl EFLAGS(%esp),%eax # mix EFLAGS and CS
914 movb CS(%esp),%al
915 testl $(VM_MASK | 3),%eax # return to VM86 mode or non-supervisor?
916 jne ret_from_sys_call
917+#ifdef CONFIG_PREEMPT
918+ cmpl $0,preempt_count(%ebx)
919+ jnz restore_all
920+ cmpl $0,need_resched(%ebx)
921+ jz restore_all
922+ movl SYMBOL_NAME(irq_stat)+irq_stat_local_bh_count CPU_INDX,%ecx
923+ addl SYMBOL_NAME(irq_stat)+irq_stat_local_irq_count CPU_INDX,%ecx
924+ jnz restore_all
925+ incl preempt_count(%ebx)
926+ sti
927+ call SYMBOL_NAME(preempt_schedule)
928+ jmp ret_from_intr
929+#else
930 jmp restore_all
931+#endif
932
933 ALIGN
934 reschedule:
935@@ -297,6 +337,9 @@
936 GET_CURRENT(%ebx)
937 call *%edi
938 addl $8,%esp
939+#ifdef CONFIG_PREEMPT
940+ cli
941+#endif
942 jmp ret_from_exception
943
944 ENTRY(coprocessor_error)
945@@ -316,12 +359,18 @@
946 movl %cr0,%eax
947 testl $0x4,%eax # EM (math emulation bit)
948 jne device_not_available_emulate
949+#ifdef CONFIG_PREEMPT
950+ cli
951+#endif
952 call SYMBOL_NAME(math_state_restore)
953 jmp ret_from_exception
954 device_not_available_emulate:
955 pushl $0 # temporary storage for ORIG_EIP
956 call SYMBOL_NAME(math_emulate)
957 addl $4,%esp
958+#ifdef CONFIG_PREEMPT
959+ cli
960+#endif
961 jmp ret_from_exception
962
963 ENTRY(debug)
964@@ -645,8 +694,8 @@
965 .long SYMBOL_NAME(sys_tkill)
966 .long SYMBOL_NAME(sys_ni_syscall) /* reserved for sendfile64 */
967 .long SYMBOL_NAME(sys_ni_syscall) /* 240 reserved for futex */
968- .long SYMBOL_NAME(sys_ni_syscall) /* reserved for sched_setaffinity */
969- .long SYMBOL_NAME(sys_ni_syscall) /* reserved for sched_getaffinity */
970+ .long SYMBOL_NAME(sys_sched_setaffinity)
971+ .long SYMBOL_NAME(sys_sched_getaffinity)
972 .long SYMBOL_NAME(sys_ni_syscall) /* sys_set_thread_area */
973 .long SYMBOL_NAME(sys_ni_syscall) /* sys_get_thread_area */
974 .long SYMBOL_NAME(sys_ni_syscall) /* 245 sys_io_setup */
975diff -urN linux-2.4.20/arch/i386/kernel/i387.c linux-2.4.20-o1-preempt/arch/i386/kernel/i387.c
976--- linux-2.4.20/arch/i386/kernel/i387.c Sat Aug 3 02:39:42 2002
977+++ linux-2.4.20-o1-preempt/arch/i386/kernel/i387.c Tue Feb 18 03:52:07 2003
978@@ -10,6 +10,7 @@
979
980 #include <linux/config.h>
981 #include <linux/sched.h>
982+#include <linux/spinlock.h>
983 #include <linux/init.h>
984 #include <asm/processor.h>
985 #include <asm/i387.h>
986@@ -89,6 +90,8 @@
987 {
988 struct task_struct *tsk = current;
989
990+ preempt_disable();
991+
992 if (tsk->flags & PF_USEDFPU) {
993 __save_init_fpu(tsk);
994 return;
995diff -urN linux-2.4.20/arch/i386/kernel/process.c linux-2.4.20-o1-preempt/arch/i386/kernel/process.c
996--- linux-2.4.20/arch/i386/kernel/process.c Sat Aug 3 02:39:42 2002
997+++ linux-2.4.20-o1-preempt/arch/i386/kernel/process.c Tue Feb 18 03:51:29 2003
998@@ -82,7 +82,7 @@
999 {
1000 if (current_cpu_data.hlt_works_ok && !hlt_counter) {
1001 __cli();
1002- if (!current->need_resched)
1003+ if (!need_resched())
1004 safe_halt();
1005 else
1006 __sti();
1007@@ -124,15 +124,12 @@
1008 void cpu_idle (void)
1009 {
1010 /* endless idle loop with no priority at all */
1011- init_idle();
1012- current->nice = 20;
1013- current->counter = -100;
1014
1015 while (1) {
1016 void (*idle)(void) = pm_idle;
1017 if (!idle)
1018 idle = default_idle;
1019- while (!current->need_resched)
1020+ if (!current->need_resched)
1021 idle();
1022 schedule();
1023 check_pgt_cache();
1024@@ -697,15 +694,17 @@
1025 asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs));
1026
1027 /*
1028- * Restore %fs and %gs.
1029+ * Restore %fs and %gs if needed.
1030 */
1031- loadsegment(fs, next->fs);
1032- loadsegment(gs, next->gs);
1033+ if (unlikely(prev->fs | prev->gs | next->fs | next->gs)) {
1034+ loadsegment(fs, next->fs);
1035+ loadsegment(gs, next->gs);
1036+ }
1037
1038 /*
1039 * Now maybe reload the debug registers
1040 */
1041- if (next->debugreg[7]){
1042+ if (unlikely(next->debugreg[7])) {
1043 loaddebug(next, 0);
1044 loaddebug(next, 1);
1045 loaddebug(next, 2);
1046@@ -715,7 +714,7 @@
1047 loaddebug(next, 7);
1048 }
1049
1050- if (prev->ioperm || next->ioperm) {
1051+ if (unlikely(prev->ioperm || next->ioperm)) {
1052 if (next->ioperm) {
1053 /*
1054 * 4 cachelines copy ... not good, but not that
1055diff -urN linux-2.4.20/arch/i386/kernel/setup.c linux-2.4.20-o1-preempt/arch/i386/kernel/setup.c
1056--- linux-2.4.20/arch/i386/kernel/setup.c Fri Nov 29 00:53:09 2002
1057+++ linux-2.4.20-o1-preempt/arch/i386/kernel/setup.c Tue Feb 18 03:51:29 2003
1058@@ -3046,9 +3046,10 @@
1059 load_TR(nr);
1060 load_LDT(&init_mm);
1061
1062- /*
1063- * Clear all 6 debug registers:
1064- */
1065+ /* Clear %fs and %gs. */
1066+ asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
1067+
1068+ /* Clear all 6 debug registers: */
1069
1070 #define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) );
1071
1072diff -urN linux-2.4.20/arch/i386/kernel/smp.c linux-2.4.20-o1-preempt/arch/i386/kernel/smp.c
1073--- linux-2.4.20/arch/i386/kernel/smp.c Fri Nov 29 00:53:09 2002
1074+++ linux-2.4.20-o1-preempt/arch/i386/kernel/smp.c Tue Feb 18 03:52:06 2003
1075@@ -357,10 +357,13 @@
1076
1077 asmlinkage void smp_invalidate_interrupt (void)
1078 {
1079- unsigned long cpu = smp_processor_id();
1080+ unsigned long cpu;
1081+
1082+ preempt_disable();
1083
1084+ cpu = smp_processor_id();
1085 if (!test_bit(cpu, &flush_cpumask))
1086- return;
1087+ goto out;
1088 /*
1089 * This was a BUG() but until someone can quote me the
1090 * line from the intel manual that guarantees an IPI to
1091@@ -381,6 +384,8 @@
1092 }
1093 ack_APIC_irq();
1094 clear_bit(cpu, &flush_cpumask);
1095+out:
1096+ preempt_enable();
1097 }
1098
1099 static void flush_tlb_others (unsigned long cpumask, struct mm_struct *mm,
1100@@ -430,17 +435,22 @@
1101 void flush_tlb_current_task(void)
1102 {
1103 struct mm_struct *mm = current->mm;
1104- unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id());
1105+ unsigned long cpu_mask;
1106
1107+ preempt_disable();
1108+ cpu_mask = mm->cpu_vm_mask & ~(1UL << smp_processor_id());
1109 local_flush_tlb();
1110 if (cpu_mask)
1111 flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
1112+ preempt_enable();
1113 }
1114
1115 void flush_tlb_mm (struct mm_struct * mm)
1116 {
1117- unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id());
1118+ unsigned long cpu_mask;
1119
1120+ preempt_disable();
1121+ cpu_mask = mm->cpu_vm_mask & ~(1UL << smp_processor_id());
1122 if (current->active_mm == mm) {
1123 if (current->mm)
1124 local_flush_tlb();
1125@@ -449,13 +459,16 @@
1126 }
1127 if (cpu_mask)
1128 flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
1129+ preempt_enable();
1130 }
1131
1132 void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
1133 {
1134 struct mm_struct *mm = vma->vm_mm;
1135- unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id());
1136+ unsigned long cpu_mask;
1137
1138+ preempt_disable();
1139+ cpu_mask = mm->cpu_vm_mask & ~(1UL << smp_processor_id());
1140 if (current->active_mm == mm) {
1141 if(current->mm)
1142 __flush_tlb_one(va);
1143@@ -465,6 +478,7 @@
1144
1145 if (cpu_mask)
1146 flush_tlb_others(cpu_mask, mm, va);
1147+ preempt_enable();
1148 }
1149
1150 static inline void do_flush_tlb_all_local(void)
1151@@ -493,10 +507,20 @@
1152 * it goes straight through and wastes no time serializing
1153 * anything. Worst case is that we lose a reschedule ...
1154 */
1155-
1156 void smp_send_reschedule(int cpu)
1157 {
1158 send_IPI_mask(1 << cpu, RESCHEDULE_VECTOR);
1159+}
1160+
1161+/*
1162+ * this function sends a reschedule IPI to all (other) CPUs.
1163+ * This should only be used if some 'global' task became runnable,
1164+ * such as a RT task, that must be handled now. The first CPU
1165+ * that manages to grab the task will run it.
1166+ */
1167+void smp_send_reschedule_all(void)
1168+{
1169+ send_IPI_allbutself(RESCHEDULE_VECTOR);
1170 }
1171
1172 /*
1173diff -urN linux-2.4.20/arch/i386/kernel/smpboot.c linux-2.4.20-o1-preempt/arch/i386/kernel/smpboot.c
1174--- linux-2.4.20/arch/i386/kernel/smpboot.c Fri Nov 29 00:53:09 2002
1175+++ linux-2.4.20-o1-preempt/arch/i386/kernel/smpboot.c Tue Feb 18 03:51:29 2003
1176@@ -308,14 +308,14 @@
1177 if (tsc_values[i] < avg)
1178 realdelta = -realdelta;
1179
1180- printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n",
1181- i, realdelta);
1182+ printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n", i, realdelta);
1183 }
1184
1185 sum += delta;
1186 }
1187 if (!buggy)
1188 printk("passed.\n");
1189+ ;
1190 }
1191
1192 static void __init synchronize_tsc_ap (void)
1193@@ -365,7 +365,7 @@
1194 * (This works even if the APIC is not enabled.)
1195 */
1196 phys_id = GET_APIC_ID(apic_read(APIC_ID));
1197- cpuid = current->processor;
1198+ cpuid = cpu();
1199 if (test_and_set_bit(cpuid, &cpu_online_map)) {
1200 printk("huh, phys CPU#%d, CPU#%d already present??\n",
1201 phys_id, cpuid);
1202@@ -435,6 +435,7 @@
1203 */
1204 smp_store_cpu_info(cpuid);
1205
1206+ disable_APIC_timer();
1207 /*
1208 * Allow the master to continue.
1209 */
1210@@ -465,6 +466,7 @@
1211 smp_callin();
1212 while (!atomic_read(&smp_commenced))
1213 rep_nop();
1214+ enable_APIC_timer();
1215 /*
1216 * low-memory mappings have been cleared, flush them from
1217 * the local TLBs too.
1218@@ -803,16 +805,13 @@
1219 if (!idle)
1220 panic("No idle process for CPU %d", cpu);
1221
1222- idle->processor = cpu;
1223- idle->cpus_runnable = 1 << cpu; /* we schedule the first task manually */
1224+ init_idle(idle, cpu);
1225
1226 map_cpu_to_boot_apicid(cpu, apicid);
1227
1228 idle->thread.eip = (unsigned long) start_secondary;
1229
1230- del_from_runqueue(idle);
1231 unhash_process(idle);
1232- init_tasks[cpu] = idle;
1233
1234 /* start_eip had better be page-aligned! */
1235 start_eip = setup_trampoline();
1236@@ -925,6 +924,7 @@
1237 }
1238
1239 cycles_t cacheflush_time;
1240+unsigned long cache_decay_ticks;
1241
1242 static void smp_tune_scheduling (void)
1243 {
1244@@ -958,9 +958,13 @@
1245 cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
1246 }
1247
1248+ cache_decay_ticks = (long)cacheflush_time/cpu_khz * HZ / 1000;
1249+
1250 printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
1251 (long)cacheflush_time/(cpu_khz/1000),
1252 ((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
1253+ printk("task migration cache decay timeout: %ld msecs.\n",
1254+ (cache_decay_ticks + 1) * 1000 / HZ);
1255 }
1256
1257 /*
1258@@ -1023,8 +1027,7 @@
1259 map_cpu_to_boot_apicid(0, boot_cpu_apicid);
1260
1261 global_irq_holder = 0;
1262- current->processor = 0;
1263- init_idle();
1264+ current->cpu = 0;
1265 smp_tune_scheduling();
1266
1267 /*
1268diff -urN linux-2.4.20/arch/i386/kernel/traps.c linux-2.4.20-o1-preempt/arch/i386/kernel/traps.c
1269--- linux-2.4.20/arch/i386/kernel/traps.c Fri Nov 29 00:53:09 2002
1270+++ linux-2.4.20-o1-preempt/arch/i386/kernel/traps.c Tue Feb 18 03:52:07 2003
1271@@ -751,6 +751,8 @@
1272 *
1273 * Careful.. There are problems with IBM-designed IRQ13 behaviour.
1274 * Don't touch unless you *really* know how it works.
1275+ *
1276+ * Must be called with kernel preemption disabled.
1277 */
1278 asmlinkage void math_state_restore(struct pt_regs regs)
1279 {
1280diff -urN linux-2.4.20/arch/i386/lib/dec_and_lock.c linux-2.4.20-o1-preempt/arch/i386/lib/dec_and_lock.c
1281--- linux-2.4.20/arch/i386/lib/dec_and_lock.c Sat Jul 8 03:20:16 2000
1282+++ linux-2.4.20-o1-preempt/arch/i386/lib/dec_and_lock.c Tue Feb 18 03:52:07 2003
1283@@ -8,6 +8,7 @@
1284 */
1285
1286 #include <linux/spinlock.h>
1287+#include <linux/sched.h>
1288 #include <asm/atomic.h>
1289
1290 int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
1291diff -urN linux-2.4.20/arch/ia64/config.in linux-2.4.20-o1-preempt/arch/ia64/config.in
1292--- linux-2.4.20/arch/ia64/config.in Fri Nov 29 00:53:09 2002
1293+++ linux-2.4.20-o1-preempt/arch/ia64/config.in Tue Feb 18 03:51:29 2003
1294@@ -102,6 +102,8 @@
1295 bool 'System V IPC' CONFIG_SYSVIPC
1296 bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT
1297 bool 'Sysctl support' CONFIG_SYSCTL
1298+int 'Maximum User Real-Time Priority' CONFIG_MAX_USER_RT_PRIO 100
1299+int 'Maximum Kernel Real-time Priority' CONFIG_MAX_RT_PRIO 0
1300 tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF
1301 tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC
1302
1303diff -urN linux-2.4.20/arch/m68k/config.in linux-2.4.20-o1-preempt/arch/m68k/config.in
1304--- linux-2.4.20/arch/m68k/config.in Fri Nov 29 00:53:09 2002
1305+++ linux-2.4.20-o1-preempt/arch/m68k/config.in Tue Feb 18 03:51:29 2003
1306@@ -92,6 +92,8 @@
1307 bool 'System V IPC' CONFIG_SYSVIPC
1308 bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT
1309 bool 'Sysctl support' CONFIG_SYSCTL
1310+int 'Maximum User Real-Time Priority' CONFIG_MAX_USER_RT_PRIO 100
1311+int 'Maximum Kernel Real-time Priority' CONFIG_MAX_RT_PRIO 0
1312 if [ "$CONFIG_PROC_FS" = "y" ]; then
1313 choice 'Kernel core (/proc/kcore) format' \
1314 "ELF CONFIG_KCORE_ELF \
1315diff -urN linux-2.4.20/arch/mips/config-shared.in linux-2.4.20-o1-preempt/arch/mips/config-shared.in
1316--- linux-2.4.20/arch/mips/config-shared.in Fri Nov 29 00:53:09 2002
1317+++ linux-2.4.20-o1-preempt/arch/mips/config-shared.in Tue Feb 18 03:52:06 2003
1318@@ -615,9 +615,12 @@
1319 define_bool CONFIG_HOTPLUG_PCI n
1320 fi
1321
1322+dep_bool 'Preemptible Kernel' CONFIG_PREEMPT $CONFIG_NEW_IRQ
1323 bool 'System V IPC' CONFIG_SYSVIPC
1324 bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT
1325 bool 'Sysctl support' CONFIG_SYSCTL
1326+int 'Maximum User Real-Time Priority' CONFIG_MAX_USER_RT_PRIO 100
1327+int 'Maximum Kernel Real-time Priority' CONFIG_MAX_RT_PRIO 0
1328 define_bool CONFIG_KCORE_ELF y
1329 define_bool CONFIG_KCORE_AOUT n
1330 define_bool CONFIG_BINFMT_AOUT n
1331diff -urN linux-2.4.20/arch/mips/kernel/i8259.c linux-2.4.20-o1-preempt/arch/mips/kernel/i8259.c
1332--- linux-2.4.20/arch/mips/kernel/i8259.c Fri Nov 29 00:53:10 2002
1333+++ linux-2.4.20-o1-preempt/arch/mips/kernel/i8259.c Tue Feb 18 03:52:07 2003
1334@@ -8,6 +8,7 @@
1335 * Copyright (C) 1992 Linus Torvalds
1336 * Copyright (C) 1994 - 2000 Ralf Baechle
1337 */
1338+#include <linux/sched.h>
1339 #include <linux/delay.h>
1340 #include <linux/init.h>
1341 #include <linux/ioport.h>
1342diff -urN linux-2.4.20/arch/mips/kernel/irq.c linux-2.4.20-o1-preempt/arch/mips/kernel/irq.c
1343--- linux-2.4.20/arch/mips/kernel/irq.c Fri Nov 29 00:53:10 2002
1344+++ linux-2.4.20-o1-preempt/arch/mips/kernel/irq.c Tue Feb 18 03:52:07 2003
1345@@ -8,6 +8,8 @@
1346 * Copyright (C) 1992 Linus Torvalds
1347 * Copyright (C) 1994 - 2000 Ralf Baechle
1348 */
1349+
1350+#include <linux/sched.h>
1351 #include <linux/config.h>
1352 #include <linux/kernel.h>
1353 #include <linux/delay.h>
1354@@ -19,11 +21,13 @@
1355 #include <linux/slab.h>
1356 #include <linux/mm.h>
1357 #include <linux/random.h>
1358-#include <linux/sched.h>
1359+#include <linux/spinlock.h>
1360+#include <linux/ptrace.h>
1361
1362 #include <asm/atomic.h>
1363 #include <asm/system.h>
1364 #include <asm/uaccess.h>
1365+#include <asm/debug.h>
1366
1367 /*
1368 * Controller mappings for all interrupt sources:
1369@@ -429,6 +433,8 @@
1370 struct irqaction * action;
1371 unsigned int status;
1372
1373+ preempt_disable();
1374+
1375 kstat.irqs[cpu][irq]++;
1376 spin_lock(&desc->lock);
1377 desc->handler->ack(irq);
1378@@ -490,6 +496,27 @@
1379
1380 if (softirq_pending(cpu))
1381 do_softirq();
1382+
1383+#if defined(CONFIG_PREEMPT)
1384+ while (--current->preempt_count == 0) {
1385+ db_assert(intr_off());
1386+ db_assert(!in_interrupt());
1387+
1388+ if (current->need_resched == 0) {
1389+ break;
1390+ }
1391+
1392+ current->preempt_count ++;
1393+ sti();
1394+ if (user_mode(regs)) {
1395+ schedule();
1396+ } else {
1397+ preempt_schedule();
1398+ }
1399+ cli();
1400+ }
1401+#endif
1402+
1403 return 1;
1404 }
1405
1406diff -urN linux-2.4.20/arch/mips/mm/extable.c linux-2.4.20-o1-preempt/arch/mips/mm/extable.c
1407--- linux-2.4.20/arch/mips/mm/extable.c Fri Nov 29 00:53:10 2002
1408+++ linux-2.4.20-o1-preempt/arch/mips/mm/extable.c Tue Feb 18 03:52:07 2003
1409@@ -3,6 +3,7 @@
1410 */
1411 #include <linux/config.h>
1412 #include <linux/module.h>
1413+#include <linux/sched.h>
1414 #include <linux/spinlock.h>
1415 #include <asm/uaccess.h>
1416
1417diff -urN linux-2.4.20/arch/mips64/kernel/process.c linux-2.4.20-o1-preempt/arch/mips64/kernel/process.c
1418--- linux-2.4.20/arch/mips64/kernel/process.c Fri Nov 29 00:53:10 2002
1419+++ linux-2.4.20-o1-preempt/arch/mips64/kernel/process.c Tue Feb 18 03:51:29 2003
1420@@ -35,8 +35,7 @@
1421 {
1422 /* endless idle loop with no priority at all */
1423 init_idle();
1424- current->nice = 20;
1425- current->counter = -100;
1426+
1427 while (1) {
1428 while (!current->need_resched)
1429 if (cpu_wait)
1430diff -urN linux-2.4.20/arch/parisc/config.in linux-2.4.20-o1-preempt/arch/parisc/config.in
1431--- linux-2.4.20/arch/parisc/config.in Fri Nov 29 00:53:10 2002
1432+++ linux-2.4.20-o1-preempt/arch/parisc/config.in Tue Feb 18 03:51:29 2003
1433@@ -83,6 +83,8 @@
1434 bool 'System V IPC' CONFIG_SYSVIPC
1435 bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT
1436 bool 'Sysctl support' CONFIG_SYSCTL
1437+int 'Maximum User Real-Time Priority' CONFIG_MAX_USER_RT_PRIO 100
1438+int 'Maximum Kernel Real-time Priority' CONFIG_MAX_RT_PRIO 0
1439 define_bool CONFIG_KCORE_ELF y
1440 tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF
1441 tristate 'Kernel support for SOM binaries' CONFIG_BINFMT_SOM
1442diff -urN linux-2.4.20/arch/parisc/kernel/process.c linux-2.4.20-o1-preempt/arch/parisc/kernel/process.c
1443--- linux-2.4.20/arch/parisc/kernel/process.c Fri Nov 29 00:53:10 2002
1444+++ linux-2.4.20-o1-preempt/arch/parisc/kernel/process.c Tue Feb 18 03:51:29 2003
1445@@ -64,8 +64,6 @@
1446 {
1447 /* endless idle loop with no priority at all */
1448 init_idle();
1449- current->nice = 20;
1450- current->counter = -100;
1451
1452 while (1) {
1453 while (!current->need_resched) {
1454diff -urN linux-2.4.20/arch/ppc/8260_io/uart.c linux-2.4.20-o1-preempt/arch/ppc/8260_io/uart.c
1455--- linux-2.4.20/arch/ppc/8260_io/uart.c Sat Aug 3 02:39:43 2002
1456+++ linux-2.4.20-o1-preempt/arch/ppc/8260_io/uart.c Tue Feb 18 03:51:29 2003
1457@@ -1732,7 +1732,6 @@
1458 printk("lsr = %d (jiff=%lu)...", lsr, jiffies);
1459 #endif
1460 current->state = TASK_INTERRUPTIBLE;
1461-/* current->counter = 0; make us low-priority */
1462 schedule_timeout(char_time);
1463 if (signal_pending(current))
1464 break;
1465diff -urN linux-2.4.20/arch/ppc/8xx_io/uart.c linux-2.4.20-o1-preempt/arch/ppc/8xx_io/uart.c
1466--- linux-2.4.20/arch/ppc/8xx_io/uart.c Sat Aug 3 02:39:43 2002
1467+++ linux-2.4.20-o1-preempt/arch/ppc/8xx_io/uart.c Tue Feb 18 03:51:29 2003
1468@@ -1796,7 +1796,6 @@
1469 printk("lsr = %d (jiff=%lu)...", lsr, jiffies);
1470 #endif
1471 current->state = TASK_INTERRUPTIBLE;
1472-/* current->counter = 0; make us low-priority */
1473 schedule_timeout(char_time);
1474 if (signal_pending(current))
1475 break;
1476diff -urN linux-2.4.20/arch/ppc/config.in linux-2.4.20-o1-preempt/arch/ppc/config.in
1477--- linux-2.4.20/arch/ppc/config.in Fri Nov 29 00:53:11 2002
1478+++ linux-2.4.20-o1-preempt/arch/ppc/config.in Tue Feb 18 03:52:06 2003
1479@@ -112,6 +112,8 @@
1480 bool ' Distribute interrupts on all CPUs by default' CONFIG_IRQ_ALL_CPUS
1481 fi
1482
1483+bool 'Preemptible kernel support' CONFIG_PREEMPT
1484+
1485 if [ "$CONFIG_6xx" = "y" -a "$CONFIG_8260" = "n" ];then
1486 bool 'AltiVec Support' CONFIG_ALTIVEC
1487 bool 'Thermal Management Support' CONFIG_TAU
1488@@ -163,6 +165,8 @@
1489 bool 'Sysctl support' CONFIG_SYSCTL
1490 bool 'System V IPC' CONFIG_SYSVIPC
1491 bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT
1492+int 'Maximum User Real-Time Priority' CONFIG_MAX_USER_RT_PRIO 100
1493+int 'Maximum Kernel Real-time Priority' CONFIG_MAX_RT_PRIO 0
1494
1495 # only elf supported, a.out is not -- Cort
1496 if [ "$CONFIG_PROC_FS" = "y" ]; then
1497diff -urN linux-2.4.20/arch/ppc/kernel/entry.S linux-2.4.20-o1-preempt/arch/ppc/kernel/entry.S
1498--- linux-2.4.20/arch/ppc/kernel/entry.S Fri Nov 29 00:53:11 2002
1499+++ linux-2.4.20-o1-preempt/arch/ppc/kernel/entry.S Tue Feb 18 03:52:07 2003
54385023
JR
1500@@ -264,7 +264,9 @@
1501
1502 .globl ret_from_fork
1503 ret_from_fork:
1504+#if CONFIG_SMP
1505 bl schedule_tail
1506+#endif
1507 lwz r0,TASK_PTRACE(r2)
1508 andi. r0,r0,PT_TRACESYS
1509 bnel- syscall_trace
54aa170e
JR
1510@@ -278,6 +278,41 @@
1511 */
1512 cmpi 0,r3,0
1513 beq restore
1514+#ifdef CONFIG_PREEMPT
1515+ lwz r3,PREEMPT_COUNT(r2)
1516+ cmpi 0,r3,1
1517+ bge ret_from_except
1518+ lwz r5,_MSR(r1)
1519+ andi. r5,r5,MSR_PR
1520+ bne do_signal_ret
1521+ lwz r5,NEED_RESCHED(r2)
1522+ cmpi 0,r5,0
1523+ beq ret_from_except
1524+ lis r3,irq_stat@h
1525+ ori r3,r3,irq_stat@l
1526+ lwz r5,4(r3)
1527+ lwz r3,8(r3)
1528+ add r3,r3,r5
1529+ cmpi 0,r3,0
1530+ bne ret_from_except
1531+ lwz r3,PREEMPT_COUNT(r2)
1532+ addi r3,r3,1
1533+ stw r3,PREEMPT_COUNT(r2)
1534+ mfmsr r0
1535+ ori r0,r0,MSR_EE
1536+ mtmsr r0
1537+ sync
1538+ bl preempt_schedule
1539+ mfmsr r0
1540+ rlwinm r0,r0,0,17,15
1541+ mtmsr r0
1542+ sync
1543+ lwz r3,PREEMPT_COUNT(r2)
1544+ subi r3,r3,1
1545+ stw r3,PREEMPT_COUNT(r2)
1546+ li r3,1
1547+ b ret_from_intercept
1548+#endif /* CONFIG_PREEMPT */
1549 .globl ret_from_except
1550 ret_from_except:
1551 lwz r3,_MSR(r1) /* Returning to user mode? */
1552diff -urN linux-2.4.20/arch/ppc/kernel/idle.c linux-2.4.20-o1-preempt/arch/ppc/kernel/idle.c
1553--- linux-2.4.20/arch/ppc/kernel/idle.c Fri Nov 29 00:53:11 2002
1554+++ linux-2.4.20-o1-preempt/arch/ppc/kernel/idle.c Tue Feb 18 03:51:29 2003
1555@@ -51,9 +51,7 @@
1556 do_power_save = 1;
1557
1558 /* endless loop with no priority at all */
1559- current->nice = 20;
1560- current->counter = -100;
1561- init_idle();
1562+
1563 for (;;) {
1564 #ifdef CONFIG_SMP
1565 if (!do_power_save) {
1566diff -urN linux-2.4.20/arch/ppc/kernel/irq.c linux-2.4.20-o1-preempt/arch/ppc/kernel/irq.c
1567--- linux-2.4.20/arch/ppc/kernel/irq.c Fri Nov 29 00:53:11 2002
1568+++ linux-2.4.20-o1-preempt/arch/ppc/kernel/irq.c Tue Feb 18 03:52:07 2003
1569@@ -556,6 +556,34 @@
1570 return 1; /* lets ret_from_int know we can do checks */
1571 }
1572
1573+#ifdef CONFIG_PREEMPT
1574+int
1575+preempt_intercept(struct pt_regs *regs)
1576+{
1577+ int ret;
1578+
1579+ preempt_disable();
1580+
1581+ switch(regs->trap) {
1582+ case 0x500:
1583+ ret = do_IRQ(regs);
1584+ break;
1585+#ifndef CONFIG_4xx
1586+ case 0x900:
1587+#else
1588+ case 0x1000:
1589+#endif
1590+ ret = timer_interrupt(regs);
1591+ break;
1592+ default:
1593+ BUG();
1594+ }
1595+
1596+ preempt_enable();
1597+ return ret;
1598+}
1599+#endif /* CONFIG_PREEMPT */
1600+
1601 unsigned long probe_irq_on (void)
1602 {
1603 return 0;
1604diff -urN linux-2.4.20/arch/ppc/kernel/misc.S linux-2.4.20-o1-preempt/arch/ppc/kernel/misc.S
1605--- linux-2.4.20/arch/ppc/kernel/misc.S Fri Nov 29 00:53:11 2002
1606+++ linux-2.4.20-o1-preempt/arch/ppc/kernel/misc.S Tue Feb 18 03:51:29 2003
1607@@ -1174,8 +1174,8 @@
1608 .long sys_lremovexattr
1609 .long sys_fremovexattr /* 220 */
1610 .long sys_ni_syscall /* reserved for sys_futex */
1611- .long sys_ni_syscall /* reserved for sys_sched_setaffinity */
1612- .long sys_ni_syscall /* reserved for sys_sched_getaffinity */
1613+ .long sys_sched_setaffinity
1614+ .long sys_sched_getaffinity
1615 .long sys_ni_syscall /* reserved for sys_security */
1616 .long sys_ni_syscall /* 225 reserved for Tux */
1617 .long sys_ni_syscall /* reserved for sys_sendfile64 */
1618diff -urN linux-2.4.20/arch/ppc/kernel/mk_defs.c linux-2.4.20-o1-preempt/arch/ppc/kernel/mk_defs.c
1619--- linux-2.4.20/arch/ppc/kernel/mk_defs.c Tue Aug 28 15:58:33 2001
1620+++ linux-2.4.20-o1-preempt/arch/ppc/kernel/mk_defs.c Tue Feb 18 03:52:06 2003
1621@@ -37,11 +37,14 @@
1622 /*DEFINE(KERNELBASE, KERNELBASE);*/
1623 DEFINE(STATE, offsetof(struct task_struct, state));
1624 DEFINE(NEXT_TASK, offsetof(struct task_struct, next_task));
1625- DEFINE(COUNTER, offsetof(struct task_struct, counter));
1626- DEFINE(PROCESSOR, offsetof(struct task_struct, processor));
1627+ DEFINE(COUNTER, offsetof(struct task_struct, time_slice));
1628+ DEFINE(PROCESSOR, offsetof(struct task_struct, cpu));
1629 DEFINE(SIGPENDING, offsetof(struct task_struct, sigpending));
1630 DEFINE(THREAD, offsetof(struct task_struct, thread));
1631 DEFINE(MM, offsetof(struct task_struct, mm));
1632+#ifdef CONFIG_PREEMPT
1633+ DEFINE(PREEMPT_COUNT, offsetof(struct task_struct, preempt_count));
1634+#endif
1635 DEFINE(ACTIVE_MM, offsetof(struct task_struct, active_mm));
1636 DEFINE(TASK_STRUCT_SIZE, sizeof(struct task_struct));
1637 DEFINE(KSP, offsetof(struct thread_struct, ksp));
1638diff -urN linux-2.4.20/arch/ppc/kernel/ppc_ksyms.c linux-2.4.20-o1-preempt/arch/ppc/kernel/ppc_ksyms.c
1639--- linux-2.4.20/arch/ppc/kernel/ppc_ksyms.c Fri Nov 29 00:53:11 2002
1640+++ linux-2.4.20-o1-preempt/arch/ppc/kernel/ppc_ksyms.c Tue Feb 18 03:51:29 2003
1641@@ -366,3 +366,4 @@
1642 EXPORT_SYMBOL_NOVERS(agp_special_page);
1643 #endif /* defined(CONFIG_ALL_PPC) */
1644
1645+EXPORT_SYMBOL(ioremap_bot);
1646diff -urN linux-2.4.20/arch/ppc/kernel/process.c linux-2.4.20-o1-preempt/arch/ppc/kernel/process.c
1647--- linux-2.4.20/arch/ppc/kernel/process.c Mon Nov 26 14:29:17 2001
1648+++ linux-2.4.20-o1-preempt/arch/ppc/kernel/process.c Tue Feb 18 03:51:29 2003
1649@@ -270,7 +270,7 @@
1650 #endif
1651
1652 #ifdef CONFIG_SMP
1653- printk(" CPU: %d", current->processor);
1654+ printk(" CPU: %d", current->cpu);
1655 #endif /* CONFIG_SMP */
1656
1657 printk("\n");
1658diff -urN linux-2.4.20/arch/ppc/kernel/setup.c linux-2.4.20-o1-preempt/arch/ppc/kernel/setup.c
1659--- linux-2.4.20/arch/ppc/kernel/setup.c Fri Nov 29 00:53:11 2002
1660+++ linux-2.4.20-o1-preempt/arch/ppc/kernel/setup.c Tue Feb 18 03:52:07 2003
1661@@ -498,6 +498,20 @@
1662 strcpy(cmd_line, CONFIG_CMDLINE);
1663 #endif /* CONFIG_CMDLINE */
1664
1665+#ifdef CONFIG_PREEMPT
1666+ /* Override the irq routines for external & timer interrupts here,
1667+ * as the MMU has only been minimally setup at this point and
1668+ * there are no protections on page zero.
1669+ */
1670+ {
1671+ extern int preempt_intercept(struct pt_regs *);
1672+
1673+ do_IRQ_intercept = (unsigned long) &preempt_intercept;
1674+ timer_interrupt_intercept = (unsigned long) &preempt_intercept;
1675+
1676+ }
1677+#endif /* CONFIG_PREEMPT */
1678+
1679 platform_init(r3, r4, r5, r6, r7);
1680
1681 if (ppc_md.progress)
1682diff -urN linux-2.4.20/arch/ppc/kernel/smp.c linux-2.4.20-o1-preempt/arch/ppc/kernel/smp.c
1683--- linux-2.4.20/arch/ppc/kernel/smp.c Sat Aug 3 02:39:43 2002
1684+++ linux-2.4.20-o1-preempt/arch/ppc/kernel/smp.c Tue Feb 18 03:51:29 2003
1685@@ -54,6 +54,7 @@
1686 unsigned long cpu_online_map;
1687 int smp_hw_index[NR_CPUS];
1688 static struct smp_ops_t *smp_ops;
1689+unsigned long cache_decay_ticks;
1690
1691 /* all cpu mappings are 1-1 -- Cort */
1692 volatile unsigned long cpu_callin_map[NR_CPUS];
1693@@ -292,9 +293,7 @@
1694 * cpu 0, the master -- Cort
1695 */
1696 cpu_callin_map[0] = 1;
1697- current->processor = 0;
1698-
1699- init_idle();
1700+ current->cpu = 0;
1701
1702 for (i = 0; i < NR_CPUS; i++) {
1703 prof_counter[i] = 1;
1704@@ -306,6 +305,7 @@
1705 * timebase increments every 4 bus cycles, 32kB L1 data cache.
1706 */
1707 cacheflush_time = 5 * 1024;
1708+ cache_decay_ticks = cacheflush_time/5 * HZ / 1000;
1709
1710 smp_ops = ppc_md.smp_ops;
1711 if (smp_ops == NULL) {
1712@@ -348,12 +348,9 @@
1713 p = init_task.prev_task;
1714 if (!p)
1715 panic("No idle task for CPU %d", i);
1716- del_from_runqueue(p);
1717+ init_idle(p, i);
1718 unhash_process(p);
1719- init_tasks[i] = p;
1720
1721- p->processor = i;
1722- p->cpus_runnable = 1 << i; /* we schedule the first task manually */
1723 current_set[i] = p;
1724
1725 /*
1726@@ -502,7 +499,7 @@
1727
1728 void __init smp_callin(void)
1729 {
1730- int cpu = current->processor;
1731+ int cpu = current->cpu;
1732
1733 smp_store_cpu_info(cpu);
1734 set_dec(tb_ticks_per_jiffy);
1735diff -urN linux-2.4.20/arch/ppc/lib/dec_and_lock.c linux-2.4.20-o1-preempt/arch/ppc/lib/dec_and_lock.c
1736--- linux-2.4.20/arch/ppc/lib/dec_and_lock.c Fri Nov 16 19:10:08 2001
1737+++ linux-2.4.20-o1-preempt/arch/ppc/lib/dec_and_lock.c Tue Feb 18 03:52:07 2003
1738@@ -1,4 +1,5 @@
1739 #include <linux/module.h>
1740+#include <linux/sched.h>
1741 #include <linux/spinlock.h>
1742 #include <asm/atomic.h>
1743 #include <asm/system.h>
1744diff -urN linux-2.4.20/arch/ppc/mm/init.c linux-2.4.20-o1-preempt/arch/ppc/mm/init.c
1745--- linux-2.4.20/arch/ppc/mm/init.c Sat Aug 3 02:39:43 2002
1746+++ linux-2.4.20-o1-preempt/arch/ppc/mm/init.c Tue Feb 18 03:51:29 2003
1747@@ -168,9 +168,9 @@
1748 {
1749 int iscur = 0;
1750 #ifdef CONFIG_SMP
1751- printk("%3d ", p->processor);
1752- if ( (p->processor != NO_PROC_ID) &&
1753- (p == current_set[p->processor]) )
1754+ printk("%3d ", p->cpu);
1755+ if ( (p->cpu != NO_PROC_ID) &&
1756+ (p == current_set[p->cpu]) )
1757 {
1758 iscur = 1;
1759 printk("current");
54385023
JR
1760--- linux-2.4.20/arch/ppc64/kernel/entry.S.orig Fri Nov 29 00:53:11 2002
1761+++ linux-2.4.20/arch/ppc64/kernel/entry.S Fri Mar 7 23:47:29 2003
1762@@ -292,7 +292,9 @@
1763 blr
1764
1765 _GLOBAL(ret_from_fork)
1766+#if CONFIG_SMP
1767 bl .schedule_tail
1768+#endif
1769 ld r0,TASK_PTRACE(r13)
1770 andi. r0,r0,PT_TRACESYS
1771 beq+ .ret_from_except
54aa170e
JR
1772diff -urN linux-2.4.20/arch/ppc64/kernel/idle.c linux-2.4.20-o1-preempt/arch/ppc64/kernel/idle.c
1773--- linux-2.4.20/arch/ppc64/kernel/idle.c Sat Aug 3 02:39:43 2002
1774+++ linux-2.4.20-o1-preempt/arch/ppc64/kernel/idle.c Tue Feb 18 03:51:29 2003
1775@@ -76,9 +76,6 @@
1776 unsigned long CTRL;
1777 #endif
1778
1779- /* endless loop with no priority at all */
1780- current->nice = 20;
1781- current->counter = -100;
1782 #ifdef CONFIG_PPC_ISERIES
1783 /* ensure iSeries run light will be out when idle */
1784 current->thread.flags &= ~PPC_FLAG_RUN_LIGHT;
1785@@ -86,7 +83,7 @@
1786 CTRL &= ~RUNLATCH;
1787 mtspr(CTRLT, CTRL);
1788 #endif
1789- init_idle();
1790+ /* endless loop with no priority at all */
1791
1792 lpaca = get_paca();
1793
1794diff -urN linux-2.4.20/arch/ppc64/kernel/process.c linux-2.4.20-o1-preempt/arch/ppc64/kernel/process.c
1795--- linux-2.4.20/arch/ppc64/kernel/process.c Fri Nov 29 00:53:11 2002
1796+++ linux-2.4.20-o1-preempt/arch/ppc64/kernel/process.c Tue Feb 18 03:51:29 2003
1797@@ -105,7 +105,7 @@
1798 #ifdef SHOW_TASK_SWITCHES
1799 printk("%s/%d -> %s/%d NIP %08lx cpu %d root %x/%x\n",
1800 prev->comm,prev->pid,
1801- new->comm,new->pid,new->thread.regs->nip,new->processor,
1802+ new->comm,new->pid,new->thread.regs->nip,new->cpu,
1803 new->fs->root,prev->fs->root);
1804 #endif
1805 #ifdef CONFIG_SMP
1806diff -urN linux-2.4.20/arch/ppc64/kernel/smp.c linux-2.4.20-o1-preempt/arch/ppc64/kernel/smp.c
1807--- linux-2.4.20/arch/ppc64/kernel/smp.c Fri Nov 29 00:53:11 2002
1808+++ linux-2.4.20-o1-preempt/arch/ppc64/kernel/smp.c Tue Feb 18 03:51:29 2003
1809@@ -69,6 +69,7 @@
1810 extern atomic_t ipi_sent;
1811 spinlock_t kernel_flag __cacheline_aligned = SPIN_LOCK_UNLOCKED;
1812 cycles_t cacheflush_time;
1813+unsigned long cache_decay_ticks;
1814 static int max_cpus __initdata = NR_CPUS;
1815
1816 unsigned long cpu_online_map;
1817@@ -611,9 +612,7 @@
1818 * cpu 0, the master -- Cort
1819 */
1820 cpu_callin_map[0] = 1;
1821- current->processor = 0;
1822-
1823- init_idle();
1824+ current->cpu = 0;
1825
1826 for (i = 0; i < NR_CPUS; i++) {
1827 paca[i].prof_counter = 1;
1828@@ -637,6 +636,7 @@
1829 * timebase increments every 4 bus cycles, 32kB L1 data cache.
1830 */
1831 cacheflush_time = 5 * 1024;
1832+ cache_decay_ticks = cacheflush_time/5 * HZ / 1000;
1833
1834 /* Probe arch for CPUs */
1835 cpu_nr = ppc_md.smp_probe();
1836@@ -684,12 +684,9 @@
1837
1838 PPCDBG(PPCDBG_SMP,"\tProcessor %d, task = 0x%lx\n", i, p);
1839
1840- del_from_runqueue(p);
1841+ init_idle(p, i);
1842 unhash_process(p);
1843- init_tasks[i] = p;
1844
1845- p->processor = i;
1846- p->cpus_runnable = 1 << i; /* we schedule the first task manually */
1847 current_set[i].task = p;
1848 sp = ((unsigned long)p) + sizeof(union task_union)
1849 - STACK_FRAME_OVERHEAD;
1850@@ -740,15 +737,13 @@
1851
1852 void __init smp_callin(void)
1853 {
1854- int cpu = current->processor;
1855+ int cpu = current->cpu;
1856
1857 smp_store_cpu_info(cpu);
1858 set_dec(paca[cpu].default_decr);
1859 cpu_callin_map[cpu] = 1;
1860
1861 ppc_md.smp_setup_cpu(cpu);
1862-
1863- init_idle();
1864
1865 set_bit(smp_processor_id(), &cpu_online_map);
1866
1867diff -urN linux-2.4.20/arch/s390/config.in linux-2.4.20-o1-preempt/arch/s390/config.in
1868--- linux-2.4.20/arch/s390/config.in Fri Nov 29 00:53:11 2002
1869+++ linux-2.4.20-o1-preempt/arch/s390/config.in Tue Feb 18 03:51:29 2003
1870@@ -49,6 +49,8 @@
1871 bool 'System V IPC' CONFIG_SYSVIPC
1872 bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT
1873 bool 'Sysctl support' CONFIG_SYSCTL
1874+int 'Maximum User Real-Time Priority' CONFIG_MAX_USER_RT_PRIO 100
1875+int 'Maximum Kernel Real-time Priority' CONFIG_MAX_RT_PRIO 0
1876 define_bool CONFIG_KCORE_ELF y
1877 tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF
1878 tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC
1879diff -urN linux-2.4.20/arch/s390/kernel/process.c linux-2.4.20-o1-preempt/arch/s390/kernel/process.c
1880--- linux-2.4.20/arch/s390/kernel/process.c Sat Aug 3 02:39:43 2002
1881+++ linux-2.4.20-o1-preempt/arch/s390/kernel/process.c Tue Feb 18 03:51:29 2003
1882@@ -57,8 +57,7 @@
1883
1884 /* endless idle loop with no priority at all */
1885 init_idle();
1886- current->nice = 20;
1887- current->counter = -100;
1888+
1889 while (1) {
1890 if (current->need_resched) {
1891 schedule();
1892diff -urN linux-2.4.20/arch/s390x/config.in linux-2.4.20-o1-preempt/arch/s390x/config.in
1893--- linux-2.4.20/arch/s390x/config.in Fri Nov 29 00:53:11 2002
1894+++ linux-2.4.20-o1-preempt/arch/s390x/config.in Tue Feb 18 03:51:29 2003
1895@@ -52,6 +52,8 @@
1896 bool 'System V IPC' CONFIG_SYSVIPC
1897 bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT
1898 bool 'Sysctl support' CONFIG_SYSCTL
1899+int 'Maximum User Real-Time Priority' CONFIG_MAX_USER_RT_PRIO 100
1900+int 'Maximum Kernel Real-time Priority' CONFIG_MAX_RT_PRIO 0
1901 define_bool CONFIG_KCORE_ELF y
1902 tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF
1903 tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC
1904diff -urN linux-2.4.20/arch/s390x/kernel/process.c linux-2.4.20-o1-preempt/arch/s390x/kernel/process.c
1905--- linux-2.4.20/arch/s390x/kernel/process.c Fri Nov 29 00:53:11 2002
1906+++ linux-2.4.20-o1-preempt/arch/s390x/kernel/process.c Tue Feb 18 03:51:29 2003
1907@@ -57,8 +57,7 @@
1908
1909 /* endless idle loop with no priority at all */
1910 init_idle();
1911- current->nice = 20;
1912- current->counter = -100;
1913+
1914 while (1) {
1915 if (current->need_resched) {
1916 schedule();
1917diff -urN linux-2.4.20/arch/sh/config.in linux-2.4.20-o1-preempt/arch/sh/config.in
1918--- linux-2.4.20/arch/sh/config.in Fri Nov 29 00:53:11 2002
1919+++ linux-2.4.20-o1-preempt/arch/sh/config.in Tue Feb 18 03:52:06 2003
1920@@ -124,6 +124,7 @@
1921 hex 'Physical memory start address' CONFIG_MEMORY_START 08000000
1922 hex 'Physical memory size' CONFIG_MEMORY_SIZE 00400000
1923 fi
1924+bool 'Preemptible Kernel' CONFIG_PREEMPT
1925 endmenu
1926
1927 if [ "$CONFIG_SH_HP690" = "y" ]; then
1928@@ -205,6 +206,8 @@
1929 bool 'System V IPC' CONFIG_SYSVIPC
1930 bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT
1931 bool 'Sysctl support' CONFIG_SYSCTL
1932+int 'Maximum User Real-Time Priority' CONFIG_MAX_USER_RT_PRIO 100
1933+int 'Maximum Kernel Real-time Priority' CONFIG_MAX_RT_PRIO 0
1934 if [ "$CONFIG_PROC_FS" = "y" ]; then
1935 choice 'Kernel core (/proc/kcore) format' \
1936 "ELF CONFIG_KCORE_ELF \
1937diff -urN linux-2.4.20/arch/sh/kernel/entry.S linux-2.4.20-o1-preempt/arch/sh/kernel/entry.S
1938--- linux-2.4.20/arch/sh/kernel/entry.S Sat Aug 3 02:39:43 2002
1939+++ linux-2.4.20-o1-preempt/arch/sh/kernel/entry.S Tue Feb 18 03:52:07 2003
1940@@ -60,10 +60,18 @@
1941 /*
1942 * These are offsets into the task-struct.
1943 */
1944-flags = 4
1945+preempt_count = 4
1946 sigpending = 8
1947 need_resched = 20
1948 tsk_ptrace = 24
1949+flags = 84
1950+
1951+/*
1952+ * These offsets are into irq_stat.
1953+ * (Find irq_cpustat_t in asm-sh/hardirq.h)
1954+ */
1955+local_irq_count = 8
1956+local_bh_count = 12
1957
1958 PT_TRACESYS = 0x00000002
1959 PF_USEDFPU = 0x00100000
1960@@ -143,7 +151,7 @@
1961 mov.l __INV_IMASK, r11; \
1962 stc sr, r10; \
1963 and r11, r10; \
1964- stc k_g_imask, r11; \
1965+ stc k_g_imask, r11; \
1966 or r11, r10; \
1967 ldc r10, sr
1968
1969@@ -304,8 +312,8 @@
1970 mov.l @(tsk_ptrace,r0), r0 ! Is current PTRACE_SYSCALL'd?
1971 mov #PT_TRACESYS, r1
1972 tst r1, r0
1973- bt ret_from_syscall
1974- bra syscall_ret_trace
1975+ bf syscall_ret_trace
1976+ bra ret_from_syscall
1977 nop
1978
1979 .align 2
1980@@ -505,8 +513,6 @@
1981 .long syscall_ret_trace
1982 __syscall_ret:
1983 .long syscall_ret
1984-__INV_IMASK:
1985- .long 0xffffff0f ! ~(IMASK)
1986
1987
1988 .align 2
1989@@ -518,7 +524,84 @@
1990 .align 2
1991 1: .long SYMBOL_NAME(schedule)
1992
1993+#ifdef CONFIG_PREEMPT
1994+ !
1995+ ! Returning from interrupt during kernel mode: check if
1996+ ! preempt_schedule should be called. If need_resched flag
1997+ ! is set, preempt_count is zero, and we're not currently
1998+ ! in an interrupt handler (local irq or bottom half) then
1999+ ! call preempt_schedule.
2000+ !
2001+ ! Increment preempt_count to prevent a nested interrupt
2002+ ! from reentering preempt_schedule, then decrement after
2003+ ! and drop through to regular interrupt return which will
2004+ ! jump back and check again in case such an interrupt did
2005+ ! come in (and didn't preempt due to preempt_count).
2006+ !
2007+ ! NOTE: because we just checked that preempt_count was
2008+ ! zero before getting to the call, can't we use immediate
2009+ ! values (1 and 0) rather than inc/dec? Also, rather than
2010+ ! drop through to ret_from_irq, we already know this thread
2011+ ! is kernel mode, can't we go direct to ret_from_kirq? In
2012+ ! fact, with proper interrupt nesting and so forth could
2013+ ! the loop simply be on the need_resched w/o checking the
2014+ ! other stuff again? Optimize later...
2015+ !
2016+ .align 2
2017+ret_from_kirq:
2018+ ! Nonzero preempt_count prevents scheduling
2019+ stc k_current, r1
2020+ mov.l @(preempt_count,r1), r0
2021+ cmp/eq #0, r0
2022+ bf restore_all
2023+ ! Zero need_resched prevents scheduling
2024+ mov.l @(need_resched,r1), r0
2025+ cmp/eq #0, r0
2026+ bt restore_all
2027+ ! If in_interrupt(), don't schedule
2028+ mov.l __irq_stat, r1
2029+ mov.l @(local_irq_count,r1), r0
2030+ mov.l @(local_bh_count,r1), r1
2031+ or r1, r0
2032+ cmp/eq #0, r0
2033+ bf restore_all
2034+ ! Allow scheduling using preempt_schedule
2035+ ! Adjust preempt_count and SR as needed.
2036+ stc k_current, r1
2037+ mov.l @(preempt_count,r1), r0 ! Could replace this ...
2038+ add #1, r0 ! ... and this w/mov #1?
2039+ mov.l r0, @(preempt_count,r1)
2040+ STI()
2041+ mov.l __preempt_schedule, r0
2042+ jsr @r0
2043+ nop
2044+ /* CLI */
2045+ stc sr, r0
2046+ or #0xf0, r0
2047+ ldc r0, sr
2048+ !
2049+ stc k_current, r1
2050+ mov.l @(preempt_count,r1), r0 ! Could replace this ...
2051+ add #-1, r0 ! ... and this w/mov #0?
2052+ mov.l r0, @(preempt_count,r1)
2053+ ! Maybe should bra ret_from_kirq, or loop over need_resched?
2054+ ! For now, fall through to ret_from_irq again...
2055+#endif /* CONFIG_PREEMPT */
2056+
2057 ret_from_irq:
2058+ mov #OFF_SR, r0
2059+ mov.l @(r0,r15), r0 ! get status register
2060+ shll r0
2061+ shll r0 ! kernel space?
2062+#ifndef CONFIG_PREEMPT
2063+ bt restore_all ! Yes, it's from kernel, go back soon
2064+#else /* CONFIG_PREEMPT */
2065+ bt ret_from_kirq ! From kernel: maybe preempt_schedule
2066+#endif /* CONFIG_PREEMPT */
2067+ !
2068+ bra ret_from_syscall
2069+ nop
2070+
2071 ret_from_exception:
2072 mov #OFF_SR, r0
2073 mov.l @(r0,r15), r0 ! get status register
2074@@ -564,6 +647,13 @@
2075 .long SYMBOL_NAME(do_signal)
2076 __irq_stat:
2077 .long SYMBOL_NAME(irq_stat)
2078+#ifdef CONFIG_PREEMPT
2079+__preempt_schedule:
2080+ .long SYMBOL_NAME(preempt_schedule)
2081+#endif /* CONFIG_PREEMPT */
2082+__INV_IMASK:
2083+ .long 0xffffff0f ! ~(IMASK)
2084+
2085
2086 .align 2
2087 restore_all:
2088@@ -679,7 +769,7 @@
2089 __fpu_prepare_fd:
2090 .long SYMBOL_NAME(fpu_prepare_fd)
2091 __init_task_flags:
2092- .long SYMBOL_NAME(init_task_union)+4
2093+ .long SYMBOL_NAME(init_task_union)+flags
2094 __PF_USEDFPU:
2095 .long PF_USEDFPU
2096 #endif
2097diff -urN linux-2.4.20/arch/sh/kernel/irq.c linux-2.4.20-o1-preempt/arch/sh/kernel/irq.c
2098--- linux-2.4.20/arch/sh/kernel/irq.c Sat Sep 8 21:29:09 2001
2099+++ linux-2.4.20-o1-preempt/arch/sh/kernel/irq.c Tue Feb 18 03:52:07 2003
2100@@ -229,6 +229,14 @@
2101 struct irqaction * action;
2102 unsigned int status;
2103
2104+ /*
2105+ * At this point we're now about to actually call handlers,
2106+ * and interrupts might get reenabled during them... bump
2107+ * preempt_count to prevent any preemption while the handler
2108+ * called here is pending...
2109+ */
2110+ preempt_disable();
2111+
2112 /* Get IRQ number */
2113 asm volatile("stc r2_bank, %0\n\t"
2114 "shlr2 %0\n\t"
2115@@ -298,8 +306,17 @@
2116 desc->handler->end(irq);
2117 spin_unlock(&desc->lock);
2118
2119+
2120 if (softirq_pending(cpu))
2121 do_softirq();
2122+
2123+ /*
2124+ * We're done with the handlers, interrupts should be
2125+ * currently disabled; decrement preempt_count now so
2126+ * as we return preemption may be allowed...
2127+ */
2128+ preempt_enable_no_resched();
2129+
2130 return 1;
2131 }
2132
2133diff -urN linux-2.4.20/arch/sh/kernel/process.c linux-2.4.20-o1-preempt/arch/sh/kernel/process.c
2134--- linux-2.4.20/arch/sh/kernel/process.c Mon Oct 15 22:36:48 2001
2135+++ linux-2.4.20-o1-preempt/arch/sh/kernel/process.c Tue Feb 18 03:51:29 2003
2136@@ -40,8 +40,6 @@
2137 {
2138 /* endless idle loop with no priority at all */
2139 init_idle();
2140- current->nice = 20;
2141- current->counter = -100;
2142
2143 while (1) {
2144 if (hlt_counter) {
2145diff -urN linux-2.4.20/arch/sparc/config.in linux-2.4.20-o1-preempt/arch/sparc/config.in
2146--- linux-2.4.20/arch/sparc/config.in Fri Nov 29 00:53:12 2002
2147+++ linux-2.4.20-o1-preempt/arch/sparc/config.in Tue Feb 18 03:51:29 2003
2148@@ -65,6 +65,8 @@
2149 bool 'System V IPC' CONFIG_SYSVIPC
2150 bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT
2151 bool 'Sysctl support' CONFIG_SYSCTL
2152+int 'Maximum User Real-Time Priority' CONFIG_MAX_USER_RT_PRIO 100
2153+int 'Maximum Kernel Real-time Priority' CONFIG_MAX_RT_PRIO 0
2154 if [ "$CONFIG_PROC_FS" = "y" ]; then
2155 define_bool CONFIG_KCORE_ELF y
2156 fi
2157diff -urN linux-2.4.20/arch/sparc/kernel/process.c linux-2.4.20-o1-preempt/arch/sparc/kernel/process.c
2158--- linux-2.4.20/arch/sparc/kernel/process.c Sat Aug 3 02:39:43 2002
2159+++ linux-2.4.20-o1-preempt/arch/sparc/kernel/process.c Tue Feb 18 03:51:29 2003
2160@@ -74,9 +74,6 @@
2161 goto out;
2162
2163 /* endless idle loop with no priority at all */
2164- current->nice = 20;
2165- current->counter = -100;
2166- init_idle();
2167
2168 for (;;) {
2169 if (ARCH_SUN4C_SUN4) {
2170@@ -128,9 +125,6 @@
2171 int cpu_idle(void)
2172 {
2173 /* endless idle loop with no priority at all */
2174- current->nice = 20;
2175- current->counter = -100;
2176- init_idle();
2177
2178 while(1) {
2179 if(current->need_resched) {
2180diff -urN linux-2.4.20/arch/sparc/kernel/smp.c linux-2.4.20-o1-preempt/arch/sparc/kernel/smp.c
2181--- linux-2.4.20/arch/sparc/kernel/smp.c Fri Dec 21 18:41:53 2001
2182+++ linux-2.4.20-o1-preempt/arch/sparc/kernel/smp.c Tue Feb 18 03:51:29 2003
2183@@ -57,6 +57,7 @@
2184 volatile int __cpu_number_map[NR_CPUS];
2185 volatile int __cpu_logical_map[NR_CPUS];
2186 cycles_t cacheflush_time = 0; /* XXX */
2187+unsigned long cache_decay_ticks = 0; /* XXX */
2188
2189 /* The only guaranteed locking primitive available on all Sparc
2190 * processors is 'ldstub [%reg + immediate], %dest_reg' which atomically
2191diff -urN linux-2.4.20/arch/sparc/kernel/sun4d_smp.c linux-2.4.20-o1-preempt/arch/sparc/kernel/sun4d_smp.c
2192--- linux-2.4.20/arch/sparc/kernel/sun4d_smp.c Sat Aug 3 02:39:43 2002
2193+++ linux-2.4.20-o1-preempt/arch/sparc/kernel/sun4d_smp.c Tue Feb 18 03:51:29 2003
2194@@ -107,7 +107,6 @@
2195 * the SMP initialization the master will be just allowed
2196 * to call the scheduler code.
2197 */
2198- init_idle();
2199
2200 /* Get our local ticker going. */
2201 smp_setup_percpu_timer();
2202@@ -127,7 +126,7 @@
2203 while((unsigned long)current_set[cpuid] < PAGE_OFFSET)
2204 barrier();
2205
2206- while(current_set[cpuid]->processor != cpuid)
2207+ while(current_set[cpuid]->cpu != cpuid)
2208 barrier();
2209
2210 /* Fix idle thread fields. */
2211@@ -197,10 +196,8 @@
2212 mid_xlate[i] = i;
2213 __cpu_number_map[boot_cpu_id] = 0;
2214 __cpu_logical_map[0] = boot_cpu_id;
2215- current->processor = boot_cpu_id;
2216 smp_store_cpu_info(boot_cpu_id);
2217 smp_setup_percpu_timer();
2218- init_idle();
2219 local_flush_cache_all();
2220 if(linux_num_cpus == 1)
2221 return; /* Not an MP box. */
2222@@ -222,15 +219,11 @@
2223 cpucount++;
2224
2225 p = init_task.prev_task;
2226- init_tasks[i] = p;
2227
2228- p->processor = i;
2229- p->cpus_runnable = 1 << i; /* we schedule the first task manually */
2230+ init_idle(p, i);
2231+ unhash_process(p);
2232
2233 current_set[i] = p;
2234-
2235- del_from_runqueue(p);
2236- unhash_process(p);
2237
2238 for (no = 0; no < linux_num_cpus; no++)
2239 if (linux_cpus[no].mid == i)
2240diff -urN linux-2.4.20/arch/sparc/kernel/sun4m_smp.c linux-2.4.20-o1-preempt/arch/sparc/kernel/sun4m_smp.c
2241--- linux-2.4.20/arch/sparc/kernel/sun4m_smp.c Wed Nov 21 19:31:09 2001
2242+++ linux-2.4.20-o1-preempt/arch/sparc/kernel/sun4m_smp.c Tue Feb 18 03:51:29 2003
2243@@ -104,7 +104,6 @@
2244 * the SMP initialization the master will be just allowed
2245 * to call the scheduler code.
2246 */
2247- init_idle();
2248
2249 /* Allow master to continue. */
2250 swap((unsigned long *)&cpu_callin_map[cpuid], 1);
2251@@ -170,12 +169,10 @@
2252 mid_xlate[boot_cpu_id] = (linux_cpus[boot_cpu_id].mid & ~8);
2253 __cpu_number_map[boot_cpu_id] = 0;
2254 __cpu_logical_map[0] = boot_cpu_id;
2255- current->processor = boot_cpu_id;
2256
2257 smp_store_cpu_info(boot_cpu_id);
2258 set_irq_udt(mid_xlate[boot_cpu_id]);
2259 smp_setup_percpu_timer();
2260- init_idle();
2261 local_flush_cache_all();
2262 if(linux_num_cpus == 1)
2263 return; /* Not an MP box. */
2264@@ -195,15 +192,11 @@
2265 cpucount++;
2266
2267 p = init_task.prev_task;
2268- init_tasks[i] = p;
2269
2270- p->processor = i;
2271- p->cpus_runnable = 1 << i; /* we schedule the first task manually */
2272+ init_idle(p, i);
2273+ unhash_process(p);
2274
2275 current_set[i] = p;
2276-
2277- del_from_runqueue(p);
2278- unhash_process(p);
2279
2280 /* See trampoline.S for details... */
2281 entry += ((i-1) * 3);
2282diff -urN linux-2.4.20/arch/sparc64/config.in linux-2.4.20-o1-preempt/arch/sparc64/config.in
2283--- linux-2.4.20/arch/sparc64/config.in Fri Nov 29 00:53:12 2002
2284+++ linux-2.4.20-o1-preempt/arch/sparc64/config.in Tue Feb 18 03:51:29 2003
2285@@ -64,6 +64,8 @@
2286 bool 'System V IPC' CONFIG_SYSVIPC
2287 bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT
2288 bool 'Sysctl support' CONFIG_SYSCTL
2289+int 'Maximum User Real-Time Priority' CONFIG_MAX_USER_RT_PRIO 100
2290+int 'Maximum Kernel Real-time Priority' CONFIG_MAX_RT_PRIO 0
2291 if [ "$CONFIG_PROC_FS" = "y" ]; then
2292 define_bool CONFIG_KCORE_ELF y
2293 fi
2294diff -urN linux-2.4.20/arch/sparc64/kernel/irq.c linux-2.4.20-o1-preempt/arch/sparc64/kernel/irq.c
2295--- linux-2.4.20/arch/sparc64/kernel/irq.c Fri Nov 29 00:53:12 2002
2296+++ linux-2.4.20-o1-preempt/arch/sparc64/kernel/irq.c Tue Feb 18 03:51:29 2003
2297@@ -162,7 +162,7 @@
2298 tid = ((tid & UPA_CONFIG_MID) << 9);
2299 tid &= IMAP_TID_UPA;
2300 } else {
2301- tid = (starfire_translate(imap, current->processor) << 26);
2302+ tid = (starfire_translate(imap, current->cpu) << 26);
2303 tid &= IMAP_TID_UPA;
2304 }
2305
2306diff -urN linux-2.4.20/arch/sparc64/kernel/process.c linux-2.4.20-o1-preempt/arch/sparc64/kernel/process.c
2307--- linux-2.4.20/arch/sparc64/kernel/process.c Fri Nov 29 00:53:12 2002
2308+++ linux-2.4.20-o1-preempt/arch/sparc64/kernel/process.c Tue Feb 18 03:51:29 2003
2309@@ -53,9 +53,6 @@
2310 return -EPERM;
2311
2312 /* endless idle loop with no priority at all */
2313- current->nice = 20;
2314- current->counter = -100;
2315- init_idle();
2316
2317 for (;;) {
2318 /* If current->need_resched is zero we should really
2319@@ -79,14 +76,10 @@
2320 /*
2321 * the idle loop on a UltraMultiPenguin...
2322 */
2323-#define idle_me_harder() (cpu_data[current->processor].idle_volume += 1)
2324-#define unidle_me() (cpu_data[current->processor].idle_volume = 0)
2325+#define idle_me_harder() (cpu_data[current->cpu].idle_volume += 1)
2326+#define unidle_me() (cpu_data[current->cpu].idle_volume = 0)
2327 int cpu_idle(void)
2328 {
2329- current->nice = 20;
2330- current->counter = -100;
2331- init_idle();
2332-
2333 while(1) {
2334 if (current->need_resched != 0) {
2335 unidle_me();
2336diff -urN linux-2.4.20/arch/sparc64/kernel/smp.c linux-2.4.20-o1-preempt/arch/sparc64/kernel/smp.c
2337--- linux-2.4.20/arch/sparc64/kernel/smp.c Fri Nov 29 00:53:12 2002
2338+++ linux-2.4.20-o1-preempt/arch/sparc64/kernel/smp.c Tue Feb 18 03:51:29 2003
2339@@ -49,6 +49,8 @@
2340 static unsigned char boot_cpu_id;
2341 static int smp_activated;
2342
2343+unsigned long cache_decay_ticks = 0; /* XXX */
2344+
2345 /* Kernel spinlock */
2346 spinlock_t kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
2347
2348@@ -259,7 +261,6 @@
2349 printk("Entering UltraSMPenguin Mode...\n");
2350 __sti();
2351 smp_store_cpu_info(boot_cpu_id);
2352- init_idle();
2353
2354 if (linux_num_cpus == 1)
2355 return;
2356@@ -282,12 +283,8 @@
2357 cpucount++;
2358
2359 p = init_task.prev_task;
2360- init_tasks[cpucount] = p;
2361-
2362- p->processor = i;
2363- p->cpus_runnable = 1UL << i; /* we schedule the first task manually */
2364
2365- del_from_runqueue(p);
2366+ init_idle(p, i);
2367 unhash_process(p);
2368
2369 callin_flag = 0;
2370@@ -1154,7 +1151,6 @@
2371 __cpu_number_map[boot_cpu_id] = 0;
2372 prom_cpu_nodes[boot_cpu_id] = linux_cpus[0].prom_node;
2373 __cpu_logical_map[0] = boot_cpu_id;
2374- current->processor = boot_cpu_id;
2375 prof_counter(boot_cpu_id) = prof_multiplier(boot_cpu_id) = 1;
2376 }
2377
2378diff -urN linux-2.4.20/drivers/block/loop.c linux-2.4.20-o1-preempt/drivers/block/loop.c
2379--- linux-2.4.20/drivers/block/loop.c Fri Nov 29 00:53:12 2002
2380+++ linux-2.4.20-o1-preempt/drivers/block/loop.c Tue Feb 18 03:51:29 2003
2381@@ -571,9 +571,6 @@
2382 flush_signals(current);
2383 spin_unlock_irq(&current->sigmask_lock);
2384
2385- current->policy = SCHED_OTHER;
2386- current->nice = -20;
2387-
2388 spin_lock_irq(&lo->lo_lock);
2389 lo->lo_state = Lo_bound;
2390 atomic_inc(&lo->lo_pending);
2391diff -urN linux-2.4.20/drivers/char/drm-4.0/tdfx_drv.c linux-2.4.20-o1-preempt/drivers/char/drm-4.0/tdfx_drv.c
2392--- linux-2.4.20/drivers/char/drm-4.0/tdfx_drv.c Fri Nov 29 00:53:12 2002
2393+++ linux-2.4.20-o1-preempt/drivers/char/drm-4.0/tdfx_drv.c Tue Feb 18 03:51:29 2003
2394@@ -554,7 +554,6 @@
2395 lock.context, current->pid, j,
2396 dev->lock.lock_time, jiffies);
2397 current->state = TASK_INTERRUPTIBLE;
2398- current->policy |= SCHED_YIELD;
2399 schedule_timeout(DRM_LOCK_SLICE-j);
2400 DRM_DEBUG("jiffies=%d\n", jiffies);
2401 }
2402diff -urN linux-2.4.20/drivers/char/mwave/mwavedd.c linux-2.4.20-o1-preempt/drivers/char/mwave/mwavedd.c
2403--- linux-2.4.20/drivers/char/mwave/mwavedd.c Mon Feb 25 20:37:57 2002
2404+++ linux-2.4.20-o1-preempt/drivers/char/mwave/mwavedd.c Tue Feb 18 03:51:29 2003
2405@@ -279,7 +279,6 @@
2406 pDrvData->IPCs[ipcnum].bIsHere = FALSE;
2407 pDrvData->IPCs[ipcnum].bIsEnabled = TRUE;
2408 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0)
2409- current->nice = -20; /* boost to provide priority timing */
2410 #else
2411 current->priority = 0x28; /* boost to provide priority timing */
2412 #endif
2413diff -urN linux-2.4.20/drivers/char/serial_txx927.c linux-2.4.20-o1-preempt/drivers/char/serial_txx927.c
2414--- linux-2.4.20/drivers/char/serial_txx927.c Sat Aug 3 02:39:43 2002
2415+++ linux-2.4.20-o1-preempt/drivers/char/serial_txx927.c Tue Feb 18 03:51:29 2003
2416@@ -1533,7 +1533,6 @@
2417 printk("cisr = %d (jiff=%lu)...", cisr, jiffies);
2418 #endif
2419 current->state = TASK_INTERRUPTIBLE;
2420- current->counter = 0; /* make us low-priority */
2421 schedule_timeout(char_time);
2422 if (signal_pending(current))
2423 break;
2424diff -urN linux-2.4.20/drivers/ieee1394/csr.c linux-2.4.20-o1-preempt/drivers/ieee1394/csr.c
2425--- linux-2.4.20/drivers/ieee1394/csr.c Fri Nov 29 00:53:13 2002
2426+++ linux-2.4.20-o1-preempt/drivers/ieee1394/csr.c Tue Feb 18 03:52:07 2003
2427@@ -10,6 +10,7 @@
2428 */
2429
2430 #include <linux/string.h>
2431+#include <linux/sched.h>
2432
2433 #include "ieee1394_types.h"
2434 #include "hosts.h"
2435diff -urN linux-2.4.20/drivers/md/md.c linux-2.4.20-o1-preempt/drivers/md/md.c
2436--- linux-2.4.20/drivers/md/md.c Fri Nov 29 00:53:13 2002
2437+++ linux-2.4.20-o1-preempt/drivers/md/md.c Tue Feb 18 03:51:29 2003
2438@@ -2936,8 +2936,6 @@
2439 * bdflush, otherwise bdflush will deadlock if there are too
2440 * many dirty RAID5 blocks.
2441 */
2442- current->policy = SCHED_OTHER;
2443- current->nice = -20;
2444 md_unlock_kernel();
2445
2446 complete(thread->event);
2447@@ -3391,11 +3389,6 @@
2448 "(but not more than %d KB/sec) for reconstruction.\n",
2449 sysctl_speed_limit_max);
2450
2451- /*
2452- * Resync has low priority.
2453- */
2454- current->nice = 19;
2455-
2456 is_mddev_idle(mddev); /* this also initializes IO event counters */
2457 for (m = 0; m < SYNC_MARKS; m++) {
2458 mark[m] = jiffies;
2459@@ -3473,16 +3466,13 @@
2460 currspeed = (j-mddev->resync_mark_cnt)/2/((jiffies-mddev->resync_mark)/HZ +1) +1;
2461
2462 if (currspeed > sysctl_speed_limit_min) {
2463- current->nice = 19;
2464-
2465 if ((currspeed > sysctl_speed_limit_max) ||
2466 !is_mddev_idle(mddev)) {
2467 current->state = TASK_INTERRUPTIBLE;
2468 md_schedule_timeout(HZ/4);
2469 goto repeat;
2470 }
2471- } else
2472- current->nice = -20;
2473+ }
2474 }
2475 printk(KERN_INFO "md: md%d: sync done.\n",mdidx(mddev));
2476 err = 0;
2477diff -urN linux-2.4.20/drivers/sound/sound_core.c linux-2.4.20-o1-preempt/drivers/sound/sound_core.c
2478--- linux-2.4.20/drivers/sound/sound_core.c Sun Sep 30 21:26:08 2001
2479+++ linux-2.4.20-o1-preempt/drivers/sound/sound_core.c Tue Feb 18 03:52:07 2003
2480@@ -37,6 +37,7 @@
2481 #include <linux/config.h>
2482 #include <linux/module.h>
2483 #include <linux/init.h>
2484+#include <linux/sched.h>
2485 #include <linux/slab.h>
2486 #include <linux/types.h>
2487 #include <linux/kernel.h>
2488diff -urN linux-2.4.20/fs/adfs/map.c linux-2.4.20-o1-preempt/fs/adfs/map.c
2489--- linux-2.4.20/fs/adfs/map.c Thu Oct 25 22:53:53 2001
2490+++ linux-2.4.20-o1-preempt/fs/adfs/map.c Tue Feb 18 03:52:07 2003
2491@@ -12,6 +12,7 @@
2492 #include <linux/fs.h>
2493 #include <linux/adfs_fs.h>
2494 #include <linux/spinlock.h>
2495+#include <linux/sched.h>
2496
2497 #include "adfs.h"
2498
2499diff -urN linux-2.4.20/fs/binfmt_elf.c linux-2.4.20-o1-preempt/fs/binfmt_elf.c
2500--- linux-2.4.20/fs/binfmt_elf.c Sat Aug 3 02:39:45 2002
2501+++ linux-2.4.20-o1-preempt/fs/binfmt_elf.c Tue Feb 18 03:51:29 2003
2502@@ -1143,7 +1143,7 @@
2503 psinfo.pr_state = i;
2504 psinfo.pr_sname = (i < 0 || i > 5) ? '.' : "RSDZTD"[i];
2505 psinfo.pr_zomb = psinfo.pr_sname == 'Z';
2506- psinfo.pr_nice = current->nice;
2507+ psinfo.pr_nice = task_nice(current);
2508 psinfo.pr_flag = current->flags;
2509 psinfo.pr_uid = NEW_TO_OLD_UID(current->uid);
2510 psinfo.pr_gid = NEW_TO_OLD_GID(current->gid);
2511diff -urN linux-2.4.20/fs/exec.c linux-2.4.20-o1-preempt/fs/exec.c
2512--- linux-2.4.20/fs/exec.c Fri Nov 29 00:53:15 2002
2513+++ linux-2.4.20-o1-preempt/fs/exec.c Tue Feb 18 03:52:07 2003
2514@@ -440,8 +440,8 @@
2515 active_mm = current->active_mm;
2516 current->mm = mm;
2517 current->active_mm = mm;
2518- task_unlock(current);
2519 activate_mm(active_mm, mm);
2520+ task_unlock(current);
2521 mm_release();
2522 if (old_mm) {
2523 if (active_mm != old_mm) BUG();
2524diff -urN linux-2.4.20/fs/fat/cache.c linux-2.4.20-o1-preempt/fs/fat/cache.c
2525--- linux-2.4.20/fs/fat/cache.c Fri Oct 12 22:48:42 2001
2526+++ linux-2.4.20-o1-preempt/fs/fat/cache.c Tue Feb 18 03:52:07 2003
2527@@ -14,6 +14,7 @@
2528 #include <linux/string.h>
2529 #include <linux/stat.h>
2530 #include <linux/fat_cvf.h>
2531+#include <linux/sched.h>
2532
2533 #if 0
2534 # define PRINTK(x) printk x
2535diff -urN linux-2.4.20/fs/jffs2/background.c linux-2.4.20-o1-preempt/fs/jffs2/background.c
2536--- linux-2.4.20/fs/jffs2/background.c Thu Oct 25 09:07:09 2001
2537+++ linux-2.4.20-o1-preempt/fs/jffs2/background.c Tue Feb 18 03:51:29 2003
2538@@ -106,9 +106,6 @@
2539
2540 sprintf(current->comm, "jffs2_gcd_mtd%d", c->mtd->index);
2541
2542- /* FIXME in the 2.2 backport */
2543- current->nice = 10;
2544-
2545 for (;;) {
2546 spin_lock_irq(&current->sigmask_lock);
2547 siginitsetinv (&current->blocked, sigmask(SIGHUP) | sigmask(SIGKILL) | sigmask(SIGSTOP) | sigmask(SIGCONT));
2548diff -urN linux-2.4.20/fs/nls/nls_base.c linux-2.4.20-o1-preempt/fs/nls/nls_base.c
2549--- linux-2.4.20/fs/nls/nls_base.c Sat Aug 3 02:39:45 2002
2550+++ linux-2.4.20-o1-preempt/fs/nls/nls_base.c Tue Feb 18 03:52:07 2003
2551@@ -18,6 +18,7 @@
2552 #ifdef CONFIG_KMOD
2553 #include <linux/kmod.h>
2554 #endif
2555+#include <linux/sched.h>
2556 #include <linux/spinlock.h>
2557
2558 static struct nls_table *tables;
2559diff -urN linux-2.4.20/fs/pipe.c linux-2.4.20-o1-preempt/fs/pipe.c
2560--- linux-2.4.20/fs/pipe.c Fri Nov 29 00:53:15 2002
2561+++ linux-2.4.20-o1-preempt/fs/pipe.c Tue Feb 18 03:51:29 2003
2562@@ -115,7 +115,7 @@
2563 * writers synchronously that there is more
2564 * room.
2565 */
2566- wake_up_interruptible_sync(PIPE_WAIT(*inode));
2567+ wake_up_interruptible(PIPE_WAIT(*inode));
2568 if (!PIPE_EMPTY(*inode))
2569 BUG();
2570 goto do_more_read;
2571diff -urN linux-2.4.20/fs/proc/array.c linux-2.4.20-o1-preempt/fs/proc/array.c
2572--- linux-2.4.20/fs/proc/array.c Sat Aug 3 02:39:45 2002
2573+++ linux-2.4.20-o1-preempt/fs/proc/array.c Tue Feb 18 03:51:29 2003
2574@@ -338,9 +338,8 @@
2575
2576 /* scale priority and nice values from timeslices to -20..20 */
2577 /* to make it look like a "normal" Unix priority/nice value */
2578- priority = task->counter;
2579- priority = 20 - (priority * 10 + DEF_COUNTER / 2) / DEF_COUNTER;
2580- nice = task->nice;
2581+ priority = task_prio(task);
2582+ nice = task_nice(task);
2583
2584 read_lock(&tasklist_lock);
2585 ppid = task->pid ? task->p_opptr->pid : 0;
2586@@ -390,7 +389,7 @@
2587 task->nswap,
2588 task->cnswap,
2589 task->exit_signal,
2590- task->processor);
2591+ task->cpu);
2592 if(mm)
2593 mmput(mm);
2594 return res;
2595diff -urN linux-2.4.20/fs/proc/proc_misc.c linux-2.4.20-o1-preempt/fs/proc/proc_misc.c
2596--- linux-2.4.20/fs/proc/proc_misc.c Fri Nov 29 00:53:15 2002
2597+++ linux-2.4.20-o1-preempt/fs/proc/proc_misc.c Tue Feb 18 03:51:29 2003
2598@@ -106,11 +106,11 @@
2599 a = avenrun[0] + (FIXED_1/200);
2600 b = avenrun[1] + (FIXED_1/200);
2601 c = avenrun[2] + (FIXED_1/200);
2602- len = sprintf(page,"%d.%02d %d.%02d %d.%02d %d/%d %d\n",
2603+ len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
2604 LOAD_INT(a), LOAD_FRAC(a),
2605 LOAD_INT(b), LOAD_FRAC(b),
2606 LOAD_INT(c), LOAD_FRAC(c),
2607- nr_running, nr_threads, last_pid);
2608+ nr_running(), nr_threads, last_pid);
2609 return proc_calc_metrics(page, start, off, count, eof, len);
2610 }
2611
2612@@ -122,7 +122,7 @@
2613 int len;
2614
2615 uptime = jiffies;
2616- idle = init_tasks[0]->times.tms_utime + init_tasks[0]->times.tms_stime;
2617+ idle = init_task.times.tms_utime + init_task.times.tms_stime;
2618
2619 /* The formula for the fraction parts really is ((t * 100) / HZ) % 100, but
2620 that would overflow about every five days at HZ == 100.
2621@@ -371,10 +371,10 @@
2622 }
2623
2624 proc_sprintf(page, &off, &len,
2625- "\nctxt %u\n"
2626+ "\nctxt %lu\n"
2627 "btime %lu\n"
2628 "processes %lu\n",
2629- kstat.context_swtch,
2630+ nr_context_switches(),
2631 xtime.tv_sec - jif / HZ,
2632 total_forks);
2633
2634diff -urN linux-2.4.20/fs/reiserfs/buffer2.c linux-2.4.20-o1-preempt/fs/reiserfs/buffer2.c
2635--- linux-2.4.20/fs/reiserfs/buffer2.c Fri Nov 29 00:53:15 2002
2636+++ linux-2.4.20-o1-preempt/fs/reiserfs/buffer2.c Tue Feb 18 03:51:29 2003
2637@@ -51,11 +51,11 @@
2638 struct buffer_head * reiserfs_bread (struct super_block *super, int n_block, int n_size)
2639 {
2640 struct buffer_head *result;
2641- PROC_EXP( unsigned int ctx_switches = kstat.context_swtch );
2642+ PROC_EXP( unsigned int ctx_switches = nr_context_switches(); );
2643
2644 result = bread (super -> s_dev, n_block, n_size);
2645 PROC_INFO_INC( super, breads );
2646- PROC_EXP( if( kstat.context_swtch != ctx_switches )
2647+ PROC_EXP( if( nr_context_switches() != ctx_switches )
2648 PROC_INFO_INC( super, bread_miss ) );
2649 return result;
2650 }
2651diff -urN linux-2.4.20/include/asm-alpha/bitops.h linux-2.4.20-o1-preempt/include/asm-alpha/bitops.h
2652--- linux-2.4.20/include/asm-alpha/bitops.h Sat Oct 13 00:35:54 2001
2653+++ linux-2.4.20-o1-preempt/include/asm-alpha/bitops.h Tue Feb 18 03:51:29 2003
2654@@ -3,6 +3,7 @@
2655
2656 #include <linux/config.h>
2657 #include <linux/kernel.h>
2658+#include <asm/compiler.h>
2659
2660 /*
2661 * Copyright 1994, Linus Torvalds.
2662@@ -60,25 +61,25 @@
2663
2664 __asm__ __volatile__(
2665 "1: ldl_l %0,%3\n"
2666- " and %0,%2,%0\n"
2667+ " bic %0,%2,%0\n"
2668 " stl_c %0,%1\n"
2669 " beq %0,2f\n"
2670 ".subsection 2\n"
2671 "2: br 1b\n"
2672 ".previous"
2673 :"=&r" (temp), "=m" (*m)
2674- :"Ir" (~(1UL << (nr & 31))), "m" (*m));
2675+ :"Ir" (1UL << (nr & 31)), "m" (*m));
2676 }
2677
2678 /*
2679 * WARNING: non atomic version.
2680 */
2681 static __inline__ void
2682-__change_bit(unsigned long nr, volatile void * addr)
2683+__clear_bit(unsigned long nr, volatile void * addr)
2684 {
2685 int *m = ((int *) addr) + (nr >> 5);
2686
2687- *m ^= 1 << (nr & 31);
2688+ *m &= ~(1 << (nr & 31));
2689 }
2690
2691 static inline void
2692@@ -99,6 +100,17 @@
2693 :"Ir" (1UL << (nr & 31)), "m" (*m));
2694 }
2695
2696+/*
2697+ * WARNING: non atomic version.
2698+ */
2699+static __inline__ void
2700+__change_bit(unsigned long nr, volatile void * addr)
2701+{
2702+ int *m = ((int *) addr) + (nr >> 5);
2703+
2704+ *m ^= 1 << (nr & 31);
2705+}
2706+
2707 static inline int
2708 test_and_set_bit(unsigned long nr, volatile void *addr)
2709 {
2710@@ -181,20 +193,6 @@
2711 return (old & mask) != 0;
2712 }
2713
2714-/*
2715- * WARNING: non atomic version.
2716- */
2717-static __inline__ int
2718-__test_and_change_bit(unsigned long nr, volatile void * addr)
2719-{
2720- unsigned long mask = 1 << (nr & 0x1f);
2721- int *m = ((int *) addr) + (nr >> 5);
2722- int old = *m;
2723-
2724- *m = old ^ mask;
2725- return (old & mask) != 0;
2726-}
2727-
2728 static inline int
2729 test_and_change_bit(unsigned long nr, volatile void * addr)
2730 {
2731@@ -220,6 +218,20 @@
2732 return oldbit != 0;
2733 }
2734
2735+/*
2736+ * WARNING: non atomic version.
2737+ */
2738+static __inline__ int
2739+__test_and_change_bit(unsigned long nr, volatile void * addr)
2740+{
2741+ unsigned long mask = 1 << (nr & 0x1f);
2742+ int *m = ((int *) addr) + (nr >> 5);
2743+ int old = *m;
2744+
2745+ *m = old ^ mask;
2746+ return (old & mask) != 0;
2747+}
2748+
2749 static inline int
2750 test_bit(int nr, volatile void * addr)
2751 {
2752@@ -235,12 +247,15 @@
2753 */
2754 static inline unsigned long ffz_b(unsigned long x)
2755 {
2756- unsigned long sum = 0;
2757+ unsigned long sum, x1, x2, x4;
2758
2759 x = ~x & -~x; /* set first 0 bit, clear others */
2760- if (x & 0xF0) sum += 4;
2761- if (x & 0xCC) sum += 2;
2762- if (x & 0xAA) sum += 1;
2763+ x1 = x & 0xAA;
2764+ x2 = x & 0xCC;
2765+ x4 = x & 0xF0;
2766+ sum = x2 ? 2 : 0;
2767+ sum += (x4 != 0) * 4;
2768+ sum += (x1 != 0);
2769
2770 return sum;
2771 }
2772@@ -257,24 +272,46 @@
2773
2774 __asm__("cmpbge %1,%2,%0" : "=r"(bits) : "r"(word), "r"(~0UL));
2775 qofs = ffz_b(bits);
2776- __asm__("extbl %1,%2,%0" : "=r"(bits) : "r"(word), "r"(qofs));
2777+ bits = __kernel_extbl(word, qofs);
2778 bofs = ffz_b(bits);
2779
2780 return qofs*8 + bofs;
2781 #endif
2782 }
2783
2784+/*
2785+ * __ffs = Find First set bit in word. Undefined if no set bit exists.
2786+ */
2787+static inline unsigned long __ffs(unsigned long word)
2788+{
2789+#if defined(__alpha_cix__) && defined(__alpha_fix__)
2790+ /* Whee. EV67 can calculate it directly. */
2791+ unsigned long result;
2792+ __asm__("cttz %1,%0" : "=r"(result) : "r"(word));
2793+ return result;
2794+#else
2795+ unsigned long bits, qofs, bofs;
2796+
2797+ __asm__("cmpbge $31,%1,%0" : "=r"(bits) : "r"(word));
2798+ qofs = ffz_b(bits);
2799+ bits = __kernel_extbl(word, qofs);
2800+ bofs = ffz_b(~bits);
2801+
2802+ return qofs*8 + bofs;
2803+#endif
2804+}
2805+
2806 #ifdef __KERNEL__
2807
2808 /*
2809 * ffs: find first bit set. This is defined the same way as
2810 * the libc and compiler builtin ffs routines, therefore
2811- * differs in spirit from the above ffz (man ffs).
2812+ * differs in spirit from the above __ffs.
2813 */
2814
2815 static inline int ffs(int word)
2816 {
2817- int result = ffz(~word);
2818+ int result = __ffs(word);
2819 return word ? result+1 : 0;
2820 }
2821
2822@@ -316,6 +353,14 @@
2823 #define hweight16(x) hweight64((x) & 0xfffful)
2824 #define hweight8(x) hweight64((x) & 0xfful)
2825 #else
2826+static inline unsigned long hweight64(unsigned long w)
2827+{
2828+ unsigned long result;
2829+ for (result = 0; w ; w >>= 1)
2830+ result += (w & 1);
2831+ return result;
2832+}
2833+
2834 #define hweight32(x) generic_hweight32(x)
2835 #define hweight16(x) generic_hweight16(x)
2836 #define hweight8(x) generic_hweight8(x)
2837@@ -365,12 +410,76 @@
2838 }
2839
2840 /*
2841- * The optimizer actually does good code for this case..
2842+ * Find next one bit in a bitmap reasonably efficiently.
2843+ */
2844+static inline unsigned long
2845+find_next_bit(void * addr, unsigned long size, unsigned long offset)
2846+{
2847+ unsigned long * p = ((unsigned long *) addr) + (offset >> 6);
2848+ unsigned long result = offset & ~63UL;
2849+ unsigned long tmp;
2850+
2851+ if (offset >= size)
2852+ return size;
2853+ size -= result;
2854+ offset &= 63UL;
2855+ if (offset) {
2856+ tmp = *(p++);
2857+ tmp &= ~0UL << offset;
2858+ if (size < 64)
2859+ goto found_first;
2860+ if (tmp)
2861+ goto found_middle;
2862+ size -= 64;
2863+ result += 64;
2864+ }
2865+ while (size & ~63UL) {
2866+ if ((tmp = *(p++)))
2867+ goto found_middle;
2868+ result += 64;
2869+ size -= 64;
2870+ }
2871+ if (!size)
2872+ return result;
2873+ tmp = *p;
2874+found_first:
2875+ tmp &= ~0UL >> (64 - size);
2876+ if (!tmp)
2877+ return result + size;
2878+found_middle:
2879+ return result + __ffs(tmp);
2880+}
2881+
2882+/*
2883+ * The optimizer actually does good code for this case.
2884 */
2885 #define find_first_zero_bit(addr, size) \
2886 find_next_zero_bit((addr), (size), 0)
2887+#define find_first_bit(addr, size) \
2888+ find_next_bit((addr), (size), 0)
2889
2890 #ifdef __KERNEL__
2891+
2892+/*
2893+ * Every architecture must define this function. It's the fastest
2894+ * way of searching a 140-bit bitmap where the first 100 bits are
2895+ * unlikely to be set. It's guaranteed that at least one of the 140
2896+ * bits is set.
2897+ */
2898+static inline unsigned long
ddc40141 2899+_sched_find_first_bit(unsigned long b[3])
54aa170e
JR
2900+{
2901+ unsigned long b0 = b[0], b1 = b[1], b2 = b[2];
2902+ unsigned long ofs;
2903+
2904+ ofs = (b1 ? 64 : 128);
2905+ b1 = (b1 ? b1 : b2);
2906+ ofs = (b0 ? 0 : ofs);
2907+ b0 = (b0 ? b0 : b1);
2908+
2909+ return __ffs(b0) + ofs;
2910+}
2911+
2912
2913 #define ext2_set_bit __test_and_set_bit
2914 #define ext2_clear_bit __test_and_clear_bit
2915diff -urN linux-2.4.20/include/asm-alpha/smp.h linux-2.4.20-o1-preempt/include/asm-alpha/smp.h
2916--- linux-2.4.20/include/asm-alpha/smp.h Fri Sep 14 00:21:32 2001
2917+++ linux-2.4.20-o1-preempt/include/asm-alpha/smp.h Tue Feb 18 03:51:29 2003
2918@@ -55,7 +55,7 @@
2919 #define cpu_logical_map(cpu) __cpu_logical_map[cpu]
2920
2921 #define hard_smp_processor_id() __hard_smp_processor_id()
2922-#define smp_processor_id() (current->processor)
2923+#define smp_processor_id() (current->cpu)
2924
2925 extern unsigned long cpu_present_mask;
2926 #define cpu_online_map cpu_present_mask
c41eb596
JR
2927--- linux-2.4.20/include/asm-alpha/spinlock.h.orig Wed Nov 21 00:49:31 2001
2928+++ linux-2.4.20/include/asm-alpha/spinlock.h Sun Mar 9 13:33:43 2003
2929@@ -38,12 +38,12 @@
2930 #define spin_unlock_wait(x) ({ do { barrier(); } while ((x)->lock); })
2931
2932 #if CONFIG_DEBUG_SPINLOCK
2933-extern void spin_unlock(spinlock_t * lock);
2934+extern void _raw_spin_unlock(spinlock_t * lock);
2935 extern void debug_spin_lock(spinlock_t * lock, const char *, int);
2936 extern int debug_spin_trylock(spinlock_t * lock, const char *, int);
2937
2938-#define spin_lock(LOCK) debug_spin_lock(LOCK, __BASE_FILE__, __LINE__)
2939-#define spin_trylock(LOCK) debug_spin_trylock(LOCK, __BASE_FILE__, __LINE__)
2940+#define _raw_spin_lock(LOCK) debug_spin_lock(LOCK, __BASE_FILE__, __LINE__)
2941+#define _raw_spin_trylock(LOCK) debug_spin_trylock(LOCK, __BASE_FILE__, __LINE__)
2942
2943 #define spin_lock_own(LOCK, LOCATION) \
2944 do { \
2945@@ -54,13 +54,13 @@
2946 (LOCK)->lock ? "taken" : "freed", (LOCK)->on_cpu); \
2947 } while (0)
2948 #else
2949-static inline void spin_unlock(spinlock_t * lock)
2950+static inline void _raw_spin_unlock(spinlock_t * lock)
2951 {
2952 mb();
2953 lock->lock = 0;
2954 }
2955
2956-static inline void spin_lock(spinlock_t * lock)
2957+static inline void _raw_spin_lock(spinlock_t * lock)
2958 {
2959 long tmp;
2960
2961@@ -83,7 +83,7 @@
2962 : "m"(lock->lock) : "memory");
2963 }
2964
2965-#define spin_trylock(lock) (!test_and_set_bit(0,(lock)))
2966+#define _raw_spin_trylock(lock) (!test_and_set_bit(0,(lock)))
2967 #define spin_lock_own(LOCK, LOCATION) ((void)0)
2968 #endif /* CONFIG_DEBUG_SPINLOCK */
2969
2970@@ -98,10 +98,10 @@
2971 #define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
2972
2973 #if CONFIG_DEBUG_RWLOCK
2974-extern void write_lock(rwlock_t * lock);
2975-extern void read_lock(rwlock_t * lock);
2976+extern void _raw_write_lock(rwlock_t * lock);
2977+extern void _raw_read_lock(rwlock_t * lock);
2978 #else
2979-static inline void write_lock(rwlock_t * lock)
2980+static inline void _raw_write_lock(rwlock_t * lock)
2981 {
2982 long regx;
2983
2984@@ -121,7 +121,7 @@
2985 : "0" (*(volatile int *)lock) : "memory");
2986 }
2987
2988-static inline void read_lock(rwlock_t * lock)
2989+static inline void _raw_read_lock(rwlock_t * lock)
2990 {
2991 long regx;
2992
2993@@ -142,13 +142,13 @@
2994 }
2995 #endif /* CONFIG_DEBUG_RWLOCK */
2996
2997-static inline void write_unlock(rwlock_t * lock)
2998+static inline void _raw_write_unlock(rwlock_t * lock)
2999 {
3000 mb();
3001 *(volatile int *)lock = 0;
3002 }
3003
3004-static inline void read_unlock(rwlock_t * lock)
3005+static inline void _raw_read_unlock(rwlock_t * lock)
3006 {
3007 long regx;
3008 __asm__ __volatile__(
54aa170e
JR
3009diff -urN linux-2.4.20/include/asm-alpha/system.h linux-2.4.20-o1-preempt/include/asm-alpha/system.h
3010--- linux-2.4.20/include/asm-alpha/system.h Fri Oct 5 03:47:08 2001
3011+++ linux-2.4.20-o1-preempt/include/asm-alpha/system.h Tue Feb 18 03:51:29 2003
3012@@ -130,7 +130,6 @@
3013 extern void halt(void) __attribute__((noreturn));
3014 #define __halt() __asm__ __volatile__ ("call_pal %0 #halt" : : "i" (PAL_halt))
3015
3016-#define prepare_to_switch() do { } while(0)
3017 #define switch_to(prev,next,last) \
3018 do { \
3019 unsigned long pcbb; \
3020diff -urN linux-2.4.20/include/asm-arm/bitops.h linux-2.4.20-o1-preempt/include/asm-arm/bitops.h
3021--- linux-2.4.20/include/asm-arm/bitops.h Sun Aug 12 20:14:00 2001
3022+++ linux-2.4.20-o1-preempt/include/asm-arm/bitops.h Tue Feb 18 03:51:29 2003
3023@@ -2,6 +2,8 @@
3024 * Copyright 1995, Russell King.
3025 * Various bits and pieces copyrights include:
3026 * Linus Torvalds (test_bit).
3027+ * Big endian support: Copyright 2001, Nicolas Pitre
3028+ * reworked by rmk.
3029 *
3030 * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
3031 *
3032@@ -17,81 +19,271 @@
3033
3034 #ifdef __KERNEL__
3035
3036+#include <asm/system.h>
3037+
3038 #define smp_mb__before_clear_bit() do { } while (0)
3039 #define smp_mb__after_clear_bit() do { } while (0)
3040
3041 /*
3042- * Function prototypes to keep gcc -Wall happy.
3043+ * These functions are the basis of our bit ops.
3044+ * First, the atomic bitops.
3045+ *
3046+ * The endian issue for these functions is handled by the macros below.
3047 */
3048-extern void set_bit(int nr, volatile void * addr);
3049+static inline void
3050+____atomic_set_bit_mask(unsigned int mask, volatile unsigned char *p)
3051+{
3052+ unsigned long flags;
3053+
3054+ local_irq_save(flags);
3055+ *p |= mask;
3056+ local_irq_restore(flags);
3057+}
3058+
3059+static inline void
3060+____atomic_clear_bit_mask(unsigned int mask, volatile unsigned char *p)
3061+{
3062+ unsigned long flags;
3063+
3064+ local_irq_save(flags);
3065+ *p &= ~mask;
3066+ local_irq_restore(flags);
3067+}
3068+
3069+static inline void
3070+____atomic_change_bit_mask(unsigned int mask, volatile unsigned char *p)
3071+{
3072+ unsigned long flags;
3073+
3074+ local_irq_save(flags);
3075+ *p ^= mask;
3076+ local_irq_restore(flags);
3077+}
3078
3079-static inline void __set_bit(int nr, volatile void *addr)
3080+static inline int
3081+____atomic_test_and_set_bit_mask(unsigned int mask, volatile unsigned char *p)
3082 {
3083- ((unsigned char *) addr)[nr >> 3] |= (1U << (nr & 7));
3084+ unsigned long flags;
3085+ unsigned int res;
3086+
3087+ local_irq_save(flags);
3088+ res = *p;
3089+ *p = res | mask;
3090+ local_irq_restore(flags);
3091+
3092+ return res & mask;
3093 }
3094
3095-extern void clear_bit(int nr, volatile void * addr);
3096+static inline int
3097+____atomic_test_and_clear_bit_mask(unsigned int mask, volatile unsigned char *p)
3098+{
3099+ unsigned long flags;
3100+ unsigned int res;
3101+
3102+ local_irq_save(flags);
3103+ res = *p;
3104+ *p = res & ~mask;
3105+ local_irq_restore(flags);
3106+
3107+ return res & mask;
3108+}
3109
3110-static inline void __clear_bit(int nr, volatile void *addr)
3111+static inline int
3112+____atomic_test_and_change_bit_mask(unsigned int mask, volatile unsigned char *p)
3113 {
3114- ((unsigned char *) addr)[nr >> 3] &= ~(1U << (nr & 7));
3115+ unsigned long flags;
3116+ unsigned int res;
3117+
3118+ local_irq_save(flags);
3119+ res = *p;
3120+ *p = res ^ mask;
3121+ local_irq_restore(flags);
3122+
3123+ return res & mask;
3124 }
3125
3126-extern void change_bit(int nr, volatile void * addr);
3127+/*
3128+ * Now the non-atomic variants. We let the compiler handle all optimisations
3129+ * for these.
3130+ */
3131+static inline void ____nonatomic_set_bit(int nr, volatile void *p)
3132+{
3133+ ((unsigned char *) p)[nr >> 3] |= (1U << (nr & 7));
3134+}
3135
3136-static inline void __change_bit(int nr, volatile void *addr)
3137+static inline void ____nonatomic_clear_bit(int nr, volatile void *p)
3138 {
3139- ((unsigned char *) addr)[nr >> 3] ^= (1U << (nr & 7));
3140+ ((unsigned char *) p)[nr >> 3] &= ~(1U << (nr & 7));
3141 }
3142
3143-extern int test_and_set_bit(int nr, volatile void * addr);
3144+static inline void ____nonatomic_change_bit(int nr, volatile void *p)
3145+{
3146+ ((unsigned char *) p)[nr >> 3] ^= (1U << (nr & 7));
3147+}
3148
3149-static inline int __test_and_set_bit(int nr, volatile void *addr)
3150+static inline int ____nonatomic_test_and_set_bit(int nr, volatile void *p)
3151 {
3152 unsigned int mask = 1 << (nr & 7);
3153 unsigned int oldval;
3154
3155- oldval = ((unsigned char *) addr)[nr >> 3];
3156- ((unsigned char *) addr)[nr >> 3] = oldval | mask;
3157+ oldval = ((unsigned char *) p)[nr >> 3];
3158+ ((unsigned char *) p)[nr >> 3] = oldval | mask;
3159 return oldval & mask;
3160 }
3161
3162-extern int test_and_clear_bit(int nr, volatile void * addr);
3163-
3164-static inline int __test_and_clear_bit(int nr, volatile void *addr)
3165+static inline int ____nonatomic_test_and_clear_bit(int nr, volatile void *p)
3166 {
3167 unsigned int mask = 1 << (nr & 7);
3168 unsigned int oldval;
3169
3170- oldval = ((unsigned char *) addr)[nr >> 3];
3171- ((unsigned char *) addr)[nr >> 3] = oldval & ~mask;
3172+ oldval = ((unsigned char *) p)[nr >> 3];
3173+ ((unsigned char *) p)[nr >> 3] = oldval & ~mask;
3174 return oldval & mask;
3175 }
3176
3177-extern int test_and_change_bit(int nr, volatile void * addr);
3178-
3179-static inline int __test_and_change_bit(int nr, volatile void *addr)
3180+static inline int ____nonatomic_test_and_change_bit(int nr, volatile void *p)
3181 {
3182 unsigned int mask = 1 << (nr & 7);
3183 unsigned int oldval;
3184
3185- oldval = ((unsigned char *) addr)[nr >> 3];
3186- ((unsigned char *) addr)[nr >> 3] = oldval ^ mask;
3187+ oldval = ((unsigned char *) p)[nr >> 3];
3188+ ((unsigned char *) p)[nr >> 3] = oldval ^ mask;
3189 return oldval & mask;
3190 }
3191
3192-extern int find_first_zero_bit(void * addr, unsigned size);
3193-extern int find_next_zero_bit(void * addr, int size, int offset);
3194-
3195 /*
3196 * This routine doesn't need to be atomic.
3197 */
3198-static inline int test_bit(int nr, const void * addr)
3199+static inline int ____test_bit(int nr, const void * p)
3200 {
3201- return ((unsigned char *) addr)[nr >> 3] & (1U << (nr & 7));
3202+ return ((volatile unsigned char *) p)[nr >> 3] & (1U << (nr & 7));
3203 }
3204
3205 /*
3206+ * A note about Endian-ness.
3207+ * -------------------------
3208+ *
3209+ * When the ARM is put into big endian mode via CR15, the processor
3210+ * merely swaps the order of bytes within words, thus:
3211+ *
3212+ * ------------ physical data bus bits -----------
3213+ * D31 ... D24 D23 ... D16 D15 ... D8 D7 ... D0
3214+ * little byte 3 byte 2 byte 1 byte 0
3215+ * big byte 0 byte 1 byte 2 byte 3
3216+ *
3217+ * This means that reading a 32-bit word at address 0 returns the same
3218+ * value irrespective of the endian mode bit.
3219+ *
3220+ * Peripheral devices should be connected with the data bus reversed in
3221+ * "Big Endian" mode. ARM Application Note 61 is applicable, and is
3222+ * available from http://www.arm.com/.
3223+ *
3224+ * The following assumes that the data bus connectivity for big endian
3225+ * mode has been followed.
3226+ *
3227+ * Note that bit 0 is defined to be 32-bit word bit 0, not byte 0 bit 0.
3228+ */
3229+
3230+/*
3231+ * Little endian assembly bitops. nr = 0 -> byte 0 bit 0.
3232+ */
3233+extern void _set_bit_le(int nr, volatile void * p);
3234+extern void _clear_bit_le(int nr, volatile void * p);
3235+extern void _change_bit_le(int nr, volatile void * p);
3236+extern int _test_and_set_bit_le(int nr, volatile void * p);
3237+extern int _test_and_clear_bit_le(int nr, volatile void * p);
3238+extern int _test_and_change_bit_le(int nr, volatile void * p);
3239+extern int _find_first_zero_bit_le(void * p, unsigned size);
3240+extern int _find_next_zero_bit_le(void * p, int size, int offset);
3241+
3242+/*
3243+ * Big endian assembly bitops. nr = 0 -> byte 3 bit 0.
3244+ */
3245+extern void _set_bit_be(int nr, volatile void * p);
3246+extern void _clear_bit_be(int nr, volatile void * p);
3247+extern void _change_bit_be(int nr, volatile void * p);
3248+extern int _test_and_set_bit_be(int nr, volatile void * p);
3249+extern int _test_and_clear_bit_be(int nr, volatile void * p);
3250+extern int _test_and_change_bit_be(int nr, volatile void * p);
3251+extern int _find_first_zero_bit_be(void * p, unsigned size);
3252+extern int _find_next_zero_bit_be(void * p, int size, int offset);
3253+
3254+
3255+/*
3256+ * The __* form of bitops are non-atomic and may be reordered.
3257+ */
3258+#define ATOMIC_BITOP_LE(name,nr,p) \
3259+ (__builtin_constant_p(nr) ? \
3260+ ____atomic_##name##_mask(1 << ((nr) & 7), \
3261+ ((unsigned char *)(p)) + ((nr) >> 3)) : \
3262+ _##name##_le(nr,p))
3263+
3264+#define ATOMIC_BITOP_BE(name,nr,p) \
3265+ (__builtin_constant_p(nr) ? \
3266+ ____atomic_##name##_mask(1 << ((nr) & 7), \
3267+ ((unsigned char *)(p)) + (((nr) >> 3) ^ 3)) : \
3268+ _##name##_be(nr,p))
3269+
3270+#define NONATOMIC_BITOP_LE(name,nr,p) \
3271+ (____nonatomic_##name(nr, p))
3272+
3273+#define NONATOMIC_BITOP_BE(name,nr,p) \
3274+ (____nonatomic_##name(nr ^ 0x18, p))
3275+
3276+#ifndef __ARMEB__
3277+/*
3278+ * These are the little endian, atomic definitions.
3279+ */
3280+#define set_bit(nr,p) ATOMIC_BITOP_LE(set_bit,nr,p)
3281+#define clear_bit(nr,p) ATOMIC_BITOP_LE(clear_bit,nr,p)
3282+#define change_bit(nr,p) ATOMIC_BITOP_LE(change_bit,nr,p)
3283+#define test_and_set_bit(nr,p) ATOMIC_BITOP_LE(test_and_set_bit,nr,p)
3284+#define test_and_clear_bit(nr,p) ATOMIC_BITOP_LE(test_and_clear_bit,nr,p)
3285+#define test_and_change_bit(nr,p) ATOMIC_BITOP_LE(test_and_change_bit,nr,p)
3286+#define test_bit(nr,p) ____test_bit(nr,p)
3287+#define find_first_zero_bit(p,sz) _find_first_zero_bit_le(p,sz)
3288+#define find_next_zero_bit(p,sz,off) _find_next_zero_bit_le(p,sz,off)
3289+
3290+/*
3291+ * These are the little endian, non-atomic definitions.
3292+ */
3293+#define __set_bit(nr,p) NONATOMIC_BITOP_LE(set_bit,nr,p)
3294+#define __clear_bit(nr,p) NONATOMIC_BITOP_LE(clear_bit,nr,p)
3295+#define __change_bit(nr,p) NONATOMIC_BITOP_LE(change_bit,nr,p)
3296+#define __test_and_set_bit(nr,p) NONATOMIC_BITOP_LE(test_and_set_bit,nr,p)
3297+#define __test_and_clear_bit(nr,p) NONATOMIC_BITOP_LE(test_and_clear_bit,nr,p)
3298+#define __test_and_change_bit(nr,p) NONATOMIC_BITOP_LE(test_and_change_bit,nr,p)
3299+#define __test_bit(nr,p) ____test_bit(nr,p)
3300+
3301+#else
3302+
3303+/*
3304+ * These are the big endian, atomic definitions.
3305+ */
3306+#define set_bit(nr,p) ATOMIC_BITOP_BE(set_bit,nr,p)
3307+#define clear_bit(nr,p) ATOMIC_BITOP_BE(clear_bit,nr,p)
3308+#define change_bit(nr,p) ATOMIC_BITOP_BE(change_bit,nr,p)
3309+#define test_and_set_bit(nr,p) ATOMIC_BITOP_BE(test_and_set_bit,nr,p)
3310+#define test_and_clear_bit(nr,p) ATOMIC_BITOP_BE(test_and_clear_bit,nr,p)
3311+#define test_and_change_bit(nr,p) ATOMIC_BITOP_BE(test_and_change_bit,nr,p)
3312+#define test_bit(nr,p) ____test_bit((nr) ^ 0x18, p)
3313+#define find_first_zero_bit(p,sz) _find_first_zero_bit_be(p,sz)
3314+#define find_next_zero_bit(p,sz,off) _find_next_zero_bit_be(p,sz,off)
3315+
3316+/*
3317+ * These are the big endian, non-atomic definitions.
3318+ */
3319+#define __set_bit(nr,p) NONATOMIC_BITOP_BE(set_bit,nr,p)
3320+#define __clear_bit(nr,p) NONATOMIC_BITOP_BE(clear_bit,nr,p)
3321+#define __change_bit(nr,p) NONATOMIC_BITOP_BE(change_bit,nr,p)
3322+#define __test_and_set_bit(nr,p) NONATOMIC_BITOP_BE(test_and_set_bit,nr,p)
3323+#define __test_and_clear_bit(nr,p) NONATOMIC_BITOP_BE(test_and_clear_bit,nr,p)
3324+#define __test_and_change_bit(nr,p) NONATOMIC_BITOP_BE(test_and_change_bit,nr,p)
3325+#define __test_bit(nr,p) ____test_bit((nr) ^ 0x18, p)
3326+
3327+#endif
3328+
3329+/*
3330 * ffz = Find First Zero in word. Undefined if no zero exists,
3331 * so code should check against ~0UL first..
3332 */
3333@@ -110,6 +302,29 @@
3334 }
3335
3336 /*
3337+ * ffz = Find First Zero in word. Undefined if no zero exists,
3338+ * so code should check against ~0UL first..
3339+ */
3340+static inline unsigned long __ffs(unsigned long word)
3341+{
3342+ int k;
3343+
3344+ k = 31;
3345+ if (word & 0x0000ffff) { k -= 16; word <<= 16; }
3346+ if (word & 0x00ff0000) { k -= 8; word <<= 8; }
3347+ if (word & 0x0f000000) { k -= 4; word <<= 4; }
3348+ if (word & 0x30000000) { k -= 2; word <<= 2; }
3349+ if (word & 0x40000000) { k -= 1; }
3350+ return k;
3351+}
3352+
3353+/*
3354+ * fls: find last bit set.
3355+ */
3356+
3357+#define fls(x) generic_fls(x)
3358+
3359+/*
3360 * ffs: find first bit set. This is defined the same way as
3361 * the libc and compiler builtin ffs routines, therefore
3362 * differs in spirit from the above ffz (man ffs).
3363@@ -118,6 +333,22 @@
3364 #define ffs(x) generic_ffs(x)
3365
3366 /*
3367+ * Find first bit set in a 168-bit bitmap, where the first
3368+ * 128 bits are unlikely to be set.
3369+ */
ddc40141 3370+static inline int _sched_find_first_bit(unsigned long *b)
54aa170e
JR
3371+{
3372+ unsigned long v;
3373+ unsigned int off;
3374+
3375+ for (off = 0; v = b[off], off < 4; off++) {
3376+ if (unlikely(v))
3377+ break;
3378+ }
3379+ return __ffs(v) + off * 32;
3380+}
3381+
3382+/*
3383 * hweightN: returns the hamming weight (i.e. the number
3384 * of bits set) of a N-bit word
3385 */
3386@@ -126,18 +357,25 @@
3387 #define hweight16(x) generic_hweight16(x)
3388 #define hweight8(x) generic_hweight8(x)
3389
3390-#define ext2_set_bit test_and_set_bit
3391-#define ext2_clear_bit test_and_clear_bit
3392-#define ext2_test_bit test_bit
3393-#define ext2_find_first_zero_bit find_first_zero_bit
3394-#define ext2_find_next_zero_bit find_next_zero_bit
3395-
3396-/* Bitmap functions for the minix filesystem. */
3397-#define minix_test_and_set_bit(nr,addr) test_and_set_bit(nr,addr)
3398-#define minix_set_bit(nr,addr) set_bit(nr,addr)
3399-#define minix_test_and_clear_bit(nr,addr) test_and_clear_bit(nr,addr)
3400-#define minix_test_bit(nr,addr) test_bit(nr,addr)
3401-#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
3402+/*
3403+ * Ext2 is defined to use little-endian byte ordering.
3404+ * These do not need to be atomic.
3405+ */
3406+#define ext2_set_bit(nr,p) NONATOMIC_BITOP_LE(test_and_set_bit,nr,p)
3407+#define ext2_clear_bit(nr,p) NONATOMIC_BITOP_LE(test_and_clear_bit,nr,p)
3408+#define ext2_test_bit(nr,p) __test_bit(nr,p)
3409+#define ext2_find_first_zero_bit(p,sz) _find_first_zero_bit_le(p,sz)
3410+#define ext2_find_next_zero_bit(p,sz,off) _find_next_zero_bit_le(p,sz,off)
3411+
3412+/*
3413+ * Minix is defined to use little-endian byte ordering.
3414+ * These do not need to be atomic.
3415+ */
3416+#define minix_set_bit(nr,p) NONATOMIC_BITOP_LE(set_bit,nr,p)
3417+#define minix_test_bit(nr,p) __test_bit(nr,p)
3418+#define minix_test_and_set_bit(nr,p) NONATOMIC_BITOP_LE(test_and_set_bit,nr,p)
3419+#define minix_test_and_clear_bit(nr,p) NONATOMIC_BITOP_LE(test_and_clear_bit,nr,p)
3420+#define minix_find_first_zero_bit(p,sz) _find_first_zero_bit_le(p,sz)
3421
3422 #endif /* __KERNEL__ */
3423
3424diff -urN linux-2.4.20/include/asm-arm/dma.h linux-2.4.20-o1-preempt/include/asm-arm/dma.h
3425--- linux-2.4.20/include/asm-arm/dma.h Sun Aug 12 20:14:00 2001
3426+++ linux-2.4.20-o1-preempt/include/asm-arm/dma.h Tue Feb 18 03:52:07 2003
3427@@ -5,6 +5,7 @@
3428
3429 #include <linux/config.h>
3430 #include <linux/spinlock.h>
3431+#include <linux/sched.h>
3432 #include <asm/system.h>
3433 #include <asm/memory.h>
3434 #include <asm/scatterlist.h>
3435diff -urN linux-2.4.20/include/asm-arm/hardirq.h linux-2.4.20-o1-preempt/include/asm-arm/hardirq.h
3436--- linux-2.4.20/include/asm-arm/hardirq.h Thu Oct 11 18:04:57 2001
3437+++ linux-2.4.20-o1-preempt/include/asm-arm/hardirq.h Tue Feb 18 03:52:07 2003
3438@@ -34,6 +34,7 @@
3439 #define irq_exit(cpu,irq) (local_irq_count(cpu)--)
3440
3441 #define synchronize_irq() do { } while (0)
3442+#define release_irqlock(cpu) do { } while (0)
3443
3444 #else
3445 #error SMP not supported
3446diff -urN linux-2.4.20/include/asm-arm/pgalloc.h linux-2.4.20-o1-preempt/include/asm-arm/pgalloc.h
3447--- linux-2.4.20/include/asm-arm/pgalloc.h Sun Aug 12 20:14:00 2001
3448+++ linux-2.4.20-o1-preempt/include/asm-arm/pgalloc.h Tue Feb 18 03:52:07 2003
3449@@ -57,40 +57,48 @@
3450 {
3451 unsigned long *ret;
3452
3453+ preempt_disable();
3454 if ((ret = pgd_quicklist) != NULL) {
3455 pgd_quicklist = (unsigned long *)__pgd_next(ret);
3456 ret[1] = ret[2];
3457 clean_dcache_entry(ret + 1);
3458 pgtable_cache_size--;
3459 }
3460+ preempt_enable();
3461 return (pgd_t *)ret;
3462 }
3463
3464 static inline void free_pgd_fast(pgd_t *pgd)
3465 {
3466+ preempt_disable();
3467 __pgd_next(pgd) = (unsigned long) pgd_quicklist;
3468 pgd_quicklist = (unsigned long *) pgd;
3469 pgtable_cache_size++;
3470+ preempt_enable();
3471 }
3472
3473 static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address)
3474 {
3475 unsigned long *ret;
3476
3477+ preempt_disable();
3478 if((ret = pte_quicklist) != NULL) {
3479 pte_quicklist = (unsigned long *)__pte_next(ret);
3480 ret[0] = 0;
3481 clean_dcache_entry(ret);
3482 pgtable_cache_size--;
3483 }
3484+ preempt_enable();
3485 return (pte_t *)ret;
3486 }
3487
3488 static inline void free_pte_fast(pte_t *pte)
3489 {
3490+ preempt_disable();
3491 __pte_next(pte) = (unsigned long) pte_quicklist;
3492 pte_quicklist = (unsigned long *) pte;
3493 pgtable_cache_size++;
3494+ preempt_enable();
3495 }
3496
3497 #else /* CONFIG_NO_PGT_CACHE */
3498diff -urN linux-2.4.20/include/asm-arm/smplock.h linux-2.4.20-o1-preempt/include/asm-arm/smplock.h
3499--- linux-2.4.20/include/asm-arm/smplock.h Sun Aug 12 20:14:00 2001
3500+++ linux-2.4.20-o1-preempt/include/asm-arm/smplock.h Tue Feb 18 03:52:07 2003
3501@@ -3,12 +3,17 @@
3502 *
3503 * Default SMP lock implementation
3504 */
3505+#include <linux/config.h>
3506 #include <linux/interrupt.h>
3507 #include <linux/spinlock.h>
3508
3509 extern spinlock_t kernel_flag;
3510
3511+#ifdef CONFIG_PREEMPT
3512+#define kernel_locked() preempt_get_count()
3513+#else
3514 #define kernel_locked() spin_is_locked(&kernel_flag)
3515+#endif
3516
3517 /*
3518 * Release global kernel lock and global interrupt lock
3519@@ -40,8 +45,14 @@
3520 */
3521 static inline void lock_kernel(void)
3522 {
3523+#ifdef CONFIG_PREEMPT
3524+ if (current->lock_depth == -1)
3525+ spin_lock(&kernel_flag);
3526+ ++current->lock_depth;
3527+#else
3528 if (!++current->lock_depth)
3529 spin_lock(&kernel_flag);
3530+#endif
3531 }
3532
3533 static inline void unlock_kernel(void)
3534diff -urN linux-2.4.20/include/asm-arm/softirq.h linux-2.4.20-o1-preempt/include/asm-arm/softirq.h
3535--- linux-2.4.20/include/asm-arm/softirq.h Sat Sep 8 21:02:31 2001
3536+++ linux-2.4.20-o1-preempt/include/asm-arm/softirq.h Tue Feb 18 03:52:07 2003
3537@@ -5,20 +5,22 @@
3538 #include <asm/hardirq.h>
3539
3540 #define __cpu_bh_enable(cpu) \
3541- do { barrier(); local_bh_count(cpu)--; } while (0)
3542+ do { barrier(); local_bh_count(cpu)--; preempt_enable(); } while (0)
3543 #define cpu_bh_disable(cpu) \
3544- do { local_bh_count(cpu)++; barrier(); } while (0)
3545+ do { preempt_disable(); local_bh_count(cpu)++; barrier(); } while (0)
3546
3547 #define local_bh_disable() cpu_bh_disable(smp_processor_id())
3548 #define __local_bh_enable() __cpu_bh_enable(smp_processor_id())
3549
3550 #define in_softirq() (local_bh_count(smp_processor_id()) != 0)
3551
3552-#define local_bh_enable() \
3553+#define _local_bh_enable() \
3554 do { \
3555 unsigned int *ptr = &local_bh_count(smp_processor_id()); \
3556 if (!--*ptr && ptr[-2]) \
3557 __asm__("bl%? __do_softirq": : : "lr");/* out of line */\
3558 } while (0)
3559+
3560+#define local_bh_enable() do { _local_bh_enable(); preempt_enable(); } while (0)
3561
3562 #endif /* __ASM_SOFTIRQ_H */
3563diff -urN linux-2.4.20/include/asm-arm/system.h linux-2.4.20-o1-preempt/include/asm-arm/system.h
3564--- linux-2.4.20/include/asm-arm/system.h Tue Nov 28 02:07:59 2000
3565+++ linux-2.4.20-o1-preempt/include/asm-arm/system.h Tue Feb 18 03:52:07 2003
3566@@ -62,6 +62,13 @@
3567 #define local_irq_disable() __cli()
3568 #define local_irq_enable() __sti()
3569
3570+#define irqs_disabled() \
3571+({ \
3572+ unsigned long cpsr_val; \
3573+ asm ("mrs %0, cpsr" : "=r" (cpsr_val)); \
3574+ cpsr_val & 128; \
3575+})
3576+
3577 #ifdef CONFIG_SMP
3578 #error SMP not supported
3579
3580diff -urN linux-2.4.20/include/asm-cris/bitops.h linux-2.4.20-o1-preempt/include/asm-cris/bitops.h
3581--- linux-2.4.20/include/asm-cris/bitops.h Mon Feb 25 20:38:10 2002
3582+++ linux-2.4.20-o1-preempt/include/asm-cris/bitops.h Tue Feb 18 03:51:29 2003
3583@@ -22,6 +22,7 @@
3584 /* We use generic_ffs so get it; include guards resolve the possible
3585 mutually inclusion. */
3586 #include <linux/bitops.h>
3587+#include <linux/compiler.h>
3588
3589 /*
3590 * Some hacks to defeat gcc over-optimizations..
3591@@ -43,6 +44,8 @@
3592
3593 #define set_bit(nr, addr) (void)test_and_set_bit(nr, addr)
3594
3595+#define __set_bit(nr, addr) (void)__test_and_set_bit(nr, addr)
3596+
3597 /*
3598 * clear_bit - Clears a bit in memory
3599 * @nr: Bit to clear
3600@@ -56,6 +59,8 @@
3601
3602 #define clear_bit(nr, addr) (void)test_and_clear_bit(nr, addr)
3603
3604+#define __clear_bit(nr, addr) (void)__test_and_clear_bit(nr, addr)
3605+
3606 /*
3607 * change_bit - Toggle a bit in memory
3608 * @nr: Bit to clear
3609@@ -89,7 +94,7 @@
3610 * It also implies a memory barrier.
3611 */
3612
3613-static __inline__ int test_and_set_bit(int nr, void *addr)
3614+static inline int test_and_set_bit(int nr, void *addr)
3615 {
3616 unsigned int mask, retval;
3617 unsigned long flags;
3618@@ -105,6 +110,18 @@
3619 return retval;
3620 }
3621
3622+static inline int __test_and_set_bit(int nr, void *addr)
3623+{
3624+ unsigned int mask, retval;
3625+ unsigned int *adr = (unsigned int *)addr;
3626+
3627+ adr += nr >> 5;
3628+ mask = 1 << (nr & 0x1f);
3629+ retval = (mask & *adr) != 0;
3630+ *adr |= mask;
3631+ return retval;
3632+}
3633+
3634 /*
3635 * clear_bit() doesn't provide any barrier for the compiler.
3636 */
3637@@ -120,7 +137,7 @@
3638 * It also implies a memory barrier.
3639 */
3640
3641-static __inline__ int test_and_clear_bit(int nr, void *addr)
3642+static inline int test_and_clear_bit(int nr, void *addr)
3643 {
3644 unsigned int mask, retval;
3645 unsigned long flags;
3646@@ -146,7 +163,7 @@
3647 * but actually fail. You must protect multiple accesses with a lock.
3648 */
3649
3650-static __inline__ int __test_and_clear_bit(int nr, void *addr)
3651+static inline int __test_and_clear_bit(int nr, void *addr)
3652 {
3653 unsigned int mask, retval;
3654 unsigned int *adr = (unsigned int *)addr;
3655@@ -166,7 +183,7 @@
3656 * It also implies a memory barrier.
3657 */
3658
3659-static __inline__ int test_and_change_bit(int nr, void *addr)
3660+static inline int test_and_change_bit(int nr, void *addr)
3661 {
3662 unsigned int mask, retval;
3663 unsigned long flags;
3664@@ -183,7 +200,7 @@
3665
3666 /* WARNING: non atomic and it can be reordered! */
3667
3668-static __inline__ int __test_and_change_bit(int nr, void *addr)
3669+static inline int __test_and_change_bit(int nr, void *addr)
3670 {
3671 unsigned int mask, retval;
3672 unsigned int *adr = (unsigned int *)addr;
3673@@ -204,7 +221,7 @@
3674 * This routine doesn't need to be atomic.
3675 */
3676
3677-static __inline__ int test_bit(int nr, const void *addr)
3678+static inline int test_bit(int nr, const void *addr)
3679 {
3680 unsigned int mask;
3681 unsigned int *adr = (unsigned int *)addr;
3682@@ -225,7 +242,7 @@
3683 * number. They differ in that the first function also inverts all bits
3684 * in the input.
3685 */
3686-static __inline__ unsigned long cris_swapnwbrlz(unsigned long w)
3687+static inline unsigned long cris_swapnwbrlz(unsigned long w)
3688 {
3689 /* Let's just say we return the result in the same register as the
3690 input. Saying we clobber the input but can return the result
3691@@ -241,7 +258,7 @@
3692 return res;
3693 }
3694
3695-static __inline__ unsigned long cris_swapwbrlz(unsigned long w)
3696+static inline unsigned long cris_swapwbrlz(unsigned long w)
3697 {
3698 unsigned res;
3699 __asm__ ("swapwbr %0 \n\t"
3700@@ -255,7 +272,7 @@
3701 * ffz = Find First Zero in word. Undefined if no zero exists,
3702 * so code should check against ~0UL first..
3703 */
3704-static __inline__ unsigned long ffz(unsigned long w)
3705+static inline unsigned long ffz(unsigned long w)
3706 {
3707 /* The generic_ffs function is used to avoid the asm when the
3708 argument is a constant. */
3709@@ -268,7 +285,7 @@
3710 * Somewhat like ffz but the equivalent of generic_ffs: in contrast to
3711 * ffz we return the first one-bit *plus one*.
3712 */
3713-static __inline__ unsigned long ffs(unsigned long w)
3714+static inline unsigned long ffs(unsigned long w)
3715 {
3716 /* The generic_ffs function is used to avoid the asm when the
3717 argument is a constant. */
3718@@ -283,7 +300,7 @@
3719 * @offset: The bitnumber to start searching at
3720 * @size: The maximum size to search
3721 */
3722-static __inline__ int find_next_zero_bit (void * addr, int size, int offset)
3723+static inline int find_next_zero_bit (void * addr, int size, int offset)
3724 {
3725 unsigned long *p = ((unsigned long *) addr) + (offset >> 5);
3726 unsigned long result = offset & ~31UL;
3727@@ -354,7 +371,45 @@
3728 #define minix_test_bit(nr,addr) test_bit(nr,addr)
3729 #define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
3730
3731-#endif /* __KERNEL__ */
3732+#if 0
3733+/* TODO: see below */
3734+#define sched_find_first_zero_bit(addr) find_first_zero_bit(addr, 168)
3735+
3736+#else
3737+/* TODO: left out pending where to put it.. (there are .h dependencies) */
3738+
3739+ /*
3740+ * Every architecture must define this function. It's the fastest
3741+ * way of searching a 168-bit bitmap where the first 128 bits are
3742+ * unlikely to be set. It's guaranteed that at least one of the 168
3743+ * bits is cleared.
3744+ */
3745+#if 0
3746+#if MAX_RT_PRIO != 128 || MAX_PRIO != 168
3747+# error update this function.
3748+#endif
3749+#else
3750+#define MAX_RT_PRIO 128
3751+#define MAX_PRIO 168
3752+#endif
3753+
3754+static inline int sched_find_first_zero_bit(char *bitmap)
3755+{
3756+ unsigned int *b = (unsigned int *)bitmap;
3757+ unsigned int rt;
3758+
3759+ rt = b[0] & b[1] & b[2] & b[3];
3760+ if (unlikely(rt != 0xffffffff))
3761+ return find_first_zero_bit(bitmap, MAX_RT_PRIO);
3762+
3763+ if (b[4] != ~0)
3764+ return ffz(b[4]) + MAX_RT_PRIO;
3765+ return ffz(b[5]) + 32 + MAX_RT_PRIO;
3766+}
3767+#undef MAX_PRIO
3768+#undef MAX_RT_PRIO
3769+#endif
3770
3771+#endif /* __KERNEL__ */
3772
3773 #endif /* _CRIS_BITOPS_H */
3774diff -urN linux-2.4.20/include/asm-generic/bitops.h linux-2.4.20-o1-preempt/include/asm-generic/bitops.h
3775--- linux-2.4.20/include/asm-generic/bitops.h Tue Nov 28 02:47:38 2000
3776+++ linux-2.4.20-o1-preempt/include/asm-generic/bitops.h Tue Feb 18 03:51:29 2003
3777@@ -51,6 +51,12 @@
3778 return ((mask & *addr) != 0);
3779 }
3780
3781+/*
3782+ * fls: find last bit set.
3783+ */
3784+
3785+#define fls(x) generic_fls(x)
3786+
3787 #ifdef __KERNEL__
3788
3789 /*
3790diff -urN linux-2.4.20/include/asm-i386/bitops.h linux-2.4.20-o1-preempt/include/asm-i386/bitops.h
3791--- linux-2.4.20/include/asm-i386/bitops.h Fri Nov 29 00:53:15 2002
3792+++ linux-2.4.20-o1-preempt/include/asm-i386/bitops.h Tue Feb 18 03:51:29 2003
3793@@ -6,6 +6,7 @@
3794 */
3795
3796 #include <linux/config.h>
3797+#include <linux/compiler.h>
3798
3799 /*
3800 * These have to be done with inline assembly: that way the bit-setting
3801@@ -75,6 +76,14 @@
3802 :"=m" (ADDR)
3803 :"Ir" (nr));
3804 }
3805+
3806+static __inline__ void __clear_bit(int nr, volatile void * addr)
3807+{
3808+ __asm__ __volatile__(
3809+ "btrl %1,%0"
3810+ :"=m" (ADDR)
3811+ :"Ir" (nr));
3812+}
3813 #define smp_mb__before_clear_bit() barrier()
3814 #define smp_mb__after_clear_bit() barrier()
3815
3816@@ -284,6 +293,34 @@
3817 }
3818
3819 /**
3820+ * find_first_bit - find the first set bit in a memory region
3821+ * @addr: The address to start the search at
3822+ * @size: The maximum size to search
3823+ *
3824+ * Returns the bit-number of the first set bit, not the number of the byte
3825+ * containing a bit.
3826+ */
3827+static __inline__ int find_first_bit(void * addr, unsigned size)
3828+{
3829+ int d0, d1;
3830+ int res;
3831+
3832+ /* This looks at memory. Mark it volatile to tell gcc not to move it around */
3833+ __asm__ __volatile__(
3834+ "xorl %%eax,%%eax\n\t"
3835+ "repe; scasl\n\t"
3836+ "jz 1f\n\t"
3837+ "leal -4(%%edi),%%edi\n\t"
3838+ "bsfl (%%edi),%%eax\n"
3839+ "1:\tsubl %%ebx,%%edi\n\t"
3840+ "shll $3,%%edi\n\t"
3841+ "addl %%edi,%%eax"
3842+ :"=a" (res), "=&c" (d0), "=&D" (d1)
3843+ :"1" ((size + 31) >> 5), "2" (addr), "b" (addr));
3844+ return res;
3845+}
3846+
3847+/**
3848 * find_next_zero_bit - find the first zero bit in a memory region
3849 * @addr: The address to base the search on
3850 * @offset: The bitnumber to start searching at
3851@@ -296,7 +333,7 @@
3852
3853 if (bit) {
3854 /*
3855- * Look for zero in first byte
3856+ * Look for zero in the first 32 bits.
3857 */
3858 __asm__("bsfl %1,%0\n\t"
3859 "jne 1f\n\t"
3860@@ -317,6 +354,39 @@
3861 }
3862
3863 /**
3864+ * find_next_bit - find the first set bit in a memory region
3865+ * @addr: The address to base the search on
3866+ * @offset: The bitnumber to start searching at
3867+ * @size: The maximum size to search
3868+ */
3869+static __inline__ int find_next_bit (void * addr, int size, int offset)
3870+{
3871+ unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
3872+ int set = 0, bit = offset & 31, res;
3873+
3874+ if (bit) {
3875+ /*
3876+ * Look for nonzero in the first 32 bits:
3877+ */
3878+ __asm__("bsfl %1,%0\n\t"
3879+ "jne 1f\n\t"
3880+ "movl $32, %0\n"
3881+ "1:"
3882+ : "=r" (set)
3883+ : "r" (*p >> bit));
3884+ if (set < (32 - bit))
3885+ return set + offset;
3886+ set = 32 - bit;
3887+ p++;
3888+ }
3889+ /*
3890+ * No set bit yet, search remaining full words for a bit
3891+ */
3892+ res = find_first_bit (p, size - 32 * (p - (unsigned long *) addr));
3893+ return (offset + set + res);
3894+}
3895+
3896+/**
3897 * ffz - find first zero in word.
3898 * @word: The word to search
3899 *
3900@@ -330,7 +400,40 @@
3901 return word;
3902 }
3903
3904+/**
3905+ * __ffs - find first bit in word.
3906+ * @word: The word to search
3907+ *
3908+ * Undefined if no bit exists, so code should check against 0 first.
3909+ */
3910+static __inline__ unsigned long __ffs(unsigned long word)
3911+{
3912+ __asm__("bsfl %1,%0"
3913+ :"=r" (word)
3914+ :"rm" (word));
3915+ return word;
3916+}
3917+
3918 #ifdef __KERNEL__
3919+
3920+/*
3921+ * Every architecture must define this function. It's the fastest
3922+ * way of searching a 140-bit bitmap where the first 100 bits are
3923+ * unlikely to be set. It's guaranteed that at least one of the 140
3924+ * bits is cleared.
3925+ */
3926+static inline int _sched_find_first_bit(unsigned long *b)
3927+{
3928+ if (unlikely(b[0]))
3929+ return __ffs(b[0]);
3930+ if (unlikely(b[1]))
3931+ return __ffs(b[1]) + 32;
3932+ if (unlikely(b[2]))
3933+ return __ffs(b[2]) + 64;
3934+ if (b[3])
3935+ return __ffs(b[3]) + 96;
3936+ return __ffs(b[4]) + 128;
3937+}
3938
3939 /**
3940 * ffs - find first bit set
3941diff -urN linux-2.4.20/include/asm-i386/hardirq.h linux-2.4.20-o1-preempt/include/asm-i386/hardirq.h
3942--- linux-2.4.20/include/asm-i386/hardirq.h Thu Nov 22 20:46:19 2001
3943+++ linux-2.4.20-o1-preempt/include/asm-i386/hardirq.h Tue Feb 18 03:52:07 2003
3944@@ -19,12 +19,16 @@
3945
3946 /*
3947 * Are we in an interrupt context? Either doing bottom half
3948- * or hardware interrupt processing?
3949+ * or hardware interrupt processing? Note the preempt check,
3950+ * this is both a bugfix and an optimization. If we are
3951+ * preemptible, we cannot be in an interrupt.
3952 */
3953-#define in_interrupt() ({ int __cpu = smp_processor_id(); \
3954- (local_irq_count(__cpu) + local_bh_count(__cpu) != 0); })
3955+#define in_interrupt() (preempt_is_disabled() && \
3956+ ({unsigned long __cpu = smp_processor_id(); \
3957+ (local_irq_count(__cpu) + local_bh_count(__cpu) != 0); }))
3958
3959-#define in_irq() (local_irq_count(smp_processor_id()) != 0)
3960+#define in_irq() (preempt_is_disabled() && \
3961+ (local_irq_count(smp_processor_id()) != 0))
3962
3963 #ifndef CONFIG_SMP
3964
3965@@ -35,6 +39,8 @@
3966 #define irq_exit(cpu, irq) (local_irq_count(cpu)--)
3967
3968 #define synchronize_irq() barrier()
3969+
3970+#define release_irqlock(cpu) do { } while (0)
3971
3972 #else
3973
3974diff -urN linux-2.4.20/include/asm-i386/highmem.h linux-2.4.20-o1-preempt/include/asm-i386/highmem.h
3975--- linux-2.4.20/include/asm-i386/highmem.h Sat Aug 3 02:39:45 2002
3976+++ linux-2.4.20-o1-preempt/include/asm-i386/highmem.h Tue Feb 18 03:52:07 2003
3977@@ -88,6 +88,7 @@
3978 enum fixed_addresses idx;
3979 unsigned long vaddr;
3980
3981+ preempt_disable();
3982 if (page < highmem_start_page)
3983 return page_address(page);
3984
3985@@ -109,8 +110,10 @@
3986 unsigned long vaddr = (unsigned long) kvaddr;
3987 enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
3988
3989- if (vaddr < FIXADDR_START) // FIXME
3990+ if (vaddr < FIXADDR_START) { // FIXME
3991+ preempt_enable();
3992 return;
3993+ }
3994
3995 if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
3996 out_of_line_bug();
3997@@ -122,6 +125,8 @@
3998 pte_clear(kmap_pte-idx);
3999 __flush_tlb_one(vaddr);
4000 #endif
4001+
4002+ preempt_enable();
4003 }
4004
4005 #endif /* __KERNEL__ */
4006diff -urN linux-2.4.20/include/asm-i386/hw_irq.h linux-2.4.20-o1-preempt/include/asm-i386/hw_irq.h
4007--- linux-2.4.20/include/asm-i386/hw_irq.h Thu Nov 22 20:46:18 2001
4008+++ linux-2.4.20-o1-preempt/include/asm-i386/hw_irq.h Tue Feb 18 03:52:07 2003
4009@@ -95,6 +95,18 @@
4010 #define __STR(x) #x
4011 #define STR(x) __STR(x)
4012
4013+#define GET_CURRENT \
4014+ "movl %esp, %ebx\n\t" \
4015+ "andl $-8192, %ebx\n\t"
4016+
4017+#ifdef CONFIG_PREEMPT
4018+#define BUMP_LOCK_COUNT \
4019+ GET_CURRENT \
4020+ "incl 4(%ebx)\n\t"
4021+#else
4022+#define BUMP_LOCK_COUNT
4023+#endif
4024+
4025 #define SAVE_ALL \
4026 "cld\n\t" \
4027 "pushl %es\n\t" \
4028@@ -108,14 +120,11 @@
4029 "pushl %ebx\n\t" \
4030 "movl $" STR(__KERNEL_DS) ",%edx\n\t" \
4031 "movl %edx,%ds\n\t" \
4032- "movl %edx,%es\n\t"
4033+ "movl %edx,%es\n\t" \
4034+ BUMP_LOCK_COUNT
4035
4036 #define IRQ_NAME2(nr) nr##_interrupt(void)
4037 #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
4038-
4039-#define GET_CURRENT \
4040- "movl %esp, %ebx\n\t" \
4041- "andl $-8192, %ebx\n\t"
4042
4043 /*
4044 * SMP has a few special interrupts for IPI messages
4045diff -urN linux-2.4.20/include/asm-i386/i387.h linux-2.4.20-o1-preempt/include/asm-i386/i387.h
4046--- linux-2.4.20/include/asm-i386/i387.h Sat Aug 3 02:39:45 2002
4047+++ linux-2.4.20-o1-preempt/include/asm-i386/i387.h Tue Feb 18 03:52:07 2003
4048@@ -12,6 +12,7 @@
4049 #define __ASM_I386_I387_H
4050
4051 #include <linux/sched.h>
4052+#include <linux/spinlock.h>
4053 #include <asm/processor.h>
4054 #include <asm/sigcontext.h>
4055 #include <asm/user.h>
4056@@ -24,7 +25,7 @@
4057 extern void restore_fpu( struct task_struct *tsk );
4058
4059 extern void kernel_fpu_begin(void);
4060-#define kernel_fpu_end() stts()
4061+#define kernel_fpu_end() do { stts(); preempt_enable(); } while(0)
4062
4063
4064 #define unlazy_fpu( tsk ) do { \
4065diff -urN linux-2.4.20/include/asm-i386/mmu_context.h linux-2.4.20-o1-preempt/include/asm-i386/mmu_context.h
4066--- linux-2.4.20/include/asm-i386/mmu_context.h Sat Aug 3 02:39:45 2002
4067+++ linux-2.4.20-o1-preempt/include/asm-i386/mmu_context.h Tue Feb 18 03:51:29 2003
4068@@ -27,13 +27,13 @@
4069
4070 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu)
4071 {
4072- if (prev != next) {
4073+ if (likely(prev != next)) {
4074 /* stop flush ipis for the previous mm */
4075 clear_bit(cpu, &prev->cpu_vm_mask);
4076 /*
4077 * Re-load LDT if necessary
4078 */
4079- if (prev->context.segments != next->context.segments)
4080+ if (unlikely(prev->context.segments != next->context.segments))
4081 load_LDT(next);
4082 #ifdef CONFIG_SMP
4083 cpu_tlbstate[cpu].state = TLBSTATE_OK;
4084diff -urN linux-2.4.20/include/asm-i386/pgalloc.h linux-2.4.20-o1-preempt/include/asm-i386/pgalloc.h
4085--- linux-2.4.20/include/asm-i386/pgalloc.h Sat Aug 3 02:39:45 2002
4086+++ linux-2.4.20-o1-preempt/include/asm-i386/pgalloc.h Tue Feb 18 03:52:06 2003
4087@@ -75,20 +75,26 @@
4088 {
4089 unsigned long *ret;
4090
4091+ preempt_disable();
4092 if ((ret = pgd_quicklist) != NULL) {
4093 pgd_quicklist = (unsigned long *)(*ret);
4094 ret[0] = 0;
4095 pgtable_cache_size--;
4096- } else
4097+ preempt_enable();
4098+ } else {
4099+ preempt_enable();
4100 ret = (unsigned long *)get_pgd_slow();
4101+ }
4102 return (pgd_t *)ret;
4103 }
4104
4105 static inline void free_pgd_fast(pgd_t *pgd)
4106 {
4107+ preempt_disable();
4108 *(unsigned long *)pgd = (unsigned long) pgd_quicklist;
4109 pgd_quicklist = (unsigned long *) pgd;
4110 pgtable_cache_size++;
4111+ preempt_enable();
4112 }
4113
4114 static inline void free_pgd_slow(pgd_t *pgd)
4115@@ -119,19 +125,23 @@
4116 {
4117 unsigned long *ret;
4118
4119+ preempt_disable();
4120 if ((ret = (unsigned long *)pte_quicklist) != NULL) {
4121 pte_quicklist = (unsigned long *)(*ret);
4122 ret[0] = ret[1];
4123 pgtable_cache_size--;
4124 }
4125+ preempt_enable();
4126 return (pte_t *)ret;
4127 }
4128
4129 static inline void pte_free_fast(pte_t *pte)
4130 {
4131+ preempt_disable();
4132 *(unsigned long *)pte = (unsigned long) pte_quicklist;
4133 pte_quicklist = (unsigned long *) pte;
4134 pgtable_cache_size++;
4135+ preempt_enable();
4136 }
4137
4138 static __inline__ void pte_free_slow(pte_t *pte)
4139@@ -224,6 +234,7 @@
4140 {
4141 struct mm_struct *active_mm;
4142 int state;
4143+ char __cacheline_padding[24];
4144 };
4145 extern struct tlb_state cpu_tlbstate[NR_CPUS];
4146
4147diff -urN linux-2.4.20/include/asm-i386/smp.h linux-2.4.20-o1-preempt/include/asm-i386/smp.h
4148--- linux-2.4.20/include/asm-i386/smp.h Fri Nov 29 00:53:15 2002
4149+++ linux-2.4.20-o1-preempt/include/asm-i386/smp.h Tue Feb 18 03:51:29 2003
4150@@ -40,6 +40,7 @@
4151 extern void smp_flush_tlb(void);
4152 extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
4153 extern void smp_send_reschedule(int cpu);
4154+extern void smp_send_reschedule_all(void);
4155 extern void smp_invalidate_rcv(void); /* Process an NMI */
4156 extern void (*mtrr_hook) (void);
4157 extern void zap_low_mappings (void);
4158@@ -81,7 +82,7 @@
4159 * so this is correct in the x86 case.
4160 */
4161
4162-#define smp_processor_id() (current->processor)
4163+#define smp_processor_id() (current->cpu)
4164
4165 static __inline int hard_smp_processor_id(void)
4166 {
4167@@ -98,18 +99,6 @@
4168 #endif /* !__ASSEMBLY__ */
4169
4170 #define NO_PROC_ID 0xFF /* No processor magic marker */
4171-
4172-/*
4173- * This magic constant controls our willingness to transfer
4174- * a process across CPUs. Such a transfer incurs misses on the L1
4175- * cache, and on a P6 or P5 with multiple L2 caches L2 hits. My
4176- * gut feeling is this will vary by board in value. For a board
4177- * with separate L2 cache it probably depends also on the RSS, and
4178- * for a board with shared L2 cache it ought to decay fast as other
4179- * processes are run.
4180- */
4181-
4182-#define PROC_CHANGE_PENALTY 15 /* Schedule penalty */
4183
4184 #endif
4185 #endif
4186diff -urN linux-2.4.20/include/asm-i386/smplock.h linux-2.4.20-o1-preempt/include/asm-i386/smplock.h
4187--- linux-2.4.20/include/asm-i386/smplock.h Sat Aug 3 02:39:45 2002
4188+++ linux-2.4.20-o1-preempt/include/asm-i386/smplock.h Tue Feb 18 03:52:07 2003
4189@@ -11,7 +11,15 @@
4190 extern spinlock_cacheline_t kernel_flag_cacheline;
4191 #define kernel_flag kernel_flag_cacheline.lock
4192
4193+#ifdef CONFIG_SMP
4194 #define kernel_locked() spin_is_locked(&kernel_flag)
4195+#else
4196+#ifdef CONFIG_PREEMPT
4197+#define kernel_locked() preempt_get_count()
4198+#else
4199+#define kernel_locked() 1
4200+#endif
4201+#endif
4202
4203 /*
4204 * Release global kernel lock and global interrupt lock
4205@@ -43,6 +51,11 @@
4206 */
4207 static __inline__ void lock_kernel(void)
4208 {
4209+#ifdef CONFIG_PREEMPT
4210+ if (current->lock_depth == -1)
4211+ spin_lock(&kernel_flag);
4212+ ++current->lock_depth;
4213+#else
4214 #if 1
4215 if (!++current->lock_depth)
4216 spin_lock(&kernel_flag);
4217@@ -54,6 +67,7 @@
4218 "\n9:"
4219 :"=m" (__dummy_lock(&kernel_flag)),
4220 "=m" (current->lock_depth));
4221+#endif
4222 #endif
4223 }
4224
4225diff -urN linux-2.4.20/include/asm-i386/softirq.h linux-2.4.20-o1-preempt/include/asm-i386/softirq.h
4226--- linux-2.4.20/include/asm-i386/softirq.h Sat Aug 3 02:39:45 2002
4227+++ linux-2.4.20-o1-preempt/include/asm-i386/softirq.h Tue Feb 18 03:52:07 2003
4228@@ -5,9 +5,9 @@
4229 #include <asm/hardirq.h>
4230
4231 #define __cpu_bh_enable(cpu) \
4232- do { barrier(); local_bh_count(cpu)--; } while (0)
4233+ do { barrier(); local_bh_count(cpu)--; preempt_enable(); } while (0)
4234 #define cpu_bh_disable(cpu) \
4235- do { local_bh_count(cpu)++; barrier(); } while (0)
4236+ do { preempt_disable(); local_bh_count(cpu)++; barrier(); } while (0)
4237
4238 #define local_bh_disable() cpu_bh_disable(smp_processor_id())
4239 #define __local_bh_enable() __cpu_bh_enable(smp_processor_id())
4240@@ -22,7 +22,7 @@
4241 * If you change the offsets in irq_stat then you have to
4242 * update this code as well.
4243 */
4244-#define local_bh_enable() \
4245+#define _local_bh_enable() \
4246 do { \
4247 unsigned int *ptr = &local_bh_count(smp_processor_id()); \
4248 \
4249@@ -44,5 +44,7 @@
4250 : "r" (ptr), "i" (do_softirq) \
4251 /* no registers clobbered */ ); \
4252 } while (0)
4253+
4254+#define local_bh_enable() do { _local_bh_enable(); preempt_enable(); } while (0)
4255
4256 #endif /* __ASM_SOFTIRQ_H */
4257diff -urN linux-2.4.20/include/asm-i386/spinlock.h linux-2.4.20-o1-preempt/include/asm-i386/spinlock.h
4258--- linux-2.4.20/include/asm-i386/spinlock.h Fri Nov 29 00:53:15 2002
4259+++ linux-2.4.20-o1-preempt/include/asm-i386/spinlock.h Tue Feb 18 03:52:07 2003
4260@@ -77,7 +77,7 @@
4261 :"=m" (lock->lock) : : "memory"
4262
4263
4264-static inline void spin_unlock(spinlock_t *lock)
4265+static inline void _raw_spin_unlock(spinlock_t *lock)
4266 {
4267 #if SPINLOCK_DEBUG
4268 if (lock->magic != SPINLOCK_MAGIC)
4269@@ -97,7 +97,7 @@
4270 :"=q" (oldval), "=m" (lock->lock) \
4271 :"0" (oldval) : "memory"
4272
4273-static inline void spin_unlock(spinlock_t *lock)
4274+static inline void _raw_spin_unlock(spinlock_t *lock)
4275 {
4276 char oldval = 1;
4277 #if SPINLOCK_DEBUG
4278@@ -113,7 +113,7 @@
4279
4280 #endif
4281
4282-static inline int spin_trylock(spinlock_t *lock)
4283+static inline int _raw_spin_trylock(spinlock_t *lock)
4284 {
4285 char oldval;
4286 __asm__ __volatile__(
4287@@ -123,7 +123,7 @@
4288 return oldval > 0;
4289 }
4290
4291-static inline void spin_lock(spinlock_t *lock)
4292+static inline void _raw_spin_lock(spinlock_t *lock)
4293 {
4294 #if SPINLOCK_DEBUG
4295 __label__ here;
4296@@ -179,7 +179,7 @@
4297 */
4298 /* the spinlock helpers are in arch/i386/kernel/semaphore.c */
4299
4300-static inline void read_lock(rwlock_t *rw)
4301+static inline void _raw_read_lock(rwlock_t *rw)
4302 {
4303 #if SPINLOCK_DEBUG
4304 if (rw->magic != RWLOCK_MAGIC)
4305@@ -188,7 +188,7 @@
4306 __build_read_lock(rw, "__read_lock_failed");
4307 }
4308
4309-static inline void write_lock(rwlock_t *rw)
4310+static inline void _raw_write_lock(rwlock_t *rw)
4311 {
4312 #if SPINLOCK_DEBUG
4313 if (rw->magic != RWLOCK_MAGIC)
4314@@ -197,10 +197,10 @@
4315 __build_write_lock(rw, "__write_lock_failed");
4316 }
4317
4318-#define read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
4319-#define write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
4320+#define _raw_read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
4321+#define _raw_write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
4322
4323-static inline int write_trylock(rwlock_t *lock)
4324+static inline int _raw_write_trylock(rwlock_t *lock)
4325 {
4326 atomic_t *count = (atomic_t *)lock;
4327 if (atomic_sub_and_test(RW_LOCK_BIAS, count))
4328diff -urN linux-2.4.20/include/asm-i386/system.h linux-2.4.20-o1-preempt/include/asm-i386/system.h
4329--- linux-2.4.20/include/asm-i386/system.h Fri Nov 29 00:53:15 2002
4330+++ linux-2.4.20-o1-preempt/include/asm-i386/system.h Tue Feb 18 03:52:06 2003
4331@@ -12,25 +12,22 @@
4332 struct task_struct; /* one of the stranger aspects of C forward declarations.. */
4333 extern void FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next));
4334
4335-#define prepare_to_switch() do { } while(0)
4336 #define switch_to(prev,next,last) do { \
4337 asm volatile("pushl %%esi\n\t" \
4338 "pushl %%edi\n\t" \
4339 "pushl %%ebp\n\t" \
4340 "movl %%esp,%0\n\t" /* save ESP */ \
4341- "movl %3,%%esp\n\t" /* restore ESP */ \
4342+ "movl %2,%%esp\n\t" /* restore ESP */ \
4343 "movl $1f,%1\n\t" /* save EIP */ \
4344- "pushl %4\n\t" /* restore EIP */ \
4345+ "pushl %3\n\t" /* restore EIP */ \
4346 "jmp __switch_to\n" \
4347 "1:\t" \
4348 "popl %%ebp\n\t" \
4349 "popl %%edi\n\t" \
4350 "popl %%esi\n\t" \
4351- :"=m" (prev->thread.esp),"=m" (prev->thread.eip), \
4352- "=b" (last) \
4353+ :"=m" (prev->thread.esp),"=m" (prev->thread.eip) \
4354 :"m" (next->thread.esp),"m" (next->thread.eip), \
4355- "a" (prev), "d" (next), \
4356- "b" (prev)); \
4357+ "a" (prev), "d" (next)); \
4358 } while (0)
4359
4360 #define _set_base(addr,base) do { unsigned long __pr; \
4361@@ -321,6 +318,13 @@
4362 #define __sti() __asm__ __volatile__("sti": : :"memory")
4363 /* used in the idle loop; sti takes one instruction cycle to complete */
4364 #define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory")
4365+
4366+#define irqs_disabled() \
4367+({ \
4368+ unsigned long flags; \
4369+ __save_flags(flags); \
4370+ !(flags & (1<<9)); \
4371+})
4372
4373 /* For spinlocks etc */
4374 #define local_irq_save(x) __asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory")
4375diff -urN linux-2.4.20/include/asm-ia64/bitops.h linux-2.4.20-o1-preempt/include/asm-ia64/bitops.h
4376--- linux-2.4.20/include/asm-ia64/bitops.h Fri Nov 29 00:53:15 2002
4377+++ linux-2.4.20-o1-preempt/include/asm-ia64/bitops.h Tue Feb 18 03:51:30 2003
4378@@ -2,10 +2,15 @@
4379 #define _ASM_IA64_BITOPS_H
4380
4381 /*
4382- * Copyright (C) 1998-2001 Hewlett-Packard Co
4383- * Copyright (C) 1998-2001 David Mosberger-Tang <davidm@hpl.hp.com>
4384+ * Copyright (C) 1998-2002 Hewlett-Packard Co
4385+ * David Mosberger-Tang <davidm@hpl.hp.com>
4386+ *
4387+ * 02/06/02 find_next_bit() and find_first_bit() added from Erich Focht's ia64 O(1)
4388+ * scheduler patch
4389 */
4390
4391+#include <linux/types.h>
4392+
4393 #include <asm/system.h>
4394
4395 /**
4396@@ -89,6 +94,17 @@
4397 }
4398
4399 /**
4400+ * __clear_bit - Clears a bit in memory (non-atomic version)
4401+ */
4402+static __inline__ void
4403+__clear_bit (int nr, volatile void *addr)
4404+{
4405+ volatile __u32 *p = (__u32 *) addr + (nr >> 5);
4406+ __u32 m = 1 << (nr & 31);
4407+ *p &= ~m;
4408+}
4409+
4410+/**
4411 * change_bit - Toggle a bit in memory
4412 * @nr: Bit to clear
4413 * @addr: Address to start counting from
4414@@ -264,12 +280,11 @@
4415 }
4416
4417 /**
4418- * ffz - find the first zero bit in a memory region
4419- * @x: The address to start the search at
4420+ * ffz - find the first zero bit in a long word
4421+ * @x: The long word to find the bit in
4422 *
4423- * Returns the bit-number (0..63) of the first (least significant) zero bit, not
4424- * the number of the byte containing a bit. Undefined if no zero exists, so
4425- * code should check against ~0UL first...
4426+ * Returns the bit-number (0..63) of the first (least significant) zero bit. Undefined if
4427+ * no zero exists, so code should check against ~0UL first...
4428 */
4429 static inline unsigned long
4430 ffz (unsigned long x)
4431@@ -280,6 +295,21 @@
4432 return result;
4433 }
4434
4435+/**
4436+ * __ffs - find first bit in word.
4437+ * @x: The word to search
4438+ *
4439+ * Undefined if no bit exists, so code should check against 0 first.
4440+ */
4441+static __inline__ unsigned long
4442+__ffs (unsigned long x)
4443+{
4444+ unsigned long result;
4445+
4446+ __asm__ ("popcnt %0=%1" : "=r" (result) : "r" ((x - 1) & ~x));
4447+ return result;
4448+}
4449+
4450 #ifdef __KERNEL__
4451
4452 /*
4453@@ -296,6 +326,12 @@
4454 return exp - 0xffff;
4455 }
4456
4457+static int
4458+fls (int x)
4459+{
4460+ return ia64_fls((unsigned int) x);
4461+}
4462+
4463 /*
4464 * ffs: find first bit set. This is defined the same way as the libc and compiler builtin
4465 * ffs routines, therefore differs in spirit from the above ffz (man ffs): it operates on
4466@@ -368,8 +404,53 @@
4467 */
4468 #define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
4469
4470+/*
4471+ * Find next bit in a bitmap reasonably efficiently..
4472+ */
4473+static inline int
4474+find_next_bit (void *addr, unsigned long size, unsigned long offset)
4475+{
4476+ unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
4477+ unsigned long result = offset & ~63UL;
4478+ unsigned long tmp;
4479+
4480+ if (offset >= size)
4481+ return size;
4482+ size -= result;
4483+ offset &= 63UL;
4484+ if (offset) {
4485+ tmp = *(p++);
4486+ tmp &= ~0UL << offset;
4487+ if (size < 64)
4488+ goto found_first;
4489+ if (tmp)
4490+ goto found_middle;
4491+ size -= 64;
4492+ result += 64;
4493+ }
4494+ while (size & ~63UL) {
4495+ if ((tmp = *(p++)))
4496+ goto found_middle;
4497+ result += 64;
4498+ size -= 64;
4499+ }
4500+ if (!size)
4501+ return result;
4502+ tmp = *p;
4503+ found_first:
4504+ tmp &= ~0UL >> (64-size);
4505+ if (tmp == 0UL) /* Are any bits set? */
4506+ return result + size; /* Nope. */
4507+ found_middle:
4508+ return result + __ffs(tmp);
4509+}
4510+
4511+#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
4512+
4513 #ifdef __KERNEL__
4514
4515+#define __clear_bit(nr, addr) clear_bit(nr, addr)
4516+
4517 #define ext2_set_bit test_and_set_bit
4518 #define ext2_clear_bit test_and_clear_bit
4519 #define ext2_test_bit test_bit
4520@@ -382,6 +463,16 @@
4521 #define minix_test_and_clear_bit(nr,addr) test_and_clear_bit(nr,addr)
4522 #define minix_test_bit(nr,addr) test_bit(nr,addr)
4523 #define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
4524+
4525+static inline int
ddc40141 4526+_sched_find_first_bit (unsigned long *b)
54aa170e
JR
4527+{
4528+ if (unlikely(b[0]))
4529+ return __ffs(b[0]);
4530+ if (unlikely(b[1]))
4531+ return 64 + __ffs(b[1]);
4532+ return __ffs(b[2]) + 128;
4533+}
4534
4535 #endif /* __KERNEL__ */
4536
4537diff -urN linux-2.4.20/include/asm-m68k/bitops.h linux-2.4.20-o1-preempt/include/asm-m68k/bitops.h
4538--- linux-2.4.20/include/asm-m68k/bitops.h Thu Oct 25 22:53:55 2001
4539+++ linux-2.4.20-o1-preempt/include/asm-m68k/bitops.h Tue Feb 18 03:51:30 2003
4540@@ -97,6 +97,7 @@
4541 (__builtin_constant_p(nr) ? \
4542 __constant_clear_bit(nr, vaddr) : \
4543 __generic_clear_bit(nr, vaddr))
4544+#define __clear_bit(nr,vaddr) clear_bit(nr,vaddr)
4545
4546 extern __inline__ void __constant_clear_bit(int nr, volatile void * vaddr)
4547 {
4548@@ -239,6 +240,28 @@
4549
4550 return 32 - cnt;
4551 }
4552+#define __ffs(x) (ffs(x) - 1)
4553+
4554+
4555+/*
4556+ * Every architecture must define this function. It's the fastest
4557+ * way of searching a 140-bit bitmap where the first 100 bits are
4558+ * unlikely to be set. It's guaranteed that at least one of the 140
4559+ * bits is cleared.
4560+ */
ddc40141 4561+static inline int _sched_find_first_bit(unsigned long *b)
54aa170e
JR
4562+{
4563+ if (unlikely(b[0]))
4564+ return __ffs(b[0]);
4565+ if (unlikely(b[1]))
4566+ return __ffs(b[1]) + 32;
4567+ if (unlikely(b[2]))
4568+ return __ffs(b[2]) + 64;
4569+ if (b[3])
4570+ return __ffs(b[3]) + 96;
4571+ return __ffs(b[4]) + 128;
4572+}
4573+
4574
4575 /*
4576 * hweightN: returns the hamming weight (i.e. the number
4577diff -urN linux-2.4.20/include/asm-mips/bitops.h linux-2.4.20-o1-preempt/include/asm-mips/bitops.h
4578--- linux-2.4.20/include/asm-mips/bitops.h Fri Nov 29 00:53:15 2002
4579+++ linux-2.4.20-o1-preempt/include/asm-mips/bitops.h Tue Feb 18 03:51:30 2003
4580@@ -43,6 +43,8 @@
4581
4582 #ifdef CONFIG_CPU_HAS_LLSC
4583
4584+#include <asm/mipsregs.h>
4585+
4586 /*
4587 * These functions for MIPS ISA > 1 are interrupt and SMP proof and
4588 * interrupt friendly
4589@@ -628,7 +630,8 @@
4590 "2:"
4591 : "=r" (res), "=r" (dummy), "=r" (addr)
4592 : "0" ((signed int) 0), "1" ((unsigned int) 0xffffffff),
4593- "2" (addr), "r" (size));
4594+ "2" (addr), "r" (size)
4595+ : "$1");
4596
4597 return res;
4598 }
4599@@ -663,7 +666,8 @@
4600 ".set\treorder\n"
4601 "1:"
4602 : "=r" (set), "=r" (dummy)
4603- : "0" (0), "1" (1 << bit), "r" (*p));
4604+ : "0" (0), "1" (1 << bit), "r" (*p)
4605+ : "$1");
4606 if (set < (32 - bit))
4607 return set + offset;
4608 set = 32 - bit;
4609@@ -684,20 +688,29 @@
4610 *
4611 * Undefined if no zero exists, so code should check against ~0UL first.
4612 */
4613-static __inline__ unsigned long ffz(unsigned long word)
4614+extern __inline__ unsigned long ffz(unsigned long word)
4615 {
4616- int b = 0, s;
4617+ unsigned int __res;
4618+ unsigned int mask = 1;
4619
4620- word = ~word;
4621- s = 16; if (word << 16 != 0) s = 0; b += s; word >>= s;
4622- s = 8; if (word << 24 != 0) s = 0; b += s; word >>= s;
4623- s = 4; if (word << 28 != 0) s = 0; b += s; word >>= s;
4624- s = 2; if (word << 30 != 0) s = 0; b += s; word >>= s;
4625- s = 1; if (word << 31 != 0) s = 0; b += s;
4626+ __asm__ (
4627+ ".set\tnoreorder\n\t"
4628+ ".set\tnoat\n\t"
4629+ "move\t%0,$0\n"
4630+ "1:\tand\t$1,%2,%1\n\t"
4631+ "beqz\t$1,2f\n\t"
4632+ "sll\t%1,1\n\t"
4633+ "bnez\t%1,1b\n\t"
4634+ "addiu\t%0,1\n\t"
4635+ ".set\tat\n\t"
4636+ ".set\treorder\n"
4637+ "2:\n\t"
4638+ : "=&r" (__res), "=r" (mask)
4639+ : "r" (word), "1" (mask)
4640+ : "$1");
4641
4642- return b;
4643+ return __res;
4644 }
4645-
4646
4647 #ifdef __KERNEL__
4648
4649diff -urN linux-2.4.20/include/asm-mips/smplock.h linux-2.4.20-o1-preempt/include/asm-mips/smplock.h
4650--- linux-2.4.20/include/asm-mips/smplock.h Sat Aug 3 02:39:45 2002
4651+++ linux-2.4.20-o1-preempt/include/asm-mips/smplock.h Tue Feb 18 03:52:06 2003
4652@@ -5,12 +5,21 @@
4653 *
4654 * Default SMP lock implementation
4655 */
4656+#include <linux/config.h>
4657 #include <linux/interrupt.h>
4658 #include <linux/spinlock.h>
4659
4660 extern spinlock_t kernel_flag;
4661
4662+#ifdef CONFIG_SMP
4663 #define kernel_locked() spin_is_locked(&kernel_flag)
4664+#else
4665+#ifdef CONFIG_PREEMPT
4666+#define kernel_locked() preempt_get_count()
4667+#else
4668+#define kernel_locked() 1
4669+#endif
4670+#endif
4671
4672 /*
4673 * Release global kernel lock and global interrupt lock
4674@@ -42,8 +51,14 @@
4675 */
4676 extern __inline__ void lock_kernel(void)
4677 {
4678+#ifdef CONFIG_PREEMPT
4679+ if (current->lock_depth == -1)
4680+ spin_lock(&kernel_flag);
4681+ ++current->lock_depth;
4682+#else
4683 if (!++current->lock_depth)
4684 spin_lock(&kernel_flag);
4685+#endif
4686 }
4687
4688 extern __inline__ void unlock_kernel(void)
4689diff -urN linux-2.4.20/include/asm-mips/softirq.h linux-2.4.20-o1-preempt/include/asm-mips/softirq.h
4690--- linux-2.4.20/include/asm-mips/softirq.h Fri Nov 29 00:53:15 2002
4691+++ linux-2.4.20-o1-preempt/include/asm-mips/softirq.h Tue Feb 18 03:52:06 2003
4692@@ -15,6 +15,7 @@
4693
4694 static inline void cpu_bh_disable(int cpu)
4695 {
4696+ preempt_disable();
4697 local_bh_count(cpu)++;
4698 barrier();
4699 }
4700@@ -23,6 +24,7 @@
4701 {
4702 barrier();
4703 local_bh_count(cpu)--;
4704+ preempt_enable();
4705 }
4706
4707
4708@@ -36,6 +38,7 @@
4709 cpu = smp_processor_id(); \
4710 if (!--local_bh_count(cpu) && softirq_pending(cpu)) \
4711 do_softirq(); \
4712+ preempt_enable(); \
4713 } while (0)
4714
4715 #define in_softirq() (local_bh_count(smp_processor_id()) != 0)
4716diff -urN linux-2.4.20/include/asm-mips/system.h linux-2.4.20-o1-preempt/include/asm-mips/system.h
4717--- linux-2.4.20/include/asm-mips/system.h Fri Nov 29 00:53:15 2002
4718+++ linux-2.4.20-o1-preempt/include/asm-mips/system.h Tue Feb 18 03:52:06 2003
4719@@ -322,4 +322,18 @@
4720 #define die_if_kernel(msg, regs) \
4721 __die_if_kernel(msg, regs, __FILE__ ":", __FUNCTION__, __LINE__)
4722
4723+extern __inline__ int intr_on(void)
4724+{
4725+ unsigned long flags;
4726+ save_flags(flags);
4727+ return flags & 1;
4728+}
4729+
4730+extern __inline__ int intr_off(void)
4731+{
4732+ return ! intr_on();
4733+}
4734+
4735+#define irqs_disabled() intr_off()
4736+
4737 #endif /* _ASM_SYSTEM_H */
4738diff -urN linux-2.4.20/include/asm-mips64/bitops.h linux-2.4.20-o1-preempt/include/asm-mips64/bitops.h
4739--- linux-2.4.20/include/asm-mips64/bitops.h Fri Nov 29 00:53:15 2002
4740+++ linux-2.4.20-o1-preempt/include/asm-mips64/bitops.h Tue Feb 18 03:51:30 2003
4741@@ -19,6 +19,7 @@
4742
4743 #include <asm/system.h>
4744 #include <asm/sgidefs.h>
4745+#include <asm/mipsregs.h>
4746
4747 /*
4748 * set_bit - Atomically set a bit in memory
4749@@ -30,7 +31,8 @@
4750 * Note that @nr may be almost arbitrarily large; this function is not
4751 * restricted to acting on a single-word quantity.
4752 */
4753-static inline void set_bit(unsigned long nr, volatile void *addr)
4754+extern __inline__ void
4755+set_bit(unsigned long nr, volatile void *addr)
4756 {
4757 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
4758 unsigned long temp;
4759@@ -54,7 +56,7 @@
4760 * If it's called on the same region of memory simultaneously, the effect
4761 * may be that only one operation succeeds.
4762 */
4763-static inline void __set_bit(int nr, volatile void * addr)
4764+extern __inline__ void __set_bit(int nr, volatile void * addr)
4765 {
4766 unsigned long * m = ((unsigned long *) addr) + (nr >> 6);
4767
4768@@ -71,7 +73,8 @@
4769 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
4770 * in order to ensure changes are visible on other processors.
4771 */
4772-static inline void clear_bit(unsigned long nr, volatile void *addr)
4773+extern __inline__ void
4774+clear_bit(unsigned long nr, volatile void *addr)
4775 {
4776 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
4777 unsigned long temp;
4778@@ -97,7 +100,8 @@
4779 * Note that @nr may be almost arbitrarily large; this function is not
4780 * restricted to acting on a single-word quantity.
4781 */
4782-static inline void change_bit(unsigned long nr, volatile void *addr)
4783+extern __inline__ void
4784+change_bit(unsigned long nr, volatile void *addr)
4785 {
4786 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
4787 unsigned long temp;
4788@@ -120,7 +124,7 @@
4789 * If it's called on the same region of memory simultaneously, the effect
4790 * may be that only one operation succeeds.
4791 */
4792-static inline void __change_bit(int nr, volatile void * addr)
4793+extern __inline__ void __change_bit(int nr, volatile void * addr)
4794 {
4795 unsigned long * m = ((unsigned long *) addr) + (nr >> 6);
4796
4797@@ -135,8 +139,8 @@
4798 * This operation is atomic and cannot be reordered.
4799 * It also implies a memory barrier.
4800 */
4801-static inline unsigned long test_and_set_bit(unsigned long nr,
4802- volatile void *addr)
4803+extern __inline__ unsigned long
4804+test_and_set_bit(unsigned long nr, volatile void *addr)
4805 {
4806 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
4807 unsigned long temp, res;
4808@@ -168,7 +172,8 @@
4809 * If two examples of this operation race, one can appear to succeed
4810 * but actually fail. You must protect multiple accesses with a lock.
4811 */
4812-static inline int __test_and_set_bit(int nr, volatile void *addr)
4813+extern __inline__ int
4814+__test_and_set_bit(int nr, volatile void * addr)
4815 {
4816 unsigned long mask, retval;
4817 long *a = (unsigned long *) addr;
4818@@ -189,8 +194,8 @@
4819 * This operation is atomic and cannot be reordered.
4820 * It also implies a memory barrier.
4821 */
4822-static inline unsigned long test_and_clear_bit(unsigned long nr,
4823- volatile void *addr)
4824+extern __inline__ unsigned long
4825+test_and_clear_bit(unsigned long nr, volatile void *addr)
4826 {
4827 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
4828 unsigned long temp, res;
4829@@ -223,7 +228,8 @@
4830 * If two examples of this operation race, one can appear to succeed
4831 * but actually fail. You must protect multiple accesses with a lock.
4832 */
4833-static inline int __test_and_clear_bit(int nr, volatile void * addr)
4834+extern __inline__ int
4835+__test_and_clear_bit(int nr, volatile void * addr)
4836 {
4837 unsigned long mask, retval;
4838 unsigned long *a = (unsigned long *) addr;
4839@@ -244,8 +250,8 @@
4840 * This operation is atomic and cannot be reordered.
4841 * It also implies a memory barrier.
4842 */
4843-static inline unsigned long test_and_change_bit(unsigned long nr,
4844- volatile void *addr)
4845+extern __inline__ unsigned long
4846+test_and_change_bit(unsigned long nr, volatile void *addr)
4847 {
4848 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
4849 unsigned long temp, res;
4850@@ -277,7 +283,8 @@
4851 * If two examples of this operation race, one can appear to succeed
4852 * but actually fail. You must protect multiple accesses with a lock.
4853 */
4854-static inline int __test_and_change_bit(int nr, volatile void *addr)
4855+extern __inline__ int
4856+__test_and_change_bit(int nr, volatile void * addr)
4857 {
4858 unsigned long mask, retval;
4859 unsigned long *a = (unsigned long *) addr;
4860@@ -294,7 +301,8 @@
4861 * @nr: bit number to test
4862 * @addr: Address to start counting from
4863 */
4864-static inline unsigned long test_bit(int nr, volatile void * addr)
4865+extern __inline__ unsigned long
4866+test_bit(int nr, volatile void * addr)
4867 {
4868 return 1UL & (((volatile unsigned long *) addr)[nr >> 6] >> (nr & 0x3f));
4869 }
4870@@ -311,7 +319,8 @@
4871 * Returns the bit-number of the first zero bit, not the number of the byte
4872 * containing a bit.
4873 */
4874-static inline int find_first_zero_bit (void *addr, unsigned size)
4875+extern __inline__ int
4876+find_first_zero_bit (void *addr, unsigned size)
4877 {
4878 unsigned long dummy;
4879 int res;
4880@@ -347,7 +356,8 @@
4881 "2:"
4882 : "=r" (res), "=r" (dummy), "=r" (addr)
4883 : "0" ((signed int) 0), "1" ((unsigned int) 0xffffffff),
4884- "2" (addr), "r" (size));
4885+ "2" (addr), "r" (size)
4886+ : "$1");
4887
4888 return res;
4889 }
4890@@ -358,7 +368,8 @@
4891 * @offset: The bitnumber to start searching at
4892 * @size: The maximum size to search
4893 */
4894-static inline int find_next_zero_bit (void * addr, int size, int offset)
4895+extern __inline__ int
4896+find_next_zero_bit (void * addr, int size, int offset)
4897 {
4898 unsigned int *p = ((unsigned int *) addr) + (offset >> 5);
4899 int set = 0, bit = offset & 31, res;
4900@@ -379,7 +390,8 @@
4901 ".set\treorder\n"
4902 "1:"
4903 : "=r" (set), "=r" (dummy)
4904- : "0" (0), "1" (1 << bit), "r" (*p));
4905+ : "0" (0), "1" (1 << bit), "r" (*p)
4906+ : "$1");
4907 if (set < (32 - bit))
4908 return set + offset;
4909 set = 32 - bit;
4910@@ -400,19 +412,20 @@
4911 *
4912 * Undefined if no zero exists, so code should check against ~0UL first.
4913 */
4914-static __inline__ unsigned long ffz(unsigned long word)
4915+extern __inline__ unsigned long ffz(unsigned long word)
4916 {
4917- int b = 0, s;
4918+ unsigned long k;
4919
4920 word = ~word;
4921- s = 32; if (word << 32 != 0) s = 0; b += s; word >>= s;
4922- s = 16; if (word << 48 != 0) s = 0; b += s; word >>= s;
4923- s = 8; if (word << 56 != 0) s = 0; b += s; word >>= s;
4924- s = 4; if (word << 60 != 0) s = 0; b += s; word >>= s;
4925- s = 2; if (word << 62 != 0) s = 0; b += s; word >>= s;
4926- s = 1; if (word << 63 != 0) s = 0; b += s;
4927+ k = 63;
4928+ if (word & 0x00000000ffffffffUL) { k -= 32; word <<= 32; }
4929+ if (word & 0x0000ffff00000000UL) { k -= 16; word <<= 16; }
4930+ if (word & 0x00ff000000000000UL) { k -= 8; word <<= 8; }
4931+ if (word & 0x0f00000000000000UL) { k -= 4; word <<= 4; }
4932+ if (word & 0x3000000000000000UL) { k -= 2; word <<= 2; }
4933+ if (word & 0x4000000000000000UL) { k -= 1; }
4934
4935- return b;
4936+ return k;
4937 }
4938
4939 #ifdef __KERNEL__
4940@@ -450,8 +463,8 @@
4941 * @offset: The bitnumber to start searching at
4942 * @size: The maximum size to search
4943 */
4944-static inline unsigned long find_next_zero_bit(void *addr, unsigned long size,
4945- unsigned long offset)
4946+extern __inline__ unsigned long
4947+find_next_zero_bit(void *addr, unsigned long size, unsigned long offset)
4948 {
4949 unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
4950 unsigned long result = offset & ~63UL;
4951@@ -498,7 +511,8 @@
4952
4953 #ifdef __MIPSEB__
4954
4955-static inline int ext2_set_bit(int nr,void * addr)
4956+extern inline int
4957+ext2_set_bit(int nr,void * addr)
4958 {
4959 int mask, retval, flags;
4960 unsigned char *ADDR = (unsigned char *) addr;
4961@@ -512,7 +526,8 @@
4962 return retval;
4963 }
4964
4965-static inline int ext2_clear_bit(int nr, void * addr)
4966+extern inline int
4967+ext2_clear_bit(int nr, void * addr)
4968 {
4969 int mask, retval, flags;
4970 unsigned char *ADDR = (unsigned char *) addr;
4971@@ -526,7 +541,8 @@
4972 return retval;
4973 }
4974
4975-static inline int ext2_test_bit(int nr, const void * addr)
4976+extern inline int
4977+ext2_test_bit(int nr, const void * addr)
4978 {
4979 int mask;
4980 const unsigned char *ADDR = (const unsigned char *) addr;
4981@@ -539,9 +555,8 @@
4982 #define ext2_find_first_zero_bit(addr, size) \
4983 ext2_find_next_zero_bit((addr), (size), 0)
4984
4985-static inline unsigned int ext2_find_next_zero_bit(void *addr,
4986- unsigned long size,
4987- unsigned long offset)
4988+extern inline unsigned int
4989+ext2_find_next_zero_bit(void *addr, unsigned long size, unsigned long offset)
4990 {
4991 unsigned int *p = ((unsigned int *) addr) + (offset >> 5);
4992 unsigned int result = offset & ~31UL;
4993diff -urN linux-2.4.20/include/asm-ppc/bitops.h linux-2.4.20-o1-preempt/include/asm-ppc/bitops.h
4994--- linux-2.4.20/include/asm-ppc/bitops.h Tue Jun 12 04:15:27 2001
4995+++ linux-2.4.20-o1-preempt/include/asm-ppc/bitops.h Tue Feb 18 03:51:30 2003
4996@@ -10,7 +10,9 @@
4997 #define _PPC_BITOPS_H
4998
4999 #include <linux/config.h>
5000+#include <linux/compiler.h>
5001 #include <asm/byteorder.h>
5002+#include <asm/atomic.h>
5003
5004 /*
5005 * The test_and_*_bit operations are taken to imply a memory barrier
5006@@ -28,7 +30,7 @@
5007 * These used to be if'd out here because using : "cc" as a constraint
5008 * resulted in errors from egcs. Things appear to be OK with gcc-2.95.
5009 */
5010-static __inline__ void set_bit(int nr, volatile void * addr)
5011+static __inline__ void set_bit(int nr, volatile unsigned long * addr)
5012 {
5013 unsigned long old;
5014 unsigned long mask = 1 << (nr & 0x1f);
5015@@ -37,6 +39,7 @@
5016 __asm__ __volatile__("\n\
5017 1: lwarx %0,0,%3 \n\
5018 or %0,%0,%2 \n\
5019+ dcbt 0,%3 \n\
5020 stwcx. %0,0,%3 \n\
5021 bne- 1b"
5022 : "=&r" (old), "=m" (*p)
5023@@ -47,7 +50,7 @@
5024 /*
5025 * non-atomic version
5026 */
5027-static __inline__ void __set_bit(int nr, volatile void *addr)
5028+static __inline__ void __set_bit(int nr, volatile unsigned long *addr)
5029 {
5030 unsigned long mask = 1 << (nr & 0x1f);
5031 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
5032@@ -61,7 +64,7 @@
5033 #define smp_mb__before_clear_bit() smp_mb()
5034 #define smp_mb__after_clear_bit() smp_mb()
5035
5036-static __inline__ void clear_bit(int nr, volatile void *addr)
5037+static __inline__ void clear_bit(int nr, volatile unsigned long *addr)
5038 {
5039 unsigned long old;
5040 unsigned long mask = 1 << (nr & 0x1f);
5041@@ -70,6 +73,7 @@
5042 __asm__ __volatile__("\n\
5043 1: lwarx %0,0,%3 \n\
5044 andc %0,%0,%2 \n\
5045+ dcbt 0,%3 \n\
5046 stwcx. %0,0,%3 \n\
5047 bne- 1b"
5048 : "=&r" (old), "=m" (*p)
5049@@ -80,7 +84,7 @@
5050 /*
5051 * non-atomic version
5052 */
5053-static __inline__ void __clear_bit(int nr, volatile void *addr)
5054+static __inline__ void __clear_bit(int nr, volatile unsigned long *addr)
5055 {
5056 unsigned long mask = 1 << (nr & 0x1f);
5057 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
5058@@ -88,7 +92,7 @@
5059 *p &= ~mask;
5060 }
5061
5062-static __inline__ void change_bit(int nr, volatile void *addr)
5063+static __inline__ void change_bit(int nr, volatile unsigned long *addr)
5064 {
5065 unsigned long old;
5066 unsigned long mask = 1 << (nr & 0x1f);
5067@@ -97,6 +101,7 @@
5068 __asm__ __volatile__("\n\
5069 1: lwarx %0,0,%3 \n\
5070 xor %0,%0,%2 \n\
5071+ dcbt 0,%3 \n\
5072 stwcx. %0,0,%3 \n\
5073 bne- 1b"
5074 : "=&r" (old), "=m" (*p)
5075@@ -107,7 +112,7 @@
5076 /*
5077 * non-atomic version
5078 */
5079-static __inline__ void __change_bit(int nr, volatile void *addr)
5080+static __inline__ void __change_bit(int nr, volatile unsigned long *addr)
5081 {
5082 unsigned long mask = 1 << (nr & 0x1f);
5083 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
5084@@ -118,7 +123,7 @@
5085 /*
5086 * test_and_*_bit do imply a memory barrier (?)
5087 */
5088-static __inline__ int test_and_set_bit(int nr, volatile void *addr)
5089+static __inline__ int test_and_set_bit(int nr, volatile unsigned long *addr)
5090 {
5091 unsigned int old, t;
5092 unsigned int mask = 1 << (nr & 0x1f);
5093@@ -127,6 +132,7 @@
5094 __asm__ __volatile__(SMP_WMB "\n\
5095 1: lwarx %0,0,%4 \n\
5096 or %1,%0,%3 \n\
5097+ dcbt 0,%4 \n\
5098 stwcx. %1,0,%4 \n\
5099 bne 1b"
5100 SMP_MB
5101@@ -140,7 +146,7 @@
5102 /*
5103 * non-atomic version
5104 */
5105-static __inline__ int __test_and_set_bit(int nr, volatile void *addr)
5106+static __inline__ int __test_and_set_bit(int nr, volatile unsigned long *addr)
5107 {
5108 unsigned long mask = 1 << (nr & 0x1f);
5109 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
5110@@ -150,7 +156,7 @@
5111 return (old & mask) != 0;
5112 }
5113
5114-static __inline__ int test_and_clear_bit(int nr, volatile void *addr)
5115+static __inline__ int test_and_clear_bit(int nr, volatile unsigned long *addr)
5116 {
5117 unsigned int old, t;
5118 unsigned int mask = 1 << (nr & 0x1f);
5119@@ -159,6 +165,7 @@
5120 __asm__ __volatile__(SMP_WMB "\n\
5121 1: lwarx %0,0,%4 \n\
5122 andc %1,%0,%3 \n\
5123+ dcbt 0,%4 \n\
5124 stwcx. %1,0,%4 \n\
5125 bne 1b"
5126 SMP_MB
5127@@ -172,7 +179,7 @@
5128 /*
5129 * non-atomic version
5130 */
5131-static __inline__ int __test_and_clear_bit(int nr, volatile void *addr)
5132+static __inline__ int __test_and_clear_bit(int nr, volatile unsigned long *addr)
5133 {
5134 unsigned long mask = 1 << (nr & 0x1f);
5135 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
5136@@ -182,7 +189,7 @@
5137 return (old & mask) != 0;
5138 }
5139
5140-static __inline__ int test_and_change_bit(int nr, volatile void *addr)
5141+static __inline__ int test_and_change_bit(int nr, volatile unsigned long *addr)
5142 {
5143 unsigned int old, t;
5144 unsigned int mask = 1 << (nr & 0x1f);
5145@@ -191,6 +198,7 @@
5146 __asm__ __volatile__(SMP_WMB "\n\
5147 1: lwarx %0,0,%4 \n\
5148 xor %1,%0,%3 \n\
5149+ dcbt 0,%4 \n\
5150 stwcx. %1,0,%4 \n\
5151 bne 1b"
5152 SMP_MB
5153@@ -204,7 +212,7 @@
5154 /*
5155 * non-atomic version
5156 */
5157-static __inline__ int __test_and_change_bit(int nr, volatile void *addr)
5158+static __inline__ int __test_and_change_bit(int nr, volatile unsigned long *addr)
5159 {
5160 unsigned long mask = 1 << (nr & 0x1f);
5161 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
5162@@ -214,7 +222,7 @@
5163 return (old & mask) != 0;
5164 }
5165
5166-static __inline__ int test_bit(int nr, __const__ volatile void *addr)
5167+static __inline__ int test_bit(int nr, __const__ volatile unsigned long *addr)
5168 {
5169 __const__ unsigned int *p = (__const__ unsigned int *) addr;
5170
5171@@ -222,7 +230,7 @@
5172 }
5173
5174 /* Return the bit position of the most significant 1 bit in a word */
5175-static __inline__ int __ilog2(unsigned int x)
5176+static __inline__ int __ilog2(unsigned long x)
5177 {
5178 int lz;
5179
5180@@ -230,7 +238,7 @@
5181 return 31 - lz;
5182 }
5183
5184-static __inline__ int ffz(unsigned int x)
5185+static __inline__ int ffz(unsigned long x)
5186 {
5187 if ((x = ~x) == 0)
5188 return 32;
5189@@ -239,6 +247,11 @@
5190
5191 #ifdef __KERNEL__
5192
5193+static inline int __ffs(unsigned long x)
5194+{
5195+ return __ilog2(x & -x);
5196+}
5197+
5198 /*
5199 * ffs: find first bit set. This is defined the same way as
5200 * the libc and compiler builtin ffs routines, therefore
5201@@ -250,6 +263,18 @@
5202 }
5203
5204 /*
5205+ * fls: find last (most-significant) bit set.
5206+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
5207+ */
5208+static __inline__ int fls(unsigned int x)
5209+{
5210+ int lz;
5211+
5212+ asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x));
5213+ return 32 - lz;
5214+}
5215+
5216+/*
5217 * hweightN: returns the hamming weight (i.e. the number
5218 * of bits set) of a N-bit word
5219 */
5220@@ -261,13 +286,86 @@
5221 #endif /* __KERNEL__ */
5222
5223 /*
5224+ * Find the first bit set in a 140-bit bitmap.
5225+ * The first 100 bits are unlikely to be set.
5226+ */
5227+static inline int _sched_find_first_bit(unsigned long *b)
5228+{
5229+ if (unlikely(b[0]))
5230+ return __ffs(b[0]);
5231+ if (unlikely(b[1]))
5232+ return __ffs(b[1]) + 32;
5233+ if (unlikely(b[2]))
5234+ return __ffs(b[2]) + 64;
5235+ if (b[3])
5236+ return __ffs(b[3]) + 96;
5237+ return __ffs(b[4]) + 128;
5238+}
5239+
5240+/**
5241+ * find_next_bit - find the next set bit in a memory region
5242+ * @addr: The address to base the search on
5243+ * @offset: The bitnumber to start searching at
5244+ * @size: The maximum size to search
5245+ */
5246+static __inline__ unsigned long find_next_bit(unsigned long *addr,
5247+ unsigned long size, unsigned long offset)
5248+{
5249+ unsigned int *p = ((unsigned int *) addr) + (offset >> 5);
5250+ unsigned int result = offset & ~31UL;
5251+ unsigned int tmp;
5252+
5253+ if (offset >= size)
5254+ return size;
5255+ size -= result;
5256+ offset &= 31UL;
5257+ if (offset) {
5258+ tmp = *p++;
5259+ tmp &= ~0UL << offset;
5260+ if (size < 32)
5261+ goto found_first;
5262+ if (tmp)
5263+ goto found_middle;
5264+ size -= 32;
5265+ result += 32;
5266+ }
5267+ while (size >= 32) {
5268+ if ((tmp = *p++) != 0)
5269+ goto found_middle;
5270+ result += 32;
5271+ size -= 32;
5272+ }
5273+ if (!size)
5274+ return result;
5275+ tmp = *p;
5276+
5277+found_first:
5278+ tmp &= ~0UL >> (32 - size);
5279+ if (tmp == 0UL) /* Are any bits set? */
5280+ return result + size; /* Nope. */
5281+found_middle:
5282+ return result + __ffs(tmp);
5283+}
5284+
5285+/**
5286+ * find_first_bit - find the first set bit in a memory region
5287+ * @addr: The address to start the search at
5288+ * @size: The maximum size to search
5289+ *
5290+ * Returns the bit-number of the first set bit, not the number of the byte
5291+ * containing a bit.
5292+ */
5293+#define find_first_bit(addr, size) \
5294+ find_next_bit((addr), (size), 0)
5295+
5296+/*
5297 * This implementation of find_{first,next}_zero_bit was stolen from
5298 * Linus' asm-alpha/bitops.h.
5299 */
5300 #define find_first_zero_bit(addr, size) \
5301 find_next_zero_bit((addr), (size), 0)
5302
5303-static __inline__ unsigned long find_next_zero_bit(void * addr,
5304+static __inline__ unsigned long find_next_zero_bit(unsigned long * addr,
5305 unsigned long size, unsigned long offset)
5306 {
5307 unsigned int * p = ((unsigned int *) addr) + (offset >> 5);
5308@@ -308,8 +406,8 @@
5309
5310 #ifdef __KERNEL__
5311
5312-#define ext2_set_bit(nr, addr) __test_and_set_bit((nr) ^ 0x18, addr)
5313-#define ext2_clear_bit(nr, addr) __test_and_clear_bit((nr) ^ 0x18, addr)
5314+#define ext2_set_bit(nr, addr) __test_and_set_bit((nr) ^ 0x18, (unsigned long *)(addr))
5315+#define ext2_clear_bit(nr, addr) __test_and_clear_bit((nr) ^ 0x18, (unsigned long *)(addr))
5316
5317 static __inline__ int ext2_test_bit(int nr, __const__ void * addr)
5318 {
5319diff -urN linux-2.4.20/include/asm-ppc/dma.h linux-2.4.20-o1-preempt/include/asm-ppc/dma.h
5320--- linux-2.4.20/include/asm-ppc/dma.h Tue May 22 00:02:06 2001
5321+++ linux-2.4.20-o1-preempt/include/asm-ppc/dma.h Tue Feb 18 03:52:06 2003
5322@@ -14,6 +14,7 @@
5323 #include <linux/config.h>
5324 #include <asm/io.h>
5325 #include <linux/spinlock.h>
5326+#include <linux/sched.h>
5327 #include <asm/system.h>
5328
5329 /*
5330diff -urN linux-2.4.20/include/asm-ppc/hardirq.h linux-2.4.20-o1-preempt/include/asm-ppc/hardirq.h
5331--- linux-2.4.20/include/asm-ppc/hardirq.h Fri Nov 29 00:53:15 2002
5332+++ linux-2.4.20-o1-preempt/include/asm-ppc/hardirq.h Tue Feb 18 03:52:06 2003
5333@@ -48,6 +48,7 @@
5334 #define hardirq_exit(cpu) (local_irq_count(cpu)--)
5335
5336 #define synchronize_irq() do { } while (0)
5337+#define release_irqlock(cpu) do { } while (0)
5338
5339 #else /* CONFIG_SMP */
5340
5341diff -urN linux-2.4.20/include/asm-ppc/highmem.h linux-2.4.20-o1-preempt/include/asm-ppc/highmem.h
5342--- linux-2.4.20/include/asm-ppc/highmem.h Mon Jul 2 23:34:57 2001
5343+++ linux-2.4.20-o1-preempt/include/asm-ppc/highmem.h Tue Feb 18 03:52:06 2003
5344@@ -84,6 +84,7 @@
5345 unsigned int idx;
5346 unsigned long vaddr;
5347
5348+ preempt_disable();
5349 if (page < highmem_start_page)
5350 return page_address(page);
5351
5352@@ -105,8 +106,10 @@
5353 unsigned long vaddr = (unsigned long) kvaddr;
5354 unsigned int idx = type + KM_TYPE_NR*smp_processor_id();
5355
5356- if (vaddr < KMAP_FIX_BEGIN) // FIXME
5357+ if (vaddr < KMAP_FIX_BEGIN) { // FIXME
5358+ preempt_enable();
5359 return;
5360+ }
5361
5362 if (vaddr != KMAP_FIX_BEGIN + idx * PAGE_SIZE)
5363 BUG();
5364@@ -118,6 +121,7 @@
5365 pte_clear(kmap_pte+idx);
5366 flush_tlb_page(0, vaddr);
5367 #endif
5368+ preempt_enable();
5369 }
5370
5371 #endif /* __KERNEL__ */
5372diff -urN linux-2.4.20/include/asm-ppc/hw_irq.h linux-2.4.20-o1-preempt/include/asm-ppc/hw_irq.h
5373--- linux-2.4.20/include/asm-ppc/hw_irq.h Fri Nov 29 00:53:15 2002
5374+++ linux-2.4.20-o1-preempt/include/asm-ppc/hw_irq.h Tue Feb 18 03:52:06 2003
5375@@ -22,6 +22,12 @@
5376 #define __save_flags(flags) __save_flags_ptr((unsigned long *)&flags)
5377 #define __save_and_cli(flags) ({__save_flags(flags);__cli();})
5378
5379+#define mfmsr() ({unsigned int rval; \
5380+ asm volatile("mfmsr %0" : "=r" (rval)); rval;})
5381+#define mtmsr(v) asm volatile("mtmsr %0" : : "r" (v))
5382+
5383+#define irqs_disabled() ((mfmsr() & MSR_EE) == 0)
5384+
5385 extern void do_lost_interrupts(unsigned long);
5386
5387 #define mask_irq(irq) ({if (irq_desc[irq].handler && irq_desc[irq].handler->disable) irq_desc[irq].handler->disable(irq);})
5388diff -urN linux-2.4.20/include/asm-ppc/mmu_context.h linux-2.4.20-o1-preempt/include/asm-ppc/mmu_context.h
5389--- linux-2.4.20/include/asm-ppc/mmu_context.h Tue Oct 2 18:12:44 2001
5390+++ linux-2.4.20-o1-preempt/include/asm-ppc/mmu_context.h Tue Feb 18 03:52:06 2003
5391@@ -158,6 +158,10 @@
5392 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
5393 struct task_struct *tsk, int cpu)
5394 {
5395+#ifdef CONFIG_PREEMPT
5396+ if (preempt_get_count() == 0)
5397+ BUG();
5398+#endif
5399 tsk->thread.pgdir = next->pgd;
5400 get_mmu_context(next);
5401 set_context(next->context, next->pgd);
5402diff -urN linux-2.4.20/include/asm-ppc/pgalloc.h linux-2.4.20-o1-preempt/include/asm-ppc/pgalloc.h
5403--- linux-2.4.20/include/asm-ppc/pgalloc.h Tue May 22 00:02:06 2001
5404+++ linux-2.4.20-o1-preempt/include/asm-ppc/pgalloc.h Tue Feb 18 03:52:06 2003
5405@@ -68,20 +68,25 @@
5406 {
5407 unsigned long *ret;
5408
5409+ preempt_disable();
5410 if ((ret = pgd_quicklist) != NULL) {
5411 pgd_quicklist = (unsigned long *)(*ret);
5412 ret[0] = 0;
5413 pgtable_cache_size--;
5414+ preempt_enable();
5415 } else
5416+ preempt_enable();
5417 ret = (unsigned long *)get_pgd_slow();
5418 return (pgd_t *)ret;
5419 }
5420
5421 extern __inline__ void free_pgd_fast(pgd_t *pgd)
5422 {
5423+ preempt_disable();
5424 *(unsigned long **)pgd = pgd_quicklist;
5425 pgd_quicklist = (unsigned long *) pgd;
5426 pgtable_cache_size++;
5427+ preempt_enable();
5428 }
5429
5430 extern __inline__ void free_pgd_slow(pgd_t *pgd)
5431@@ -120,19 +125,23 @@
5432 {
5433 unsigned long *ret;
5434
5435+ preempt_disable();
5436 if ((ret = pte_quicklist) != NULL) {
5437 pte_quicklist = (unsigned long *)(*ret);
5438 ret[0] = 0;
5439 pgtable_cache_size--;
5440 }
5441+ preempt_enable();
5442 return (pte_t *)ret;
5443 }
5444
5445 extern __inline__ void pte_free_fast(pte_t *pte)
5446 {
5447+ preempt_disable();
5448 *(unsigned long **)pte = pte_quicklist;
5449 pte_quicklist = (unsigned long *) pte;
5450 pgtable_cache_size++;
5451+ preempt_enable();
5452 }
5453
5454 extern __inline__ void pte_free_slow(pte_t *pte)
5455diff -urN linux-2.4.20/include/asm-ppc/smp.h linux-2.4.20-o1-preempt/include/asm-ppc/smp.h
5456--- linux-2.4.20/include/asm-ppc/smp.h Sat Aug 3 02:39:45 2002
5457+++ linux-2.4.20-o1-preempt/include/asm-ppc/smp.h Tue Feb 18 03:51:30 2003
5458@@ -48,7 +48,7 @@
5459 #define cpu_logical_map(cpu) (cpu)
5460 #define cpu_number_map(x) (x)
5461
5462-#define smp_processor_id() (current->processor)
5463+#define smp_processor_id() (current->cpu)
5464
5465 extern int smp_hw_index[NR_CPUS];
5466 #define hard_smp_processor_id() (smp_hw_index[smp_processor_id()])
5467diff -urN linux-2.4.20/include/asm-ppc/smplock.h linux-2.4.20-o1-preempt/include/asm-ppc/smplock.h
5468--- linux-2.4.20/include/asm-ppc/smplock.h Sat Nov 3 02:43:54 2001
5469+++ linux-2.4.20-o1-preempt/include/asm-ppc/smplock.h Tue Feb 18 03:52:06 2003
5470@@ -15,7 +15,15 @@
5471
5472 extern spinlock_t kernel_flag;
5473
5474+#ifdef CONFIG_SMP
5475 #define kernel_locked() spin_is_locked(&kernel_flag)
5476+#else
5477+#ifdef CONFIG_PREEMPT
5478+#define kernel_locked() preempt_get_count()
5479+#else
5480+#define kernel_locked() 1
5481+#endif
5482+#endif
5483
5484 /*
5485 * Release global kernel lock and global interrupt lock
5486@@ -47,8 +55,14 @@
5487 */
5488 static __inline__ void lock_kernel(void)
5489 {
5490+#ifdef CONFIG_PREEMPT
5491+ if (current->lock_depth == -1)
5492+ spin_lock(&kernel_flag);
5493+ ++current->lock_depth;
5494+#else
5495 if (!++current->lock_depth)
5496 spin_lock(&kernel_flag);
5497+#endif
5498 }
5499
5500 static __inline__ void unlock_kernel(void)
5501diff -urN linux-2.4.20/include/asm-ppc/softirq.h linux-2.4.20-o1-preempt/include/asm-ppc/softirq.h
5502--- linux-2.4.20/include/asm-ppc/softirq.h Sat Sep 8 21:02:31 2001
5503+++ linux-2.4.20-o1-preempt/include/asm-ppc/softirq.h Tue Feb 18 03:52:06 2003
5504@@ -10,6 +10,7 @@
5505
5506 #define local_bh_disable() \
5507 do { \
5508+ preempt_disable(); \
5509 local_bh_count(smp_processor_id())++; \
5510 barrier(); \
5511 } while (0)
5512@@ -18,14 +19,21 @@
5513 do { \
5514 barrier(); \
5515 local_bh_count(smp_processor_id())--; \
5516+ preempt_enable(); \
5517 } while (0)
5518
5519-#define local_bh_enable() \
5520+#define _local_bh_enable() \
5521 do { \
5522 if (!--local_bh_count(smp_processor_id()) \
5523 && softirq_pending(smp_processor_id())) { \
5524 do_softirq(); \
5525 } \
5526+} while (0)
5527+
5528+#define local_bh_enable() \
5529+do { \
5530+ _local_bh_enable(); \
5531+ preempt_enable(); \
5532 } while (0)
5533
5534 #define in_softirq() (local_bh_count(smp_processor_id()) != 0)
c41eb596
JR
5535--- linux-2.4.20/include/asm-ppc/spinlock.h.orig Fri Nov 29 00:53:15 2002
5536+++ linux-2.4.20/include/asm-ppc/spinlock.h Sun Mar 9 13:15:14 2003
5537@@ -44,17 +44,17 @@
5538 extern int spin_trylock(spinlock_t *lock);
5539 extern unsigned long __spin_trylock(volatile unsigned long *lock);
5540
5541-#define spin_lock(lp) _spin_lock(lp)
5542-#define spin_unlock(lp) _spin_unlock(lp)
5543+#define _raw_spin_lock(lp) _spin_lock(lp)
5544+#define _raw_spin_unlock(lp) _spin_unlock(lp)
5545
5546 #else /* ! SPINLOCK_DEBUG */
5547
5548-static inline void spin_lock(spinlock_t *lock)
5549+static inline void _raw_spin_lock(spinlock_t *lock)
5550 {
5551 unsigned long tmp;
5552
5553 __asm__ __volatile__(
5554- "b 1f # spin_lock\n\
5555+ "b 1f # _raw_spin_lock\n\
5556 2: lwzx %0,0,%1\n\
5557 cmpwi 0,%0,0\n\
5558 bne+ 2b\n\
5559@@ -69,13 +69,13 @@
5560 : "cr0", "memory");
5561 }
5562
5563-static inline void spin_unlock(spinlock_t *lock)
5564+static inline void _raw_spin_unlock(spinlock_t *lock)
5565 {
5566- __asm__ __volatile__("eieio # spin_unlock": : :"memory");
5567+ __asm__ __volatile__("eieio # _raw_spin_unlock": : :"memory");
5568 lock->lock = 0;
5569 }
5570
5571-#define spin_trylock(lock) (!test_and_set_bit(0,(lock)))
5572+#define _raw_spin_trylock(lock) (!test_and_set_bit(0,(lock)))
5573
5574 #endif
5575
5576@@ -112,19 +112,19 @@
5577 extern void _write_lock(rwlock_t *rw);
5578 extern void _write_unlock(rwlock_t *rw);
5579
5580-#define read_lock(rw) _read_lock(rw)
5581-#define write_lock(rw) _write_lock(rw)
5582-#define write_unlock(rw) _write_unlock(rw)
5583-#define read_unlock(rw) _read_unlock(rw)
5584+#define _raw_read_lock(rw) _read_lock(rw)
5585+#define _raw_write_lock(rw) _write_lock(rw)
5586+#define _raw_write_unlock(rw) _write_unlock(rw)
5587+#define _raw_read_unlock(rw) _read_unlock(rw)
5588
5589 #else /* ! SPINLOCK_DEBUG */
5590
5591-static __inline__ void read_lock(rwlock_t *rw)
5592+static __inline__ void _raw_read_lock(rwlock_t *rw)
5593 {
5594 unsigned int tmp;
5595
5596 __asm__ __volatile__(
5597- "b 2f # read_lock\n\
5598+ "b 2f # _raw_read_lock\n\
5599 1: lwzx %0,0,%1\n\
5600 cmpwi 0,%0,0\n\
5601 blt+ 1b\n\
5602@@ -139,12 +139,12 @@
5603 : "cr0", "memory");
5604 }
5605
5606-static __inline__ void read_unlock(rwlock_t *rw)
5607+static __inline__ void _raw_read_unlock(rwlock_t *rw)
5608 {
5609 unsigned int tmp;
5610
5611 __asm__ __volatile__(
5612- "eieio # read_unlock\n\
5613+ "eieio # _raw_read_unlock\n\
5614 1: lwarx %0,0,%1\n\
5615 addic %0,%0,-1\n\
5616 stwcx. %0,0,%1\n\
5617@@ -154,7 +154,7 @@
5618 : "cr0", "memory");
5619 }
5620
5621-static __inline__ void write_lock(rwlock_t *rw)
5622+static __inline__ void _raw_write_lock(rwlock_t *rw)
5623 {
5624 unsigned int tmp;
5625
5626@@ -174,9 +174,9 @@
5627 : "cr0", "memory");
5628 }
5629
5630-static __inline__ void write_unlock(rwlock_t *rw)
5631+static __inline__ void _raw_write_unlock(rwlock_t *rw)
5632 {
5633- __asm__ __volatile__("eieio # write_unlock": : :"memory");
5634+ __asm__ __volatile__("eieio # raw_write_unlock": : :"memory");
5635 rw->lock = 0;
5636 }
5637
54aa170e
JR
5638diff -urN linux-2.4.20/include/asm-ppc/unistd.h linux-2.4.20-o1-preempt/include/asm-ppc/unistd.h
5639--- linux-2.4.20/include/asm-ppc/unistd.h Fri Nov 29 00:53:15 2002
5640+++ linux-2.4.20-o1-preempt/include/asm-ppc/unistd.h Tue Feb 18 03:51:30 2003
5641@@ -228,7 +228,6 @@
5642 #define __NR_removexattr 218
5643 #define __NR_lremovexattr 219
5644 #define __NR_fremovexattr 220
5645-#if 0
5646 #define __NR_futex 221
5647 #define __NR_sched_setaffinity 222
5648 #define __NR_sched_getaffinity 223
5649@@ -240,7 +239,6 @@
5650 #define __NR_io_getevents 229
5651 #define __NR_io_submit 230
5652 #define __NR_io_cancel 231
5653-#endif
5654
5655 #define __NR(n) #n
5656
5657diff -urN linux-2.4.20/include/asm-ppc64/bitops.h linux-2.4.20-o1-preempt/include/asm-ppc64/bitops.h
5658--- linux-2.4.20/include/asm-ppc64/bitops.h Sat Aug 3 02:39:45 2002
5659+++ linux-2.4.20-o1-preempt/include/asm-ppc64/bitops.h Tue Feb 18 03:51:30 2003
5660@@ -33,7 +33,6 @@
5661
5662 #ifdef __KERNEL__
5663
5664-#include <asm/byteorder.h>
5665 #include <asm/memory.h>
5666
5667 /*
5668@@ -42,12 +41,12 @@
5669 #define smp_mb__before_clear_bit() smp_mb()
5670 #define smp_mb__after_clear_bit() smp_mb()
5671
5672-static __inline__ int test_bit(unsigned long nr, __const__ volatile void *addr)
5673+static __inline__ int test_bit(unsigned long nr, __const__ volatile unsigned long *addr)
5674 {
5675 return (1UL & (((__const__ long *) addr)[nr >> 6] >> (nr & 63)));
5676 }
5677
5678-static __inline__ void set_bit(unsigned long nr, volatile void *addr)
5679+static __inline__ void set_bit(unsigned long nr, volatile unsigned long *addr)
5680 {
5681 unsigned long old;
5682 unsigned long mask = 1UL << (nr & 0x3f);
5683@@ -63,7 +62,7 @@
5684 : "cc");
5685 }
5686
5687-static __inline__ void clear_bit(unsigned long nr, volatile void *addr)
5688+static __inline__ void clear_bit(unsigned long nr, volatile unsigned long *addr)
5689 {
5690 unsigned long old;
5691 unsigned long mask = 1UL << (nr & 0x3f);
5692@@ -79,7 +78,7 @@
5693 : "cc");
5694 }
5695
5696-static __inline__ void change_bit(unsigned long nr, volatile void *addr)
5697+static __inline__ void change_bit(unsigned long nr, volatile unsigned long *addr)
5698 {
5699 unsigned long old;
5700 unsigned long mask = 1UL << (nr & 0x3f);
5701@@ -95,7 +94,7 @@
5702 : "cc");
5703 }
5704
5705-static __inline__ int test_and_set_bit(unsigned long nr, volatile void *addr)
5706+static __inline__ int test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
5707 {
5708 unsigned long old, t;
5709 unsigned long mask = 1UL << (nr & 0x3f);
5710@@ -115,7 +114,7 @@
5711 return (old & mask) != 0;
5712 }
5713
5714-static __inline__ int test_and_clear_bit(unsigned long nr, volatile void *addr)
5715+static __inline__ int test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
5716 {
5717 unsigned long old, t;
5718 unsigned long mask = 1UL << (nr & 0x3f);
5719@@ -135,7 +134,7 @@
5720 return (old & mask) != 0;
5721 }
5722
5723-static __inline__ int test_and_change_bit(unsigned long nr, volatile void *addr)
5724+static __inline__ int test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
5725 {
5726 unsigned long old, t;
5727 unsigned long mask = 1UL << (nr & 0x3f);
5728@@ -158,7 +157,7 @@
5729 /*
5730 * non-atomic versions
5731 */
5732-static __inline__ void __set_bit(unsigned long nr, volatile void *addr)
5733+static __inline__ void __set_bit(unsigned long nr, volatile unsigned long *addr)
5734 {
5735 unsigned long mask = 1UL << (nr & 0x3f);
5736 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
5737@@ -166,7 +165,7 @@
5738 *p |= mask;
5739 }
5740
5741-static __inline__ void __clear_bit(unsigned long nr, volatile void *addr)
5742+static __inline__ void __clear_bit(unsigned long nr, volatile unsigned long *addr)
5743 {
5744 unsigned long mask = 1UL << (nr & 0x3f);
5745 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
5746@@ -174,7 +173,7 @@
5747 *p &= ~mask;
5748 }
5749
5750-static __inline__ void __change_bit(unsigned long nr, volatile void *addr)
5751+static __inline__ void __change_bit(unsigned long nr, volatile unsigned long *addr)
5752 {
5753 unsigned long mask = 1UL << (nr & 0x3f);
5754 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
5755@@ -182,7 +181,7 @@
5756 *p ^= mask;
5757 }
5758
5759-static __inline__ int __test_and_set_bit(unsigned long nr, volatile void *addr)
5760+static __inline__ int __test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
5761 {
5762 unsigned long mask = 1UL << (nr & 0x3f);
5763 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
5764@@ -192,7 +191,7 @@
5765 return (old & mask) != 0;
5766 }
5767
5768-static __inline__ int __test_and_clear_bit(unsigned long nr, volatile void *addr)
5769+static __inline__ int __test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
5770 {
5771 unsigned long mask = 1UL << (nr & 0x3f);
5772 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
5773@@ -202,7 +201,7 @@
5774 return (old & mask) != 0;
5775 }
5776
5777-static __inline__ int __test_and_change_bit(unsigned long nr, volatile void *addr)
5778+static __inline__ int __test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
5779 {
5780 unsigned long mask = 1UL << (nr & 0x3f);
5781 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
5782@@ -224,54 +223,46 @@
5783 return 63 - lz;
5784 }
5785
5786-/* Return the zero-based bit position
5787- * from RIGHT TO LEFT 63 --> 0
5788- * of the most significant (left-most) 1-bit in an 8-byte area.
5789- */
5790-static __inline__ long cnt_trailing_zeros(unsigned long mask)
5791-{
5792- long cnt;
5793-
5794- asm(
5795-" addi %0,%1,-1 \n\
5796- andc %0,%0,%1 \n\
5797- cntlzd %0,%0 \n\
5798- subfic %0,%0,64"
5799- : "=r" (cnt)
5800- : "r" (mask));
5801- return cnt;
5802-}
5803-
5804-
5805-
5806 /*
5807- * ffz = Find First Zero in word. Undefined if no zero exists,
5808- * Determines the bit position of the LEAST significant
5809- * (rightmost) 0 bit in the specified DOUBLE-WORD.
5810- * The returned bit position will be zero-based, starting
5811- * from the right side (63 - 0).
5812- * the code should check against ~0UL first..
5813+ * Determines the bit position of the least significant (rightmost) 0 bit
5814+ * in the specified double word. The returned bit position will be zero-based,
5815+ * starting from the right side (63 - 0).
5816 */
5817 static __inline__ unsigned long ffz(unsigned long x)
5818 {
5819- u32 tempRC;
5820-
5821- /* Change all of x's 1s to 0s and 0s to 1s in x.
5822- * And insure at least 1 zero exists in the 8 byte area.
5823- */
5824+ /* no zero exists anywhere in the 8 byte area. */
5825 if ((x = ~x) == 0)
5826- /* no zero exists anywhere in the 8 byte area. */
5827 return 64;
5828
5829- /* Calculate the bit position of the least significant '1' bit in x
5830- * (since x has been changed this will actually be the least
5831- * significant '0' bit in the original x).
5832- * Note: (x & -x) gives us a mask that is the LEAST significant
5833- * (RIGHT-most) 1-bit of the value in x.
5834+ /*
5835+ * Calculate the bit position of the least signficant '1' bit in x
5836+ * (since x has been changed this will actually be the least signficant
5837+ * '0' bit in * the original x). Note: (x & -x) gives us a mask that
5838+ * is the least significant * (RIGHT-most) 1-bit of the value in x.
5839 */
5840- tempRC = __ilog2(x & -x);
5841+ return __ilog2(x & -x);
5842+}
5843+
5844+static __inline__ int __ffs(unsigned long x)
5845+{
5846+ return __ilog2(x & -x);
5847+}
5848
5849- return tempRC;
5850+/*
5851+ * Every architecture must define this function. It's the fastest
5852+ * way of searching a 140-bit bitmap where the first 100 bits are
5853+ * unlikely to be set. It's guaranteed that at least one of the 140
5854+ * bits is cleared.
5855+ */
5856+static inline int _sched_find_first_bit(unsigned long *b)
5857+{
5858+ if (unlikely(b[0]))
5859+ return __ffs(b[0]);
5860+ if (unlikely(((unsigned int)b[1])))
5861+ return __ffs(b[1]) + 64;
5862+ if (b[1] >> 32)
5863+ return __ffs(b[1] >> 32) + 96;
5864+ return __ffs(b[2]) + 128;
5865 }
5866
5867 /*
5868@@ -281,8 +272,8 @@
5869 */
5870 static __inline__ int ffs(int x)
5871 {
5872- int result = ffz(~x);
5873- return x ? result+1 : 0;
5874+ unsigned long i = (unsigned long)x;
5875+ return __ilog2(i & -i) + 1;
5876 }
5877
5878 /*
5879@@ -293,139 +284,82 @@
5880 #define hweight16(x) generic_hweight16(x)
5881 #define hweight8(x) generic_hweight8(x)
5882
5883-extern unsigned long find_next_zero_bit(void * addr, unsigned long size,
5884- unsigned long offset);
5885-/*
5886- * The optimizer actually does good code for this case..
5887- */
5888-#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
5889-
5890-/* Bitmap functions for the ext2 filesystem. */
5891-#define _EXT2_HAVE_ASM_BITOPS_
5892+extern unsigned long find_next_zero_bit(unsigned long *addr, unsigned long size, unsigned long offset);
5893+#define find_first_zero_bit(addr, size) \
5894+ find_next_zero_bit((addr), (size), 0)
5895+
5896+extern unsigned long find_next_bit(unsigned long *addr, unsigned long size, unsigned long offset);
5897+#define find_first_bit(addr, size) \
5898+ find_next_bit((addr), (size), 0)
5899+
5900+extern unsigned long find_next_zero_le_bit(unsigned long *addr, unsigned long size, unsigned long offset);
5901+#define find_first_zero_le_bit(addr, size) \
5902+ find_next_zero_le_bit((addr), (size), 0)
5903
5904-static __inline__ int ext2_set_bit(int nr, void* addr)
5905+static __inline__ int test_le_bit(unsigned long nr, __const__ unsigned long * addr)
5906 {
5907- /* This method needs to take into account the fact that the ext2 file system represents
5908- * it's bitmaps as "little endian" unsigned integers.
5909- * Note: this method is not atomic, but ext2 does not need it to be.
5910- */
5911- int mask;
5912- int oldbit;
5913- unsigned char* ADDR = (unsigned char*) addr;
5914-
5915- /* Determine the BYTE containing the specified bit
5916- * (nr) - important as if we go to a byte there are no
5917- * little endian concerns.
5918- */
5919- ADDR += nr >> 3;
5920- mask = 1 << (nr & 0x07); /* Create a mask to the bit within this byte. */
5921- oldbit = *ADDR & mask; /* Save the bit's previous value. */
5922- *ADDR |= mask; /* Turn the bit on. */
5923- return oldbit; /* Return the bit's previous value. */
5924+ __const__ unsigned char *ADDR = (__const__ unsigned char *) addr;
5925+ return (ADDR[nr >> 3] >> (nr & 7)) & 1;
5926 }
5927
5928-static __inline__ int ext2_clear_bit(int nr, void* addr)
5929+/*
5930+ * non-atomic versions
5931+ */
5932+static __inline__ void __set_le_bit(unsigned long nr, unsigned long *addr)
5933 {
5934- /* This method needs to take into account the fact that the ext2 file system represents
5935- * | it's bitmaps as "little endian" unsigned integers.
5936- * Note: this method is not atomic, but ext2 does not need it to be.
5937- */
5938- int mask;
5939- int oldbit;
5940- unsigned char* ADDR = (unsigned char*) addr;
5941+ unsigned char *ADDR = (unsigned char *)addr;
5942
5943- /* Determine the BYTE containing the specified bit (nr)
5944- * - important as if we go to a byte there are no little endian concerns.
5945- */
5946- ADDR += nr >> 3;
5947- mask = 1 << (nr & 0x07); /* Create a mask to the bit within this byte. */
5948- oldbit = *ADDR & mask; /* Save the bit's previous value. */
5949- *ADDR = *ADDR & ~mask; /* Turn the bit off. */
5950- return oldbit; /* Return the bit's previous value. */
5951-}
5952-
5953-static __inline__ int ext2_test_bit(int nr, __const__ void * addr)
5954-{
5955- /* This method needs to take into account the fact that the ext2 file system represents
5956- * | it's bitmaps as "little endian" unsigned integers.
5957- * Determine the BYTE containing the specified bit (nr),
5958- * then shift to the right the correct number of bits and return that bit's value.
5959- */
5960- __const__ unsigned char *ADDR = (__const__ unsigned char *) addr;
5961- return (ADDR[nr >> 3] >> (nr & 7)) & 1;
5962+ ADDR += nr >> 3;
5963+ *ADDR |= 1 << (nr & 0x07);
5964 }
5965
5966-/* Returns the bit position of the most significant 1 bit in a WORD. */
5967-static __inline__ int ext2_ilog2(unsigned int x)
5968+static __inline__ void __clear_le_bit(unsigned long nr, unsigned long *addr)
5969 {
5970- int lz;
5971+ unsigned char *ADDR = (unsigned char *)addr;
5972
5973- asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x));
5974- return 31 - lz;
5975+ ADDR += nr >> 3;
5976+ *ADDR &= ~(1 << (nr & 0x07));
5977 }
5978
5979-/* ext2_ffz = ext2's Find First Zero.
5980- * Determines the bit position of the LEAST significant (rightmost) 0 bit in the specified WORD.
5981- * The returned bit position will be zero-based, starting from the right side (31 - 0).
5982- */
5983-static __inline__ int ext2_ffz(unsigned int x)
5984+static __inline__ int __test_and_set_le_bit(unsigned long nr, unsigned long *addr)
5985 {
5986- u32 tempRC;
5987- /* Change all of x's 1s to 0s and 0s to 1s in x. And insure at least 1 zero exists in the word. */
5988- if ((x = ~x) == 0)
5989- /* no zero exists anywhere in the 4 byte area. */
5990- return 32;
5991- /* Calculate the bit position of the least significant '1' bit in x
5992- * (since x has been changed this will actually be the least
5993- * significant '0' bit in the original x).
5994- * Note: (x & -x) gives us a mask that is the LEAST significant
5995- * (RIGHT-most) 1-bit of the value in x.
5996- */
5997- tempRC = ext2_ilog2(x & -x);
5998- return tempRC;
5999+ int mask, retval;
6000+ unsigned char *ADDR = (unsigned char *)addr;
6001+
6002+ ADDR += nr >> 3;
6003+ mask = 1 << (nr & 0x07);
6004+ retval = (mask & *ADDR) != 0;
6005+ *ADDR |= mask;
6006+ return retval;
6007 }
6008
6009-static __inline__ u32 ext2_find_next_zero_bit(void* addr, u32 size, u32 offset)
6010+static __inline__ int __test_and_clear_le_bit(unsigned long nr, unsigned long *addr)
6011 {
6012- /* This method needs to take into account the fact that the ext2 file system represents
6013- * | it's bitmaps as "little endian" unsigned integers.
6014- */
6015- unsigned int *p = ((unsigned int *) addr) + (offset >> 5);
6016- unsigned int result = offset & ~31;
6017- unsigned int tmp;
6018-
6019- if (offset >= size)
6020- return size;
6021- size -= result;
6022- offset &= 31;
6023- if (offset) {
6024- tmp = cpu_to_le32p(p++);
6025- tmp |= ~0U >> (32-offset); /* bug or feature ? */
6026- if (size < 32)
6027- goto found_first;
6028- if (tmp != ~0)
6029- goto found_middle;
6030- size -= 32;
6031- result += 32;
6032- }
6033- while (size >= 32) {
6034- if ((tmp = cpu_to_le32p(p++)) != ~0)
6035- goto found_middle;
6036- result += 32;
6037- size -= 32;
6038- }
6039- if (!size)
6040- return result;
6041- tmp = cpu_to_le32p(p);
6042-found_first:
6043- tmp |= ~0 << size;
6044- if (tmp == ~0) /* Are any bits zero? */
6045- return result + size; /* Nope. */
6046-found_middle:
6047- return result + ext2_ffz(tmp);
6048-}
6049+ int mask, retval;
6050+ unsigned char *ADDR = (unsigned char *)addr;
6051
6052-#define ext2_find_first_zero_bit(addr, size) ext2_find_next_zero_bit((addr), (size), 0)
6053+ ADDR += nr >> 3;
6054+ mask = 1 << (nr & 0x07);
6055+ retval = (mask & *ADDR) != 0;
6056+ *ADDR &= ~mask;
6057+ return retval;
6058+}
6059+
6060+#define ext2_set_bit(nr,addr) \
6061+ __test_and_set_le_bit((nr),(unsigned long*)addr)
6062+#define ext2_clear_bit(nr, addr) \
6063+ __test_and_clear_le_bit((nr),(unsigned long*)addr)
6064+#define ext2_test_bit(nr, addr) test_le_bit((nr),(unsigned long*)addr)
6065+#define ext2_find_first_zero_bit(addr, size) \
6066+ find_first_zero_le_bit((unsigned long*)addr, size)
6067+#define ext2_find_next_zero_bit(addr, size, off) \
6068+ find_next_zero_le_bit((unsigned long*)addr, size, off)
6069+
6070+#define minix_test_and_set_bit(nr,addr) test_and_set_bit(nr,addr)
6071+#define minix_set_bit(nr,addr) set_bit(nr,addr)
6072+#define minix_test_and_clear_bit(nr,addr) test_and_clear_bit(nr,addr)
6073+#define minix_test_bit(nr,addr) test_bit(nr,addr)
6074+#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
6075
6076 #endif /* __KERNEL__ */
6077 #endif /* _PPC64_BITOPS_H */
c41eb596
JR
6078--- linux-2.4.20/include/asm-ppc64/spinlock.h.orig Sat Aug 3 02:39:45 2002
6079+++ linux-2.4.20/include/asm-ppc64/spinlock.h Sun Mar 9 13:36:03 2003
6080@@ -23,12 +23,12 @@
6081
6082 #define spin_is_locked(x) ((x)->lock != 0)
6083
6084-static __inline__ int spin_trylock(spinlock_t *lock)
6085+static __inline__ int _raw_spin_trylock(spinlock_t *lock)
6086 {
6087 unsigned int tmp;
6088
6089 __asm__ __volatile__(
6090-"1: lwarx %0,0,%1 # spin_trylock\n\
6091+"1: lwarx %0,0,%1 # _raw_spin_trylock\n\
6092 cmpwi 0,%0,0\n\
6093 li %0,0\n\
6094 bne- 2f\n\
6095@@ -43,12 +43,12 @@
6096 return tmp;
6097 }
6098
6099-static __inline__ void spin_lock(spinlock_t *lock)
6100+static __inline__ void _raw_spin_lock(spinlock_t *lock)
6101 {
6102 unsigned int tmp;
6103
6104 __asm__ __volatile__(
6105- "b 2f # spin_lock\n\
6106+ "b 2f # _raw_spin_lock\n\
6107 1: or 1,1,1 # spin at low priority\n\
6108 lwzx %0,0,%1\n\
6109 cmpwi 0,%0,0\n\
6110@@ -65,9 +65,9 @@
6111 : "cr0", "memory");
6112 }
6113
6114-static __inline__ void spin_unlock(spinlock_t *lock)
6115+static __inline__ void _raw_spin_unlock(spinlock_t *lock)
6116 {
6117- __asm__ __volatile__("lwsync # spin_unlock": : :"memory");
6118+ __asm__ __volatile__("lwsync # _raw_spin_unlock": : :"memory");
6119 lock->lock = 0;
6120 }
6121
6122@@ -109,12 +109,12 @@
6123 return ret;
6124 }
6125
6126-static __inline__ void read_lock(rwlock_t *rw)
6127+static __inline__ void _raw_read_lock(rwlock_t *rw)
6128 {
6129 unsigned int tmp;
6130
6131 __asm__ __volatile__(
6132- "b 2f # read_lock\n\
6133+ "b 2f # _raw_read_lock\n\
6134 1: or 1,1,1 # spin at low priority\n\
6135 lwax %0,0,%1\n\
6136 cmpwi 0,%0,0\n\
6137@@ -132,12 +132,12 @@
6138 : "cr0", "memory");
6139 }
6140
6141-static __inline__ void read_unlock(rwlock_t *rw)
6142+static __inline__ void _raw_read_unlock(rwlock_t *rw)
6143 {
6144 unsigned int tmp;
6145
6146 __asm__ __volatile__(
6147- "lwsync # read_unlock\n\
6148+ "lwsync # _raw_read_unlock\n\
6149 1: lwarx %0,0,%1\n\
6150 addic %0,%0,-1\n\
6151 stwcx. %0,0,%1\n\
6152@@ -168,12 +168,12 @@
6153 return ret;
6154 }
6155
6156-static __inline__ void write_lock(rwlock_t *rw)
6157+static __inline__ void _raw_write_lock(rwlock_t *rw)
6158 {
6159 unsigned int tmp;
6160
6161 __asm__ __volatile__(
6162- "b 2f # write_lock\n\
6163+ "b 2f # _raw_write_lock\n\
6164 1: or 1,1,1 # spin at low priority\n\
6165 lwax %0,0,%1\n\
6166 cmpwi 0,%0,0\n\
6167@@ -190,9 +190,9 @@
6168 : "cr0", "memory");
6169 }
6170
6171-static __inline__ void write_unlock(rwlock_t *rw)
6172+static __inline__ void _raw_write_unlock(rwlock_t *rw)
6173 {
6174- __asm__ __volatile__("lwsync # write_unlock": : :"memory");
6175+ __asm__ __volatile__("lwsync # _raw_write_unlock": : :"memory");
6176 rw->lock = 0;
6177 }
6178
54aa170e
JR
6179diff -urN linux-2.4.20/include/asm-s390/bitops.h linux-2.4.20-o1-preempt/include/asm-s390/bitops.h
6180--- linux-2.4.20/include/asm-s390/bitops.h Sat Aug 3 02:39:45 2002
6181+++ linux-2.4.20-o1-preempt/include/asm-s390/bitops.h Tue Feb 18 03:51:30 2003
6182@@ -47,272 +47,217 @@
6183 extern const char _oi_bitmap[];
6184 extern const char _ni_bitmap[];
6185 extern const char _zb_findmap[];
6186+extern const char _sb_findmap[];
6187
6188 #ifdef CONFIG_SMP
6189 /*
6190 * SMP save set_bit routine based on compare and swap (CS)
6191 */
6192-static __inline__ void set_bit_cs(int nr, volatile void * addr)
6193+static inline void set_bit_cs(int nr, volatile void *ptr)
6194 {
6195- unsigned long bits, mask;
6196- __asm__ __volatile__(
6197+ unsigned long addr, old, new, mask;
6198+
6199+ addr = (unsigned long) ptr;
6200 #if ALIGN_CS == 1
6201- " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
6202- " nr %2,%1\n" /* isolate last 2 bits of address */
6203- " xr %1,%2\n" /* make addr % 4 == 0 */
6204- " sll %2,3\n"
6205- " ar %0,%2\n" /* add alignement to bitnr */
6206+ addr ^= addr & 3; /* align address to 4 */
6207+ nr += (addr & 3) << 3; /* add alignment to bit number */
6208 #endif
6209- " lhi %2,31\n"
6210- " nr %2,%0\n" /* make shift value */
6211- " xr %0,%2\n"
6212- " srl %0,3\n"
6213- " lhi %3,1\n"
6214- " la %1,0(%0,%1)\n" /* calc. address for CS */
6215- " sll %3,0(%2)\n" /* make OR mask */
6216- " l %0,0(%1)\n"
6217- "0: lr %2,%0\n" /* CS loop starts here */
6218- " or %2,%3\n" /* set bit */
6219- " cs %0,%2,0(%1)\n"
6220- " jl 0b"
6221- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
6222- : "cc", "memory" );
6223+ addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
6224+ mask = 1UL << (nr & 31); /* make OR mask */
6225+ asm volatile(
6226+ " l %0,0(%4)\n"
6227+ "0: lr %1,%0\n"
6228+ " or %1,%3\n"
6229+ " cs %0,%1,0(%4)\n"
6230+ " jl 0b"
6231+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
6232+ : "d" (mask), "a" (addr)
6233+ : "cc" );
6234 }
6235
6236 /*
6237 * SMP save clear_bit routine based on compare and swap (CS)
6238 */
6239-static __inline__ void clear_bit_cs(int nr, volatile void * addr)
6240+static inline void clear_bit_cs(int nr, volatile void *ptr)
6241 {
6242- static const int minusone = -1;
6243- unsigned long bits, mask;
6244- __asm__ __volatile__(
6245+ unsigned long addr, old, new, mask;
6246+
6247+ addr = (unsigned long) ptr;
6248 #if ALIGN_CS == 1
6249- " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
6250- " nr %2,%1\n" /* isolate last 2 bits of address */
6251- " xr %1,%2\n" /* make addr % 4 == 0 */
6252- " sll %2,3\n"
6253- " ar %0,%2\n" /* add alignement to bitnr */
6254+ addr ^= addr & 3; /* align address to 4 */
6255+ nr += (addr & 3) << 3; /* add alignment to bit number */
6256 #endif
6257- " lhi %2,31\n"
6258- " nr %2,%0\n" /* make shift value */
6259- " xr %0,%2\n"
6260- " srl %0,3\n"
6261- " lhi %3,1\n"
6262- " la %1,0(%0,%1)\n" /* calc. address for CS */
6263- " sll %3,0(%2)\n"
6264- " x %3,%4\n" /* make AND mask */
6265- " l %0,0(%1)\n"
6266- "0: lr %2,%0\n" /* CS loop starts here */
6267- " nr %2,%3\n" /* clear bit */
6268- " cs %0,%2,0(%1)\n"
6269- " jl 0b"
6270- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask)
6271- : "m" (minusone) : "cc", "memory" );
6272+ addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
6273+ mask = ~(1UL << (nr & 31)); /* make AND mask */
6274+ asm volatile(
6275+ " l %0,0(%4)\n"
6276+ "0: lr %1,%0\n"
6277+ " nr %1,%3\n"
6278+ " cs %0,%1,0(%4)\n"
6279+ " jl 0b"
6280+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
6281+ : "d" (mask), "a" (addr)
6282+ : "cc" );
6283 }
6284
6285 /*
6286 * SMP save change_bit routine based on compare and swap (CS)
6287 */
6288-static __inline__ void change_bit_cs(int nr, volatile void * addr)
6289+static inline void change_bit_cs(int nr, volatile void *ptr)
6290 {
6291- unsigned long bits, mask;
6292- __asm__ __volatile__(
6293+ unsigned long addr, old, new, mask;
6294+
6295+ addr = (unsigned long) ptr;
6296 #if ALIGN_CS == 1
6297- " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
6298- " nr %2,%1\n" /* isolate last 2 bits of address */
6299- " xr %1,%2\n" /* make addr % 4 == 0 */
6300- " sll %2,3\n"
6301- " ar %0,%2\n" /* add alignement to bitnr */
6302+ addr ^= addr & 3; /* align address to 4 */
6303+ nr += (addr & 3) << 3; /* add alignment to bit number */
6304 #endif
6305- " lhi %2,31\n"
6306- " nr %2,%0\n" /* make shift value */
6307- " xr %0,%2\n"
6308- " srl %0,3\n"
6309- " lhi %3,1\n"
6310- " la %1,0(%0,%1)\n" /* calc. address for CS */
6311- " sll %3,0(%2)\n" /* make XR mask */
6312- " l %0,0(%1)\n"
6313- "0: lr %2,%0\n" /* CS loop starts here */
6314- " xr %2,%3\n" /* change bit */
6315- " cs %0,%2,0(%1)\n"
6316- " jl 0b"
6317- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
6318- : "cc", "memory" );
6319+ addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
6320+ mask = 1UL << (nr & 31); /* make XOR mask */
6321+ asm volatile(
6322+ " l %0,0(%4)\n"
6323+ "0: lr %1,%0\n"
6324+ " xr %1,%3\n"
6325+ " cs %0,%1,0(%4)\n"
6326+ " jl 0b"
6327+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
6328+ : "d" (mask), "a" (addr)
6329+ : "cc" );
6330 }
6331
6332 /*
6333 * SMP save test_and_set_bit routine based on compare and swap (CS)
6334 */
6335-static __inline__ int test_and_set_bit_cs(int nr, volatile void * addr)
6336+static inline int test_and_set_bit_cs(int nr, volatile void *ptr)
6337 {
6338- unsigned long bits, mask;
6339- __asm__ __volatile__(
6340+ unsigned long addr, old, new, mask;
6341+
6342+ addr = (unsigned long) ptr;
6343 #if ALIGN_CS == 1
6344- " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
6345- " nr %2,%1\n" /* isolate last 2 bits of address */
6346- " xr %1,%2\n" /* make addr % 4 == 0 */
6347- " sll %2,3\n"
6348- " ar %0,%2\n" /* add alignement to bitnr */
6349+ addr ^= addr & 3; /* align address to 4 */
6350+ nr += (addr & 3) << 3; /* add alignment to bit number */
6351 #endif
6352- " lhi %2,31\n"
6353- " nr %2,%0\n" /* make shift value */
6354- " xr %0,%2\n"
6355- " srl %0,3\n"
6356- " lhi %3,1\n"
6357- " la %1,0(%0,%1)\n" /* calc. address for CS */
6358- " sll %3,0(%2)\n" /* make OR mask */
6359- " l %0,0(%1)\n"
6360- "0: lr %2,%0\n" /* CS loop starts here */
6361- " or %2,%3\n" /* set bit */
6362- " cs %0,%2,0(%1)\n"
6363- " jl 0b\n"
6364- " nr %0,%3\n" /* isolate old bit */
6365- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
6366- : "cc", "memory" );
6367- return nr != 0;
6368+ addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
6369+ mask = 1UL << (nr & 31); /* make OR/test mask */
6370+ asm volatile(
6371+ " l %0,0(%4)\n"
6372+ "0: lr %1,%0\n"
6373+ " or %1,%3\n"
6374+ " cs %0,%1,0(%4)\n"
6375+ " jl 0b"
6376+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
6377+ : "d" (mask), "a" (addr)
6378+ : "cc" );
6379+ return (old & mask) != 0;
6380 }
6381
6382 /*
6383 * SMP save test_and_clear_bit routine based on compare and swap (CS)
6384 */
6385-static __inline__ int test_and_clear_bit_cs(int nr, volatile void * addr)
6386+static inline int test_and_clear_bit_cs(int nr, volatile void *ptr)
6387 {
6388- static const int minusone = -1;
6389- unsigned long bits, mask;
6390- __asm__ __volatile__(
6391+ unsigned long addr, old, new, mask;
6392+
6393+ addr = (unsigned long) ptr;
6394 #if ALIGN_CS == 1
6395- " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
6396- " nr %2,%1\n" /* isolate last 2 bits of address */
6397- " xr %1,%2\n" /* make addr % 4 == 0 */
6398- " sll %2,3\n"
6399- " ar %0,%2\n" /* add alignement to bitnr */
6400+ addr ^= addr & 3; /* align address to 4 */
6401+ nr += (addr & 3) << 3; /* add alignment to bit number */
6402 #endif
6403- " lhi %2,31\n"
6404- " nr %2,%0\n" /* make shift value */
6405- " xr %0,%2\n"
6406- " srl %0,3\n"
6407- " lhi %3,1\n"
6408- " la %1,0(%0,%1)\n" /* calc. address for CS */
6409- " sll %3,0(%2)\n"
6410- " l %0,0(%1)\n"
6411- " x %3,%4\n" /* make AND mask */
6412- "0: lr %2,%0\n" /* CS loop starts here */
6413- " nr %2,%3\n" /* clear bit */
6414- " cs %0,%2,0(%1)\n"
6415- " jl 0b\n"
6416- " x %3,%4\n"
6417- " nr %0,%3\n" /* isolate old bit */
6418- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask)
6419- : "m" (minusone) : "cc", "memory" );
6420- return nr;
6421+ addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
6422+ mask = ~(1UL << (nr & 31)); /* make AND mask */
6423+ asm volatile(
6424+ " l %0,0(%4)\n"
6425+ "0: lr %1,%0\n"
6426+ " nr %1,%3\n"
6427+ " cs %0,%1,0(%4)\n"
6428+ " jl 0b"
6429+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
6430+ : "d" (mask), "a" (addr)
6431+ : "cc" );
6432+ return (old ^ new) != 0;
6433 }
6434
6435 /*
6436 * SMP save test_and_change_bit routine based on compare and swap (CS)
6437 */
6438-static __inline__ int test_and_change_bit_cs(int nr, volatile void * addr)
6439+static inline int test_and_change_bit_cs(int nr, volatile void *ptr)
6440 {
6441- unsigned long bits, mask;
6442- __asm__ __volatile__(
6443+ unsigned long addr, old, new, mask;
6444+
6445+ addr = (unsigned long) ptr;
6446 #if ALIGN_CS == 1
6447- " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
6448- " nr %2,%1\n" /* isolate last 2 bits of address */
6449- " xr %1,%2\n" /* make addr % 4 == 0 */
6450- " sll %2,3\n"
6451- " ar %0,%2\n" /* add alignement to bitnr */
6452+ addr ^= addr & 3; /* align address to 4 */
6453+ nr += (addr & 3) << 3; /* add alignment to bit number */
6454 #endif
6455- " lhi %2,31\n"
6456- " nr %2,%0\n" /* make shift value */
6457- " xr %0,%2\n"
6458- " srl %0,3\n"
6459- " lhi %3,1\n"
6460- " la %1,0(%0,%1)\n" /* calc. address for CS */
6461- " sll %3,0(%2)\n" /* make OR mask */
6462- " l %0,0(%1)\n"
6463- "0: lr %2,%0\n" /* CS loop starts here */
6464- " xr %2,%3\n" /* change bit */
6465- " cs %0,%2,0(%1)\n"
6466- " jl 0b\n"
6467- " nr %0,%3\n" /* isolate old bit */
6468- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
6469- : "cc", "memory" );
6470- return nr != 0;
6471+ addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
6472+ mask = 1UL << (nr & 31); /* make XOR mask */
6473+ asm volatile(
6474+ " l %0,0(%4)\n"
6475+ "0: lr %1,%0\n"
6476+ " xr %1,%3\n"
6477+ " cs %0,%1,0(%4)\n"
6478+ " jl 0b"
6479+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
6480+ : "d" (mask), "a" (addr)
6481+ : "cc" );
6482+ return (old & mask) != 0;
6483 }
6484 #endif /* CONFIG_SMP */
6485
6486 /*
6487 * fast, non-SMP set_bit routine
6488 */
6489-static __inline__ void __set_bit(int nr, volatile void * addr)
6490+static inline void __set_bit(int nr, volatile void *ptr)
6491 {
6492- unsigned long reg1, reg2;
6493- __asm__ __volatile__(
6494- " lhi %1,24\n"
6495- " lhi %0,7\n"
6496- " xr %1,%2\n"
6497- " nr %0,%2\n"
6498- " srl %1,3\n"
6499- " la %1,0(%1,%3)\n"
6500- " la %0,0(%0,%4)\n"
6501- " oc 0(1,%1),0(%0)"
6502- : "=&a" (reg1), "=&a" (reg2)
6503- : "r" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
6504-}
6505-
6506-static __inline__ void
6507-__constant_set_bit(const int nr, volatile void * addr)
6508-{
6509- switch (nr&7) {
6510- case 0:
6511- __asm__ __volatile__ ("la 1,%0\n\t"
6512- "oi 0(1),0x01"
6513- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6514- : : "1", "cc", "memory");
6515- break;
6516- case 1:
6517- __asm__ __volatile__ ("la 1,%0\n\t"
6518- "oi 0(1),0x02"
6519- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6520- : : "1", "cc", "memory" );
6521- break;
6522- case 2:
6523- __asm__ __volatile__ ("la 1,%0\n\t"
6524- "oi 0(1),0x04"
6525- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6526- : : "1", "cc", "memory" );
6527- break;
6528- case 3:
6529- __asm__ __volatile__ ("la 1,%0\n\t"
6530- "oi 0(1),0x08"
6531- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6532- : : "1", "cc", "memory" );
6533- break;
6534- case 4:
6535- __asm__ __volatile__ ("la 1,%0\n\t"
6536- "oi 0(1),0x10"
6537- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6538- : : "1", "cc", "memory" );
6539- break;
6540- case 5:
6541- __asm__ __volatile__ ("la 1,%0\n\t"
6542- "oi 0(1),0x20"
6543- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6544- : : "1", "cc", "memory" );
6545- break;
6546- case 6:
6547- __asm__ __volatile__ ("la 1,%0\n\t"
6548- "oi 0(1),0x40"
6549- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6550- : : "1", "cc", "memory" );
6551- break;
6552- case 7:
6553- __asm__ __volatile__ ("la 1,%0\n\t"
6554- "oi 0(1),0x80"
6555- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6556- : : "1", "cc", "memory" );
6557- break;
6558- }
6559+ unsigned long addr;
6560+
6561+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
6562+ asm volatile("oc 0(1,%1),0(%2)"
6563+ : "+m" (*(char *) addr)
6564+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
6565+ : "cc" );
6566+}
6567+
6568+static inline void
6569+__constant_set_bit(const int nr, volatile void *ptr)
6570+{
6571+ unsigned long addr;
6572+
6573+ addr = ((unsigned long) ptr) + ((nr >> 3) ^ 3);
6574+ switch (nr&7) {
6575+ case 0:
6576+ asm volatile ("oi 0(%1),0x01"
6577+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6578+ break;
6579+ case 1:
6580+ asm volatile ("oi 0(%1),0x02"
6581+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6582+ break;
6583+ case 2:
6584+ asm volatile ("oi 0(%1),0x04"
6585+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6586+ break;
6587+ case 3:
6588+ asm volatile ("oi 0(%1),0x08"
6589+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6590+ break;
6591+ case 4:
6592+ asm volatile ("oi 0(%1),0x10"
6593+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6594+ break;
6595+ case 5:
6596+ asm volatile ("oi 0(%1),0x20"
6597+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6598+ break;
6599+ case 6:
6600+ asm volatile ("oi 0(%1),0x40"
6601+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6602+ break;
6603+ case 7:
6604+ asm volatile ("oi 0(%1),0x80"
6605+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6606+ break;
6607+ }
6608 }
6609
6610 #define set_bit_simple(nr,addr) \
6611@@ -323,76 +268,58 @@
6612 /*
6613 * fast, non-SMP clear_bit routine
6614 */
6615-static __inline__ void
6616-__clear_bit(int nr, volatile void * addr)
6617+static inline void
6618+__clear_bit(int nr, volatile void *ptr)
6619 {
6620- unsigned long reg1, reg2;
6621- __asm__ __volatile__(
6622- " lhi %1,24\n"
6623- " lhi %0,7\n"
6624- " xr %1,%2\n"
6625- " nr %0,%2\n"
6626- " srl %1,3\n"
6627- " la %1,0(%1,%3)\n"
6628- " la %0,0(%0,%4)\n"
6629- " nc 0(1,%1),0(%0)"
6630- : "=&a" (reg1), "=&a" (reg2)
6631- : "r" (nr), "a" (addr), "a" (&_ni_bitmap) : "cc", "memory" );
6632-}
6633-
6634-static __inline__ void
6635-__constant_clear_bit(const int nr, volatile void * addr)
6636-{
6637- switch (nr&7) {
6638- case 0:
6639- __asm__ __volatile__ ("la 1,%0\n\t"
6640- "ni 0(1),0xFE"
6641- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6642- : : "1", "cc", "memory" );
6643- break;
6644- case 1:
6645- __asm__ __volatile__ ("la 1,%0\n\t"
6646- "ni 0(1),0xFD"
6647- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6648- : : "1", "cc", "memory" );
6649- break;
6650- case 2:
6651- __asm__ __volatile__ ("la 1,%0\n\t"
6652- "ni 0(1),0xFB"
6653- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6654- : : "1", "cc", "memory" );
6655- break;
6656- case 3:
6657- __asm__ __volatile__ ("la 1,%0\n\t"
6658- "ni 0(1),0xF7"
6659- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6660- : : "1", "cc", "memory" );
6661- break;
6662- case 4:
6663- __asm__ __volatile__ ("la 1,%0\n\t"
6664- "ni 0(1),0xEF"
6665- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6666- : : "cc", "memory" );
6667- break;
6668- case 5:
6669- __asm__ __volatile__ ("la 1,%0\n\t"
6670- "ni 0(1),0xDF"
6671- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6672- : : "1", "cc", "memory" );
6673- break;
6674- case 6:
6675- __asm__ __volatile__ ("la 1,%0\n\t"
6676- "ni 0(1),0xBF"
6677- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6678- : : "1", "cc", "memory" );
6679- break;
6680- case 7:
6681- __asm__ __volatile__ ("la 1,%0\n\t"
6682- "ni 0(1),0x7F"
6683- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6684- : : "1", "cc", "memory" );
6685- break;
6686- }
6687+ unsigned long addr;
6688+
6689+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
6690+ asm volatile("nc 0(1,%1),0(%2)"
6691+ : "+m" (*(char *) addr)
6692+ : "a" (addr), "a" (_ni_bitmap + (nr & 7))
6693+ : "cc" );
6694+}
6695+
6696+static inline void
6697+__constant_clear_bit(const int nr, volatile void *ptr)
6698+{
6699+ unsigned long addr;
6700+
6701+ addr = ((unsigned long) ptr) + ((nr >> 3) ^ 3);
6702+ switch (nr&7) {
6703+ case 0:
6704+ asm volatile ("ni 0(%1),0xFE"
6705+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6706+ break;
6707+ case 1:
6708+ asm volatile ("ni 0(%1),0xFD"
6709+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6710+ break;
6711+ case 2:
6712+ asm volatile ("ni 0(%1),0xFB"
6713+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6714+ break;
6715+ case 3:
6716+ asm volatile ("ni 0(%1),0xF7"
6717+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6718+ break;
6719+ case 4:
6720+ asm volatile ("ni 0(%1),0xEF"
6721+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6722+ break;
6723+ case 5:
6724+ asm volatile ("ni 0(%1),0xDF"
6725+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6726+ break;
6727+ case 6:
6728+ asm volatile ("ni 0(%1),0xBF"
6729+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6730+ break;
6731+ case 7:
6732+ asm volatile ("ni 0(%1),0x7F"
6733+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6734+ break;
6735+ }
6736 }
6737
6738 #define clear_bit_simple(nr,addr) \
6739@@ -403,75 +330,57 @@
6740 /*
6741 * fast, non-SMP change_bit routine
6742 */
6743-static __inline__ void __change_bit(int nr, volatile void * addr)
6744+static inline void __change_bit(int nr, volatile void *ptr)
6745 {
6746- unsigned long reg1, reg2;
6747- __asm__ __volatile__(
6748- " lhi %1,24\n"
6749- " lhi %0,7\n"
6750- " xr %1,%2\n"
6751- " nr %0,%2\n"
6752- " srl %1,3\n"
6753- " la %1,0(%1,%3)\n"
6754- " la %0,0(%0,%4)\n"
6755- " xc 0(1,%1),0(%0)"
6756- : "=&a" (reg1), "=&a" (reg2)
6757- : "r" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
6758-}
6759-
6760-static __inline__ void
6761-__constant_change_bit(const int nr, volatile void * addr)
6762-{
6763- switch (nr&7) {
6764- case 0:
6765- __asm__ __volatile__ ("la 1,%0\n\t"
6766- "xi 0(1),0x01"
6767- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6768- : : "cc", "memory" );
6769- break;
6770- case 1:
6771- __asm__ __volatile__ ("la 1,%0\n\t"
6772- "xi 0(1),0x02"
6773- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6774- : : "cc", "memory" );
6775- break;
6776- case 2:
6777- __asm__ __volatile__ ("la 1,%0\n\t"
6778- "xi 0(1),0x04"
6779- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6780- : : "cc", "memory" );
6781- break;
6782- case 3:
6783- __asm__ __volatile__ ("la 1,%0\n\t"
6784- "xi 0(1),0x08"
6785- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6786- : : "cc", "memory" );
6787- break;
6788- case 4:
6789- __asm__ __volatile__ ("la 1,%0\n\t"
6790- "xi 0(1),0x10"
6791- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6792- : : "cc", "memory" );
6793- break;
6794- case 5:
6795- __asm__ __volatile__ ("la 1,%0\n\t"
6796- "xi 0(1),0x20"
6797- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6798- : : "1", "cc", "memory" );
6799- break;
6800- case 6:
6801- __asm__ __volatile__ ("la 1,%0\n\t"
6802- "xi 0(1),0x40"
6803- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6804- : : "1", "cc", "memory" );
6805- break;
6806- case 7:
6807- __asm__ __volatile__ ("la 1,%0\n\t"
6808- "xi 0(1),0x80"
6809- : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6810- : : "1", "cc", "memory" );
6811- break;
6812- }
6813+ unsigned long addr;
6814+
6815+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
6816+ asm volatile("xc 0(1,%1),0(%2)"
6817+ : "+m" (*(char *) addr)
6818+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
6819+ : "cc" );
6820+}
6821+
6822+static inline void
6823+__constant_change_bit(const int nr, volatile void *ptr)
6824+{
6825+ unsigned long addr;
6826+
6827+ addr = ((unsigned long) ptr) + ((nr >> 3) ^ 3);
6828+ switch (nr&7) {
6829+ case 0:
6830+ asm volatile ("xi 0(%1),0x01"
6831+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6832+ break;
6833+ case 1:
6834+ asm volatile ("xi 0(%1),0x02"
6835+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6836+ break;
6837+ case 2:
6838+ asm volatile ("xi 0(%1),0x04"
6839+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6840+ break;
6841+ case 3:
6842+ asm volatile ("xi 0(%1),0x08"
6843+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6844+ break;
6845+ case 4:
6846+ asm volatile ("xi 0(%1),0x10"
6847+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6848+ break;
6849+ case 5:
6850+ asm volatile ("xi 0(%1),0x20"
6851+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6852+ break;
6853+ case 6:
6854+ asm volatile ("xi 0(%1),0x40"
6855+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6856+ break;
6857+ case 7:
6858+ asm volatile ("xi 0(%1),0x80"
6859+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6860+ break;
6861+ }
6862 }
6863
6864 #define change_bit_simple(nr,addr) \
6865@@ -482,74 +391,54 @@
6866 /*
6867 * fast, non-SMP test_and_set_bit routine
6868 */
6869-static __inline__ int test_and_set_bit_simple(int nr, volatile void * addr)
6870+static inline int test_and_set_bit_simple(int nr, volatile void *ptr)
6871 {
6872- unsigned long reg1, reg2;
6873- int oldbit;
6874- __asm__ __volatile__(
6875- " lhi %1,24\n"
6876- " lhi %2,7\n"
6877- " xr %1,%3\n"
6878- " nr %2,%3\n"
6879- " srl %1,3\n"
6880- " la %1,0(%1,%4)\n"
6881- " ic %0,0(%1)\n"
6882- " srl %0,0(%2)\n"
6883- " la %2,0(%2,%5)\n"
6884- " oc 0(1,%1),0(%2)"
6885- : "=d&" (oldbit), "=&a" (reg1), "=&a" (reg2)
6886- : "r" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
6887- return oldbit & 1;
6888+ unsigned long addr;
6889+ unsigned char ch;
6890+
6891+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
6892+ ch = *(unsigned char *) addr;
6893+ asm volatile("oc 0(1,%1),0(%2)"
6894+ : "+m" (*(char *) addr)
6895+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
6896+ : "cc" );
6897+ return (ch >> (nr & 7)) & 1;
6898 }
6899 #define __test_and_set_bit(X,Y) test_and_set_bit_simple(X,Y)
6900
6901 /*
6902 * fast, non-SMP test_and_clear_bit routine
6903 */
6904-static __inline__ int test_and_clear_bit_simple(int nr, volatile void * addr)
6905+static inline int test_and_clear_bit_simple(int nr, volatile void *ptr)
6906 {
6907- unsigned long reg1, reg2;
6908- int oldbit;
6909+ unsigned long addr;
6910+ unsigned char ch;
6911
6912- __asm__ __volatile__(
6913- " lhi %1,24\n"
6914- " lhi %2,7\n"
6915- " xr %1,%3\n"
6916- " nr %2,%3\n"
6917- " srl %1,3\n"
6918- " la %1,0(%1,%4)\n"
6919- " ic %0,0(%1)\n"
6920- " srl %0,0(%2)\n"
6921- " la %2,0(%2,%5)\n"
6922- " nc 0(1,%1),0(%2)"
6923- : "=d&" (oldbit), "=&a" (reg1), "=&a" (reg2)
6924- : "r" (nr), "a" (addr), "a" (&_ni_bitmap) : "cc", "memory" );
6925- return oldbit & 1;
6926+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
6927+ ch = *(unsigned char *) addr;
6928+ asm volatile("nc 0(1,%1),0(%2)"
6929+ : "+m" (*(char *) addr)
6930+ : "a" (addr), "a" (_ni_bitmap + (nr & 7))
6931+ : "cc" );
6932+ return (ch >> (nr & 7)) & 1;
6933 }
6934 #define __test_and_clear_bit(X,Y) test_and_clear_bit_simple(X,Y)
6935
6936 /*
6937 * fast, non-SMP test_and_change_bit routine
6938 */
6939-static __inline__ int test_and_change_bit_simple(int nr, volatile void * addr)
6940+static inline int test_and_change_bit_simple(int nr, volatile void *ptr)
6941 {
6942- unsigned long reg1, reg2;
6943- int oldbit;
6944+ unsigned long addr;
6945+ unsigned char ch;
6946
6947- __asm__ __volatile__(
6948- " lhi %1,24\n"
6949- " lhi %2,7\n"
6950- " xr %1,%3\n"
6951- " nr %2,%1\n"
6952- " srl %1,3\n"
6953- " la %1,0(%1,%4)\n"
6954- " ic %0,0(%1)\n"
6955- " srl %0,0(%2)\n"
6956- " la %2,0(%2,%5)\n"
6957- " xc 0(1,%1),0(%2)"
6958- : "=d&" (oldbit), "=&a" (reg1), "=&a" (reg2)
6959- : "r" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
6960- return oldbit & 1;
6961+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
6962+ ch = *(unsigned char *) addr;
6963+ asm volatile("xc 0(1,%1),0(%2)"
6964+ : "+m" (*(char *) addr)
6965+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
6966+ : "cc" );
6967+ return (ch >> (nr & 7)) & 1;
6968 }
6969 #define __test_and_change_bit(X,Y) test_and_change_bit_simple(X,Y)
6970
6971@@ -574,25 +463,17 @@
6972 * This routine doesn't need to be atomic.
6973 */
6974
6975-static __inline__ int __test_bit(int nr, volatile void * addr)
6976+static inline int __test_bit(int nr, volatile void *ptr)
6977 {
6978- unsigned long reg1, reg2;
6979- int oldbit;
6980+ unsigned long addr;
6981+ unsigned char ch;
6982
6983- __asm__ __volatile__(
6984- " lhi %2,24\n"
6985- " lhi %1,7\n"
6986- " xr %2,%3\n"
6987- " nr %1,%3\n"
6988- " srl %2,3\n"
6989- " ic %0,0(%2,%4)\n"
6990- " srl %0,0(%1)"
6991- : "=d&" (oldbit), "=&a" (reg1), "=&a" (reg2)
6992- : "r" (nr), "a" (addr) : "cc" );
6993- return oldbit & 1;
6994+ addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
6995+ ch = *(unsigned char *) addr;
6996+ return (ch >> (nr & 7)) & 1;
6997 }
6998
6999-static __inline__ int __constant_test_bit(int nr, volatile void * addr) {
7000+static inline int __constant_test_bit(int nr, volatile void * addr) {
7001 return (((volatile char *) addr)[(nr>>3)^3] & (1<<(nr&7))) != 0;
7002 }
7003
7004@@ -604,7 +485,7 @@
7005 /*
7006 * Find-bit routines..
7007 */
7008-static __inline__ int find_first_zero_bit(void * addr, unsigned size)
7009+static inline int find_first_zero_bit(void * addr, unsigned size)
7010 {
7011 unsigned long cmp, count;
7012 int res;
7013@@ -642,7 +523,45 @@
7014 return (res < size) ? res : size;
7015 }
7016
7017-static __inline__ int find_next_zero_bit (void * addr, int size, int offset)
7018+static inline int find_first_bit(void * addr, unsigned size)
7019+{
7020+ unsigned long cmp, count;
7021+ int res;
7022+
7023+ if (!size)
7024+ return 0;
7025+ __asm__(" slr %1,%1\n"
7026+ " lr %2,%3\n"
7027+ " slr %0,%0\n"
7028+ " ahi %2,31\n"
7029+ " srl %2,5\n"
7030+ "0: c %1,0(%0,%4)\n"
7031+ " jne 1f\n"
7032+ " ahi %0,4\n"
7033+ " brct %2,0b\n"
7034+ " lr %0,%3\n"
7035+ " j 4f\n"
7036+ "1: l %2,0(%0,%4)\n"
7037+ " sll %0,3\n"
7038+ " lhi %1,0xff\n"
7039+ " tml %2,0xffff\n"
7040+ " jnz 2f\n"
7041+ " ahi %0,16\n"
7042+ " srl %2,16\n"
7043+ "2: tml %2,0x00ff\n"
7044+ " jnz 3f\n"
7045+ " ahi %0,8\n"
7046+ " srl %2,8\n"
7047+ "3: nr %2,%1\n"
7048+ " ic %2,0(%2,%5)\n"
7049+ " alr %0,%2\n"
7050+ "4:"
7051+ : "=&a" (res), "=&d" (cmp), "=&a" (count)
7052+ : "a" (size), "a" (addr), "a" (&_sb_findmap) : "cc" );
7053+ return (res < size) ? res : size;
7054+}
7055+
7056+static inline int find_next_zero_bit (void * addr, int size, int offset)
7057 {
7058 unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
7059 unsigned long bitvec, reg;
7060@@ -680,11 +599,49 @@
7061 return (offset + res);
7062 }
7063
7064+static inline int find_next_bit (void * addr, int size, int offset)
7065+{
7066+ unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
7067+ unsigned long bitvec, reg;
7068+ int set, bit = offset & 31, res;
7069+
7070+ if (bit) {
7071+ /*
7072+ * Look for set bit in first word
7073+ */
7074+ bitvec = (*p) >> bit;
7075+ __asm__(" slr %0,%0\n"
7076+ " lhi %2,0xff\n"
7077+ " tml %1,0xffff\n"
7078+ " jnz 0f\n"
7079+ " ahi %0,16\n"
7080+ " srl %1,16\n"
7081+ "0: tml %1,0x00ff\n"
7082+ " jnz 1f\n"
7083+ " ahi %0,8\n"
7084+ " srl %1,8\n"
7085+ "1: nr %1,%2\n"
7086+ " ic %1,0(%1,%3)\n"
7087+ " alr %0,%1"
7088+ : "=&d" (set), "+a" (bitvec), "=&d" (reg)
7089+ : "a" (&_sb_findmap) : "cc" );
7090+ if (set < (32 - bit))
7091+ return set + offset;
7092+ offset += 32 - bit;
7093+ p++;
7094+ }
7095+ /*
7096+ * No set bit yet, search remaining full words for a bit
7097+ */
7098+ res = find_first_bit (p, size - 32 * (p - (unsigned long *) addr));
7099+ return (offset + res);
7100+}
7101+
7102 /*
7103 * ffz = Find First Zero in word. Undefined if no zero exists,
7104 * so code should check against ~0UL first..
7105 */
7106-static __inline__ unsigned long ffz(unsigned long word)
7107+static inline unsigned long ffz(unsigned long word)
7108 {
7109 unsigned long reg;
7110 int result;
7111@@ -708,40 +665,109 @@
7112 }
7113
7114 /*
7115+ * __ffs = find first bit in word. Undefined if no bit exists,
7116+ * so code should check against 0UL first..
7117+ */
7118+static inline unsigned long __ffs(unsigned long word)
7119+{
7120+ unsigned long reg, result;
7121+
7122+ __asm__(" slr %0,%0\n"
7123+ " lhi %2,0xff\n"
7124+ " tml %1,0xffff\n"
7125+ " jnz 0f\n"
7126+ " ahi %0,16\n"
7127+ " srl %1,16\n"
7128+ "0: tml %1,0x00ff\n"
7129+ " jnz 1f\n"
7130+ " ahi %0,8\n"
7131+ " srl %1,8\n"
7132+ "1: nr %1,%2\n"
7133+ " ic %1,0(%1,%3)\n"
7134+ " alr %0,%1"
7135+ : "=&d" (result), "+a" (word), "=&d" (reg)
7136+ : "a" (&_sb_findmap) : "cc" );
7137+ return result;
7138+}
7139+
7140+/*
7141+ * Every architecture must define this function. It's the fastest
7142+ * way of searching a 140-bit bitmap where the first 100 bits are
7143+ * unlikely to be set. It's guaranteed that at least one of the 140
7144+ * bits is cleared.
7145+ */
ddc40141 7146+static inline int _sched_find_first_bit(unsigned long *b)
54aa170e
JR
7147+{
7148+ return find_first_bit(b, 140);
7149+}
7150+
7151+/*
7152 * ffs: find first bit set. This is defined the same way as
7153 * the libc and compiler builtin ffs routines, therefore
7154 * differs in spirit from the above ffz (man ffs).
7155 */
7156
7157-extern int __inline__ ffs (int x)
7158+extern int inline ffs (int x)
7159 {
7160- int r;
7161+ int r = 1;
7162
7163 if (x == 0)
7164- return 0;
7165- __asm__(" slr %0,%0\n"
7166- " tml %1,0xffff\n"
7167+ return 0;
7168+ __asm__(" tml %1,0xffff\n"
7169 " jnz 0f\n"
7170- " ahi %0,16\n"
7171 " srl %1,16\n"
7172+ " ahi %0,16\n"
7173 "0: tml %1,0x00ff\n"
7174 " jnz 1f\n"
7175- " ahi %0,8\n"
7176 " srl %1,8\n"
7177+ " ahi %0,8\n"
7178 "1: tml %1,0x000f\n"
7179 " jnz 2f\n"
7180- " ahi %0,4\n"
7181 " srl %1,4\n"
7182+ " ahi %0,4\n"
7183 "2: tml %1,0x0003\n"
7184 " jnz 3f\n"
7185- " ahi %0,2\n"
7186 " srl %1,2\n"
7187+ " ahi %0,2\n"
7188 "3: tml %1,0x0001\n"
7189 " jnz 4f\n"
7190 " ahi %0,1\n"
7191 "4:"
7192 : "=&d" (r), "+d" (x) : : "cc" );
7193- return r+1;
7194+ return r;
7195+}
7196+
7197+/*
7198+ * fls: find last bit set.
7199+ */
7200+extern __inline__ int fls(int x)
7201+{
7202+ int r = 32;
7203+
7204+ if (x == 0)
7205+ return 0;
7206+ __asm__(" tmh %1,0xffff\n"
7207+ " jz 0f\n"
7208+ " sll %1,16\n"
7209+ " ahi %0,-16\n"
7210+ "0: tmh %1,0xff00\n"
7211+ " jz 1f\n"
7212+ " sll %1,8\n"
7213+ " ahi %0,-8\n"
7214+ "1: tmh %1,0xf000\n"
7215+ " jz 2f\n"
7216+ " sll %1,4\n"
7217+ " ahi %0,-4\n"
7218+ "2: tmh %1,0xc000\n"
7219+ " jz 3f\n"
7220+ " sll %1,2\n"
7221+ " ahi %0,-2\n"
7222+ "3: tmh %1,0x8000\n"
7223+ " jz 4f\n"
7224+ " ahi %0,-1\n"
7225+ "4:"
7226+ : "+d" (r), "+d" (x) : : "cc" );
7227+ return r;
7228 }
7229
7230 /*
7231@@ -769,7 +795,7 @@
7232 #define ext2_set_bit(nr, addr) test_and_set_bit((nr)^24, addr)
7233 #define ext2_clear_bit(nr, addr) test_and_clear_bit((nr)^24, addr)
7234 #define ext2_test_bit(nr, addr) test_bit((nr)^24, addr)
7235-static __inline__ int ext2_find_first_zero_bit(void *vaddr, unsigned size)
7236+static inline int ext2_find_first_zero_bit(void *vaddr, unsigned size)
7237 {
7238 unsigned long cmp, count;
7239 int res;
7240@@ -808,7 +834,7 @@
7241 return (res < size) ? res : size;
7242 }
7243
7244-static __inline__ int
7245+static inline int
7246 ext2_find_next_zero_bit(void *vaddr, unsigned size, unsigned offset)
7247 {
7248 unsigned long *addr = vaddr;
7249diff -urN linux-2.4.20/include/asm-s390x/bitops.h linux-2.4.20-o1-preempt/include/asm-s390x/bitops.h
7250--- linux-2.4.20/include/asm-s390x/bitops.h Sat Aug 3 02:39:45 2002
7251+++ linux-2.4.20-o1-preempt/include/asm-s390x/bitops.h Tue Feb 18 03:51:30 2003
7252@@ -51,271 +51,220 @@
7253 extern const char _oi_bitmap[];
7254 extern const char _ni_bitmap[];
7255 extern const char _zb_findmap[];
7256+extern const char _sb_findmap[];
7257
7258 #ifdef CONFIG_SMP
7259 /*
7260 * SMP save set_bit routine based on compare and swap (CS)
7261 */
7262-static __inline__ void set_bit_cs(unsigned long nr, volatile void * addr)
7263+static inline void set_bit_cs(unsigned long nr, volatile void *ptr)
7264 {
7265- unsigned long bits, mask;
7266- __asm__ __volatile__(
7267+ unsigned long addr, old, new, mask;
7268+
7269+ addr = (unsigned long) ptr;
7270 #if ALIGN_CS == 1
7271- " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
7272- " ngr %2,%1\n" /* isolate last 2 bits of address */
7273- " xgr %1,%2\n" /* make addr % 4 == 0 */
7274- " sllg %2,%2,3\n"
7275- " agr %0,%2\n" /* add alignement to bitnr */
7276+ addr ^= addr & 7; /* align address to 8 */
7277+ nr += (addr & 7) << 3; /* add alignment to bit number */
7278 #endif
7279- " lghi %2,63\n"
7280- " nr %2,%0\n" /* make shift value */
7281- " xr %0,%2\n"
7282- " srlg %0,%0,3\n"
7283- " lghi %3,1\n"
7284- " la %1,0(%0,%1)\n" /* calc. address for CS */
7285- " sllg %3,%3,0(%2)\n" /* make OR mask */
7286- " lg %0,0(%1)\n"
7287- "0: lgr %2,%0\n" /* CS loop starts here */
7288- " ogr %2,%3\n" /* set bit */
7289- " csg %0,%2,0(%1)\n"
7290- " jl 0b"
7291- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
7292- : "cc", "memory" );
7293+ addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
7294+ mask = 1UL << (nr & 63); /* make OR mask */
7295+ asm volatile(
7296+ " lg %0,0(%4)\n"
7297+ "0: lgr %1,%0\n"
7298+ " ogr %1,%3\n"
7299+ " csg %0,%1,0(%4)\n"
7300+ " jl 0b"
7301+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
7302+ : "d" (mask), "a" (addr)
7303+ : "cc" );
7304 }
7305
7306 /*
7307 * SMP save clear_bit routine based on compare and swap (CS)
7308 */
7309-static __inline__ void clear_bit_cs(unsigned long nr, volatile void * addr)
7310+static inline void clear_bit_cs(unsigned long nr, volatile void *ptr)
7311 {
7312- unsigned long bits, mask;
7313- __asm__ __volatile__(
7314+ unsigned long addr, old, new, mask;
7315+
7316+ addr = (unsigned long) ptr;
7317 #if ALIGN_CS == 1
7318- " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
7319- " ngr %2,%1\n" /* isolate last 2 bits of address */
7320- " xgr %1,%2\n" /* make addr % 4 == 0 */
7321- " sllg %2,%2,3\n"
7322- " agr %0,%2\n" /* add alignement to bitnr */
7323+ addr ^= addr & 7; /* align address to 8 */
7324+ nr += (addr & 7) << 3; /* add alignment to bit number */
7325 #endif
7326- " lghi %2,63\n"
7327- " nr %2,%0\n" /* make shift value */
7328- " xr %0,%2\n"
7329- " srlg %0,%0,3\n"
7330- " lghi %3,-2\n"
7331- " la %1,0(%0,%1)\n" /* calc. address for CS */
7332- " lghi %3,-2\n"
7333- " rllg %3,%3,0(%2)\n" /* make AND mask */
7334- " lg %0,0(%1)\n"
7335- "0: lgr %2,%0\n" /* CS loop starts here */
7336- " ngr %2,%3\n" /* clear bit */
7337- " csg %0,%2,0(%1)\n"
7338- " jl 0b"
7339- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
7340- : "cc", "memory" );
7341+ addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
7342+ mask = ~(1UL << (nr & 63)); /* make AND mask */
7343+ asm volatile(
7344+ " lg %0,0(%4)\n"
7345+ "0: lgr %1,%0\n"
7346+ " ngr %1,%3\n"
7347+ " csg %0,%1,0(%4)\n"
7348+ " jl 0b"
7349+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
7350+ : "d" (mask), "a" (addr)
7351+ : "cc" );
7352 }
7353
7354 /*
7355 * SMP save change_bit routine based on compare and swap (CS)
7356 */
7357-static __inline__ void change_bit_cs(unsigned long nr, volatile void * addr)
7358+static inline void change_bit_cs(unsigned long nr, volatile void *ptr)
7359 {
7360- unsigned long bits, mask;
7361- __asm__ __volatile__(
7362+ unsigned long addr, old, new, mask;
7363+
7364+ addr = (unsigned long) ptr;
7365 #if ALIGN_CS == 1
7366- " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
7367- " ngr %2,%1\n" /* isolate last 2 bits of address */
7368- " xgr %1,%2\n" /* make addr % 4 == 0 */
7369- " sllg %2,%2,3\n"
7370- " agr %0,%2\n" /* add alignement to bitnr */
7371+ addr ^= addr & 7; /* align address to 8 */
7372+ nr += (addr & 7) << 3; /* add alignment to bit number */
7373 #endif
7374- " lghi %2,63\n"
7375- " nr %2,%0\n" /* make shift value */
7376- " xr %0,%2\n"
7377- " srlg %0,%0,3\n"
7378- " lghi %3,1\n"
7379- " la %1,0(%0,%1)\n" /* calc. address for CS */
7380- " sllg %3,%3,0(%2)\n" /* make XR mask */
7381- " lg %0,0(%1)\n"
7382- "0: lgr %2,%0\n" /* CS loop starts here */
7383- " xgr %2,%3\n" /* change bit */
7384- " csg %0,%2,0(%1)\n"
7385- " jl 0b"
7386- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
7387- : "cc", "memory" );
7388+ addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
7389+ mask = 1UL << (nr & 63); /* make XOR mask */
7390+ asm volatile(
7391+ " lg %0,0(%4)\n"
7392+ "0: lgr %1,%0\n"
7393+ " xgr %1,%3\n"
7394+ " csg %0,%1,0(%4)\n"
7395+ " jl 0b"
7396+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
7397+ : "d" (mask), "a" (addr)
7398+ : "cc" );
7399 }
7400
7401 /*
7402 * SMP save test_and_set_bit routine based on compare and swap (CS)
7403 */
7404-static __inline__ int
7405-test_and_set_bit_cs(unsigned long nr, volatile void * addr)
7406+static inline int
7407+test_and_set_bit_cs(unsigned long nr, volatile void *ptr)
7408 {
7409- unsigned long bits, mask;
7410- __asm__ __volatile__(
7411+ unsigned long addr, old, new, mask;
7412+
7413+ addr = (unsigned long) ptr;
7414 #if ALIGN_CS == 1
7415- " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
7416- " ngr %2,%1\n" /* isolate last 2 bits of address */
7417- " xgr %1,%2\n" /* make addr % 4 == 0 */
7418- " sllg %2,%2,3\n"
7419- " agr %0,%2\n" /* add alignement to bitnr */
7420+ addr ^= addr & 7; /* align address to 8 */
7421+ nr += (addr & 7) << 3; /* add alignment to bit number */
7422 #endif
7423- " lghi %2,63\n"
7424- " nr %2,%0\n" /* make shift value */
7425- " xr %0,%2\n"
7426- " srlg %0,%0,3\n"
7427- " lghi %3,1\n"
7428- " la %1,0(%0,%1)\n" /* calc. address for CS */
7429- " sllg %3,%3,0(%2)\n" /* make OR mask */
7430- " lg %0,0(%1)\n"
7431- "0: lgr %2,%0\n" /* CS loop starts here */
7432- " ogr %2,%3\n" /* set bit */
7433- " csg %0,%2,0(%1)\n"
7434- " jl 0b\n"
7435- " ngr %0,%3\n" /* isolate old bit */
7436- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
7437- : "cc", "memory" );
7438- return nr != 0;
7439+ addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
7440+ mask = 1UL << (nr & 63); /* make OR/test mask */
7441+ asm volatile(
7442+ " lg %0,0(%4)\n"
7443+ "0: lgr %1,%0\n"
7444+ " ogr %1,%3\n"
7445+ " csg %0,%1,0(%4)\n"
7446+ " jl 0b"
7447+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
7448+ : "d" (mask), "a" (addr)
7449+ : "cc" );
7450+ return (old & mask) != 0;
7451 }
7452
7453 /*
7454 * SMP save test_and_clear_bit routine based on compare and swap (CS)
7455 */
7456-static __inline__ int
7457-test_and_clear_bit_cs(unsigned long nr, volatile void * addr)
7458+static inline int
7459+test_and_clear_bit_cs(unsigned long nr, volatile void *ptr)
7460 {
7461- unsigned long bits, mask;
7462- __asm__ __volatile__(
7463+ unsigned long addr, old, new, mask;
7464+
7465+ addr = (unsigned long) ptr;
7466 #if ALIGN_CS == 1
7467- " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
7468- " ngr %2,%1\n" /* isolate last 2 bits of address */
7469- " xgr %1,%2\n" /* make addr % 4 == 0 */
7470- " sllg %2,%2,3\n"
7471- " agr %0,%2\n" /* add alignement to bitnr */
7472+ addr ^= addr & 7; /* align address to 8 */
7473+ nr += (addr & 7) << 3; /* add alignment to bit number */
7474 #endif
7475- " lghi %2,63\n"
7476- " nr %2,%0\n" /* make shift value */
7477- " xr %0,%2\n"
7478- " srlg %0,%0,3\n"
7479- " lghi %3,-2\n"
7480- " la %1,0(%0,%1)\n" /* calc. address for CS */
7481- " rllg %3,%3,0(%2)\n" /* make AND mask */
7482- " lg %0,0(%1)\n"
7483- "0: lgr %2,%0\n" /* CS loop starts here */
7484- " ngr %2,%3\n" /* clear bit */
7485- " csg %0,%2,0(%1)\n"
7486- " jl 0b\n"
7487- " xgr %0,%2\n" /* isolate old bit */
7488- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
7489- : "cc", "memory" );
7490- return nr != 0;
7491+ addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
7492+ mask = ~(1UL << (nr & 63)); /* make AND mask */
7493+ asm volatile(
7494+ " lg %0,0(%4)\n"
7495+ "0: lgr %1,%0\n"
7496+ " ngr %1,%3\n"
7497+ " csg %0,%1,0(%4)\n"
7498+ " jl 0b"
7499+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
7500+ : "d" (mask), "a" (addr)
7501+ : "cc" );
7502+ return (old ^ new) != 0;
7503 }
7504
7505 /*
7506 * SMP save test_and_change_bit routine based on compare and swap (CS)
7507 */
7508-static __inline__ int
7509-test_and_change_bit_cs(unsigned long nr, volatile void * addr)
7510+static inline int
7511+test_and_change_bit_cs(unsigned long nr, volatile void *ptr)
7512 {
7513- unsigned long bits, mask;
7514- __asm__ __volatile__(
7515+ unsigned long addr, old, new, mask;
7516+
7517+ addr = (unsigned long) ptr;
7518 #if ALIGN_CS == 1
7519- " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
7520- " ngr %2,%1\n" /* isolate last 2 bits of address */
7521- " xgr %1,%2\n" /* make addr % 4 == 0 */
7522- " sllg %2,%2,3\n"
7523- " agr %0,%2\n" /* add alignement to bitnr */
7524+ addr ^= addr & 7; /* align address to 8 */
7525+ nr += (addr & 7) << 3; /* add alignment to bit number */
7526 #endif
7527- " lghi %2,63\n"
7528- " nr %2,%0\n" /* make shift value */
7529- " xr %0,%2\n"
7530- " srlg %0,%0,3\n"
7531- " lghi %3,1\n"
7532- " la %1,0(%0,%1)\n" /* calc. address for CS */
7533- " sllg %3,%3,0(%2)\n" /* make OR mask */
7534- " lg %0,0(%1)\n"
7535- "0: lgr %2,%0\n" /* CS loop starts here */
7536- " xgr %2,%3\n" /* change bit */
7537- " csg %0,%2,0(%1)\n"
7538- " jl 0b\n"
7539- " ngr %0,%3\n" /* isolate old bit */
7540- : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
7541- : "cc", "memory" );
7542- return nr != 0;
7543+ addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
7544+ mask = 1UL << (nr & 63); /* make XOR mask */
7545+ asm volatile(
7546+ " lg %0,0(%4)\n"
7547+ "0: lgr %1,%0\n"
7548+ " xgr %1,%3\n"
7549+ " csg %0,%1,0(%4)\n"
7550+ " jl 0b"
7551+ : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
7552+ : "d" (mask), "a" (addr)
7553+ : "cc" );
7554+ return (old & mask) != 0;
7555 }
7556 #endif /* CONFIG_SMP */
7557
7558 /*
7559 * fast, non-SMP set_bit routine
7560 */
7561-static __inline__ void __set_bit(unsigned long nr, volatile void * addr)
7562+static inline void __set_bit(unsigned long nr, volatile void *ptr)
7563 {
7564- unsigned long reg1, reg2;
7565- __asm__ __volatile__(
7566- " lghi %1,56\n"
7567- " lghi %0,7\n"
7568- " xgr %1,%2\n"
7569- " nr %0,%2\n"
7570- " srlg %1,%1,3\n"
7571- " la %1,0(%1,%3)\n"
7572- " la %0,0(%0,%4)\n"
7573- " oc 0(1,%1),0(%0)"
7574- : "=&a" (reg1), "=&a" (reg2)
7575- : "a" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
7576-}
7577-
7578-static __inline__ void
7579-__constant_set_bit(const unsigned long nr, volatile void * addr)
7580-{
7581- switch (nr&7) {
7582- case 0:
7583- __asm__ __volatile__ ("la 1,%0\n\t"
7584- "oi 0(1),0x01"
7585- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7586- : : "1", "cc", "memory");
7587- break;
7588- case 1:
7589- __asm__ __volatile__ ("la 1,%0\n\t"
7590- "oi 0(1),0x02"
7591- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7592- : : "1", "cc", "memory" );
7593- break;
7594- case 2:
7595- __asm__ __volatile__ ("la 1,%0\n\t"
7596- "oi 0(1),0x04"
7597- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7598- : : "1", "cc", "memory" );
7599- break;
7600- case 3:
7601- __asm__ __volatile__ ("la 1,%0\n\t"
7602- "oi 0(1),0x08"
7603- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7604- : : "1", "cc", "memory" );
7605- break;
7606- case 4:
7607- __asm__ __volatile__ ("la 1,%0\n\t"
7608- "oi 0(1),0x10"
7609- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7610- : : "1", "cc", "memory" );
7611- break;
7612- case 5:
7613- __asm__ __volatile__ ("la 1,%0\n\t"
7614- "oi 0(1),0x20"
7615- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7616- : : "1", "cc", "memory" );
7617- break;
7618- case 6:
7619- __asm__ __volatile__ ("la 1,%0\n\t"
7620- "oi 0(1),0x40"
7621- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7622- : : "1", "cc", "memory" );
7623- break;
7624- case 7:
7625- __asm__ __volatile__ ("la 1,%0\n\t"
7626- "oi 0(1),0x80"
7627- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7628- : : "1", "cc", "memory" );
7629- break;
7630- }
7631+ unsigned long addr;
7632+
7633+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
7634+ asm volatile("oc 0(1,%1),0(%2)"
7635+ : "+m" (*(char *) addr)
7636+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
7637+ : "cc" );
7638+}
7639+
7640+static inline void
7641+__constant_set_bit(const unsigned long nr, volatile void *ptr)
7642+{
7643+ unsigned long addr;
7644+
7645+ addr = ((unsigned long) ptr) + ((nr >> 3) ^ 7);
7646+ switch (nr&7) {
7647+ case 0:
7648+ asm volatile ("oi 0(%1),0x01"
7649+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7650+ break;
7651+ case 1:
7652+ asm volatile ("oi 0(%1),0x02"
7653+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7654+ break;
7655+ case 2:
7656+ asm volatile ("oi 0(%1),0x04"
7657+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7658+ break;
7659+ case 3:
7660+ asm volatile ("oi 0(%1),0x08"
7661+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7662+ break;
7663+ case 4:
7664+ asm volatile ("oi 0(%1),0x10"
7665+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7666+ break;
7667+ case 5:
7668+ asm volatile ("oi 0(%1),0x20"
7669+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7670+ break;
7671+ case 6:
7672+ asm volatile ("oi 0(%1),0x40"
7673+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7674+ break;
7675+ case 7:
7676+ asm volatile ("oi 0(%1),0x80"
7677+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7678+ break;
7679+ }
7680 }
7681
7682 #define set_bit_simple(nr,addr) \
7683@@ -326,76 +275,58 @@
7684 /*
7685 * fast, non-SMP clear_bit routine
7686 */
7687-static __inline__ void
7688-__clear_bit(unsigned long nr, volatile void * addr)
7689+static inline void
7690+__clear_bit(unsigned long nr, volatile void *ptr)
7691 {
7692- unsigned long reg1, reg2;
7693- __asm__ __volatile__(
7694- " lghi %1,56\n"
7695- " lghi %0,7\n"
7696- " xgr %1,%2\n"
7697- " nr %0,%2\n"
7698- " srlg %1,%1,3\n"
7699- " la %1,0(%1,%3)\n"
7700- " la %0,0(%0,%4)\n"
7701- " nc 0(1,%1),0(%0)"
7702- : "=&a" (reg1), "=&a" (reg2)
7703- : "d" (nr), "a" (addr), "a" (&_ni_bitmap) : "cc", "memory" );
7704-}
7705-
7706-static __inline__ void
7707-__constant_clear_bit(const unsigned long nr, volatile void * addr)
7708-{
7709- switch (nr&7) {
7710- case 0:
7711- __asm__ __volatile__ ("la 1,%0\n\t"
7712- "ni 0(1),0xFE"
7713- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7714- : : "1", "cc", "memory" );
7715- break;
7716- case 1:
7717- __asm__ __volatile__ ("la 1,%0\n\t"
7718- "ni 0(1),0xFD"
7719- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7720- : : "1", "cc", "memory" );
7721- break;
7722- case 2:
7723- __asm__ __volatile__ ("la 1,%0\n\t"
7724- "ni 0(1),0xFB"
7725- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7726- : : "1", "cc", "memory" );
7727- break;
7728- case 3:
7729- __asm__ __volatile__ ("la 1,%0\n\t"
7730- "ni 0(1),0xF7"
7731- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7732- : : "1", "cc", "memory" );
7733- break;
7734- case 4:
7735- __asm__ __volatile__ ("la 1,%0\n\t"
7736- "ni 0(1),0xEF"
7737- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7738- : : "cc", "memory" );
7739- break;
7740- case 5:
7741- __asm__ __volatile__ ("la 1,%0\n\t"
7742- "ni 0(1),0xDF"
7743- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7744- : : "1", "cc", "memory" );
7745- break;
7746- case 6:
7747- __asm__ __volatile__ ("la 1,%0\n\t"
7748- "ni 0(1),0xBF"
7749- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7750- : : "1", "cc", "memory" );
7751- break;
7752- case 7:
7753- __asm__ __volatile__ ("la 1,%0\n\t"
7754- "ni 0(1),0x7F"
7755- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7756- : : "1", "cc", "memory" );
7757- break;
7758- }
7759+ unsigned long addr;
7760+
7761+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
7762+ asm volatile("nc 0(1,%1),0(%2)"
7763+ : "+m" (*(char *) addr)
7764+ : "a" (addr), "a" (_ni_bitmap + (nr & 7))
7765+ : "cc" );
7766+}
7767+
7768+static inline void
7769+__constant_clear_bit(const unsigned long nr, volatile void *ptr)
7770+{
7771+ unsigned long addr;
7772+
7773+ addr = ((unsigned long) ptr) + ((nr >> 3) ^ 7);
7774+ switch (nr&7) {
7775+ case 0:
7776+ asm volatile ("ni 0(%1),0xFE"
7777+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7778+ break;
7779+ case 1:
7780+ asm volatile ("ni 0(%1),0xFD"
7781+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7782+ break;
7783+ case 2:
7784+ asm volatile ("ni 0(%1),0xFB"
7785+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7786+ break;
7787+ case 3:
7788+ asm volatile ("ni 0(%1),0xF7"
7789+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7790+ break;
7791+ case 4:
7792+ asm volatile ("ni 0(%1),0xEF"
7793+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7794+ break;
7795+ case 5:
7796+ asm volatile ("ni 0(%1),0xDF"
7797+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7798+ break;
7799+ case 6:
7800+ asm volatile ("ni 0(%1),0xBF"
7801+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7802+ break;
7803+ case 7:
7804+ asm volatile ("ni 0(%1),0x7F"
7805+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7806+ break;
7807+ }
7808 }
7809
7810 #define clear_bit_simple(nr,addr) \
7811@@ -406,75 +337,57 @@
7812 /*
7813 * fast, non-SMP change_bit routine
7814 */
7815-static __inline__ void __change_bit(unsigned long nr, volatile void * addr)
7816+static inline void __change_bit(unsigned long nr, volatile void *ptr)
7817 {
7818- unsigned long reg1, reg2;
7819- __asm__ __volatile__(
7820- " lghi %1,56\n"
7821- " lghi %0,7\n"
7822- " xgr %1,%2\n"
7823- " nr %0,%2\n"
7824- " srlg %1,%1,3\n"
7825- " la %1,0(%1,%3)\n"
7826- " la %0,0(%0,%4)\n"
7827- " xc 0(1,%1),0(%0)"
7828- : "=&a" (reg1), "=&a" (reg2)
7829- : "d" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
7830-}
7831-
7832-static __inline__ void
7833-__constant_change_bit(const unsigned long nr, volatile void * addr)
7834-{
7835- switch (nr&7) {
7836- case 0:
7837- __asm__ __volatile__ ("la 1,%0\n\t"
7838- "xi 0(1),0x01"
7839- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7840- : : "cc", "memory" );
7841- break;
7842- case 1:
7843- __asm__ __volatile__ ("la 1,%0\n\t"
7844- "xi 0(1),0x02"
7845- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7846- : : "cc", "memory" );
7847- break;
7848- case 2:
7849- __asm__ __volatile__ ("la 1,%0\n\t"
7850- "xi 0(1),0x04"
7851- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7852- : : "cc", "memory" );
7853- break;
7854- case 3:
7855- __asm__ __volatile__ ("la 1,%0\n\t"
7856- "xi 0(1),0x08"
7857- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7858- : : "cc", "memory" );
7859- break;
7860- case 4:
7861- __asm__ __volatile__ ("la 1,%0\n\t"
7862- "xi 0(1),0x10"
7863- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7864- : : "cc", "memory" );
7865- break;
7866- case 5:
7867- __asm__ __volatile__ ("la 1,%0\n\t"
7868- "xi 0(1),0x20"
7869- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7870- : : "1", "cc", "memory" );
7871- break;
7872- case 6:
7873- __asm__ __volatile__ ("la 1,%0\n\t"
7874- "xi 0(1),0x40"
7875- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7876- : : "1", "cc", "memory" );
7877- break;
7878- case 7:
7879- __asm__ __volatile__ ("la 1,%0\n\t"
7880- "xi 0(1),0x80"
7881- : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7882- : : "1", "cc", "memory" );
7883- break;
7884- }
7885+ unsigned long addr;
7886+
7887+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
7888+ asm volatile("xc 0(1,%1),0(%2)"
7889+ : "+m" (*(char *) addr)
7890+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
7891+ : "cc" );
7892+}
7893+
7894+static inline void
7895+__constant_change_bit(const unsigned long nr, volatile void *ptr)
7896+{
7897+ unsigned long addr;
7898+
7899+ addr = ((unsigned long) ptr) + ((nr >> 3) ^ 7);
7900+ switch (nr&7) {
7901+ case 0:
7902+ asm volatile ("xi 0(%1),0x01"
7903+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7904+ break;
7905+ case 1:
7906+ asm volatile ("xi 0(%1),0x02"
7907+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7908+ break;
7909+ case 2:
7910+ asm volatile ("xi 0(%1),0x04"
7911+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7912+ break;
7913+ case 3:
7914+ asm volatile ("xi 0(%1),0x08"
7915+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7916+ break;
7917+ case 4:
7918+ asm volatile ("xi 0(%1),0x10"
7919+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7920+ break;
7921+ case 5:
7922+ asm volatile ("xi 0(%1),0x20"
7923+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7924+ break;
7925+ case 6:
7926+ asm volatile ("xi 0(%1),0x40"
7927+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7928+ break;
7929+ case 7:
7930+ asm volatile ("xi 0(%1),0x80"
7931+ : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7932+ break;
7933+ }
7934 }
7935
7936 #define change_bit_simple(nr,addr) \
7937@@ -485,77 +398,57 @@
7938 /*
7939 * fast, non-SMP test_and_set_bit routine
7940 */
7941-static __inline__ int
7942-test_and_set_bit_simple(unsigned long nr, volatile void * addr)
7943+static inline int
7944+test_and_set_bit_simple(unsigned long nr, volatile void *ptr)
7945 {
7946- unsigned long reg1, reg2;
7947- int oldbit;
7948- __asm__ __volatile__(
7949- " lghi %1,56\n"
7950- " lghi %2,7\n"
7951- " xgr %1,%3\n"
7952- " nr %2,%3\n"
7953- " srlg %1,%1,3\n"
7954- " la %1,0(%1,%4)\n"
7955- " ic %0,0(%1)\n"
7956- " srl %0,0(%2)\n"
7957- " la %2,0(%2,%5)\n"
7958- " oc 0(1,%1),0(%2)"
7959- : "=&d" (oldbit), "=&a" (reg1), "=&a" (reg2)
7960- : "d" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
7961- return oldbit & 1;
7962+ unsigned long addr;
7963+ unsigned char ch;
7964+
7965+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
7966+ ch = *(unsigned char *) addr;
7967+ asm volatile("oc 0(1,%1),0(%2)"
7968+ : "+m" (*(char *) addr)
7969+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
7970+ : "cc" );
7971+ return (ch >> (nr & 7)) & 1;
7972 }
7973 #define __test_and_set_bit(X,Y) test_and_set_bit_simple(X,Y)
7974
7975 /*
7976 * fast, non-SMP test_and_clear_bit routine
7977 */
7978-static __inline__ int
7979-test_and_clear_bit_simple(unsigned long nr, volatile void * addr)
7980+static inline int
7981+test_and_clear_bit_simple(unsigned long nr, volatile void *ptr)
7982 {
7983- unsigned long reg1, reg2;
7984- int oldbit;
7985+ unsigned long addr;
7986+ unsigned char ch;
7987
7988- __asm__ __volatile__(
7989- " lghi %1,56\n"
7990- " lghi %2,7\n"
7991- " xgr %1,%3\n"
7992- " nr %2,%3\n"
7993- " srlg %1,%1,3\n"
7994- " la %1,0(%1,%4)\n"
7995- " ic %0,0(%1)\n"
7996- " srl %0,0(%2)\n"
7997- " la %2,0(%2,%5)\n"
7998- " nc 0(1,%1),0(%2)"
7999- : "=&d" (oldbit), "=&a" (reg1), "=&a" (reg2)
8000- : "d" (nr), "a" (addr), "a" (&_ni_bitmap) : "cc", "memory" );
8001- return oldbit & 1;
8002+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
8003+ ch = *(unsigned char *) addr;
8004+ asm volatile("nc 0(1,%1),0(%2)"
8005+ : "+m" (*(char *) addr)
8006+ : "a" (addr), "a" (_ni_bitmap + (nr & 7))
8007+ : "cc" );
8008+ return (ch >> (nr & 7)) & 1;
8009 }
8010 #define __test_and_clear_bit(X,Y) test_and_clear_bit_simple(X,Y)
8011
8012 /*
8013 * fast, non-SMP test_and_change_bit routine
8014 */
8015-static __inline__ int
8016-test_and_change_bit_simple(unsigned long nr, volatile void * addr)
8017+static inline int
8018+test_and_change_bit_simple(unsigned long nr, volatile void *ptr)
8019 {
8020- unsigned long reg1, reg2;
8021- int oldbit;
8022+ unsigned long addr;
8023+ unsigned char ch;
8024
8025- __asm__ __volatile__(
8026- " lghi %1,56\n"
8027- " lghi %2,7\n"
8028- " xgr %1,%3\n"
8029- " nr %2,%3\n"
8030- " srlg %1,%1,3\n"
8031- " la %1,0(%1,%4)\n"
8032- " ic %0,0(%1)\n"
8033- " srl %0,0(%2)\n"
8034- " la %2,0(%2,%5)\n"
8035- " xc 0(1,%1),0(%2)"
8036- : "=&d" (oldbit), "=&a" (reg1), "=&a" (reg2)
8037- : "d" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
8038- return oldbit & 1;
8039+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
8040+ ch = *(unsigned char *) addr;
8041+ asm volatile("xc 0(1,%1),0(%2)"
8042+ : "+m" (*(char *) addr)
8043+ : "a" (addr), "a" (_oi_bitmap + (nr & 7))
8044+ : "cc" );
8045+ return (ch >> (nr & 7)) & 1;
8046 }
8047 #define __test_and_change_bit(X,Y) test_and_change_bit_simple(X,Y)
8048
8049@@ -580,26 +473,18 @@
8050 * This routine doesn't need to be atomic.
8051 */
8052
8053-static __inline__ int __test_bit(unsigned long nr, volatile void * addr)
8054+static inline int __test_bit(unsigned long nr, volatile void *ptr)
8055 {
8056- unsigned long reg1, reg2;
8057- int oldbit;
8058+ unsigned long addr;
8059+ unsigned char ch;
8060
8061- __asm__ __volatile__(
8062- " lghi %2,56\n"
8063- " lghi %1,7\n"
8064- " xgr %2,%3\n"
8065- " nr %1,%3\n"
8066- " srlg %2,%2,3\n"
8067- " ic %0,0(%2,%4)\n"
8068- " srl %0,0(%1)\n"
8069- : "=&d" (oldbit), "=&a" (reg1), "=&a" (reg2)
8070- : "d" (nr), "a" (addr) : "cc" );
8071- return oldbit & 1;
8072+ addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
8073+ ch = *(unsigned char *) addr;
8074+ return (ch >> (nr & 7)) & 1;
8075 }
8076
8077-static __inline__ int
8078-__constant_test_bit(unsigned long nr, volatile void * addr) {
8079+static inline int
8080+__constant_test_bit(unsigned long nr, volatile void *addr) {
8081 return (((volatile char *) addr)[(nr>>3)^7] & (1<<(nr&7))) != 0;
8082 }
8083
8084@@ -611,7 +496,7 @@
8085 /*
8086 * Find-bit routines..
8087 */
8088-static __inline__ unsigned long
8089+static inline unsigned long
8090 find_first_zero_bit(void * addr, unsigned long size)
8091 {
8092 unsigned long res, cmp, count;
8093@@ -653,7 +538,49 @@
8094 return (res < size) ? res : size;
8095 }
8096
8097-static __inline__ unsigned long
8098+static inline unsigned long
8099+find_first_bit(void * addr, unsigned long size)
8100+{
8101+ unsigned long res, cmp, count;
8102+
8103+ if (!size)
8104+ return 0;
8105+ __asm__(" slgr %1,%1\n"
8106+ " lgr %2,%3\n"
8107+ " slgr %0,%0\n"
8108+ " aghi %2,63\n"
8109+ " srlg %2,%2,6\n"
8110+ "0: cg %1,0(%0,%4)\n"
8111+ " jne 1f\n"
8112+ " aghi %0,8\n"
8113+ " brct %2,0b\n"
8114+ " lgr %0,%3\n"
8115+ " j 5f\n"
8116+ "1: lg %2,0(%0,%4)\n"
8117+ " sllg %0,%0,3\n"
8118+ " clr %2,%1\n"
8119+ " jne 2f\n"
8120+ " aghi %0,32\n"
8121+ " srlg %2,%2,32\n"
8122+ "2: lghi %1,0xff\n"
8123+ " tmll %2,0xffff\n"
8124+ " jnz 3f\n"
8125+ " aghi %0,16\n"
8126+ " srl %2,16\n"
8127+ "3: tmll %2,0x00ff\n"
8128+ " jnz 4f\n"
8129+ " aghi %0,8\n"
8130+ " srl %2,8\n"
8131+ "4: ngr %2,%1\n"
8132+ " ic %2,0(%2,%5)\n"
8133+ " algr %0,%2\n"
8134+ "5:"
8135+ : "=&a" (res), "=&d" (cmp), "=&a" (count)
8136+ : "a" (size), "a" (addr), "a" (&_sb_findmap) : "cc" );
8137+ return (res < size) ? res : size;
8138+}
8139+
8140+static inline unsigned long
8141 find_next_zero_bit (void * addr, unsigned long size, unsigned long offset)
8142 {
8143 unsigned long * p = ((unsigned long *) addr) + (offset >> 6);
8144@@ -697,14 +624,56 @@
8145 return (offset + res);
8146 }
8147
8148+static inline unsigned long
8149+find_next_bit (void * addr, unsigned long size, unsigned long offset)
8150+{
8151+ unsigned long * p = ((unsigned long *) addr) + (offset >> 6);
8152+ unsigned long bitvec, reg;
8153+ unsigned long set, bit = offset & 63, res;
8154+
8155+ if (bit) {
8156+ /*
8157+ * Look for zero in first word
8158+ */
8159+ bitvec = (*p) >> bit;
8160+ __asm__(" slgr %0,%0\n"
8161+ " ltr %1,%1\n"
8162+ " jnz 0f\n"
8163+ " aghi %0,32\n"
8164+ " srlg %1,%1,32\n"
8165+ "0: lghi %2,0xff\n"
8166+ " tmll %1,0xffff\n"
8167+ " jnz 1f\n"
8168+ " aghi %0,16\n"
8169+ " srlg %1,%1,16\n"
8170+ "1: tmll %1,0x00ff\n"
8171+ " jnz 2f\n"
8172+ " aghi %0,8\n"
8173+ " srlg %1,%1,8\n"
8174+ "2: ngr %1,%2\n"
8175+ " ic %1,0(%1,%3)\n"
8176+ " algr %0,%1"
8177+ : "=&d" (set), "+a" (bitvec), "=&d" (reg)
8178+ : "a" (&_sb_findmap) : "cc" );
8179+ if (set < (64 - bit))
8180+ return set + offset;
8181+ offset += 64 - bit;
8182+ p++;
8183+ }
8184+ /*
8185+ * No set bit yet, search remaining full words for a bit
8186+ */
8187+ res = find_first_bit (p, size - 64 * (p - (unsigned long *) addr));
8188+ return (offset + res);
8189+}
8190+
8191 /*
8192 * ffz = Find First Zero in word. Undefined if no zero exists,
8193 * so code should check against ~0UL first..
8194 */
8195-static __inline__ unsigned long ffz(unsigned long word)
8196+static inline unsigned long ffz(unsigned long word)
8197 {
8198- unsigned long reg;
8199- int result;
8200+ unsigned long reg, result;
8201
8202 __asm__(" lhi %2,-1\n"
8203 " slgr %0,%0\n"
8204@@ -730,40 +699,112 @@
8205 }
8206
8207 /*
8208+ * __ffs = find first bit in word. Undefined if no bit exists,
8209+ * so code should check against 0UL first..
8210+ */
8211+static inline unsigned long __ffs (unsigned long word)
8212+{
8213+ unsigned long reg, result;
8214+
8215+ __asm__(" slgr %0,%0\n"
8216+ " ltr %1,%1\n"
8217+ " jnz 0f\n"
8218+ " aghi %0,32\n"
8219+ " srlg %1,%1,32\n"
8220+ "0: lghi %2,0xff\n"
8221+ " tmll %1,0xffff\n"
8222+ " jnz 1f\n"
8223+ " aghi %0,16\n"
8224+ " srlg %1,%1,16\n"
8225+ "1: tmll %1,0x00ff\n"
8226+ " jnz 2f\n"
8227+ " aghi %0,8\n"
8228+ " srlg %1,%1,8\n"
8229+ "2: ngr %1,%2\n"
8230+ " ic %1,0(%1,%3)\n"
8231+ " algr %0,%1"
8232+ : "=&d" (result), "+a" (word), "=&d" (reg)
8233+ : "a" (&_sb_findmap) : "cc" );
8234+ return result;
8235+}
8236+
8237+/*
8238+ * Every architecture must define this function. It's the fastest
8239+ * way of searching a 140-bit bitmap where the first 100 bits are
8240+ * unlikely to be set. It's guaranteed that at least one of the 140
8241+ * bits is cleared.
8242+ */
ddc40141 8243+static inline int _sched_find_first_bit(unsigned long *b)
54aa170e
JR
8244+{
8245+ return find_first_bit(b, 140);
8246+}
8247+
8248+/*
8249 * ffs: find first bit set. This is defined the same way as
8250 * the libc and compiler builtin ffs routines, therefore
8251 * differs in spirit from the above ffz (man ffs).
8252 */
8253-
8254-extern int __inline__ ffs (int x)
8255+extern int inline ffs (int x)
8256 {
8257- int r;
8258+ int r = 1;
8259
8260 if (x == 0)
8261- return 0;
8262- __asm__(" slr %0,%0\n"
8263- " tml %1,0xffff\n"
8264+ return 0;
8265+ __asm__(" tml %1,0xffff\n"
8266 " jnz 0f\n"
8267- " ahi %0,16\n"
8268 " srl %1,16\n"
8269+ " ahi %0,16\n"
8270 "0: tml %1,0x00ff\n"
8271 " jnz 1f\n"
8272- " ahi %0,8\n"
8273 " srl %1,8\n"
8274+ " ahi %0,8\n"
8275 "1: tml %1,0x000f\n"
8276 " jnz 2f\n"
8277- " ahi %0,4\n"
8278 " srl %1,4\n"
8279+ " ahi %0,4\n"
8280 "2: tml %1,0x0003\n"
8281 " jnz 3f\n"
8282- " ahi %0,2\n"
8283 " srl %1,2\n"
8284+ " ahi %0,2\n"
8285 "3: tml %1,0x0001\n"
8286 " jnz 4f\n"
8287 " ahi %0,1\n"
8288 "4:"
8289 : "=&d" (r), "+d" (x) : : "cc" );
8290- return r+1;
8291+ return r;
8292+}
8293+
8294+/*
8295+ * fls: find last bit set.
8296+ */
8297+extern __inline__ int fls(int x)
8298+{
8299+ int r = 32;
8300+
8301+ if (x == 0)
8302+ return 0;
8303+ __asm__(" tmh %1,0xffff\n"
8304+ " jz 0f\n"
8305+ " sll %1,16\n"
8306+ " ahi %0,-16\n"
8307+ "0: tmh %1,0xff00\n"
8308+ " jz 1f\n"
8309+ " sll %1,8\n"
8310+ " ahi %0,-8\n"
8311+ "1: tmh %1,0xf000\n"
8312+ " jz 2f\n"
8313+ " sll %1,4\n"
8314+ " ahi %0,-4\n"
8315+ "2: tmh %1,0xc000\n"
8316+ " jz 3f\n"
8317+ " sll %1,2\n"
8318+ " ahi %0,-2\n"
8319+ "3: tmh %1,0x8000\n"
8320+ " jz 4f\n"
8321+ " ahi %0,-1\n"
8322+ "4:"
8323+ : "+d" (r), "+d" (x) : : "cc" );
8324+ return r;
8325 }
8326
8327 /*
8328@@ -791,7 +832,7 @@
8329 #define ext2_set_bit(nr, addr) test_and_set_bit((nr)^56, addr)
8330 #define ext2_clear_bit(nr, addr) test_and_clear_bit((nr)^56, addr)
8331 #define ext2_test_bit(nr, addr) test_bit((nr)^56, addr)
8332-static __inline__ unsigned long
8333+static inline unsigned long
8334 ext2_find_first_zero_bit(void *vaddr, unsigned long size)
8335 {
8336 unsigned long res, cmp, count;
8337@@ -833,7 +874,7 @@
8338 return (res < size) ? res : size;
8339 }
8340
8341-static __inline__ unsigned long
8342+static inline unsigned long
8343 ext2_find_next_zero_bit(void *vaddr, unsigned long size, unsigned long offset)
8344 {
8345 unsigned long *addr = vaddr;
8346diff -urN linux-2.4.20/include/asm-sh/hardirq.h linux-2.4.20-o1-preempt/include/asm-sh/hardirq.h
8347--- linux-2.4.20/include/asm-sh/hardirq.h Sat Sep 8 21:29:09 2001
8348+++ linux-2.4.20-o1-preempt/include/asm-sh/hardirq.h Tue Feb 18 03:52:06 2003
8349@@ -34,6 +34,8 @@
8350
8351 #define synchronize_irq() barrier()
8352
8353+#define release_irqlock(cpu) do { } while (0)
8354+
8355 #else
8356
8357 #error Super-H SMP is not available
8358diff -urN linux-2.4.20/include/asm-sh/smplock.h linux-2.4.20-o1-preempt/include/asm-sh/smplock.h
8359--- linux-2.4.20/include/asm-sh/smplock.h Sat Sep 8 21:29:09 2001
8360+++ linux-2.4.20-o1-preempt/include/asm-sh/smplock.h Tue Feb 18 03:52:06 2003
8361@@ -9,15 +9,88 @@
8362
8363 #include <linux/config.h>
8364
8365-#ifndef CONFIG_SMP
8366-
8367+#if !defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT)
8368+/*
8369+ * Should never happen, since linux/smp_lock.h catches this case;
8370+ * but in case this file is included directly with neither SMP nor
8371+ * PREEMPT configuration, provide same dummys as linux/smp_lock.h
8372+ */
8373 #define lock_kernel() do { } while(0)
8374 #define unlock_kernel() do { } while(0)
8375-#define release_kernel_lock(task, cpu, depth) ((depth) = 1)
8376-#define reacquire_kernel_lock(task, cpu, depth) do { } while(0)
8377+#define release_kernel_lock(task, cpu) do { } while(0)
8378+#define reacquire_kernel_lock(task) do { } while(0)
8379+#define kernel_locked() 1
8380+
8381+#else /* CONFIG_SMP || CONFIG_PREEMPT */
8382+
8383+#if CONFIG_SMP
8384+#error "We do not support SMP on SH yet"
8385+#endif
8386+/*
8387+ * Default SMP lock implementation (i.e. the i386 version)
8388+ */
8389+
8390+#include <linux/interrupt.h>
8391+#include <linux/spinlock.h>
8392+
8393+extern spinlock_t kernel_flag;
8394+#define lock_bkl() spin_lock(&kernel_flag)
8395+#define unlock_bkl() spin_unlock(&kernel_flag)
8396
8397+#ifdef CONFIG_SMP
8398+#define kernel_locked() spin_is_locked(&kernel_flag)
8399+#elif CONFIG_PREEMPT
8400+#define kernel_locked() preempt_get_count()
8401+#else /* neither */
8402+#define kernel_locked() 1
8403+#endif
8404+
8405+/*
8406+ * Release global kernel lock and global interrupt lock
8407+ */
8408+#define release_kernel_lock(task, cpu) \
8409+do { \
8410+ if (task->lock_depth >= 0) \
8411+ spin_unlock(&kernel_flag); \
8412+ release_irqlock(cpu); \
8413+ __sti(); \
8414+} while (0)
8415+
8416+/*
8417+ * Re-acquire the kernel lock
8418+ */
8419+#define reacquire_kernel_lock(task) \
8420+do { \
8421+ if (task->lock_depth >= 0) \
8422+ spin_lock(&kernel_flag); \
8423+} while (0)
8424+
8425+/*
8426+ * Getting the big kernel lock.
8427+ *
8428+ * This cannot happen asynchronously,
8429+ * so we only need to worry about other
8430+ * CPU's.
8431+ */
8432+static __inline__ void lock_kernel(void)
8433+{
8434+#ifdef CONFIG_PREEMPT
8435+ if (current->lock_depth == -1)
8436+ spin_lock(&kernel_flag);
8437+ ++current->lock_depth;
8438 #else
8439-#error "We do not support SMP on SH"
8440-#endif /* CONFIG_SMP */
8441+ if (!++current->lock_depth)
8442+ spin_lock(&kernel_flag);
8443+#endif
8444+}
8445+
8446+static __inline__ void unlock_kernel(void)
8447+{
8448+ if (current->lock_depth < 0)
8449+ BUG();
8450+ if (--current->lock_depth < 0)
8451+ spin_unlock(&kernel_flag);
8452+}
8453+#endif /* CONFIG_SMP || CONFIG_PREEMPT */
8454
8455 #endif /* __ASM_SH_SMPLOCK_H */
8456diff -urN linux-2.4.20/include/asm-sh/softirq.h linux-2.4.20-o1-preempt/include/asm-sh/softirq.h
8457--- linux-2.4.20/include/asm-sh/softirq.h Sat Sep 8 21:29:09 2001
8458+++ linux-2.4.20-o1-preempt/include/asm-sh/softirq.h Tue Feb 18 03:52:06 2003
8459@@ -6,6 +6,7 @@
8460
8461 #define local_bh_disable() \
8462 do { \
8463+ preempt_disable(); \
8464 local_bh_count(smp_processor_id())++; \
8465 barrier(); \
8466 } while (0)
8467@@ -14,6 +15,7 @@
8468 do { \
8469 barrier(); \
8470 local_bh_count(smp_processor_id())--; \
8471+ preempt_enable(); \
8472 } while (0)
8473
8474 #define local_bh_enable() \
8475@@ -23,6 +25,7 @@
8476 && softirq_pending(smp_processor_id())) { \
8477 do_softirq(); \
8478 } \
8479+ preempt_enable(); \
8480 } while (0)
8481
8482 #define in_softirq() (local_bh_count(smp_processor_id()) != 0)
8483diff -urN linux-2.4.20/include/asm-sh/system.h linux-2.4.20-o1-preempt/include/asm-sh/system.h
8484--- linux-2.4.20/include/asm-sh/system.h Sat Sep 8 21:29:09 2001
8485+++ linux-2.4.20-o1-preempt/include/asm-sh/system.h Tue Feb 18 03:52:06 2003
8486@@ -285,4 +285,17 @@
8487 void disable_hlt(void);
8488 void enable_hlt(void);
8489
8490+/*
8491+ * irqs_disabled - are interrupts disabled?
8492+ */
8493+static inline int irqs_disabled(void)
8494+{
8495+ unsigned long flags;
8496+
8497+ __save_flags(flags);
8498+ if (flags & 0x000000f0)
8499+ return 1;
8500+ return 0;
8501+}
8502+
8503 #endif
8504diff -urN linux-2.4.20/include/asm-sparc/bitops.h linux-2.4.20-o1-preempt/include/asm-sparc/bitops.h
8505--- linux-2.4.20/include/asm-sparc/bitops.h Fri Dec 21 18:42:03 2001
8506+++ linux-2.4.20-o1-preempt/include/asm-sparc/bitops.h Tue Feb 18 03:51:30 2003
8507@@ -13,6 +13,23 @@
8508 #include <asm/byteorder.h>
8509 #include <asm/system.h>
8510
8511+/**
8512+ * __ffs - find first bit in word.
8513+ * @word: The word to search
8514+ *
8515+ * Undefined if no bit exists, so code should check against 0 first.
8516+ */
8517+static __inline__ unsigned long __ffs(unsigned long word)
8518+{
8519+ unsigned long result = 0;
8520+
8521+ while (!(word & 1UL)) {
8522+ result++;
8523+ word >>= 1;
8524+ }
8525+ return result;
8526+}
8527+
8528 #ifdef __KERNEL__
8529
8530 /*
8531@@ -205,6 +222,25 @@
8532 word >>= 1;
8533 }
8534 return result;
8535+}
8536+
8537+/*
8538+ * Every architecture must define this function. It's the fastest
8539+ * way of searching a 140-bit bitmap where the first 100 bits are
8540+ * unlikely to be set. It's guaranteed that at least one of the 140
8541+ * bits is cleared.
8542+ */
8543+static inline int _sched_find_first_bit(unsigned long *b)
8544+{
8545+ if (unlikely(b[0]))
8546+ return __ffs(b[0]);
8547+ if (unlikely(b[1]))
8548+ return __ffs(b[1]) + 32;
8549+ if (unlikely(b[2]))
8550+ return __ffs(b[2]) + 64;
8551+ if (b[3])
8552+ return __ffs(b[3]) + 96;
8553+ return __ffs(b[4]) + 128;
8554 }
8555
8556 /*
c41eb596
JR
8557--- linux-2.4.20/include/asm-sparc/spinlock.h.orig Wed Oct 31 00:08:11 2001
8558+++ linux-2.4.20/include/asm-sparc/spinlock.h Sun Mar 9 13:41:06 2003
8559@@ -36,9 +36,9 @@
8560 extern int _spin_trylock(spinlock_t *lock);
8561 extern void _do_spin_unlock(spinlock_t *lock);
8562
8563-#define spin_trylock(lp) _spin_trylock(lp)
8564-#define spin_lock(lock) _do_spin_lock(lock, "spin_lock")
8565-#define spin_unlock(lock) _do_spin_unlock(lock)
8566+#define _raw_spin_trylock(lp) _spin_trylock(lp)
8567+#define _raw_spin_lock(lock) _do_spin_lock(lock, "_raw_spin_lock")
8568+#define _raw_spin_unlock(lock) _do_spin_unlock(lock)
8569
8570 struct _rwlock_debug {
8571 volatile unsigned int lock;
8572@@ -56,28 +56,28 @@
8573 extern void _do_write_lock(rwlock_t *rw, char *str);
8574 extern void _do_write_unlock(rwlock_t *rw);
8575
8576-#define read_lock(lock) \
8577+#define _raw_read_lock(lock) \
8578 do { unsigned long flags; \
8579 __save_and_cli(flags); \
8580- _do_read_lock(lock, "read_lock"); \
8581+ _do_read_lock(lock, "_raw_read_lock"); \
8582 __restore_flags(flags); \
8583 } while(0)
8584
8585-#define read_unlock(lock) \
8586+#define _raw_read_unlock(lock) \
8587 do { unsigned long flags; \
8588 __save_and_cli(flags); \
8589- _do_read_unlock(lock, "read_unlock"); \
8590+ _do_read_unlock(lock, "_raw_read_unlock"); \
8591 __restore_flags(flags); \
8592 } while(0)
8593
8594-#define write_lock(lock) \
8595+#define _raw_write_lock(lock) \
8596 do { unsigned long flags; \
8597 __save_and_cli(flags); \
8598- _do_write_lock(lock, "write_lock"); \
8599+ _do_write_lock(lock, "_raw_write_lock"); \
8600 __restore_flags(flags); \
8601 } while(0)
8602
8603-#define write_unlock(lock) \
8604+#define _raw_write_unlock(lock) \
8605 do { unsigned long flags; \
8606 __save_and_cli(flags); \
8607 _do_write_unlock(lock); \
8608@@ -97,7 +97,7 @@
8609 barrier(); \
8610 } while(*((volatile unsigned char *)lock))
8611
8612-extern __inline__ void spin_lock(spinlock_t *lock)
8613+extern __inline__ void _raw_spin_lock(spinlock_t *lock)
8614 {
8615 __asm__ __volatile__(
8616 "\n1:\n\t"
8617@@ -117,7 +117,7 @@
8618 : "g2", "memory", "cc");
8619 }
8620
8621-extern __inline__ int spin_trylock(spinlock_t *lock)
8622+extern __inline__ int _raw_spin_trylock(spinlock_t *lock)
8623 {
8624 unsigned int result;
8625 __asm__ __volatile__("ldstub [%1], %0"
8626@@ -127,7 +127,7 @@
8627 return (result == 0);
8628 }
8629
8630-extern __inline__ void spin_unlock(spinlock_t *lock)
8631+extern __inline__ void _raw_spin_unlock(spinlock_t *lock)
8632 {
8633 __asm__ __volatile__("stb %%g0, [%0]" : : "r" (lock) : "memory");
8634 }
8635@@ -178,7 +178,7 @@
8636 : "g2", "g4", "memory", "cc");
8637 }
8638
8639-#define read_lock(lock) \
8640+#define _raw_read_lock(lock) \
8641 do { unsigned long flags; \
8642 __save_and_cli(flags); \
8643 _read_lock(lock); \
8644@@ -198,14 +198,14 @@
8645 : "g2", "g4", "memory", "cc");
8646 }
8647
8648-#define read_unlock(lock) \
8649+#define _raw_read_unlock(lock) \
8650 do { unsigned long flags; \
8651 __save_and_cli(flags); \
8652 _read_unlock(lock); \
8653 __restore_flags(flags); \
8654 } while(0)
8655
8656-extern __inline__ void write_lock(rwlock_t *rw)
8657+extern __inline__ void _raw_write_lock(rwlock_t *rw)
8658 {
8659 register rwlock_t *lp asm("g1");
8660 lp = rw;
8661@@ -218,7 +218,7 @@
8662 : "g2", "g4", "memory", "cc");
8663 }
8664
8665-#define write_unlock(rw) do { (rw)->lock = 0; } while(0)
8666+#define _raw_write_unlock(rw) do { (rw)->lock = 0; } while(0)
8667
8668 #endif /* SPIN_LOCK_DEBUG */
8669
54aa170e
JR
8670diff -urN linux-2.4.20/include/asm-sparc/system.h linux-2.4.20-o1-preempt/include/asm-sparc/system.h
8671--- linux-2.4.20/include/asm-sparc/system.h Wed Oct 31 00:08:11 2001
8672+++ linux-2.4.20-o1-preempt/include/asm-sparc/system.h Tue Feb 18 03:51:30 2003
8673@@ -88,7 +88,7 @@
8674 *
8675 * SWITCH_ENTER and SWITH_DO_LAZY_FPU do not work yet (e.g. SMP does not work)
8676 */
8677-#define prepare_to_switch() do { \
8678+#define prepare_arch_switch(rq, next) do { \
8679 __asm__ __volatile__( \
8680 ".globl\tflush_patch_switch\nflush_patch_switch:\n\t" \
8681 "save %sp, -0x40, %sp; save %sp, -0x40, %sp; save %sp, -0x40, %sp\n\t" \
8682@@ -96,6 +96,8 @@
8683 "save %sp, -0x40, %sp\n\t" \
8684 "restore; restore; restore; restore; restore; restore; restore"); \
8685 } while(0)
8686+#define finish_arch_switch(rq, next) do{ }while(0)
8687+#define task_running(rq, p) ((rq)->curr == (p))
8688
8689 /* Much care has gone into this code, do not touch it.
8690 *
8691diff -urN linux-2.4.20/include/asm-sparc64/bitops.h linux-2.4.20-o1-preempt/include/asm-sparc64/bitops.h
8692--- linux-2.4.20/include/asm-sparc64/bitops.h Fri Dec 21 18:42:03 2001
8693+++ linux-2.4.20-o1-preempt/include/asm-sparc64/bitops.h Tue Feb 18 03:51:30 2003
8694@@ -7,11 +7,12 @@
8695 #ifndef _SPARC64_BITOPS_H
8696 #define _SPARC64_BITOPS_H
8697
8698+#include <linux/compiler.h>
8699 #include <asm/byteorder.h>
8700
8701-extern long ___test_and_set_bit(unsigned long nr, volatile void *addr);
8702-extern long ___test_and_clear_bit(unsigned long nr, volatile void *addr);
8703-extern long ___test_and_change_bit(unsigned long nr, volatile void *addr);
8704+extern long ___test_and_set_bit(unsigned long nr, volatile unsigned long *addr);
8705+extern long ___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr);
8706+extern long ___test_and_change_bit(unsigned long nr, volatile unsigned long *addr);
8707
8708 #define test_and_set_bit(nr,addr) ({___test_and_set_bit(nr,addr)!=0;})
8709 #define test_and_clear_bit(nr,addr) ({___test_and_clear_bit(nr,addr)!=0;})
8710@@ -21,109 +22,132 @@
8711 #define change_bit(nr,addr) ((void)___test_and_change_bit(nr,addr))
8712
8713 /* "non-atomic" versions... */
8714-#define __set_bit(X,Y) \
8715-do { unsigned long __nr = (X); \
8716- long *__m = ((long *) (Y)) + (__nr >> 6); \
8717- *__m |= (1UL << (__nr & 63)); \
8718-} while (0)
8719-#define __clear_bit(X,Y) \
8720-do { unsigned long __nr = (X); \
8721- long *__m = ((long *) (Y)) + (__nr >> 6); \
8722- *__m &= ~(1UL << (__nr & 63)); \
8723-} while (0)
8724-#define __change_bit(X,Y) \
8725-do { unsigned long __nr = (X); \
8726- long *__m = ((long *) (Y)) + (__nr >> 6); \
8727- *__m ^= (1UL << (__nr & 63)); \
8728-} while (0)
8729-#define __test_and_set_bit(X,Y) \
8730-({ unsigned long __nr = (X); \
8731- long *__m = ((long *) (Y)) + (__nr >> 6); \
8732- long __old = *__m; \
8733- long __mask = (1UL << (__nr & 63)); \
8734- *__m = (__old | __mask); \
8735- ((__old & __mask) != 0); \
8736-})
8737-#define __test_and_clear_bit(X,Y) \
8738-({ unsigned long __nr = (X); \
8739- long *__m = ((long *) (Y)) + (__nr >> 6); \
8740- long __old = *__m; \
8741- long __mask = (1UL << (__nr & 63)); \
8742- *__m = (__old & ~__mask); \
8743- ((__old & __mask) != 0); \
8744-})
8745-#define __test_and_change_bit(X,Y) \
8746-({ unsigned long __nr = (X); \
8747- long *__m = ((long *) (Y)) + (__nr >> 6); \
8748- long __old = *__m; \
8749- long __mask = (1UL << (__nr & 63)); \
8750- *__m = (__old ^ __mask); \
8751- ((__old & __mask) != 0); \
8752-})
8753+
8754+static __inline__ void __set_bit(int nr, volatile unsigned long *addr)
8755+{
8756+ volatile unsigned long *m = addr + (nr >> 6);
8757+
8758+ *m |= (1UL << (nr & 63));
8759+}
8760+
8761+static __inline__ void __clear_bit(int nr, volatile unsigned long *addr)
8762+{
8763+ volatile unsigned long *m = addr + (nr >> 6);
8764+
8765+ *m &= ~(1UL << (nr & 63));
8766+}
8767+
8768+static __inline__ void __change_bit(int nr, volatile unsigned long *addr)
8769+{
8770+ volatile unsigned long *m = addr + (nr >> 6);
8771+
8772+ *m ^= (1UL << (nr & 63));
8773+}
8774+
8775+static __inline__ int __test_and_set_bit(int nr, volatile unsigned long *addr)
8776+{
8777+ volatile unsigned long *m = addr + (nr >> 6);
8778+ long old = *m;
8779+ long mask = (1UL << (nr & 63));
8780+
8781+ *m = (old | mask);
8782+ return ((old & mask) != 0);
8783+}
8784+
8785+static __inline__ int __test_and_clear_bit(int nr, volatile unsigned long *addr)
8786+{
8787+ volatile unsigned long *m = addr + (nr >> 6);
8788+ long old = *m;
8789+ long mask = (1UL << (nr & 63));
8790+
8791+ *m = (old & ~mask);
8792+ return ((old & mask) != 0);
8793+}
8794+
8795+static __inline__ int __test_and_change_bit(int nr, volatile unsigned long *addr)
8796+{
8797+ volatile unsigned long *m = addr + (nr >> 6);
8798+ long old = *m;
8799+ long mask = (1UL << (nr & 63));
8800+
8801+ *m = (old ^ mask);
8802+ return ((old & mask) != 0);
8803+}
8804
8805 #define smp_mb__before_clear_bit() do { } while(0)
8806 #define smp_mb__after_clear_bit() do { } while(0)
8807
8808-extern __inline__ int test_bit(int nr, __const__ void *addr)
8809+static __inline__ int test_bit(int nr, __const__ volatile unsigned long *addr)
8810 {
8811- return (1UL & (((__const__ long *) addr)[nr >> 6] >> (nr & 63))) != 0UL;
8812+ return (1UL & ((addr)[nr >> 6] >> (nr & 63))) != 0UL;
8813 }
8814
8815 /* The easy/cheese version for now. */
8816-extern __inline__ unsigned long ffz(unsigned long word)
8817+static __inline__ unsigned long ffz(unsigned long word)
8818 {
8819 unsigned long result;
8820
8821-#ifdef ULTRA_HAS_POPULATION_COUNT /* Thanks for nothing Sun... */
8822- __asm__ __volatile__(
8823-" brz,pn %0, 1f\n"
8824-" neg %0, %%g1\n"
8825-" xnor %0, %%g1, %%g2\n"
8826-" popc %%g2, %0\n"
8827-"1: " : "=&r" (result)
8828- : "0" (word)
8829- : "g1", "g2");
8830-#else
8831-#if 1 /* def EASY_CHEESE_VERSION */
8832 result = 0;
8833 while(word & 1) {
8834 result++;
8835 word >>= 1;
8836 }
8837-#else
8838- unsigned long tmp;
8839+ return result;
8840+}
8841
8842- result = 0;
8843- tmp = ~word & -~word;
8844- if (!(unsigned)tmp) {
8845- tmp >>= 32;
8846- result = 32;
8847- }
8848- if (!(unsigned short)tmp) {
8849- tmp >>= 16;
8850- result += 16;
8851- }
8852- if (!(unsigned char)tmp) {
8853- tmp >>= 8;
8854- result += 8;
8855+/**
8856+ * __ffs - find first bit in word.
8857+ * @word: The word to search
8858+ *
8859+ * Undefined if no bit exists, so code should check against 0 first.
8860+ */
8861+static __inline__ unsigned long __ffs(unsigned long word)
8862+{
8863+ unsigned long result = 0;
8864+
8865+ while (!(word & 1UL)) {
8866+ result++;
8867+ word >>= 1;
8868 }
8869- if (tmp & 0xf0) result += 4;
8870- if (tmp & 0xcc) result += 2;
8871- if (tmp & 0xaa) result ++;
8872-#endif
8873-#endif
8874 return result;
8875 }
8876
8877+/*
8878+ * fls: find last bit set.
8879+ */
8880+
8881+#define fls(x) generic_fls(x)
8882+
8883 #ifdef __KERNEL__
8884
8885 /*
8886+ * Every architecture must define this function. It's the fastest
8887+ * way of searching a 140-bit bitmap where the first 100 bits are
8888+ * unlikely to be set. It's guaranteed that at least one of the 140
8889+ * bits is cleared.
8890+ */
8891+static inline int _sched_find_first_bit(unsigned long *b)
8892+{
8893+ if (unlikely(b[0]))
8894+ return __ffs(b[0]);
8895+ if (unlikely(((unsigned int)b[1])))
8896+ return __ffs(b[1]) + 64;
8897+ if (b[1] >> 32)
8898+ return __ffs(b[1] >> 32) + 96;
8899+ return __ffs(b[2]) + 128;
8900+}
8901+
8902+/*
8903 * ffs: find first bit set. This is defined the same way as
8904 * the libc and compiler builtin ffs routines, therefore
8905 * differs in spirit from the above ffz (man ffs).
8906 */
8907-
8908-#define ffs(x) generic_ffs(x)
8909+static __inline__ int ffs(int x)
8910+{
8911+ if (!x)
8912+ return 0;
8913+ return __ffs((unsigned long)x);
8914+}
8915
8916 /*
8917 * hweightN: returns the hamming weight (i.e. the number
8918@@ -132,7 +156,7 @@
8919
8920 #ifdef ULTRA_HAS_POPULATION_COUNT
8921
8922-extern __inline__ unsigned int hweight32(unsigned int w)
8923+static __inline__ unsigned int hweight32(unsigned int w)
8924 {
8925 unsigned int res;
8926
8927@@ -140,7 +164,7 @@
8928 return res;
8929 }
8930
8931-extern __inline__ unsigned int hweight16(unsigned int w)
8932+static __inline__ unsigned int hweight16(unsigned int w)
8933 {
8934 unsigned int res;
8935
8936@@ -148,7 +172,7 @@
8937 return res;
8938 }
8939
8940-extern __inline__ unsigned int hweight8(unsigned int w)
8941+static __inline__ unsigned int hweight8(unsigned int w)
8942 {
8943 unsigned int res;
8944
8945@@ -165,14 +189,69 @@
8946 #endif
8947 #endif /* __KERNEL__ */
8948
8949+/**
8950+ * find_next_bit - find the next set bit in a memory region
8951+ * @addr: The address to base the search on
8952+ * @offset: The bitnumber to start searching at
8953+ * @size: The maximum size to search
8954+ */
8955+static __inline__ unsigned long find_next_bit(unsigned long *addr, unsigned long size, unsigned long offset)
8956+{
8957+ unsigned long *p = addr + (offset >> 6);
8958+ unsigned long result = offset & ~63UL;
8959+ unsigned long tmp;
8960+
8961+ if (offset >= size)
8962+ return size;
8963+ size -= result;
8964+ offset &= 63UL;
8965+ if (offset) {
8966+ tmp = *(p++);
8967+ tmp &= (~0UL << offset);
8968+ if (size < 64)
8969+ goto found_first;
8970+ if (tmp)
8971+ goto found_middle;
8972+ size -= 64;
8973+ result += 64;
8974+ }
8975+ while (size & ~63UL) {
8976+ if ((tmp = *(p++)))
8977+ goto found_middle;
8978+ result += 64;
8979+ size -= 64;
8980+ }
8981+ if (!size)
8982+ return result;
8983+ tmp = *p;
8984+
8985+found_first:
8986+ tmp &= (~0UL >> (64 - size));
8987+ if (tmp == 0UL) /* Are any bits set? */
8988+ return result + size; /* Nope. */
8989+found_middle:
8990+ return result + __ffs(tmp);
8991+}
8992+
8993+/**
8994+ * find_first_bit - find the first set bit in a memory region
8995+ * @addr: The address to start the search at
8996+ * @size: The maximum size to search
8997+ *
8998+ * Returns the bit-number of the first set bit, not the number of the byte
8999+ * containing a bit.
9000+ */
9001+#define find_first_bit(addr, size) \
9002+ find_next_bit((addr), (size), 0)
9003+
9004 /* find_next_zero_bit() finds the first zero bit in a bit string of length
9005 * 'size' bits, starting the search at bit 'offset'. This is largely based
9006 * on Linus's ALPHA routines, which are pretty portable BTW.
9007 */
9008
9009-extern __inline__ unsigned long find_next_zero_bit(void *addr, unsigned long size, unsigned long offset)
9010+static __inline__ unsigned long find_next_zero_bit(unsigned long *addr, unsigned long size, unsigned long offset)
9011 {
9012- unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
9013+ unsigned long *p = addr + (offset >> 6);
9014 unsigned long result = offset & ~63UL;
9015 unsigned long tmp;
9016
9017@@ -211,15 +290,15 @@
9018 #define find_first_zero_bit(addr, size) \
9019 find_next_zero_bit((addr), (size), 0)
9020
9021-extern long ___test_and_set_le_bit(int nr, volatile void *addr);
9022-extern long ___test_and_clear_le_bit(int nr, volatile void *addr);
9023+extern long ___test_and_set_le_bit(int nr, volatile unsigned long *addr);
9024+extern long ___test_and_clear_le_bit(int nr, volatile unsigned long *addr);
9025
9026 #define test_and_set_le_bit(nr,addr) ({___test_and_set_le_bit(nr,addr)!=0;})
9027 #define test_and_clear_le_bit(nr,addr) ({___test_and_clear_le_bit(nr,addr)!=0;})
9028 #define set_le_bit(nr,addr) ((void)___test_and_set_le_bit(nr,addr))
9029 #define clear_le_bit(nr,addr) ((void)___test_and_clear_le_bit(nr,addr))
9030
9031-extern __inline__ int test_le_bit(int nr, __const__ void * addr)
9032+static __inline__ int test_le_bit(int nr, __const__ unsigned long * addr)
9033 {
9034 int mask;
9035 __const__ unsigned char *ADDR = (__const__ unsigned char *) addr;
9036@@ -232,9 +311,9 @@
9037 #define find_first_zero_le_bit(addr, size) \
9038 find_next_zero_le_bit((addr), (size), 0)
9039
9040-extern __inline__ unsigned long find_next_zero_le_bit(void *addr, unsigned long size, unsigned long offset)
9041+static __inline__ unsigned long find_next_zero_le_bit(unsigned long *addr, unsigned long size, unsigned long offset)
9042 {
9043- unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
9044+ unsigned long *p = addr + (offset >> 6);
9045 unsigned long result = offset & ~63UL;
9046 unsigned long tmp;
9047
9048@@ -271,18 +350,22 @@
9049
9050 #ifdef __KERNEL__
9051
9052-#define ext2_set_bit test_and_set_le_bit
9053-#define ext2_clear_bit test_and_clear_le_bit
9054-#define ext2_test_bit test_le_bit
9055-#define ext2_find_first_zero_bit find_first_zero_le_bit
9056-#define ext2_find_next_zero_bit find_next_zero_le_bit
9057+#define ext2_set_bit(nr,addr) test_and_set_le_bit((nr),(unsigned long *)(addr))
9058+#define ext2_clear_bit(nr,addr) test_and_clear_le_bit((nr),(unsigned long *)(addr))
9059+#define ext2_test_bit(nr,addr) test_le_bit((nr),(unsigned long *)(addr))
9060+#define ext2_find_first_zero_bit(addr, size) \
9061+ find_first_zero_le_bit((unsigned long *)(addr), (size))
9062+#define ext2_find_next_zero_bit(addr, size, off) \
9063+ find_next_zero_le_bit((unsigned long *)(addr), (size), (off))
9064
9065 /* Bitmap functions for the minix filesystem. */
9066-#define minix_test_and_set_bit(nr,addr) test_and_set_bit(nr,addr)
9067-#define minix_set_bit(nr,addr) set_bit(nr,addr)
9068-#define minix_test_and_clear_bit(nr,addr) test_and_clear_bit(nr,addr)
9069-#define minix_test_bit(nr,addr) test_bit(nr,addr)
9070-#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
9071+#define minix_test_and_set_bit(nr,addr) test_and_set_bit((nr),(unsigned long *)(addr))
9072+#define minix_set_bit(nr,addr) set_bit((nr),(unsigned long *)(addr))
9073+#define minix_test_and_clear_bit(nr,addr) \
9074+ test_and_clear_bit((nr),(unsigned long *)(addr))
9075+#define minix_test_bit(nr,addr) test_bit((nr),(unsigned long *)(addr))
9076+#define minix_find_first_zero_bit(addr,size) \
9077+ find_first_zero_bit((unsigned long *)(addr),(size))
9078
9079 #endif /* __KERNEL__ */
9080
9081diff -urN linux-2.4.20/include/asm-sparc64/smp.h linux-2.4.20-o1-preempt/include/asm-sparc64/smp.h
9082--- linux-2.4.20/include/asm-sparc64/smp.h Fri Nov 29 00:53:15 2002
9083+++ linux-2.4.20-o1-preempt/include/asm-sparc64/smp.h Tue Feb 18 03:51:30 2003
9084@@ -103,7 +103,7 @@
9085 }
9086 }
9087
9088-#define smp_processor_id() (current->processor)
9089+#define smp_processor_id() (current->cpu)
9090
9091 /* This needn't do anything as we do not sleep the cpu
9092 * inside of the idler task, so an interrupt is not needed
c41eb596
JR
9093--- linux-2.4.20/include/asm-sparc64/spinlock.h~ Fri Dec 21 18:42:03 2001
9094+++ linux-2.4.20/include/asm-sparc64/spinlock.h Sun Mar 9 13:43:48 2003
9095@@ -40,7 +40,7 @@
9096 do { membar("#LoadLoad"); \
9097 } while(*((volatile unsigned char *)lock))
9098
9099-extern __inline__ void spin_lock(spinlock_t *lock)
9100+extern __inline__ void _raw_spin_lock(spinlock_t *lock)
9101 {
9102 __asm__ __volatile__(
9103 "1: ldstub [%0], %%g7\n"
9104@@ -57,7 +57,7 @@
9105 : "g7", "memory");
9106 }
9107
9108-extern __inline__ int spin_trylock(spinlock_t *lock)
9109+extern __inline__ int _raw_spin_trylock(spinlock_t *lock)
9110 {
9111 unsigned int result;
9112 __asm__ __volatile__("ldstub [%1], %0\n\t"
9113@@ -68,7 +68,7 @@
9114 return (result == 0);
9115 }
9116
9117-extern __inline__ void spin_unlock(spinlock_t *lock)
9118+extern __inline__ void _raw_spin_unlock(spinlock_t *lock)
9119 {
9120 __asm__ __volatile__("membar #StoreStore | #LoadStore\n\t"
9121 "stb %%g0, [%0]"
9122@@ -99,9 +99,9 @@
9123 extern void _do_spin_unlock (spinlock_t *lock);
9124 extern int _spin_trylock (spinlock_t *lock);
9125
9126-#define spin_trylock(lp) _spin_trylock(lp)
9127-#define spin_lock(lock) _do_spin_lock(lock, "spin_lock")
9128-#define spin_unlock(lock) _do_spin_unlock(lock)
9129+#define _raw_spin_trylock(lp) _spin_trylock(lp)
9130+#define _raw_spin_lock(lock) _do_spin_lock(lock, "_raw_spin_lock")
9131+#define _raw_spin_unlock(lock) _do_spin_unlock(lock)
9132
9133 #endif /* CONFIG_DEBUG_SPINLOCK */
9134
9135@@ -118,10 +118,10 @@
9136 extern void __write_lock(rwlock_t *);
9137 extern void __write_unlock(rwlock_t *);
9138
9139-#define read_lock(p) __read_lock(p)
9140-#define read_unlock(p) __read_unlock(p)
9141-#define write_lock(p) __write_lock(p)
9142-#define write_unlock(p) __write_unlock(p)
9143+#define _raw_read_lock(p) __read_lock(p)
9144+#define _raw_read_unlock(p) __read_unlock(p)
9145+#define _raw_write_lock(p) __write_lock(p)
9146+#define _raw_write_unlock(p) __write_unlock(p)
9147
9148 #else /* !(CONFIG_DEBUG_SPINLOCK) */
9149
9150@@ -138,28 +138,28 @@
9151 extern void _do_write_lock(rwlock_t *rw, char *str);
9152 extern void _do_write_unlock(rwlock_t *rw);
9153
9154-#define read_lock(lock) \
9155+#define _raw_read_lock(lock) \
9156 do { unsigned long flags; \
9157 __save_and_cli(flags); \
9158- _do_read_lock(lock, "read_lock"); \
9159+ _do_read_lock(lock, "_raw_read_lock"); \
9160 __restore_flags(flags); \
9161 } while(0)
9162
9163-#define read_unlock(lock) \
9164+#define _raw_read_unlock(lock) \
9165 do { unsigned long flags; \
9166 __save_and_cli(flags); \
9167- _do_read_unlock(lock, "read_unlock"); \
9168+ _do_read_unlock(lock, "_raw_read_unlock"); \
9169 __restore_flags(flags); \
9170 } while(0)
9171
9172-#define write_lock(lock) \
9173+#define _raw_write_lock(lock) \
9174 do { unsigned long flags; \
9175 __save_and_cli(flags); \
9176- _do_write_lock(lock, "write_lock"); \
9177+ _do_write_lock(lock, "_raw_write_lock"); \
9178 __restore_flags(flags); \
9179 } while(0)
9180
9181-#define write_unlock(lock) \
9182+#define _raw_write_unlock(lock) \
9183 do { unsigned long flags; \
9184 __save_and_cli(flags); \
9185 _do_write_unlock(lock); \
54aa170e
JR
9186diff -urN linux-2.4.20/include/asm-sparc64/system.h linux-2.4.20-o1-preempt/include/asm-sparc64/system.h
9187--- linux-2.4.20/include/asm-sparc64/system.h Sat Aug 3 02:39:45 2002
9188+++ linux-2.4.20-o1-preempt/include/asm-sparc64/system.h Tue Feb 18 03:51:30 2003
9189@@ -143,7 +143,18 @@
9190
9191 #define flush_user_windows flushw_user
9192 #define flush_register_windows flushw_all
9193-#define prepare_to_switch flushw_all
9194+
9195+#define prepare_arch_schedule(prev) task_lock(prev)
9196+#define finish_arch_schedule(prev) task_unlock(prev)
9197+#define prepare_arch_switch(rq, next) \
9198+do { spin_lock(&(next)->switch_lock); \
9199+ spin_unlock(&(rq)->lock); \
9200+ flushw_all(); \
9201+} while (0)
9202+
9203+#define finish_arch_switch(rq, prev) \
9204+do { spin_unlock_irq(&(prev)->switch_lock); \
9205+} while (0)
9206
9207 #ifndef CONFIG_DEBUG_SPINLOCK
9208 #define CHECK_LOCKS(PREV) do { } while(0)
9209diff -urN linux-2.4.20/include/linux/brlock.h linux-2.4.20-o1-preempt/include/linux/brlock.h
9210--- linux-2.4.20/include/linux/brlock.h Fri Nov 29 00:53:15 2002
9211+++ linux-2.4.20-o1-preempt/include/linux/brlock.h Tue Feb 18 03:52:06 2003
9212@@ -171,11 +171,11 @@
9213 }
9214
9215 #else
9216-# define br_read_lock(idx) ((void)(idx))
9217-# define br_read_unlock(idx) ((void)(idx))
9218-# define br_write_lock(idx) ((void)(idx))
9219-# define br_write_unlock(idx) ((void)(idx))
9220-#endif
9221+# define br_read_lock(idx) ({ (void)(idx); preempt_disable(); })
9222+# define br_read_unlock(idx) ({ (void)(idx); preempt_enable(); })
9223+# define br_write_lock(idx) ({ (void)(idx); preempt_disable(); })
9224+# define br_write_unlock(idx) ({ (void)(idx); preempt_enable(); })
9225+#endif /* CONFIG_SMP */
9226
9227 /*
9228 * Now enumerate all of the possible sw/hw IRQ protected
9229diff -urN linux-2.4.20/include/linux/capability.h linux-2.4.20-o1-preempt/include/linux/capability.h
9230--- linux-2.4.20/include/linux/capability.h Thu Nov 22 20:46:19 2001
9231+++ linux-2.4.20-o1-preempt/include/linux/capability.h Tue Feb 18 03:51:30 2003
9232@@ -243,6 +243,7 @@
9233 /* Allow use of FIFO and round-robin (realtime) scheduling on own
9234 processes and setting the scheduling algorithm used by another
9235 process. */
9236+/* Allow setting cpu affinity on other processes */
9237
9238 #define CAP_SYS_NICE 23
9239
9240diff -urN linux-2.4.20/include/linux/dcache.h linux-2.4.20-o1-preempt/include/linux/dcache.h
9241--- linux-2.4.20/include/linux/dcache.h Fri Nov 29 00:53:15 2002
9242+++ linux-2.4.20-o1-preempt/include/linux/dcache.h Tue Feb 18 03:52:06 2003
9243@@ -127,31 +127,6 @@
9244
9245 extern spinlock_t dcache_lock;
9246
9247-/**
9248- * d_drop - drop a dentry
9249- * @dentry: dentry to drop
9250- *
9251- * d_drop() unhashes the entry from the parent
9252- * dentry hashes, so that it won't be found through
9253- * a VFS lookup any more. Note that this is different
9254- * from deleting the dentry - d_delete will try to
9255- * mark the dentry negative if possible, giving a
9256- * successful _negative_ lookup, while d_drop will
9257- * just make the cache lookup fail.
9258- *
9259- * d_drop() is used mainly for stuff that wants
9260- * to invalidate a dentry for some reason (NFS
9261- * timeouts or autofs deletes).
9262- */
9263-
9264-static __inline__ void d_drop(struct dentry * dentry)
9265-{
9266- spin_lock(&dcache_lock);
9267- list_del(&dentry->d_hash);
9268- INIT_LIST_HEAD(&dentry->d_hash);
9269- spin_unlock(&dcache_lock);
9270-}
9271-
9272 static __inline__ int dname_external(struct dentry *d)
9273 {
9274 return d->d_name.name != d->d_iname;
9275@@ -276,3 +251,34 @@
9276 #endif /* __KERNEL__ */
9277
9278 #endif /* __LINUX_DCACHE_H */
9279+
9280+#if !defined(__LINUX_DCACHE_H_INLINES) && defined(_TASK_STRUCT_DEFINED)
9281+#define __LINUX_DCACHE_H_INLINES
9282+
9283+#ifdef __KERNEL__
9284+/**
9285+ * d_drop - drop a dentry
9286+ * @dentry: dentry to drop
9287+ *
9288+ * d_drop() unhashes the entry from the parent
9289+ * dentry hashes, so that it won't be found through
9290+ * a VFS lookup any more. Note that this is different
9291+ * from deleting the dentry - d_delete will try to
9292+ * mark the dentry negative if possible, giving a
9293+ * successful _negative_ lookup, while d_drop will
9294+ * just make the cache lookup fail.
9295+ *
9296+ * d_drop() is used mainly for stuff that wants
9297+ * to invalidate a dentry for some reason (NFS
9298+ * timeouts or autofs deletes).
9299+ */
9300+
9301+static __inline__ void d_drop(struct dentry * dentry)
9302+{
9303+ spin_lock(&dcache_lock);
9304+ list_del(&dentry->d_hash);
9305+ INIT_LIST_HEAD(&dentry->d_hash);
9306+ spin_unlock(&dcache_lock);
9307+}
9308+#endif
9309+#endif
9310diff -urN linux-2.4.20/include/linux/fs_struct.h linux-2.4.20-o1-preempt/include/linux/fs_struct.h
9311--- linux-2.4.20/include/linux/fs_struct.h Sat Jul 14 00:10:44 2001
9312+++ linux-2.4.20-o1-preempt/include/linux/fs_struct.h Tue Feb 18 03:52:06 2003
9313@@ -20,6 +20,15 @@
9314 extern void exit_fs(struct task_struct *);
9315 extern void set_fs_altroot(void);
9316
9317+struct fs_struct *copy_fs_struct(struct fs_struct *old);
9318+void put_fs_struct(struct fs_struct *fs);
9319+
9320+#endif
9321+#endif
9322+
9323+#if !defined(_LINUX_FS_STRUCT_H_INLINES) && defined(_TASK_STRUCT_DEFINED)
9324+#define _LINUX_FS_STRUCT_H_INLINES
9325+#ifdef __KERNEL__
9326 /*
9327 * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
9328 * It can block. Requires the big lock held.
9329@@ -65,9 +74,5 @@
9330 mntput(old_pwdmnt);
9331 }
9332 }
9333-
9334-struct fs_struct *copy_fs_struct(struct fs_struct *old);
9335-void put_fs_struct(struct fs_struct *fs);
9336-
9337 #endif
9338 #endif
9339diff -urN linux-2.4.20/include/linux/kernel_stat.h linux-2.4.20-o1-preempt/include/linux/kernel_stat.h
9340--- linux-2.4.20/include/linux/kernel_stat.h Fri Nov 29 00:53:15 2002
9341+++ linux-2.4.20-o1-preempt/include/linux/kernel_stat.h Tue Feb 18 03:51:30 2003
9342@@ -31,7 +31,6 @@
9343 #elif !defined(CONFIG_ARCH_S390)
9344 unsigned int irqs[NR_CPUS][NR_IRQS];
9345 #endif
9346- unsigned int context_swtch;
9347 };
9348
9349 extern struct kernel_stat kstat;
9350diff -urN linux-2.4.20/include/linux/sched.h linux-2.4.20-o1-preempt/include/linux/sched.h
9351--- linux-2.4.20/include/linux/sched.h Fri Nov 29 00:53:15 2002
9352+++ linux-2.4.20-o1-preempt/include/linux/sched.h Tue Feb 18 03:52:06 2003
9353@@ -6,6 +6,7 @@
9354 extern unsigned long event;
9355
9356 #include <linux/config.h>
9357+#include <linux/compiler.h>
9358 #include <linux/binfmts.h>
9359 #include <linux/threads.h>
9360 #include <linux/kernel.h>
9361@@ -21,7 +22,7 @@
9362 #include <asm/mmu.h>
9363
9364 #include <linux/smp.h>
9365-#include <linux/tty.h>
9366+//#include <linux/tty.h>
9367 #include <linux/sem.h>
9368 #include <linux/signal.h>
9369 #include <linux/securebits.h>
9370@@ -73,10 +74,12 @@
9371 #define CT_TO_SECS(x) ((x) / HZ)
9372 #define CT_TO_USECS(x) (((x) % HZ) * 1000000/HZ)
9373
9374-extern int nr_running, nr_threads;
9375+extern int nr_threads;
9376 extern int last_pid;
9377+extern unsigned long nr_running(void);
9378+extern unsigned long nr_uninterruptible(void);
9379
9380-#include <linux/fs.h>
9381+//#include <linux/fs.h>
9382 #include <linux/time.h>
9383 #include <linux/param.h>
9384 #include <linux/resource.h>
9385@@ -91,6 +94,7 @@
9386 #define TASK_UNINTERRUPTIBLE 2
9387 #define TASK_ZOMBIE 4
9388 #define TASK_STOPPED 8
9389+#define PREEMPT_ACTIVE 0x4000000
9390
9391 #define __set_task_state(tsk, state_value) \
9392 do { (tsk)->state = (state_value); } while (0)
9393@@ -119,12 +123,6 @@
9394 #define SCHED_FIFO 1
9395 #define SCHED_RR 2
9396
9397-/*
9398- * This is an additional bit set when we want to
9399- * yield the CPU for one re-schedule..
9400- */
9401-#define SCHED_YIELD 0x10
9402-
9403 struct sched_param {
9404 int sched_priority;
9405 };
9406@@ -142,21 +140,28 @@
9407 * a separate lock).
9408 */
9409 extern rwlock_t tasklist_lock;
9410-extern spinlock_t runqueue_lock;
9411 extern spinlock_t mmlist_lock;
9412
9413+typedef struct task_struct task_t;
9414+
9415 extern void sched_init(void);
9416-extern void init_idle(void);
9417+extern void init_idle(task_t *idle, int cpu);
9418 extern void show_state(void);
9419 extern void cpu_init (void);
9420 extern void trap_init(void);
9421 extern void update_process_times(int user);
9422-extern void update_one_process(struct task_struct *p, unsigned long user,
9423+extern void update_one_process(task_t *p, unsigned long user,
9424 unsigned long system, int cpu);
9425+extern void scheduler_tick(int user_tick, int system);
9426+extern void migration_init(void);
9427+extern unsigned long cache_decay_ticks;
9428
9429 #define MAX_SCHEDULE_TIMEOUT LONG_MAX
9430 extern signed long FASTCALL(schedule_timeout(signed long timeout));
9431 asmlinkage void schedule(void);
9432+#ifdef CONFIG_PREEMPT
9433+asmlinkage void preempt_schedule(void);
9434+#endif
9435
9436 extern int schedule_task(struct tq_struct *task);
9437 extern void flush_scheduled_tasks(void);
9438@@ -164,6 +169,51 @@
9439 extern int current_is_keventd(void);
9440
9441 /*
9442+ * Priority of a process goes from 0..MAX_PRIO-1, valid RT
9443+ * priority is 0..MAX_RT_PRIO-1, and SCHED_OTHER tasks are
9444+ * in the range MAX_RT_PRIO..MAX_PRIO-1. Priority values
9445+ * are inverted: lower p->prio value means higher priority.
9446+ *
9447+ * The MAX_RT_USER_PRIO value allows the actual maximum
9448+ * RT priority to be separate from the value exported to
9449+ * user-space. This allows kernel threads to set their
9450+ * priority to a value higher than any user task. Note:
9451+ * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
9452+ *
9453+ * Both values are configurable at compile-time.
9454+ */
9455+
9456+#if CONFIG_MAX_USER_RT_PRIO < 100
9457+#define MAX_USER_RT_PRIO 100
9458+#elif CONFIG_MAX_USER_RT_PRIO > 1000
9459+#define MAX_USER_RT_PRIO 1000
9460+#else
9461+#define MAX_USER_RT_PRIO CONFIG_MAX_USER_RT_PRIO
9462+#endif
9463+
9464+#if CONFIG_MAX_RT_PRIO < 0
9465+#define MAX_RT_PRIO MAX_USER_RT_PRIO
9466+#elif CONFIG_MAX_RT_PRIO > 200
9467+#define MAX_RT_PRIO (MAX_USER_RT_PRIO + 200)
9468+#else
9469+#define MAX_RT_PRIO (MAX_USER_RT_PRIO + CONFIG_MAX_RT_PRIO)
9470+#endif
9471+
9472+#define MAX_PRIO (MAX_RT_PRIO + 40)
9473+
9474+/*
9475+ * The maximum RT priority is configurable. If the resulting
9476+ * bitmap is 160-bits , we can use a hand-coded routine which
9477+ * is optimal. Otherwise, we fall back on a generic routine for
9478+ * finding the first set bit from an arbitrarily-sized bitmap.
9479+ */
9480+#if MAX_PRIO < 160 && MAX_PRIO > 127
9481+#define sched_find_first_bit(map) _sched_find_first_bit(map)
9482+#else
9483+#define sched_find_first_bit(map) find_first_bit(map, MAX_PRIO)
9484+#endif
9485+
9486+/*
9487 * The default fd array needs to be at least BITS_PER_LONG,
9488 * as this is the granularity returned by copy_fdset().
9489 */
9490@@ -284,12 +334,14 @@
9491 extern struct user_struct root_user;
9492 #define INIT_USER (&root_user)
9493
9494+typedef struct prio_array prio_array_t;
9495+
9496 struct task_struct {
9497 /*
9498 * offsets of these are hardcoded elsewhere - touch with care
9499 */
9500 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
9501- unsigned long flags; /* per process flags, defined below */
9502+ int preempt_count; /* 0 => preemptable, <0 => BUG */
9503 int sigpending;
9504 mm_segment_t addr_limit; /* thread address space:
9505 0-0xBFFFFFFF for user-thead
9506@@ -301,36 +353,28 @@
9507
9508 int lock_depth; /* Lock depth */
9509
9510-/*
9511- * offset 32 begins here on 32-bit platforms. We keep
9512- * all fields in a single cacheline that are needed for
9513- * the goodness() loop in schedule().
9514- */
9515- long counter;
9516- long nice;
9517- unsigned long policy;
9518- struct mm_struct *mm;
9519- int processor;
9520- /*
9521- * cpus_runnable is ~0 if the process is not running on any
9522- * CPU. It's (1 << cpu) if it's running on a CPU. This mask
9523- * is updated under the runqueue lock.
9524- *
9525- * To determine whether a process might run on a CPU, this
9526- * mask is AND-ed with cpus_allowed.
9527- */
9528- unsigned long cpus_runnable, cpus_allowed;
9529 /*
9530- * (only the 'next' pointer fits into the cacheline, but
9531- * that's just fine.)
9532+ * offset 32 begins here on 32-bit platforms.
9533 */
9534- struct list_head run_list;
9535- unsigned long sleep_time;
9536+ unsigned int cpu;
9537+ int prio, static_prio;
9538+ list_t run_list;
9539+ prio_array_t *array;
9540+
9541+ unsigned long sleep_avg;
9542+ unsigned long sleep_timestamp;
9543+
9544+ unsigned long policy;
9545+ unsigned long cpus_allowed;
9546+ unsigned int time_slice, first_time_slice;
9547+
9548+ task_t *next_task, *prev_task;
9549
9550- struct task_struct *next_task, *prev_task;
9551- struct mm_struct *active_mm;
9552+ struct mm_struct *mm, *active_mm;
9553 struct list_head local_pages;
9554+
9555 unsigned int allocation_order, nr_local_pages;
9556+ unsigned long flags;
9557
9558 /* task state */
9559 struct linux_binfmt *binfmt;
9560@@ -351,12 +395,12 @@
9561 * older sibling, respectively. (p->father can be replaced with
9562 * p->p_pptr->pid)
9563 */
9564- struct task_struct *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr;
9565+ task_t *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr;
9566 struct list_head thread_group;
9567
9568 /* PID hash table linkage. */
9569- struct task_struct *pidhash_next;
9570- struct task_struct **pidhash_pprev;
9571+ task_t *pidhash_next;
9572+ task_t **pidhash_pprev;
9573
9574 wait_queue_head_t wait_chldexit; /* for wait4() */
9575 struct completion *vfork_done; /* for vfork() */
9576@@ -415,6 +459,8 @@
9577 u32 self_exec_id;
9578 /* Protection of (de-)allocation: mm, files, fs, tty */
9579 spinlock_t alloc_lock;
9580+/* context-switch lock */
9581+ spinlock_t switch_lock;
9582
9583 /* journalling filesystem info */
9584 void *journal_info;
9585@@ -454,9 +500,15 @@
9586 */
9587 #define _STK_LIM (8*1024*1024)
9588
9589-#define DEF_COUNTER (10*HZ/100) /* 100 ms time slice */
9590-#define MAX_COUNTER (20*HZ/100)
9591-#define DEF_NICE (0)
9592+#if CONFIG_SMP
9593+extern void set_cpus_allowed(task_t *p, unsigned long new_mask);
9594+#else
9595+#define set_cpus_allowed(p, new_mask) do { } while (0)
9596+#endif
9597+
9598+extern void set_user_nice(task_t *p, long nice);
9599+extern int task_prio(task_t *p);
9600+extern int task_nice(task_t *p);
9601
9602 extern void yield(void);
9603
9604@@ -477,14 +529,14 @@
9605 addr_limit: KERNEL_DS, \
9606 exec_domain: &default_exec_domain, \
9607 lock_depth: -1, \
9608- counter: DEF_COUNTER, \
9609- nice: DEF_NICE, \
9610+ prio: MAX_PRIO-20, \
9611+ static_prio: MAX_PRIO-20, \
9612 policy: SCHED_OTHER, \
9613+ cpus_allowed: -1, \
9614 mm: NULL, \
9615 active_mm: &init_mm, \
9616- cpus_runnable: -1, \
9617- cpus_allowed: -1, \
9618 run_list: LIST_HEAD_INIT(tsk.run_list), \
9619+ time_slice: HZ, \
9620 next_task: &tsk, \
9621 prev_task: &tsk, \
9622 p_opptr: &tsk, \
9623@@ -509,6 +561,7 @@
9624 pending: { NULL, &tsk.pending.head, {{0}}}, \
9625 blocked: {{0}}, \
9626 alloc_lock: SPIN_LOCK_UNLOCKED, \
9627+ switch_lock: SPIN_LOCK_UNLOCKED, \
9628 journal_info: NULL, \
9629 }
9630
9631@@ -518,24 +571,23 @@
9632 #endif
9633
9634 union task_union {
9635- struct task_struct task;
9636+ task_t task;
9637 unsigned long stack[INIT_TASK_SIZE/sizeof(long)];
9638 };
9639
9640 extern union task_union init_task_union;
9641
9642 extern struct mm_struct init_mm;
9643-extern struct task_struct *init_tasks[NR_CPUS];
9644
9645 /* PID hashing. (shouldnt this be dynamic?) */
9646 #define PIDHASH_SZ (4096 >> 2)
9647-extern struct task_struct *pidhash[PIDHASH_SZ];
9648+extern task_t *pidhash[PIDHASH_SZ];
9649
9650 #define pid_hashfn(x) ((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1))
9651
9652-static inline void hash_pid(struct task_struct *p)
9653+static inline void hash_pid(task_t *p)
9654 {
9655- struct task_struct **htable = &pidhash[pid_hashfn(p->pid)];
9656+ task_t **htable = &pidhash[pid_hashfn(p->pid)];
9657
9658 if((p->pidhash_next = *htable) != NULL)
9659 (*htable)->pidhash_pprev = &p->pidhash_next;
9660@@ -543,16 +595,16 @@
9661 p->pidhash_pprev = htable;
9662 }
9663
9664-static inline void unhash_pid(struct task_struct *p)
9665+static inline void unhash_pid(task_t *p)
9666 {
9667 if(p->pidhash_next)
9668 p->pidhash_next->pidhash_pprev = p->pidhash_pprev;
9669 *p->pidhash_pprev = p->pidhash_next;
9670 }
9671
9672-static inline struct task_struct *find_task_by_pid(int pid)
9673+static inline task_t *find_task_by_pid(int pid)
9674 {
9675- struct task_struct *p, **htable = &pidhash[pid_hashfn(pid)];
9676+ task_t *p, **htable = &pidhash[pid_hashfn(pid)];
9677
9678 for(p = *htable; p && p->pid != pid; p = p->pidhash_next)
9679 ;
9680@@ -560,19 +612,6 @@
9681 return p;
9682 }
9683
9684-#define task_has_cpu(tsk) ((tsk)->cpus_runnable != ~0UL)
9685-
9686-static inline void task_set_cpu(struct task_struct *tsk, unsigned int cpu)
9687-{
9688- tsk->processor = cpu;
9689- tsk->cpus_runnable = 1UL << cpu;
9690-}
9691-
9692-static inline void task_release_cpu(struct task_struct *tsk)
9693-{
9694- tsk->cpus_runnable = ~0UL;
9695-}
9696-
9697 /* per-UID process charging. */
9698 extern struct user_struct * alloc_uid(uid_t);
9699 extern void free_uid(struct user_struct *);
9700@@ -599,47 +638,50 @@
9701 extern void FASTCALL(interruptible_sleep_on(wait_queue_head_t *q));
9702 extern long FASTCALL(interruptible_sleep_on_timeout(wait_queue_head_t *q,
9703 signed long timeout));
9704-extern int FASTCALL(wake_up_process(struct task_struct * tsk));
9705+extern int FASTCALL(wake_up_process(task_t * p));
9706+extern void FASTCALL(wake_up_forked_process(task_t * p));
9707
9708 #define wake_up(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
9709 #define wake_up_nr(x, nr) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
9710 #define wake_up_all(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0)
9711-#define wake_up_sync(x) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
9712-#define wake_up_sync_nr(x, nr) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
9713 #define wake_up_interruptible(x) __wake_up((x),TASK_INTERRUPTIBLE, 1)
9714 #define wake_up_interruptible_nr(x, nr) __wake_up((x),TASK_INTERRUPTIBLE, nr)
9715 #define wake_up_interruptible_all(x) __wake_up((x),TASK_INTERRUPTIBLE, 0)
9716-#define wake_up_interruptible_sync(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
9717-#define wake_up_interruptible_sync_nr(x, nr) __wake_up_sync((x),TASK_INTERRUPTIBLE, nr)
9718+#ifdef CONFIG_SMP
9719+#define wake_up_interruptible_sync(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
9720+#else
9721+#define wake_up_interruptible_sync(x) __wake_up((x),TASK_INTERRUPTIBLE, 1)
9722+#endif
9723+
9724 asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru);
9725
9726 extern int in_group_p(gid_t);
9727 extern int in_egroup_p(gid_t);
9728
9729 extern void proc_caches_init(void);
9730-extern void flush_signals(struct task_struct *);
9731-extern void flush_signal_handlers(struct task_struct *);
9732+extern void flush_signals(task_t *);
9733+extern void flush_signal_handlers(task_t *);
9734 extern void sig_exit(int, int, struct siginfo *);
9735 extern int dequeue_signal(sigset_t *, siginfo_t *);
9736 extern void block_all_signals(int (*notifier)(void *priv), void *priv,
9737 sigset_t *mask);
9738 extern void unblock_all_signals(void);
9739-extern int send_sig_info(int, struct siginfo *, struct task_struct *);
9740-extern int force_sig_info(int, struct siginfo *, struct task_struct *);
9741+extern int send_sig_info(int, struct siginfo *, task_t *);
9742+extern int force_sig_info(int, struct siginfo *, task_t *);
9743 extern int kill_pg_info(int, struct siginfo *, pid_t);
9744 extern int kill_sl_info(int, struct siginfo *, pid_t);
9745 extern int kill_proc_info(int, struct siginfo *, pid_t);
9746-extern void notify_parent(struct task_struct *, int);
9747-extern void do_notify_parent(struct task_struct *, int);
9748-extern void force_sig(int, struct task_struct *);
9749-extern int send_sig(int, struct task_struct *, int);
9750+extern void notify_parent(task_t *, int);
9751+extern void do_notify_parent(task_t *, int);
9752+extern void force_sig(int, task_t *);
9753+extern int send_sig(int, task_t *, int);
9754 extern int kill_pg(pid_t, int, int);
9755 extern int kill_sl(pid_t, int, int);
9756 extern int kill_proc(pid_t, int, int);
9757 extern int do_sigaction(int, const struct k_sigaction *, struct k_sigaction *);
9758 extern int do_sigaltstack(const stack_t *, stack_t *, unsigned long);
9759
9760-static inline int signal_pending(struct task_struct *p)
9761+static inline int signal_pending(task_t *p)
9762 {
9763 return (p->sigpending != 0);
9764 }
9765@@ -678,7 +720,7 @@
9766 This is required every time the blocked sigset_t changes.
9767 All callers should have t->sigmask_lock. */
9768
9769-static inline void recalc_sigpending(struct task_struct *t)
9770+static inline void recalc_sigpending(task_t *t)
9771 {
9772 t->sigpending = has_pending_signals(&t->pending.signal, &t->blocked);
9773 }
9774@@ -785,16 +827,17 @@
9775 extern int expand_fdset(struct files_struct *, int nr);
9776 extern void free_fdset(fd_set *, int);
9777
9778-extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
9779+extern int copy_thread(int, unsigned long, unsigned long, unsigned long, task_t *, struct pt_regs *);
9780 extern void flush_thread(void);
9781 extern void exit_thread(void);
9782
9783-extern void exit_mm(struct task_struct *);
9784-extern void exit_files(struct task_struct *);
9785-extern void exit_sighand(struct task_struct *);
9786+extern void exit_mm(task_t *);
9787+extern void exit_files(task_t *);
9788+extern void exit_sighand(task_t *);
9789
9790 extern void reparent_to_init(void);
9791 extern void daemonize(void);
9792+extern task_t *child_reaper;
9793
9794 extern int do_execve(char *, char **, char **, struct pt_regs *);
9795 extern int do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long);
9796@@ -803,6 +846,9 @@
9797 extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait));
9798 extern void FASTCALL(remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait));
9799
9800+extern void wait_task_inactive(task_t * p);
9801+extern void kick_if_running(task_t * p);
9802+
9803 #define __wait_event(wq, condition) \
9804 do { \
9805 wait_queue_t __wait; \
9806@@ -884,27 +930,12 @@
9807 for (task = next_thread(current) ; task != current ; task = next_thread(task))
9808
9809 #define next_thread(p) \
9810- list_entry((p)->thread_group.next, struct task_struct, thread_group)
9811+ list_entry((p)->thread_group.next, task_t, thread_group)
9812
9813 #define thread_group_leader(p) (p->pid == p->tgid)
9814
9815-static inline void del_from_runqueue(struct task_struct * p)
9816+static inline void unhash_process(task_t *p)
9817 {
9818- nr_running--;
9819- p->sleep_time = jiffies;
9820- list_del(&p->run_list);
9821- p->run_list.next = NULL;
9822-}
9823-
9824-static inline int task_on_runqueue(struct task_struct *p)
9825-{
9826- return (p->run_list.next != NULL);
9827-}
9828-
9829-static inline void unhash_process(struct task_struct *p)
9830-{
9831- if (task_on_runqueue(p))
9832- out_of_line_bug();
9833 write_lock_irq(&tasklist_lock);
9834 nr_threads--;
9835 unhash_pid(p);
9836@@ -914,12 +945,12 @@
9837 }
9838
9839 /* Protects ->fs, ->files, ->mm, and synchronises with wait4(). Nests inside tasklist_lock */
9840-static inline void task_lock(struct task_struct *p)
9841+static inline void task_lock(task_t *p)
9842 {
9843 spin_lock(&p->alloc_lock);
9844 }
9845
9846-static inline void task_unlock(struct task_struct *p)
9847+static inline void task_unlock(task_t *p)
9848 {
9849 spin_unlock(&p->alloc_lock);
9850 }
9851@@ -943,6 +974,26 @@
9852 return res;
9853 }
9854
9855+static inline void set_need_resched(void)
9856+{
9857+ current->need_resched = 1;
9858+}
9859+
9860+static inline void clear_need_resched(void)
9861+{
9862+ current->need_resched = 0;
9863+}
9864+
9865+static inline void set_tsk_need_resched(task_t *tsk)
9866+{
9867+ tsk->need_resched = 1;
9868+}
9869+
9870+static inline void clear_tsk_need_resched(task_t *tsk)
9871+{
9872+ tsk->need_resched = 0;
9873+}
9874+
9875 static inline int need_resched(void)
9876 {
9877 return (unlikely(current->need_resched));
9878@@ -955,5 +1006,11 @@
9879 __cond_resched();
9880 }
9881
9882+#define _TASK_STRUCT_DEFINED
9883+#include <linux/dcache.h>
9884+#include <linux/tqueue.h>
9885+#include <linux/fs_struct.h>
9886+
9887 #endif /* __KERNEL__ */
9888+
9889 #endif
9890diff -urN linux-2.4.20/include/linux/smp.h linux-2.4.20-o1-preempt/include/linux/smp.h
9891--- linux-2.4.20/include/linux/smp.h Thu Nov 22 20:46:19 2001
9892+++ linux-2.4.20-o1-preempt/include/linux/smp.h Tue Feb 18 03:51:30 2003
9893@@ -86,6 +86,14 @@
9894 #define cpu_number_map(cpu) 0
9895 #define smp_call_function(func,info,retry,wait) ({ 0; })
9896 #define cpu_online_map 1
9897+static inline void smp_send_reschedule(int cpu) { }
9898+static inline void smp_send_reschedule_all(void) { }
9899
9900 #endif
9901+
9902+/*
9903+ * Common definitions:
9904+ */
9905+#define cpu() smp_processor_id()
9906+
9907 #endif
9908diff -urN linux-2.4.20/include/linux/smp_lock.h linux-2.4.20-o1-preempt/include/linux/smp_lock.h
9909--- linux-2.4.20/include/linux/smp_lock.h Thu Nov 22 20:46:27 2001
9910+++ linux-2.4.20-o1-preempt/include/linux/smp_lock.h Tue Feb 18 03:52:06 2003
9911@@ -3,7 +3,7 @@
9912
9913 #include <linux/config.h>
9914
9915-#ifndef CONFIG_SMP
9916+#if !defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT)
9917
9918 #define lock_kernel() do { } while(0)
9919 #define unlock_kernel() do { } while(0)
9920diff -urN linux-2.4.20/include/linux/spinlock.h linux-2.4.20-o1-preempt/include/linux/spinlock.h
9921--- linux-2.4.20/include/linux/spinlock.h Fri Nov 29 00:53:15 2002
9922+++ linux-2.4.20-o1-preempt/include/linux/spinlock.h Tue Feb 18 03:52:06 2003
9923@@ -2,6 +2,7 @@
9924 #define __LINUX_SPINLOCK_H
9925
9926 #include <linux/config.h>
9927+#include <linux/compiler.h>
9928
9929 /*
9930 * These are the generic versions of the spinlocks and read-write
9931@@ -62,8 +63,10 @@
9932
9933 #if (DEBUG_SPINLOCKS < 1)
9934
9935+#ifndef CONFIG_PREEMPT
9936 #define atomic_dec_and_lock(atomic,lock) atomic_dec_and_test(atomic)
9937 #define ATOMIC_DEC_AND_LOCK
9938+#endif
9939
9940 /*
9941 * Your basic spinlocks, allowing only a single CPU anywhere
9942@@ -80,11 +83,11 @@
9943 #endif
9944
9945 #define spin_lock_init(lock) do { } while(0)
9946-#define spin_lock(lock) (void)(lock) /* Not "unused variable". */
9947+#define _raw_spin_lock(lock) (void)(lock) /* Not "unused variable". */
9948 #define spin_is_locked(lock) (0)
9949-#define spin_trylock(lock) ({1; })
9950+#define _raw_spin_trylock(lock) ({1; })
9951 #define spin_unlock_wait(lock) do { } while(0)
9952-#define spin_unlock(lock) do { } while(0)
9953+#define _raw_spin_unlock(lock) do { } while(0)
9954
9955 #elif (DEBUG_SPINLOCKS < 2)
9956
9957@@ -144,12 +147,77 @@
9958 #endif
9959
9960 #define rwlock_init(lock) do { } while(0)
9961-#define read_lock(lock) (void)(lock) /* Not "unused variable". */
9962-#define read_unlock(lock) do { } while(0)
9963-#define write_lock(lock) (void)(lock) /* Not "unused variable". */
9964-#define write_unlock(lock) do { } while(0)
9965+#define _raw_read_lock(lock) (void)(lock) /* Not "unused variable". */
9966+#define _raw_read_unlock(lock) do { } while(0)
9967+#define _raw_write_lock(lock) (void)(lock) /* Not "unused variable". */
9968+#define _raw_write_unlock(lock) do { } while(0)
9969
9970 #endif /* !SMP */
9971+
9972+#ifdef CONFIG_PREEMPT
9973+
9974+#define preempt_get_count() (current->preempt_count)
9975+#define preempt_is_disabled() (preempt_get_count() != 0)
9976+
9977+#define preempt_disable() \
9978+do { \
9979+ ++current->preempt_count; \
9980+ barrier(); \
9981+} while (0)
9982+
9983+#define preempt_enable_no_resched() \
9984+do { \
9985+ --current->preempt_count; \
9986+ barrier(); \
9987+} while (0)
9988+
9989+#define preempt_enable() \
9990+do { \
9991+ --current->preempt_count; \
9992+ barrier(); \
9993+ if (unlikely(current->preempt_count < current->need_resched)) \
9994+ preempt_schedule(); \
9995+} while (0)
9996+
9997+#define spin_lock(lock) \
9998+do { \
9999+ preempt_disable(); \
10000+ _raw_spin_lock(lock); \
10001+} while(0)
10002+
10003+#define spin_trylock(lock) ({preempt_disable(); _raw_spin_trylock(lock) ? \
10004+ 1 : ({preempt_enable(); 0;});})
10005+#define spin_unlock(lock) \
10006+do { \
10007+ _raw_spin_unlock(lock); \
10008+ preempt_enable(); \
10009+} while (0)
10010+
10011+#define read_lock(lock) ({preempt_disable(); _raw_read_lock(lock);})
10012+#define read_unlock(lock) ({_raw_read_unlock(lock); preempt_enable();})
10013+#define write_lock(lock) ({preempt_disable(); _raw_write_lock(lock);})
10014+#define write_unlock(lock) ({_raw_write_unlock(lock); preempt_enable();})
10015+#define write_trylock(lock) ({preempt_disable();_raw_write_trylock(lock) ? \
10016+ 1 : ({preempt_enable(); 0;});})
10017+
10018+#else
10019+
10020+#define preempt_get_count() (0)
10021+#define preempt_is_disabled() (1)
10022+#define preempt_disable() do { } while (0)
10023+#define preempt_enable_no_resched() do {} while(0)
10024+#define preempt_enable() do { } while (0)
10025+
10026+#define spin_lock(lock) _raw_spin_lock(lock)
10027+#define spin_trylock(lock) _raw_spin_trylock(lock)
10028+#define spin_unlock(lock) _raw_spin_unlock(lock)
10029+
10030+#define read_lock(lock) _raw_read_lock(lock)
10031+#define read_unlock(lock) _raw_read_unlock(lock)
10032+#define write_lock(lock) _raw_write_lock(lock)
10033+#define write_unlock(lock) _raw_write_unlock(lock)
10034+#define write_trylock(lock) _raw_write_trylock(lock)
10035+#endif
10036
10037 /* "lock on reference count zero" */
10038 #ifndef ATOMIC_DEC_AND_LOCK
10039diff -urN linux-2.4.20/include/linux/tqueue.h linux-2.4.20-o1-preempt/include/linux/tqueue.h
10040--- linux-2.4.20/include/linux/tqueue.h Thu Nov 22 20:46:19 2001
10041+++ linux-2.4.20-o1-preempt/include/linux/tqueue.h Tue Feb 18 03:52:06 2003
10042@@ -94,6 +94,22 @@
10043 extern spinlock_t tqueue_lock;
10044
10045 /*
10046+ * Call all "bottom halfs" on a given list.
10047+ */
10048+
10049+extern void __run_task_queue(task_queue *list);
10050+
10051+static inline void run_task_queue(task_queue *list)
10052+{
10053+ if (TQ_ACTIVE(*list))
10054+ __run_task_queue(list);
10055+}
10056+
10057+#endif /* _LINUX_TQUEUE_H */
10058+
10059+#if !defined(_LINUX_TQUEUE_H_INLINES) && defined(_TASK_STRUCT_DEFINED)
10060+#define _LINUX_TQUEUE_H_INLINES
10061+/*
10062 * Queue a task on a tq. Return non-zero if it was successfully
10063 * added.
10064 */
10065@@ -109,17 +125,4 @@
10066 }
10067 return ret;
10068 }
10069-
10070-/*
10071- * Call all "bottom halfs" on a given list.
10072- */
10073-
10074-extern void __run_task_queue(task_queue *list);
10075-
10076-static inline void run_task_queue(task_queue *list)
10077-{
10078- if (TQ_ACTIVE(*list))
10079- __run_task_queue(list);
10080-}
10081-
10082-#endif /* _LINUX_TQUEUE_H */
10083+#endif
10084diff -urN linux-2.4.20/include/linux/wait.h linux-2.4.20-o1-preempt/include/linux/wait.h
10085--- linux-2.4.20/include/linux/wait.h Thu Nov 22 20:46:19 2001
10086+++ linux-2.4.20-o1-preempt/include/linux/wait.h Tue Feb 18 03:51:30 2003
10087@@ -59,6 +59,7 @@
10088 # define wq_write_lock_irq write_lock_irq
10089 # define wq_write_lock_irqsave write_lock_irqsave
10090 # define wq_write_unlock_irqrestore write_unlock_irqrestore
10091+# define wq_write_unlock_irq write_unlock_irq
10092 # define wq_write_unlock write_unlock
10093 #else
10094 # define wq_lock_t spinlock_t
10095@@ -71,6 +72,7 @@
10096 # define wq_write_lock_irq spin_lock_irq
10097 # define wq_write_lock_irqsave spin_lock_irqsave
10098 # define wq_write_unlock_irqrestore spin_unlock_irqrestore
10099+# define wq_write_unlock_irq spin_unlock_irq
10100 # define wq_write_unlock spin_unlock
10101 #endif
10102
10103diff -urN linux-2.4.20/init/main.c linux-2.4.20-o1-preempt/init/main.c
10104--- linux-2.4.20/init/main.c Sat Aug 3 02:39:46 2002
10105+++ linux-2.4.20-o1-preempt/init/main.c Tue Feb 18 03:51:30 2003
10106@@ -288,8 +288,6 @@
10107 extern void setup_arch(char **);
10108 extern void cpu_idle(void);
10109
10110-unsigned long wait_init_idle;
10111-
10112 #ifndef CONFIG_SMP
10113
10114 #ifdef CONFIG_X86_LOCAL_APIC
10115@@ -298,34 +296,24 @@
10116 APIC_init_uniprocessor();
10117 }
10118 #else
10119-#define smp_init() do { } while (0)
10120+#define smp_init() do { } while (0)
10121 #endif
10122
10123 #else
10124
10125-
10126 /* Called by boot processor to activate the rest. */
10127 static void __init smp_init(void)
10128 {
10129 /* Get other processors into their bootup holding patterns. */
10130 smp_boot_cpus();
10131- wait_init_idle = cpu_online_map;
10132- clear_bit(current->processor, &wait_init_idle); /* Don't wait on me! */
10133
10134 smp_threads_ready=1;
10135 smp_commence();
10136-
10137- /* Wait for the other cpus to set up their idle processes */
10138- printk("Waiting on wait_init_idle (map = 0x%lx)\n", wait_init_idle);
10139- while (wait_init_idle) {
10140- cpu_relax();
10141- barrier();
10142- }
10143- printk("All processors have done init_idle\n");
10144 }
10145
10146 #endif
10147
10148+
10149 /*
10150 * We need to finalize in a non-__init function or else race conditions
10151 * between the root thread and the init thread may cause start_kernel to
10152@@ -337,9 +325,8 @@
10153 {
10154 kernel_thread(init, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
10155 unlock_kernel();
10156- current->need_resched = 1;
10157- cpu_idle();
10158-}
10159+ cpu_idle();
10160+}
10161
10162 /*
10163 * Activate the first processor.
10164@@ -424,14 +411,18 @@
10165 ipc_init();
10166 #endif
10167 check_bugs();
10168+
10169 printk("POSIX conformance testing by UNIFIX\n");
10170
10171- /*
10172- * We count on the initial thread going ok
10173- * Like idlers init is an unlocked kernel thread, which will
10174- * make syscalls (and thus be locked).
10175+ init_idle(current, smp_processor_id());
10176+ /*
10177+ * We count on the initial thread going ok
10178+ * Like idlers init is an unlocked kernel thread, which will
10179+ * make syscalls (and thus be locked).
10180 */
10181 smp_init();
10182+
10183+ /* Do the rest non-__init'ed, we're now alive */
10184 rest_init();
10185 }
10186
10187@@ -460,6 +451,10 @@
10188 */
10189 static void __init do_basic_setup(void)
10190 {
10191+ /* Start the per-CPU migration threads */
10192+#if CONFIG_SMP
10193+ migration_init();
10194+#endif
10195
10196 /*
10197 * Tell the world that we're going to be the grim
10198diff -urN linux-2.4.20/kernel/capability.c linux-2.4.20-o1-preempt/kernel/capability.c
10199--- linux-2.4.20/kernel/capability.c Sat Jun 24 06:06:37 2000
10200+++ linux-2.4.20-o1-preempt/kernel/capability.c Tue Feb 18 03:51:30 2003
10201@@ -8,6 +8,8 @@
10202 #include <linux/mm.h>
10203 #include <asm/uaccess.h>
10204
10205+unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
10206+
10207 kernel_cap_t cap_bset = CAP_INIT_EFF_SET;
10208
10209 /* Note: never hold tasklist_lock while spinning for this one */
10210diff -urN linux-2.4.20/kernel/exit.c linux-2.4.20-o1-preempt/kernel/exit.c
10211--- linux-2.4.20/kernel/exit.c Fri Nov 29 00:53:15 2002
10212+++ linux-2.4.20-o1-preempt/kernel/exit.c Tue Feb 18 03:52:06 2003
10213@@ -28,49 +28,22 @@
10214
10215 static void release_task(struct task_struct * p)
10216 {
10217- if (p != current) {
10218+ if (p == current)
10219+ BUG();
10220 #ifdef CONFIG_SMP
10221- /*
10222- * Wait to make sure the process isn't on the
10223- * runqueue (active on some other CPU still)
10224- */
10225- for (;;) {
10226- task_lock(p);
10227- if (!task_has_cpu(p))
10228- break;
10229- task_unlock(p);
10230- do {
10231- cpu_relax();
10232- barrier();
10233- } while (task_has_cpu(p));
10234- }
10235- task_unlock(p);
10236+ wait_task_inactive(p);
10237 #endif
10238- atomic_dec(&p->user->processes);
10239- free_uid(p->user);
10240- unhash_process(p);
10241-
10242- release_thread(p);
10243- current->cmin_flt += p->min_flt + p->cmin_flt;
10244- current->cmaj_flt += p->maj_flt + p->cmaj_flt;
10245- current->cnswap += p->nswap + p->cnswap;
10246- /*
10247- * Potentially available timeslices are retrieved
10248- * here - this way the parent does not get penalized
10249- * for creating too many processes.
10250- *
10251- * (this cannot be used to artificially 'generate'
10252- * timeslices, because any timeslice recovered here
10253- * was given away by the parent in the first place.)
10254- */
10255- current->counter += p->counter;
10256- if (current->counter >= MAX_COUNTER)
10257- current->counter = MAX_COUNTER;
10258- p->pid = 0;
10259- free_task_struct(p);
10260- } else {
10261- printk("task releasing itself\n");
10262- }
10263+ atomic_dec(&p->user->processes);
10264+ free_uid(p->user);
10265+ unhash_process(p);
10266+
10267+ release_thread(p);
10268+ current->cmin_flt += p->min_flt + p->cmin_flt;
10269+ current->cmaj_flt += p->maj_flt + p->cmaj_flt;
10270+ current->cnswap += p->nswap + p->cnswap;
10271+ sched_exit(p);
10272+ p->pid = 0;
10273+ free_task_struct(p);
10274 }
10275
10276 /*
10277@@ -150,6 +123,79 @@
10278 return retval;
10279 }
10280
10281+/**
10282+ * reparent_to_init() - Reparent the calling kernel thread to the init task.
10283+ *
10284+ * If a kernel thread is launched as a result of a system call, or if
10285+ * it ever exits, it should generally reparent itself to init so that
10286+ * it is correctly cleaned up on exit.
10287+ *
10288+ * The various task state such as scheduling policy and priority may have
10289+ * been inherited from a user process, so we reset them to sane values here.
10290+ *
10291+ * NOTE that reparent_to_init() gives the caller full capabilities.
10292+ */
10293+void reparent_to_init(void)
10294+{
10295+ write_lock_irq(&tasklist_lock);
10296+
10297+ /* Reparent to init */
10298+ REMOVE_LINKS(current);
10299+ current->p_pptr = child_reaper;
10300+ current->p_opptr = child_reaper;
10301+ SET_LINKS(current);
10302+
10303+ /* Set the exit signal to SIGCHLD so we signal init on exit */
10304+ current->exit_signal = SIGCHLD;
10305+
10306+ current->ptrace = 0;
10307+ if ((current->policy == SCHED_OTHER) && (task_nice(current) < 0))
10308+ set_user_nice(current, 0);
10309+ /* cpus_allowed? */
10310+ /* rt_priority? */
10311+ /* signals? */
10312+ current->cap_effective = CAP_INIT_EFF_SET;
10313+ current->cap_inheritable = CAP_INIT_INH_SET;
10314+ current->cap_permitted = CAP_FULL_SET;
10315+ current->keep_capabilities = 0;
10316+ memcpy(current->rlim, init_task.rlim, sizeof(*(current->rlim)));
10317+ current->user = INIT_USER;
10318+
10319+ write_unlock_irq(&tasklist_lock);
10320+}
10321+
10322+/*
10323+ * Put all the gunge required to become a kernel thread without
10324+ * attached user resources in one place where it belongs.
10325+ */
10326+
10327+void daemonize(void)
10328+{
10329+ struct fs_struct *fs;
10330+
10331+
10332+ /*
10333+ * If we were started as result of loading a module, close all of the
10334+ * user space pages. We don't need them, and if we didn't close them
10335+ * they would be locked into memory.
10336+ */
10337+ exit_mm(current);
10338+
10339+ current->session = 1;
10340+ current->pgrp = 1;
10341+ current->tty = NULL;
10342+
10343+ /* Become as one with the init task */
10344+
10345+ exit_fs(current); /* current->fs->count--; */
10346+ fs = init_task.fs;
10347+ current->fs = fs;
10348+ atomic_inc(&fs->count);
10349+ exit_files(current);
10350+ current->files = init_task.files;
10351+ atomic_inc(&current->files->count);
10352+}
10353+
10354 /*
10355 * When we die, we re-parent all our children.
10356 * Try to give them to another thread in our thread
10357@@ -171,6 +217,7 @@
10358 /* Make sure we're not reparenting to ourselves */
10359 p->p_opptr = child_reaper;
10360
10361+ p->first_time_slice = 0;
10362 if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0);
10363 }
10364 }
10365@@ -313,8 +360,8 @@
10366 /* more a memory barrier than a real lock */
10367 task_lock(tsk);
10368 tsk->mm = NULL;
10369- task_unlock(tsk);
10370 enter_lazy_tlb(mm, current, smp_processor_id());
10371+ task_unlock(tsk);
10372 mmput(mm);
10373 }
10374 }
10375@@ -434,6 +481,11 @@
10376 panic("Attempted to kill init!");
10377 tsk->flags |= PF_EXITING;
10378 del_timer_sync(&tsk->real_timer);
10379+
10380+ if (unlikely(preempt_get_count()))
10381+ printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
10382+ current->comm, current->pid,
10383+ preempt_get_count());
10384
10385 fake_volatile:
10386 #ifdef CONFIG_BSD_PROCESS_ACCT
10387diff -urN linux-2.4.20/kernel/fork.c linux-2.4.20-o1-preempt/kernel/fork.c
10388--- linux-2.4.20/kernel/fork.c Fri Nov 29 00:53:15 2002
10389+++ linux-2.4.20-o1-preempt/kernel/fork.c Tue Feb 18 03:52:06 2003
10390@@ -30,7 +30,6 @@
10391
10392 /* The idle threads do not count.. */
10393 int nr_threads;
10394-int nr_running;
10395
10396 int max_threads;
10397 unsigned long total_forks; /* Handle normal Linux uptimes. */
10398@@ -38,6 +37,8 @@
10399
10400 struct task_struct *pidhash[PIDHASH_SZ];
10401
10402+rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */
10403+
10404 void add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
10405 {
10406 unsigned long flags;
10407@@ -629,6 +630,13 @@
10408 if (p->binfmt && p->binfmt->module)
10409 __MOD_INC_USE_COUNT(p->binfmt->module);
10410
10411+#ifdef CONFIG_PREEMPT
10412+ /*
10413+ * Continue with preemption disabled as part of the context
10414+ * switch, so start with preempt_count set to 1.
10415+ */
10416+ p->preempt_count = 1;
10417+#endif
10418 p->did_exec = 0;
10419 p->swappable = 0;
10420 p->state = TASK_UNINTERRUPTIBLE;
10421@@ -638,8 +646,7 @@
10422 if (p->pid == 0 && current->pid != 0)
10423 goto bad_fork_cleanup;
10424
10425- p->run_list.next = NULL;
10426- p->run_list.prev = NULL;
10427+ INIT_LIST_HEAD(&p->run_list);
10428
10429 p->p_cptr = NULL;
10430 init_waitqueue_head(&p->wait_chldexit);
10431@@ -649,6 +656,7 @@
10432 init_completion(&vfork);
10433 }
10434 spin_lock_init(&p->alloc_lock);
10435+ spin_lock_init(&p->switch_lock);
10436
10437 p->sigpending = 0;
10438 init_sigpending(&p->pending);
10439@@ -665,14 +673,15 @@
10440 #ifdef CONFIG_SMP
10441 {
10442 int i;
10443- p->cpus_runnable = ~0UL;
10444- p->processor = current->processor;
10445+
10446 /* ?? should we just memset this ?? */
10447 for(i = 0; i < smp_num_cpus; i++)
10448- p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0;
10449+ p->per_cpu_utime[cpu_logical_map(i)] =
10450+ p->per_cpu_stime[cpu_logical_map(i)] = 0;
10451 spin_lock_init(&p->sigmask_lock);
10452 }
10453 #endif
10454+ p->array = NULL;
10455 p->lock_depth = -1; /* -1 = no lock */
10456 p->start_time = jiffies;
10457
10458@@ -706,15 +715,27 @@
10459 p->pdeath_signal = 0;
10460
10461 /*
10462- * "share" dynamic priority between parent and child, thus the
10463- * total amount of dynamic priorities in the system doesn't change,
10464- * more scheduling fairness. This is only important in the first
10465- * timeslice, on the long run the scheduling behaviour is unchanged.
10466- */
10467- p->counter = (current->counter + 1) >> 1;
10468- current->counter >>= 1;
10469- if (!current->counter)
10470- current->need_resched = 1;
10471+ * Share the timeslice between parent and child, thus the
10472+ * total amount of pending timeslices in the system doesnt change,
10473+ * resulting in more scheduling fairness.
10474+ */
10475+ __cli();
10476+ if (!current->time_slice)
10477+ BUG();
10478+ p->time_slice = (current->time_slice + 1) >> 1;
10479+ current->time_slice >>= 1;
10480+ p->first_time_slice = 1;
10481+ if (!current->time_slice) {
10482+ /*
10483+ * This case is rare, it happens when the parent has only
10484+ * a single jiffy left from its timeslice. Taking the
10485+ * runqueue lock is not a problem.
10486+ */
10487+ current->time_slice = 1;
10488+ scheduler_tick(0,0);
10489+ }
10490+ p->sleep_timestamp = jiffies;
10491+ __sti();
10492
10493 /*
10494 * Ok, add it to the run-queues and make it
10495@@ -750,11 +771,16 @@
10496
10497 if (p->ptrace & PT_PTRACED)
10498 send_sig(SIGSTOP, p, 1);
10499-
10500- wake_up_process(p); /* do this last */
10501+ wake_up_forked_process(p); /* do this last */
10502 ++total_forks;
10503 if (clone_flags & CLONE_VFORK)
10504 wait_for_completion(&vfork);
10505+ else
10506+ /*
10507+ * Let the child process run first, to avoid most of the
10508+ * COW overhead when the child exec()s afterwards.
10509+ */
10510+ current->need_resched = 1;
10511
10512 fork_out:
10513 return retval;
10514diff -urN linux-2.4.20/kernel/ksyms.c linux-2.4.20-o1-preempt/kernel/ksyms.c
10515--- linux-2.4.20/kernel/ksyms.c Fri Nov 29 00:53:15 2002
10516+++ linux-2.4.20-o1-preempt/kernel/ksyms.c Tue Feb 18 03:52:06 2003
10517@@ -443,16 +443,23 @@
10518 /* process management */
10519 EXPORT_SYMBOL(complete_and_exit);
10520 EXPORT_SYMBOL(__wake_up);
10521-EXPORT_SYMBOL(__wake_up_sync);
10522 EXPORT_SYMBOL(wake_up_process);
10523 EXPORT_SYMBOL(sleep_on);
10524 EXPORT_SYMBOL(sleep_on_timeout);
10525 EXPORT_SYMBOL(interruptible_sleep_on);
10526 EXPORT_SYMBOL(interruptible_sleep_on_timeout);
10527 EXPORT_SYMBOL(schedule);
10528+#ifdef CONFIG_PREEMPT
10529+EXPORT_SYMBOL(preempt_schedule);
10530+#endif
10531 EXPORT_SYMBOL(schedule_timeout);
10532 EXPORT_SYMBOL(yield);
10533 EXPORT_SYMBOL(__cond_resched);
10534+EXPORT_SYMBOL(set_user_nice);
10535+#ifdef CONFIG_SMP
10536+EXPORT_SYMBOL_GPL(set_cpus_allowed);
10537+#endif
10538+EXPORT_SYMBOL(nr_context_switches);
10539 EXPORT_SYMBOL(jiffies);
10540 EXPORT_SYMBOL(xtime);
10541 EXPORT_SYMBOL(do_gettimeofday);
10542@@ -463,7 +470,6 @@
10543 #endif
10544
10545 EXPORT_SYMBOL(kstat);
10546-EXPORT_SYMBOL(nr_running);
10547
10548 /* misc */
10549 EXPORT_SYMBOL(panic);
10550diff -urN linux-2.4.20/kernel/printk.c linux-2.4.20-o1-preempt/kernel/printk.c
10551--- linux-2.4.20/kernel/printk.c Sat Aug 3 02:39:46 2002
10552+++ linux-2.4.20-o1-preempt/kernel/printk.c Tue Feb 18 03:51:30 2003
10553@@ -26,6 +26,7 @@
10554 #include <linux/module.h>
10555 #include <linux/interrupt.h> /* For in_interrupt() */
10556 #include <linux/config.h>
10557+#include <linux/delay.h>
10558
10559 #include <asm/uaccess.h>
10560
10561diff -urN linux-2.4.20/kernel/ptrace.c linux-2.4.20-o1-preempt/kernel/ptrace.c
10562--- linux-2.4.20/kernel/ptrace.c Sat Aug 3 02:39:46 2002
10563+++ linux-2.4.20-o1-preempt/kernel/ptrace.c Tue Feb 18 03:51:30 2003
10564@@ -31,20 +31,7 @@
10565 if (child->state != TASK_STOPPED)
10566 return -ESRCH;
10567 #ifdef CONFIG_SMP
10568- /* Make sure the child gets off its CPU.. */
10569- for (;;) {
10570- task_lock(child);
10571- if (!task_has_cpu(child))
10572- break;
10573- task_unlock(child);
10574- do {
10575- if (child->state != TASK_STOPPED)
10576- return -ESRCH;
10577- barrier();
10578- cpu_relax();
10579- } while (task_has_cpu(child));
10580- }
10581- task_unlock(child);
10582+ wait_task_inactive(child);
10583 #endif
10584 }
10585
10586diff -urN linux-2.4.20/kernel/sched.c linux-2.4.20-o1-preempt/kernel/sched.c
10587--- linux-2.4.20/kernel/sched.c Fri Nov 29 00:53:15 2002
10588+++ linux-2.4.20-o1-preempt/kernel/sched.c Tue Feb 18 03:52:06 2003
10589@@ -3,340 +3,332 @@
10590 *
10591 * Kernel scheduler and related syscalls
10592 *
10593- * Copyright (C) 1991, 1992 Linus Torvalds
10594+ * Copyright (C) 1991-2002 Linus Torvalds
10595 *
10596 * 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and
10597 * make semaphores SMP safe
10598 * 1998-11-19 Implemented schedule_timeout() and related stuff
10599 * by Andrea Arcangeli
10600- * 1998-12-28 Implemented better SMP scheduling by Ingo Molnar
10601+ * 2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar:
10602+ * hybrid priority-list and round-robin design with
10603+ * an array-switch method of distributing timeslices
10604+ * and per-CPU runqueues. Additional code by Davide
10605+ * Libenzi, Robert Love, and Rusty Russell.
10606 */
10607
10608-/*
10609- * 'sched.c' is the main kernel file. It contains scheduling primitives
10610- * (sleep_on, wakeup, schedule etc) as well as a number of simple system
10611- * call functions (type getpid()), which just extract a field from
10612- * current-task
10613- */
10614-
10615-#include <linux/config.h>
10616 #include <linux/mm.h>
10617-#include <linux/init.h>
10618-#include <linux/smp_lock.h>
10619 #include <linux/nmi.h>
10620 #include <linux/interrupt.h>
10621-#include <linux/kernel_stat.h>
10622-#include <linux/completion.h>
10623-#include <linux/prefetch.h>
10624-#include <linux/compiler.h>
10625-
10626+#include <linux/init.h>
10627 #include <asm/uaccess.h>
10628+#include <linux/smp_lock.h>
10629 #include <asm/mmu_context.h>
10630-
10631-extern void timer_bh(void);
10632-extern void tqueue_bh(void);
10633-extern void immediate_bh(void);
10634+#include <linux/kernel_stat.h>
10635+#include <linux/completion.h>
10636
10637 /*
10638- * scheduler variables
10639+ * Convert user-nice values [ -20 ... 0 ... 19 ]
10640+ * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
10641+ * and back.
10642 */
10643+#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20)
10644+#define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20)
10645+#define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio)
10646
10647-unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
10648-
10649-extern void mem_use(void);
10650+/*
10651+ * 'User priority' is the nice value converted to something we
10652+ * can work with better when scaling various scheduler parameters,
10653+ * it's a [ 0 ... 39 ] range.
10654+ */
10655+#define USER_PRIO(p) ((p)-MAX_RT_PRIO)
10656+#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio)
10657+#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO))
10658
10659 /*
10660- * Scheduling quanta.
10661- *
10662- * NOTE! The unix "nice" value influences how long a process
10663- * gets. The nice value ranges from -20 to +19, where a -20
10664- * is a "high-priority" task, and a "+10" is a low-priority
10665- * task.
10666+ * These are the 'tuning knobs' of the scheduler:
10667 *
10668- * We want the time-slice to be around 50ms or so, so this
10669- * calculation depends on the value of HZ.
10670+ * Minimum timeslice is 10 msecs, default timeslice is 150 msecs,
10671+ * maximum timeslice is 300 msecs. Timeslices get refilled after
10672+ * they expire.
10673 */
10674-#if HZ < 200
10675-#define TICK_SCALE(x) ((x) >> 2)
10676-#elif HZ < 400
10677-#define TICK_SCALE(x) ((x) >> 1)
10678-#elif HZ < 800
10679-#define TICK_SCALE(x) (x)
10680-#elif HZ < 1600
10681-#define TICK_SCALE(x) ((x) << 1)
10682-#else
10683-#define TICK_SCALE(x) ((x) << 2)
10684-#endif
10685-
10686-#define NICE_TO_TICKS(nice) (TICK_SCALE(20-(nice))+1)
10687-
10688+#define MIN_TIMESLICE ( 10 * HZ / 1000)
10689+#define MAX_TIMESLICE (300 * HZ / 1000)
10690+#define CHILD_PENALTY 95
10691+#define PARENT_PENALTY 100
10692+#define EXIT_WEIGHT 3
10693+#define PRIO_BONUS_RATIO 25
10694+#define INTERACTIVE_DELTA 2
10695+#define MAX_SLEEP_AVG (2*HZ)
10696+#define STARVATION_LIMIT (2*HZ)
10697
10698 /*
10699- * Init task must be ok at boot for the ix86 as we will check its signals
10700- * via the SMP irq return path.
10701+ * If a task is 'interactive' then we reinsert it in the active
10702+ * array after it has expired its current timeslice. (it will not
10703+ * continue to run immediately, it will still roundrobin with
10704+ * other interactive tasks.)
10705+ *
10706+ * This part scales the interactivity limit depending on niceness.
10707+ *
10708+ * We scale it linearly, offset by the INTERACTIVE_DELTA delta.
10709+ * Here are a few examples of different nice levels:
10710+ *
10711+ * TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0]
10712+ * TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0]
10713+ * TASK_INTERACTIVE( 0): [1,1,1,1,0,0,0,0,0,0,0]
10714+ * TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0]
10715+ * TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0]
10716+ *
10717+ * (the X axis represents the possible -5 ... 0 ... +5 dynamic
10718+ * priority range a task can explore, a value of '1' means the
10719+ * task is rated interactive.)
10720+ *
10721+ * Ie. nice +19 tasks can never get 'interactive' enough to be
10722+ * reinserted into the active array. And only heavily CPU-hog nice -20
10723+ * tasks will be expired. Default nice 0 tasks are somewhere between,
10724+ * it takes some effort for them to get interactive, but it's not
10725+ * too hard.
10726 */
10727-
10728-struct task_struct * init_tasks[NR_CPUS] = {&init_task, };
10729+
10730+#define SCALE(v1,v1_max,v2_max) \
10731+ (v1) * (v2_max) / (v1_max)
10732+
10733+#define DELTA(p) \
10734+ (SCALE(TASK_NICE(p), 40, MAX_USER_PRIO*PRIO_BONUS_RATIO/100) + \
10735+ INTERACTIVE_DELTA)
10736+
10737+#define TASK_INTERACTIVE(p) \
10738+ ((p)->prio <= (p)->static_prio - DELTA(p))
10739
10740 /*
10741- * The tasklist_lock protects the linked list of processes.
10742+ * TASK_TIMESLICE scales user-nice values [ -20 ... 19 ]
10743+ * to time slice values.
10744 *
10745- * The runqueue_lock locks the parts that actually access
10746- * and change the run-queues, and have to be interrupt-safe.
10747- *
10748- * If both locks are to be concurrently held, the runqueue_lock
10749- * nests inside the tasklist_lock.
10750- *
10751- * task->alloc_lock nests inside tasklist_lock.
10752+ * The higher a process's priority, the bigger timeslices
10753+ * it gets during one round of execution. But even the lowest
10754+ * priority process gets MIN_TIMESLICE worth of execution time.
10755 */
10756-spinlock_t runqueue_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED; /* inner */
10757-rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */
10758
10759-static LIST_HEAD(runqueue_head);
10760+#define TASK_TIMESLICE(p) (MIN_TIMESLICE + \
10761+ ((MAX_TIMESLICE - MIN_TIMESLICE) * (MAX_PRIO-1-(p)->static_prio)/39))
10762
10763 /*
10764- * We align per-CPU scheduling data on cacheline boundaries,
10765- * to prevent cacheline ping-pong.
10766+ * These are the runqueue data structures:
10767 */
10768-static union {
10769- struct schedule_data {
10770- struct task_struct * curr;
10771- cycles_t last_schedule;
10772- } schedule_data;
10773- char __pad [SMP_CACHE_BYTES];
10774-} aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}};
10775
10776-#define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr
10777-#define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule
10778+#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long))
10779
10780-struct kernel_stat kstat;
10781-extern struct task_struct *child_reaper;
10782+typedef struct runqueue runqueue_t;
10783
10784-#ifdef CONFIG_SMP
10785+struct prio_array {
10786+ int nr_active;
10787+ unsigned long bitmap[BITMAP_SIZE];
10788+ list_t queue[MAX_PRIO];
10789+};
10790
10791-#define idle_task(cpu) (init_tasks[cpu_number_map(cpu)])
10792-#define can_schedule(p,cpu) \
10793- ((p)->cpus_runnable & (p)->cpus_allowed & (1 << cpu))
10794+/*
10795+ * This is the main, per-CPU runqueue data structure.
10796+ *
10797+ * Locking rule: those places that want to lock multiple runqueues
10798+ * (such as the load balancing or the process migration code), lock
10799+ * acquire operations must be ordered by ascending &runqueue.
10800+ */
10801+struct runqueue {
10802+ spinlock_t lock;
10803+ unsigned long nr_running, nr_switches, expired_timestamp;
10804+ task_t *curr, *idle;
10805+ prio_array_t *active, *expired, arrays[2];
10806+ long nr_uninterruptible;
10807+ int prev_nr_running[NR_CPUS];
10808+ task_t *migration_thread;
10809+ list_t migration_queue;
10810+} ____cacheline_aligned;
10811+
10812+static struct runqueue runqueues[NR_CPUS] __cacheline_aligned;
10813+
10814+#define cpu_rq(cpu) (runqueues + (cpu))
10815+#define this_rq() cpu_rq(smp_processor_id())
10816+#define task_rq(p) cpu_rq((p)->cpu)
10817+#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
10818+#define rt_task(p) ((p)->prio < MAX_RT_PRIO)
10819
10820-#else
10821+/*
10822+ * Default context-switch locking:
10823+ */
10824+#ifndef prepare_arch_switch
10825+# define prepare_arch_switch(rq, next) do { } while(0)
10826+# define finish_arch_switch(rq, next) spin_unlock_irq(&(rq)->lock)
10827+#endif
10828
10829-#define idle_task(cpu) (&init_task)
10830-#define can_schedule(p,cpu) (1)
10831+/*
10832+ * task_rq_lock - lock the runqueue a given task resides on and disable
10833+ * interrupts. Note the ordering: we can safely lookup the task_rq without
10834+ * explicitly disabling preemption.
10835+ */
10836+static inline runqueue_t *task_rq_lock(task_t *p, unsigned long *flags)
10837+{
10838+ struct runqueue *rq;
10839
10840-#endif
10841+repeat_lock_task:
10842+ preempt_disable();
10843+ rq = task_rq(p);
10844+ spin_lock_irqsave(&rq->lock, *flags);
10845+ if (unlikely(rq != task_rq(p))) {
10846+ spin_unlock_irqrestore(&rq->lock, *flags);
10847+ preempt_enable();
10848+ goto repeat_lock_task;
10849+ }
10850+ return rq;
10851+}
10852
10853-void scheduling_functions_start_here(void) { }
10854+static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags)
10855+{
10856+ spin_unlock_irqrestore(&rq->lock, *flags);
10857+ preempt_enable();
10858+}
10859
10860 /*
10861- * This is the function that decides how desirable a process is..
10862- * You can weigh different processes against each other depending
10863- * on what CPU they've run on lately etc to try to handle cache
10864- * and TLB miss penalties.
10865- *
10866- * Return values:
10867- * -1000: never select this
10868- * 0: out of time, recalculate counters (but it might still be
10869- * selected)
10870- * +ve: "goodness" value (the larger, the better)
10871- * +1000: realtime process, select this.
10872+ * Adding/removing a task to/from a priority array:
10873 */
10874+static inline void dequeue_task(struct task_struct *p, prio_array_t *array)
10875+{
10876+ array->nr_active--;
10877+ list_del(&p->run_list);
10878+ if (list_empty(array->queue + p->prio))
10879+ __clear_bit(p->prio, array->bitmap);
10880+}
10881
10882-static inline int goodness(struct task_struct * p, int this_cpu, struct mm_struct *this_mm)
10883+static inline void enqueue_task(struct task_struct *p, prio_array_t *array)
10884 {
10885- int weight;
10886+ list_add_tail(&p->run_list, array->queue + p->prio);
10887+ __set_bit(p->prio, array->bitmap);
10888+ array->nr_active++;
10889+ p->array = array;
10890+}
10891
10892- /*
10893- * select the current process after every other
10894- * runnable process, but before the idle thread.
10895- * Also, dont trigger a counter recalculation.
10896- */
10897- weight = -1;
10898- if (p->policy & SCHED_YIELD)
10899- goto out;
10900+static inline int effective_prio(task_t *p)
10901+{
10902+ int bonus, prio;
10903
10904 /*
10905- * Non-RT process - normal case first.
10906+ * Here we scale the actual sleep average [0 .... MAX_SLEEP_AVG]
10907+ * into the -5 ... 0 ... +5 bonus/penalty range.
10908+ *
10909+ * We use 25% of the full 0...39 priority range so that:
10910+ *
10911+ * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
10912+ * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
10913+ *
10914+ * Both properties are important to certain workloads.
10915 */
10916- if (p->policy == SCHED_OTHER) {
10917+ bonus = MAX_USER_PRIO*PRIO_BONUS_RATIO*p->sleep_avg/MAX_SLEEP_AVG/100 -
10918+ MAX_USER_PRIO*PRIO_BONUS_RATIO/100/2;
10919+
10920+ prio = p->static_prio - bonus;
10921+ if (prio < MAX_RT_PRIO)
10922+ prio = MAX_RT_PRIO;
10923+ if (prio > MAX_PRIO-1)
10924+ prio = MAX_PRIO-1;
10925+ return prio;
10926+}
10927+
10928+static inline void activate_task(task_t *p, runqueue_t *rq)
10929+{
10930+ unsigned long sleep_time = jiffies - p->sleep_timestamp;
10931+ prio_array_t *array = rq->active;
10932+
10933+ if (!rt_task(p) && sleep_time) {
10934 /*
10935- * Give the process a first-approximation goodness value
10936- * according to the number of clock-ticks it has left.
10937- *
10938- * Don't do any other calculations if the time slice is
10939- * over..
10940+ * This code gives a bonus to interactive tasks. We update
10941+ * an 'average sleep time' value here, based on
10942+ * sleep_timestamp. The more time a task spends sleeping,
10943+ * the higher the average gets - and the higher the priority
10944+ * boost gets as well.
10945 */
10946- weight = p->counter;
10947- if (!weight)
10948- goto out;
10949-
10950-#ifdef CONFIG_SMP
10951- /* Give a largish advantage to the same processor... */
10952- /* (this is equivalent to penalizing other processors) */
10953- if (p->processor == this_cpu)
10954- weight += PROC_CHANGE_PENALTY;
10955-#endif
10956-
10957- /* .. and a slight advantage to the current MM */
10958- if (p->mm == this_mm || !p->mm)
10959- weight += 1;
10960- weight += 20 - p->nice;
10961- goto out;
10962+ p->sleep_avg += sleep_time;
10963+ if (p->sleep_avg > MAX_SLEEP_AVG)
10964+ p->sleep_avg = MAX_SLEEP_AVG;
10965+ p->prio = effective_prio(p);
10966 }
10967+ enqueue_task(p, array);
10968+ rq->nr_running++;
10969+}
10970
10971- /*
10972- * Realtime process, select the first one on the
10973- * runqueue (taking priorities within processes
10974- * into account).
10975- */
10976- weight = 1000 + p->rt_priority;
10977-out:
10978- return weight;
10979+static inline void deactivate_task(struct task_struct *p, runqueue_t *rq)
10980+{
10981+ rq->nr_running--;
10982+ if (p->state == TASK_UNINTERRUPTIBLE)
10983+ rq->nr_uninterruptible++;
10984+ dequeue_task(p, p->array);
10985+ p->array = NULL;
10986 }
10987
10988-/*
10989- * the 'goodness value' of replacing a process on a given CPU.
10990- * positive value means 'replace', zero or negative means 'dont'.
10991- */
10992-static inline int preemption_goodness(struct task_struct * prev, struct task_struct * p, int cpu)
10993+static inline void resched_task(task_t *p)
10994 {
10995- return goodness(p, cpu, prev->active_mm) - goodness(prev, cpu, prev->active_mm);
10996+#ifdef CONFIG_SMP
10997+ int need_resched;
10998+
10999+ preempt_disable();
11000+ need_resched = p->need_resched;
11001+ set_tsk_need_resched(p);
11002+ if (!need_resched && (p->cpu != smp_processor_id()))
11003+ smp_send_reschedule(p->cpu);
11004+#else
11005+ preempt_disable();
11006+ set_tsk_need_resched(p);
11007+#endif
11008+ preempt_enable();
11009 }
11010
11011+#ifdef CONFIG_SMP
11012+
11013 /*
11014- * This is ugly, but reschedule_idle() is very timing-critical.
11015- * We are called with the runqueue spinlock held and we must
11016- * not claim the tasklist_lock.
11017+ * Wait for a process to unschedule. This is used by the exit() and
11018+ * ptrace() code.
11019 */
11020-static FASTCALL(void reschedule_idle(struct task_struct * p));
11021-
11022-static void reschedule_idle(struct task_struct * p)
11023+void wait_task_inactive(task_t * p)
11024 {
11025-#ifdef CONFIG_SMP
11026- int this_cpu = smp_processor_id();
11027- struct task_struct *tsk, *target_tsk;
11028- int cpu, best_cpu, i, max_prio;
11029- cycles_t oldest_idle;
11030-
11031- /*
11032- * shortcut if the woken up task's last CPU is
11033- * idle now.
11034- */
11035- best_cpu = p->processor;
11036- if (can_schedule(p, best_cpu)) {
11037- tsk = idle_task(best_cpu);
11038- if (cpu_curr(best_cpu) == tsk) {
11039- int need_resched;
11040-send_now_idle:
11041- /*
11042- * If need_resched == -1 then we can skip sending
11043- * the IPI altogether, tsk->need_resched is
11044- * actively watched by the idle thread.
11045- */
11046- need_resched = tsk->need_resched;
11047- tsk->need_resched = 1;
11048- if ((best_cpu != this_cpu) && !need_resched)
11049- smp_send_reschedule(best_cpu);
11050- return;
11051- }
11052- }
11053-
11054- /*
11055- * We know that the preferred CPU has a cache-affine current
11056- * process, lets try to find a new idle CPU for the woken-up
11057- * process. Select the least recently active idle CPU. (that
11058- * one will have the least active cache context.) Also find
11059- * the executing process which has the least priority.
11060- */
11061- oldest_idle = (cycles_t) -1;
11062- target_tsk = NULL;
11063- max_prio = 0;
11064+ unsigned long flags;
11065+ runqueue_t *rq;
11066
11067- for (i = 0; i < smp_num_cpus; i++) {
11068- cpu = cpu_logical_map(i);
11069- if (!can_schedule(p, cpu))
11070- continue;
11071- tsk = cpu_curr(cpu);
11072+repeat:
11073+ preempt_disable();
11074+ rq = task_rq(p);
11075+ if (unlikely(rq->curr == p)) {
11076+ cpu_relax();
11077+ barrier();
11078 /*
11079- * We use the first available idle CPU. This creates
11080- * a priority list between idle CPUs, but this is not
11081- * a problem.
11082+ * enable/disable preemption just to make this
11083+ * a preemption point - we are busy-waiting
11084+ * anyway.
11085 */
11086- if (tsk == idle_task(cpu)) {
11087-#if defined(__i386__) && defined(CONFIG_SMP)
11088- /*
11089- * Check if two siblings are idle in the same
11090- * physical package. Use them if found.
11091- */
11092- if (smp_num_siblings == 2) {
11093- if (cpu_curr(cpu_sibling_map[cpu]) ==
11094- idle_task(cpu_sibling_map[cpu])) {
11095- oldest_idle = last_schedule(cpu);
11096- target_tsk = tsk;
11097- break;
11098- }
11099-
11100- }
11101-#endif
11102- if (last_schedule(cpu) < oldest_idle) {
11103- oldest_idle = last_schedule(cpu);
11104- target_tsk = tsk;
11105- }
11106- } else {
11107- if (oldest_idle == -1ULL) {
11108- int prio = preemption_goodness(tsk, p, cpu);
11109-
11110- if (prio > max_prio) {
11111- max_prio = prio;
11112- target_tsk = tsk;
11113- }
11114- }
11115- }
11116+ preempt_enable();
11117+ goto repeat;
11118 }
11119- tsk = target_tsk;
11120- if (tsk) {
11121- if (oldest_idle != -1ULL) {
11122- best_cpu = tsk->processor;
11123- goto send_now_idle;
11124- }
11125- tsk->need_resched = 1;
11126- if (tsk->processor != this_cpu)
11127- smp_send_reschedule(tsk->processor);
11128- }
11129- return;
11130-
11131-
11132-#else /* UP */
11133- int this_cpu = smp_processor_id();
11134- struct task_struct *tsk;
11135-
11136- tsk = cpu_curr(this_cpu);
11137- if (preemption_goodness(tsk, p, this_cpu) > 0)
11138- tsk->need_resched = 1;
11139-#endif
11140+ rq = task_rq_lock(p, &flags);
11141+ if (unlikely(rq->curr == p)) {
11142+ task_rq_unlock(rq, &flags);
11143+ preempt_enable();
11144+ goto repeat;
11145+ }
11146+ task_rq_unlock(rq, &flags);
11147+ preempt_enable();
11148 }
11149
11150 /*
11151- * Careful!
11152- *
11153- * This has to add the process to the _end_ of the
11154- * run-queue, not the beginning. The goodness value will
11155- * determine whether this process will run next. This is
11156- * important to get SCHED_FIFO and SCHED_RR right, where
11157- * a process that is either pre-empted or its time slice
11158- * has expired, should be moved to the tail of the run
11159- * queue for its priority - Bhavesh Davda
11160+ * Kick the remote CPU if the task is running currently,
11161+ * this code is used by the signal code to signal tasks
11162+ * which are in user-mode as quickly as possible.
11163+ *
11164+ * (Note that we do this lockless - if the task does anything
11165+ * while the message is in flight then it will notice the
11166+ * sigpending condition anyway.)
11167 */
11168-static inline void add_to_runqueue(struct task_struct * p)
11169+void kick_if_running(task_t * p)
11170 {
11171- list_add_tail(&p->run_list, &runqueue_head);
11172- nr_running++;
11173-}
11174-
11175-static inline void move_last_runqueue(struct task_struct * p)
11176-{
11177- list_del(&p->run_list);
11178- list_add_tail(&p->run_list, &runqueue_head);
11179+ if (p == task_rq(p)->curr && p->cpu != smp_processor_id())
11180+ resched_task(p);
11181 }
11182+#endif
11183
11184 /*
11185 * Wake up a process. Put it on the run-queue if it's not
11186@@ -345,429 +337,648 @@
11187 * progress), and as such you're allowed to do the simpler
11188 * "current->state = TASK_RUNNING" to mark yourself runnable
11189 * without the overhead of this.
11190+ *
11191+ * returns failure only if the task is already active.
11192 */
11193-static inline int try_to_wake_up(struct task_struct * p, int synchronous)
11194+static int try_to_wake_up(task_t * p, int sync)
11195 {
11196 unsigned long flags;
11197 int success = 0;
11198+ long old_state;
11199+ runqueue_t *rq;
11200
11201- /*
11202- * We want the common case fall through straight, thus the goto.
11203- */
11204- spin_lock_irqsave(&runqueue_lock, flags);
11205+repeat_lock_task:
11206+ rq = task_rq_lock(p, &flags);
11207+ old_state = p->state;
11208+ if (!p->array) {
11209+ if (unlikely(sync) &&
11210+ rq->curr != p &&
11211+ p->cpu != smp_processor_id() &&
11212+ p->cpus_allowed & (1UL << smp_processor_id())) {
11213+ p->cpu = smp_processor_id();
11214+ task_rq_unlock(rq, &flags);
11215+ goto repeat_lock_task;
11216+ }
11217+ if (old_state == TASK_UNINTERRUPTIBLE)
11218+ rq->nr_uninterruptible--;
11219+ activate_task(p, rq);
11220+ if (p->prio < rq->curr->prio)
11221+ resched_task(rq->curr);
11222+ success = 1;
11223+ }
11224 p->state = TASK_RUNNING;
11225- if (task_on_runqueue(p))
11226- goto out;
11227- add_to_runqueue(p);
11228- if (!synchronous || !(p->cpus_allowed & (1 << smp_processor_id())))
11229- reschedule_idle(p);
11230- success = 1;
11231-out:
11232- spin_unlock_irqrestore(&runqueue_lock, flags);
11233+ task_rq_unlock(rq, &flags);
11234+
11235 return success;
11236 }
11237
11238-inline int wake_up_process(struct task_struct * p)
11239+int wake_up_process(task_t * p)
11240 {
11241 return try_to_wake_up(p, 0);
11242 }
11243
11244-static void process_timeout(unsigned long __data)
11245+void wake_up_forked_process(task_t * p)
11246 {
11247- struct task_struct * p = (struct task_struct *) __data;
11248+ runqueue_t *rq;
11249
11250- wake_up_process(p);
11251+ preempt_disable();
11252+ rq = this_rq();
11253+ spin_lock_irq(&rq->lock);
11254+
11255+ p->state = TASK_RUNNING;
11256+ if (!rt_task(p)) {
11257+ /*
11258+ * We decrease the sleep average of forking parents
11259+ * and children as well, to keep max-interactive tasks
11260+ * from forking tasks that are max-interactive.
11261+ */
11262+ current->sleep_avg = current->sleep_avg * PARENT_PENALTY / 100;
11263+ p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100;
11264+ p->prio = effective_prio(p);
11265+ }
11266+ p->cpu = smp_processor_id();
11267+ activate_task(p, rq);
11268+ spin_unlock_irq(&rq->lock);
11269+ preempt_enable();
11270 }
11271
11272-/**
11273- * schedule_timeout - sleep until timeout
11274- * @timeout: timeout value in jiffies
11275- *
11276- * Make the current task sleep until @timeout jiffies have
11277- * elapsed. The routine will return immediately unless
11278- * the current task state has been set (see set_current_state()).
11279- *
11280- * You can set the task state as follows -
11281- *
11282- * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
11283- * pass before the routine returns. The routine will return 0
11284- *
11285- * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
11286- * delivered to the current task. In this case the remaining time
11287- * in jiffies will be returned, or 0 if the timer expired in time
11288- *
11289- * The current task state is guaranteed to be TASK_RUNNING when this
11290- * routine returns.
11291- *
11292- * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
11293- * the CPU away without a bound on the timeout. In this case the return
11294- * value will be %MAX_SCHEDULE_TIMEOUT.
11295- *
11296- * In all cases the return value is guaranteed to be non-negative.
11297+/*
11298+ * Potentially available exiting-child timeslices are
11299+ * retrieved here - this way the parent does not get
11300+ * penalized for creating too many processes.
11301+ *
11302+ * (this cannot be used to 'generate' timeslices
11303+ * artificially, because any timeslice recovered here
11304+ * was given away by the parent in the first place.)
11305 */
11306-signed long schedule_timeout(signed long timeout)
11307+void sched_exit(task_t * p)
11308 {
11309- struct timer_list timer;
11310- unsigned long expire;
11311+ __cli();
11312+ if (p->first_time_slice) {
11313+ current->time_slice += p->time_slice;
11314+ if (unlikely(current->time_slice > MAX_TIMESLICE))
11315+ current->time_slice = MAX_TIMESLICE;
11316+ }
11317+ __sti();
11318+ /*
11319+ * If the child was a (relative-) CPU hog then decrease
11320+ * the sleep_avg of the parent as well.
11321+ */
11322+ if (p->sleep_avg < current->sleep_avg)
11323+ current->sleep_avg = (current->sleep_avg * EXIT_WEIGHT +
11324+ p->sleep_avg) / (EXIT_WEIGHT + 1);
11325+}
11326
11327- switch (timeout)
11328- {
11329- case MAX_SCHEDULE_TIMEOUT:
11330- /*
11331- * These two special cases are useful to be comfortable
11332- * in the caller. Nothing more. We could take
11333- * MAX_SCHEDULE_TIMEOUT from one of the negative value
11334- * but I' d like to return a valid offset (>=0) to allow
11335- * the caller to do everything it want with the retval.
11336- */
11337- schedule();
11338- goto out;
11339- default:
11340- /*
11341- * Another bit of PARANOID. Note that the retval will be
11342- * 0 since no piece of kernel is supposed to do a check
11343- * for a negative retval of schedule_timeout() (since it
11344- * should never happens anyway). You just have the printk()
11345- * that will tell you if something is gone wrong and where.
11346- */
11347- if (timeout < 0)
11348- {
11349- printk(KERN_ERR "schedule_timeout: wrong timeout "
11350- "value %lx from %p\n", timeout,
11351- __builtin_return_address(0));
11352- current->state = TASK_RUNNING;
11353- goto out;
11354- }
11355+#if CONFIG_SMP || CONFIG_PREEMPT
11356+asmlinkage void schedule_tail(task_t *prev)
11357+{
11358+ finish_arch_switch(this_rq(), prev);
11359+}
11360+#endif
11361+
11362+static inline task_t * context_switch(task_t *prev, task_t *next)
11363+{
11364+ struct mm_struct *mm = next->mm;
11365+ struct mm_struct *oldmm = prev->active_mm;
11366+
11367+ if (unlikely(!mm)) {
11368+ next->active_mm = oldmm;
11369+ atomic_inc(&oldmm->mm_count);
11370+ enter_lazy_tlb(oldmm, next, smp_processor_id());
11371+ } else
11372+ switch_mm(oldmm, mm, next, smp_processor_id());
11373+
11374+ if (unlikely(!prev->mm)) {
11375+ prev->active_mm = NULL;
11376+ mmdrop(oldmm);
11377 }
11378
11379- expire = timeout + jiffies;
11380+ /* Here we just switch the register state and the stack. */
11381+ switch_to(prev, next, prev);
11382
11383- init_timer(&timer);
11384- timer.expires = expire;
11385- timer.data = (unsigned long) current;
11386- timer.function = process_timeout;
11387+ return prev;
11388+}
11389
11390- add_timer(&timer);
11391- schedule();
11392- del_timer_sync(&timer);
11393+unsigned long nr_running(void)
11394+{
11395+ unsigned long i, sum = 0;
11396
11397- timeout = expire - jiffies;
11398+ for (i = 0; i < smp_num_cpus; i++)
11399+ sum += cpu_rq(cpu_logical_map(i))->nr_running;
11400
11401- out:
11402- return timeout < 0 ? 0 : timeout;
11403+ return sum;
11404 }
11405
11406-/*
11407- * schedule_tail() is getting called from the fork return path. This
11408- * cleans up all remaining scheduler things, without impacting the
11409- * common case.
11410- */
11411-static inline void __schedule_tail(struct task_struct *prev)
11412+/* Note: the per-cpu information is useful only to get the cumulative result */
11413+unsigned long nr_uninterruptible(void)
11414 {
11415-#ifdef CONFIG_SMP
11416- int policy;
11417+ unsigned long i, sum = 0;
11418
11419- /*
11420- * prev->policy can be written from here only before `prev'
11421- * can be scheduled (before setting prev->cpus_runnable to ~0UL).
11422- * Of course it must also be read before allowing prev
11423- * to be rescheduled, but since the write depends on the read
11424- * to complete, wmb() is enough. (the spin_lock() acquired
11425- * before setting cpus_runnable is not enough because the spin_lock()
11426- * common code semantics allows code outside the critical section
11427- * to enter inside the critical section)
11428- */
11429- policy = prev->policy;
11430- prev->policy = policy & ~SCHED_YIELD;
11431- wmb();
11432-
11433- /*
11434- * fast path falls through. We have to clear cpus_runnable before
11435- * checking prev->state to avoid a wakeup race. Protect against
11436- * the task exiting early.
11437- */
11438- task_lock(prev);
11439- task_release_cpu(prev);
11440- mb();
11441- if (prev->state == TASK_RUNNING)
11442- goto needs_resched;
11443+ for (i = 0; i < smp_num_cpus; i++)
11444+ sum += cpu_rq(cpu_logical_map(i))->nr_uninterruptible;
11445
11446-out_unlock:
11447- task_unlock(prev); /* Synchronise here with release_task() if prev is TASK_ZOMBIE */
11448- return;
11449+ return sum;
11450+}
11451
11452- /*
11453- * Slow path - we 'push' the previous process and
11454- * reschedule_idle() will attempt to find a new
11455- * processor for it. (but it might preempt the
11456- * current process as well.) We must take the runqueue
11457- * lock and re-check prev->state to be correct. It might
11458- * still happen that this process has a preemption
11459- * 'in progress' already - but this is not a problem and
11460- * might happen in other circumstances as well.
11461- */
11462-needs_resched:
11463- {
11464- unsigned long flags;
11465+unsigned long nr_context_switches(void)
11466+{
11467+ unsigned long i, sum = 0;
11468
11469- /*
11470- * Avoid taking the runqueue lock in cases where
11471- * no preemption-check is necessery:
11472- */
11473- if ((prev == idle_task(smp_processor_id())) ||
11474- (policy & SCHED_YIELD))
11475- goto out_unlock;
11476+ for (i = 0; i < smp_num_cpus; i++)
11477+ sum += cpu_rq(cpu_logical_map(i))->nr_switches;
11478
11479- spin_lock_irqsave(&runqueue_lock, flags);
11480- if ((prev->state == TASK_RUNNING) && !task_has_cpu(prev))
11481- reschedule_idle(prev);
11482- spin_unlock_irqrestore(&runqueue_lock, flags);
11483- goto out_unlock;
11484+ return sum;
11485+}
11486+
11487+#if CONFIG_SMP
11488+/*
11489+ * Lock the busiest runqueue as well, this_rq is locked already.
11490+ * Recalculate nr_running if we have to drop the runqueue lock.
11491+ */
11492+static inline unsigned int double_lock_balance(runqueue_t *this_rq,
11493+ runqueue_t *busiest, int this_cpu, int idle, unsigned int nr_running)
11494+{
11495+ if (unlikely(!spin_trylock(&busiest->lock))) {
11496+ if (busiest < this_rq) {
11497+ spin_unlock(&this_rq->lock);
11498+ spin_lock(&busiest->lock);
11499+ spin_lock(&this_rq->lock);
11500+ /* Need to recalculate nr_running */
11501+ if (idle || (this_rq->nr_running > this_rq->prev_nr_running[this_cpu]))
11502+ nr_running = this_rq->nr_running;
11503+ else
11504+ nr_running = this_rq->prev_nr_running[this_cpu];
11505+ } else
11506+ spin_lock(&busiest->lock);
11507 }
11508-#else
11509- prev->policy &= ~SCHED_YIELD;
11510-#endif /* CONFIG_SMP */
11511+ return nr_running;
11512 }
11513
11514-asmlinkage void schedule_tail(struct task_struct *prev)
11515+/*
11516+ * Move a task from a remote runqueue to the local runqueue.
11517+ * Both runqueues must be locked.
11518+ */
11519+static inline void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, runqueue_t *this_rq, int this_cpu)
11520 {
11521- __schedule_tail(prev);
11522+ dequeue_task(p, src_array);
11523+ src_rq->nr_running--;
11524+ p->cpu = this_cpu;
11525+ this_rq->nr_running++;
11526+ enqueue_task(p, this_rq->active);
11527+ /*
11528+ * Note that idle threads have a prio of MAX_PRIO, for this test
11529+ * to be always true for them.
11530+ */
11531+ if (p->prio < this_rq->curr->prio)
11532+ set_need_resched();
11533 }
11534
11535 /*
11536- * 'schedule()' is the scheduler function. It's a very simple and nice
11537- * scheduler: it's not perfect, but certainly works for most things.
11538+ * Current runqueue is empty, or rebalance tick: if there is an
11539+ * inbalance (current runqueue is too short) then pull from
11540+ * busiest runqueue(s).
11541 *
11542- * The goto is "interesting".
11543- *
11544- * NOTE!! Task 0 is the 'idle' task, which gets called when no other
11545- * tasks can run. It can not be killed, and it cannot sleep. The 'state'
11546- * information in task[0] is never used.
11547+ * We call this with the current runqueue locked,
11548+ * irqs disabled.
11549 */
11550-asmlinkage void schedule(void)
11551+static void load_balance(runqueue_t *this_rq, int idle)
11552 {
11553- struct schedule_data * sched_data;
11554- struct task_struct *prev, *next, *p;
11555- struct list_head *tmp;
11556- int this_cpu, c;
11557-
11558-
11559- spin_lock_prefetch(&runqueue_lock);
11560-
11561- BUG_ON(!current->active_mm);
11562-need_resched_back:
11563- prev = current;
11564- this_cpu = prev->processor;
11565-
11566- if (unlikely(in_interrupt())) {
11567- printk("Scheduling in interrupt\n");
11568- BUG();
11569- }
11570-
11571- release_kernel_lock(prev, this_cpu);
11572+ int imbalance, nr_running, load, max_load,
11573+ idx, i, this_cpu = smp_processor_id();
11574+ task_t *tmp;
11575+ runqueue_t *busiest, *rq_src;
11576+ prio_array_t *array;
11577+ list_t *head, *curr;
11578
11579 /*
11580- * 'sched_data' is protected by the fact that we can run
11581- * only one process per CPU.
11582+ * We search all runqueues to find the most busy one.
11583+ * We do this lockless to reduce cache-bouncing overhead,
11584+ * we re-check the 'best' source CPU later on again, with
11585+ * the lock held.
11586+ *
11587+ * We fend off statistical fluctuations in runqueue lengths by
11588+ * saving the runqueue length during the previous load-balancing
11589+ * operation and using the smaller one the current and saved lengths.
11590+ * If a runqueue is long enough for a longer amount of time then
11591+ * we recognize it and pull tasks from it.
11592+ *
11593+ * The 'current runqueue length' is a statistical maximum variable,
11594+ * for that one we take the longer one - to avoid fluctuations in
11595+ * the other direction. So for a load-balance to happen it needs
11596+ * stable long runqueue on the target CPU and stable short runqueue
11597+ * on the local runqueue.
11598+ *
11599+ * We make an exception if this CPU is about to become idle - in
11600+ * that case we are less picky about moving a task across CPUs and
11601+ * take what can be taken.
11602 */
11603- sched_data = & aligned_data[this_cpu].schedule_data;
11604+ if (idle || (this_rq->nr_running > this_rq->prev_nr_running[this_cpu]))
11605+ nr_running = this_rq->nr_running;
11606+ else
11607+ nr_running = this_rq->prev_nr_running[this_cpu];
11608
11609- spin_lock_irq(&runqueue_lock);
11610+ busiest = NULL;
11611+ max_load = 1;
11612+ for (i = 0; i < smp_num_cpus; i++) {
11613+ int logical = cpu_logical_map(i);
11614
11615- /* move an exhausted RR process to be last.. */
11616- if (unlikely(prev->policy == SCHED_RR))
11617- if (!prev->counter) {
11618- prev->counter = NICE_TO_TICKS(prev->nice);
11619- move_last_runqueue(prev);
11620+ rq_src = cpu_rq(logical);
11621+ if (idle || (rq_src->nr_running < this_rq->prev_nr_running[logical]))
11622+ load = rq_src->nr_running;
11623+ else
11624+ load = this_rq->prev_nr_running[logical];
11625+ this_rq->prev_nr_running[logical] = rq_src->nr_running;
11626+
11627+ if ((load > max_load) && (rq_src != this_rq)) {
11628+ busiest = rq_src;
11629+ max_load = load;
11630 }
11631-
11632- switch (prev->state) {
11633- case TASK_INTERRUPTIBLE:
11634- if (signal_pending(prev)) {
11635- prev->state = TASK_RUNNING;
11636- break;
11637- }
11638- default:
11639- del_from_runqueue(prev);
11640- case TASK_RUNNING:;
11641 }
11642- prev->need_resched = 0;
11643
11644+ if (likely(!busiest))
11645+ return;
11646+
11647+ imbalance = (max_load - nr_running) / 2;
11648+
11649+ /* It needs an at least ~25% imbalance to trigger balancing. */
11650+ if (!idle && (imbalance < (max_load + 3)/4))
11651+ return;
11652+
11653+ nr_running = double_lock_balance(this_rq, busiest, this_cpu, idle, nr_running);
11654 /*
11655- * this is the scheduler proper:
11656+ * Make sure nothing changed since we checked the
11657+ * runqueue length.
11658 */
11659+ if (busiest->nr_running <= nr_running + 1)
11660+ goto out_unlock;
11661
11662-repeat_schedule:
11663 /*
11664- * Default process to select..
11665+ * We first consider expired tasks. Those will likely not be
11666+ * executed in the near future, and they are most likely to
11667+ * be cache-cold, thus switching CPUs has the least effect
11668+ * on them.
11669 */
11670- next = idle_task(this_cpu);
11671- c = -1000;
11672- list_for_each(tmp, &runqueue_head) {
11673- p = list_entry(tmp, struct task_struct, run_list);
11674- if (can_schedule(p, this_cpu)) {
11675- int weight = goodness(p, this_cpu, prev->active_mm);
11676- if (weight > c)
11677- c = weight, next = p;
11678+ if (busiest->expired->nr_active)
11679+ array = busiest->expired;
11680+ else
11681+ array = busiest->active;
11682+
11683+new_array:
11684+ /* Start searching at priority 0: */
11685+ idx = 0;
11686+skip_bitmap:
11687+ if (!idx)
11688+ idx = sched_find_first_bit(array->bitmap);
11689+ else
11690+ idx = find_next_bit(array->bitmap, MAX_PRIO, idx);
11691+ if (idx == MAX_PRIO) {
11692+ if (array == busiest->expired) {
11693+ array = busiest->active;
11694+ goto new_array;
11695 }
11696+ goto out_unlock;
11697 }
11698
11699- /* Do we need to re-calculate counters? */
11700- if (unlikely(!c)) {
11701- struct task_struct *p;
11702-
11703- spin_unlock_irq(&runqueue_lock);
11704- read_lock(&tasklist_lock);
11705- for_each_task(p)
11706- p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice);
11707- read_unlock(&tasklist_lock);
11708- spin_lock_irq(&runqueue_lock);
11709- goto repeat_schedule;
11710+ head = array->queue + idx;
11711+ curr = head->prev;
11712+skip_queue:
11713+ tmp = list_entry(curr, task_t, run_list);
11714+
11715+ /*
11716+ * We do not migrate tasks that are:
11717+ * 1) running (obviously), or
11718+ * 2) cannot be migrated to this CPU due to cpus_allowed, or
11719+ * 3) are cache-hot on their current CPU.
11720+ */
11721+
11722+#define CAN_MIGRATE_TASK(p,rq,this_cpu) \
11723+ ((jiffies - (p)->sleep_timestamp > cache_decay_ticks) && \
11724+ ((p) != (rq)->curr) && \
11725+ ((p)->cpus_allowed & (1UL << (this_cpu))))
11726+
11727+ curr = curr->prev;
11728+
11729+ if (!CAN_MIGRATE_TASK(tmp, busiest, this_cpu)) {
11730+ if (curr != head)
11731+ goto skip_queue;
11732+ idx++;
11733+ goto skip_bitmap;
11734+ }
11735+ pull_task(busiest, array, tmp, this_rq, this_cpu);
11736+ if (!idle && --imbalance) {
11737+ if (curr != head)
11738+ goto skip_queue;
11739+ idx++;
11740+ goto skip_bitmap;
11741 }
11742+out_unlock:
f844dd4e 11743+ spin_unlock(&busiest->lock);
54aa170e
JR
11744+}
11745
11746- /*
11747- * from this point on nothing can prevent us from
11748- * switching to the next task, save this fact in
11749- * sched_data.
11750- */
11751- sched_data->curr = next;
11752- task_set_cpu(next, this_cpu);
11753- spin_unlock_irq(&runqueue_lock);
11754+/*
11755+ * One of the idle_cpu_tick() or the busy_cpu_tick() function will
11756+ * gets called every timer tick, on every CPU. Our balancing action
11757+ * frequency and balancing agressivity depends on whether the CPU is
11758+ * idle or not.
11759+ *
11760+ * busy-rebalance every 250 msecs. idle-rebalance every 1 msec. (or on
11761+ * systems with HZ=100, every 10 msecs.)
11762+ */
11763+#define BUSY_REBALANCE_TICK (HZ/4 ?: 1)
11764+#define IDLE_REBALANCE_TICK (HZ/1000 ?: 1)
11765
11766- if (unlikely(prev == next)) {
11767- /* We won't go through the normal tail, so do this by hand */
11768- prev->policy &= ~SCHED_YIELD;
11769- goto same_process;
11770- }
11771+static inline void idle_tick(void)
11772+{
11773+ if (jiffies % IDLE_REBALANCE_TICK)
11774+ return;
11775+ spin_lock(&this_rq()->lock);
11776+ load_balance(this_rq(), 1);
11777+ spin_unlock(&this_rq()->lock);
11778+}
11779
11780-#ifdef CONFIG_SMP
11781- /*
11782- * maintain the per-process 'last schedule' value.
11783- * (this has to be recalculated even if we reschedule to
11784- * the same process) Currently this is only used on SMP,
11785- * and it's approximate, so we do not have to maintain
11786- * it while holding the runqueue spinlock.
11787- */
11788- sched_data->last_schedule = get_cycles();
11789+#endif
11790+
11791+/*
11792+ * We place interactive tasks back into the active array, if possible.
11793+ *
11794+ * To guarantee that this does not starve expired tasks we ignore the
11795+ * interactivity of a task if the first expired task had to wait more
11796+ * than a 'reasonable' amount of time. This deadline timeout is
11797+ * load-dependent, as the frequency of array switched decreases with
11798+ * increasing number of running tasks:
11799+ */
11800+#define EXPIRED_STARVING(rq) \
11801+ ((rq)->expired_timestamp && \
11802+ (jiffies - (rq)->expired_timestamp >= \
11803+ STARVATION_LIMIT * ((rq)->nr_running) + 1))
11804+
11805+/*
11806+ * This function gets called by the timer code, with HZ frequency.
11807+ * We call it with interrupts disabled.
11808+ */
11809+void scheduler_tick(int user_tick, int system)
11810+{
11811+ int cpu = smp_processor_id();
11812+ runqueue_t *rq = this_rq();
11813+ task_t *p = current;
11814+
11815+ if (p == rq->idle) {
11816+ if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
11817+ kstat.per_cpu_system[cpu] += system;
11818+#if CONFIG_SMP
11819+ idle_tick();
11820+#endif
11821+ return;
11822+ }
11823+ if (TASK_NICE(p) > 0)
11824+ kstat.per_cpu_nice[cpu] += user_tick;
11825+ else
11826+ kstat.per_cpu_user[cpu] += user_tick;
11827+ kstat.per_cpu_system[cpu] += system;
11828
11829+ /* Task might have expired already, but not scheduled off yet */
11830+ if (p->array != rq->active) {
11831+ set_tsk_need_resched(p);
11832+ return;
11833+ }
11834+ spin_lock(&rq->lock);
11835+ if (unlikely(rt_task(p))) {
11836+ /*
11837+ * RR tasks need a special form of timeslice management.
11838+ * FIFO tasks have no timeslices.
11839+ */
11840+ if ((p->policy == SCHED_RR) && !--p->time_slice) {
11841+ p->time_slice = TASK_TIMESLICE(p);
11842+ p->first_time_slice = 0;
11843+ set_tsk_need_resched(p);
11844+
11845+ /* put it at the end of the queue: */
11846+ dequeue_task(p, rq->active);
11847+ enqueue_task(p, rq->active);
11848+ }
11849+ goto out;
11850+ }
11851 /*
11852- * We drop the scheduler lock early (it's a global spinlock),
11853- * thus we have to lock the previous process from getting
11854- * rescheduled during switch_to().
11855- */
11856+ * The task was running during this tick - update the
11857+ * time slice counter and the sleep average. Note: we
11858+ * do not update a process's priority until it either
11859+ * goes to sleep or uses up its timeslice. This makes
11860+ * it possible for interactive tasks to use up their
11861+ * timeslices at their highest priority levels.
11862+ */
11863+ if (p->sleep_avg)
11864+ p->sleep_avg--;
11865+ if (!--p->time_slice) {
11866+ dequeue_task(p, rq->active);
11867+ set_tsk_need_resched(p);
11868+ p->prio = effective_prio(p);
11869+ p->time_slice = TASK_TIMESLICE(p);
11870+ p->first_time_slice = 0;
11871+
11872+ if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) {
11873+ if (!rq->expired_timestamp)
11874+ rq->expired_timestamp = jiffies;
11875+ enqueue_task(p, rq->expired);
11876+ } else
11877+ enqueue_task(p, rq->active);
11878+ }
11879+out:
11880+#if CONFIG_SMP
11881+ if (!(jiffies % BUSY_REBALANCE_TICK))
11882+ load_balance(rq, 0);
11883+#endif
11884+ spin_unlock(&rq->lock);
11885+}
11886
11887-#endif /* CONFIG_SMP */
11888+void scheduling_functions_start_here(void) { }
11889
11890- kstat.context_swtch++;
11891+/*
11892+ * 'schedule()' is the main scheduler function.
11893+ */
11894+asmlinkage void schedule(void)
11895+{
11896+ task_t *prev, *next;
11897+ runqueue_t *rq;
11898+ prio_array_t *array;
11899+ list_t *queue;
11900+ int idx;
11901+
11902+ if (unlikely(in_interrupt()))
11903+ BUG();
11904+
11905+need_resched:
11906+ preempt_disable();
11907+ prev = current;
11908+ rq = this_rq();
11909+
11910+ release_kernel_lock(prev, smp_processor_id());
11911+ prev->sleep_timestamp = jiffies;
11912+ spin_lock_irq(&rq->lock);
11913+
11914+#ifdef CONFIG_PREEMPT
11915 /*
11916- * there are 3 processes which are affected by a context switch:
11917- *
11918- * prev == .... ==> (last => next)
11919- *
11920- * It's the 'much more previous' 'prev' that is on next's stack,
11921- * but prev is set to (the just run) 'last' process by switch_to().
11922- * This might sound slightly confusing but makes tons of sense.
11923+ * entering from preempt_schedule, off a kernel preemption,
11924+ * go straight to picking the next task.
11925 */
11926- prepare_to_switch();
11927- {
11928- struct mm_struct *mm = next->mm;
11929- struct mm_struct *oldmm = prev->active_mm;
11930- if (!mm) {
11931- BUG_ON(next->active_mm);
11932- next->active_mm = oldmm;
11933- atomic_inc(&oldmm->mm_count);
11934- enter_lazy_tlb(oldmm, next, this_cpu);
11935- } else {
11936- BUG_ON(next->active_mm != mm);
11937- switch_mm(oldmm, mm, next, this_cpu);
11938- }
11939-
11940- if (!prev->mm) {
11941- prev->active_mm = NULL;
11942- mmdrop(oldmm);
11943+ if (unlikely(preempt_get_count() & PREEMPT_ACTIVE))
2d69ade2 11944+ goto pick_next_task;
54aa170e
JR
11945+#endif
11946+ switch (prev->state) {
11947+ case TASK_INTERRUPTIBLE:
11948+ if (unlikely(signal_pending(prev))) {
11949+ prev->state = TASK_RUNNING;
11950+ break;
11951 }
11952+ default:
11953+ deactivate_task(prev, rq);
11954+ case TASK_RUNNING:
11955+ ;
11956+ }
11957+#if CONFIG_SMP || CONFIG_PREEMPT
11958+pick_next_task:
11959+#endif
11960+ if (unlikely(!rq->nr_running)) {
11961+#if CONFIG_SMP
11962+ load_balance(rq, 1);
11963+ if (rq->nr_running)
11964+ goto pick_next_task;
11965+#endif
11966+ next = rq->idle;
11967+ rq->expired_timestamp = 0;
11968+ goto switch_tasks;
11969 }
11970
11971+ array = rq->active;
11972+ if (unlikely(!array->nr_active)) {
11973+ /*
11974+ * Switch the active and expired arrays.
11975+ */
11976+ rq->active = rq->expired;
11977+ rq->expired = array;
11978+ array = rq->active;
11979+ rq->expired_timestamp = 0;
11980+ }
11981+
11982+ idx = sched_find_first_bit(array->bitmap);
11983+ queue = array->queue + idx;
11984+ next = list_entry(queue->next, task_t, run_list);
11985+
11986+switch_tasks:
11987+ prefetch(next);
11988+ clear_tsk_need_resched(prev);
11989+
11990+ if (likely(prev != next)) {
11991+ rq->nr_switches++;
11992+ rq->curr = next;
11993+
11994+ prepare_arch_switch(rq, next);
11995+ prev = context_switch(prev, next);
11996+ barrier();
11997+ rq = this_rq();
11998+ finish_arch_switch(rq, prev);
11999+ } else
12000+ spin_unlock_irq(&rq->lock);
12001+
12002+ reacquire_kernel_lock(current);
12003+ preempt_enable_no_resched();
12004+ if (need_resched())
12005+ goto need_resched;
12006+}
12007+
12008+#ifdef CONFIG_PREEMPT
12009+/*
12010+ * this is is the entry point to schedule() from in-kernel preemption.
12011+ */
12012+asmlinkage void preempt_schedule(void)
12013+{
12014 /*
12015- * This just switches the register state and the
12016- * stack.
12017+ * Interrupts disabled implies no kernel preemption. Just return.
12018 */
12019- switch_to(prev, next, prev);
12020- __schedule_tail(prev);
12021+ if (unlikely(irqs_disabled()))
12022+ return;
12023
12024-same_process:
12025- reacquire_kernel_lock(current);
12026- if (current->need_resched)
12027- goto need_resched_back;
12028- return;
12029+need_resched:
12030+ current->preempt_count += PREEMPT_ACTIVE;
12031+ schedule();
12032+ current->preempt_count -= PREEMPT_ACTIVE;
12033+
12034+ /* we can miss a preemption between schedule() and now */
12035+ barrier();
12036+ if (unlikely((current->need_resched)))
12037+ goto need_resched;
12038 }
12039+#endif /* CONFIG_PREEMPT */
12040
12041 /*
12042- * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just wake everything
12043- * up. If it's an exclusive wakeup (nr_exclusive == small +ve number) then we wake all the
12044- * non-exclusive tasks and one exclusive task.
12045+ * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
12046+ * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
12047+ * number) then we wake all the non-exclusive tasks and one exclusive task.
12048 *
12049 * There are circumstances in which we can try to wake a task which has already
12050- * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns zero
12051- * in this (rare) case, and we handle it by contonuing to scan the queue.
12052+ * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
12053+ * zero in this (rare) case, and we handle it by continuing to scan the queue.
12054 */
12055-static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode,
12056- int nr_exclusive, const int sync)
12057+static inline void __wake_up_common(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, int sync)
12058 {
12059 struct list_head *tmp;
12060- struct task_struct *p;
12061-
12062- CHECK_MAGIC_WQHEAD(q);
12063- WQ_CHECK_LIST_HEAD(&q->task_list);
12064-
12065- list_for_each(tmp,&q->task_list) {
12066- unsigned int state;
12067- wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
12068+ unsigned int state;
12069+ wait_queue_t *curr;
12070+ task_t *p;
12071
12072- CHECK_MAGIC(curr->__magic);
12073+ list_for_each(tmp, &q->task_list) {
12074+ curr = list_entry(tmp, wait_queue_t, task_list);
12075 p = curr->task;
12076 state = p->state;
12077- if (state & mode) {
12078- WQ_NOTE_WAKER(curr);
12079- if (try_to_wake_up(p, sync) && (curr->flags&WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
12080+ if ((state & mode) && try_to_wake_up(p, sync) &&
12081+ ((curr->flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive))
12082 break;
12083- }
12084 }
12085 }
12086
12087-void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr)
12088+void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
12089 {
12090- if (q) {
12091- unsigned long flags;
12092- wq_read_lock_irqsave(&q->lock, flags);
12093- __wake_up_common(q, mode, nr, 0);
12094- wq_read_unlock_irqrestore(&q->lock, flags);
12095- }
12096+ unsigned long flags;
12097+
12098+ if (unlikely(!q))
12099+ return;
12100+
12101+ wq_read_lock_irqsave(&q->lock, flags);
12102+ __wake_up_common(q, mode, nr_exclusive, 0);
12103+ wq_read_unlock_irqrestore(&q->lock, flags);
12104 }
12105
12106-void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr)
12107+#if CONFIG_SMP
12108+
12109+void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
12110 {
12111- if (q) {
12112- unsigned long flags;
12113- wq_read_lock_irqsave(&q->lock, flags);
12114- __wake_up_common(q, mode, nr, 1);
12115- wq_read_unlock_irqrestore(&q->lock, flags);
12116- }
12117+ unsigned long flags;
12118+
12119+ if (unlikely(!q))
12120+ return;
12121+
12122+ wq_read_lock_irqsave(&q->lock, flags);
12123+ if (likely(nr_exclusive))
12124+ __wake_up_common(q, mode, nr_exclusive, 1);
12125+ else
12126+ __wake_up_common(q, mode, nr_exclusive, 0);
12127+ wq_read_unlock_irqrestore(&q->lock, flags);
12128 }
12129
12130+#endif
12131+
12132 void complete(struct completion *x)
12133 {
12134 unsigned long flags;
12135
12136- spin_lock_irqsave(&x->wait.lock, flags);
12137+ wq_write_lock_irqsave(&x->wait.lock, flags);
12138 x->done++;
12139 __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, 0);
12140- spin_unlock_irqrestore(&x->wait.lock, flags);
12141+ wq_write_unlock_irqrestore(&x->wait.lock, flags);
12142 }
12143
12144 void wait_for_completion(struct completion *x)
12145 {
12146- spin_lock_irq(&x->wait.lock);
12147+ wq_write_lock_irq(&x->wait.lock);
12148 if (!x->done) {
12149 DECLARE_WAITQUEUE(wait, current);
12150
12151@@ -775,14 +986,14 @@
12152 __add_wait_queue_tail(&x->wait, &wait);
12153 do {
12154 __set_current_state(TASK_UNINTERRUPTIBLE);
12155- spin_unlock_irq(&x->wait.lock);
12156+ wq_write_unlock_irq(&x->wait.lock);
12157 schedule();
12158- spin_lock_irq(&x->wait.lock);
12159+ wq_write_lock_irq(&x->wait.lock);
12160 } while (!x->done);
12161 __remove_wait_queue(&x->wait, &wait);
12162 }
12163 x->done--;
12164- spin_unlock_irq(&x->wait.lock);
12165+ wq_write_unlock_irq(&x->wait.lock);
12166 }
12167
12168 #define SLEEP_ON_VAR \
12169@@ -850,6 +1061,41 @@
12170
12171 void scheduling_functions_end_here(void) { }
12172
12173+void set_user_nice(task_t *p, long nice)
12174+{
12175+ unsigned long flags;
12176+ prio_array_t *array;
12177+ runqueue_t *rq;
12178+
12179+ if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
12180+ return;
12181+ /*
12182+ * We have to be careful, if called from sys_setpriority(),
12183+ * the task might be in the middle of scheduling on another CPU.
12184+ */
12185+ rq = task_rq_lock(p, &flags);
12186+ if (rt_task(p)) {
12187+ p->static_prio = NICE_TO_PRIO(nice);
12188+ goto out_unlock;
12189+ }
12190+ array = p->array;
12191+ if (array)
12192+ dequeue_task(p, array);
12193+ p->static_prio = NICE_TO_PRIO(nice);
12194+ p->prio = NICE_TO_PRIO(nice);
12195+ if (array) {
12196+ enqueue_task(p, array);
12197+ /*
12198+ * If the task is running and lowered its priority,
12199+ * or increased its priority then reschedule its CPU:
12200+ */
12201+ if ((NICE_TO_PRIO(nice) < p->static_prio) || (p == rq->curr))
12202+ resched_task(rq->curr);
12203+ }
12204+out_unlock:
12205+ task_rq_unlock(rq, &flags);
12206+}
12207+
12208 #ifndef __alpha__
12209
12210 /*
12211@@ -860,7 +1106,7 @@
12212
12213 asmlinkage long sys_nice(int increment)
12214 {
12215- long newprio;
12216+ long nice;
12217
12218 /*
12219 * Setpriority might change our priority at the same moment.
12220@@ -876,32 +1122,51 @@
12221 if (increment > 40)
12222 increment = 40;
12223
12224- newprio = current->nice + increment;
12225- if (newprio < -20)
12226- newprio = -20;
12227- if (newprio > 19)
12228- newprio = 19;
12229- current->nice = newprio;
12230+ nice = PRIO_TO_NICE(current->static_prio) + increment;
12231+ if (nice < -20)
12232+ nice = -20;
12233+ if (nice > 19)
12234+ nice = 19;
12235+ set_user_nice(current, nice);
12236 return 0;
12237 }
12238
12239 #endif
12240
12241-static inline struct task_struct *find_process_by_pid(pid_t pid)
12242+/*
12243+ * This is the priority value as seen by users in /proc
12244+ *
12245+ * RT tasks are offset by -200. Normal tasks are centered
12246+ * around 0, value goes from -16 to +15.
12247+ */
12248+int task_prio(task_t *p)
12249+{
12250+ return p->prio - MAX_USER_RT_PRIO;
12251+}
12252+
12253+int task_nice(task_t *p)
12254 {
12255- struct task_struct *tsk = current;
12256+ return TASK_NICE(p);
12257+}
12258
12259- if (pid)
12260- tsk = find_task_by_pid(pid);
12261- return tsk;
12262+int idle_cpu(int cpu)
12263+{
12264+ return cpu_curr(cpu) == cpu_rq(cpu)->idle;
12265 }
12266
12267-static int setscheduler(pid_t pid, int policy,
12268- struct sched_param *param)
12269+static inline task_t *find_process_by_pid(pid_t pid)
12270+{
12271+ return pid ? find_task_by_pid(pid) : current;
12272+}
12273+
12274+static int setscheduler(pid_t pid, int policy, struct sched_param *param)
12275 {
12276 struct sched_param lp;
12277- struct task_struct *p;
12278+ prio_array_t *array;
12279+ unsigned long flags;
12280+ runqueue_t *rq;
12281 int retval;
12282+ task_t *p;
12283
12284 retval = -EINVAL;
12285 if (!param || pid < 0)
12286@@ -915,14 +1180,19 @@
12287 * We play safe to avoid deadlocks.
12288 */
12289 read_lock_irq(&tasklist_lock);
12290- spin_lock(&runqueue_lock);
12291
12292 p = find_process_by_pid(pid);
12293
12294 retval = -ESRCH;
12295 if (!p)
12296- goto out_unlock;
12297-
12298+ goto out_unlock_tasklist;
12299+
12300+ /*
12301+ * To be able to change p->policy safely, the apropriate
12302+ * runqueue lock must be held.
12303+ */
12304+ rq = task_rq_lock(p, &flags);
12305+
12306 if (policy < 0)
12307 policy = p->policy;
12308 else {
12309@@ -931,40 +1201,48 @@
12310 policy != SCHED_OTHER)
12311 goto out_unlock;
12312 }
12313-
12314+
12315 /*
12316- * Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid
12317- * priority for SCHED_OTHER is 0.
12318+ * Valid priorities for SCHED_FIFO and SCHED_RR are
12319+ * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_OTHER is 0.
12320 */
12321 retval = -EINVAL;
12322- if (lp.sched_priority < 0 || lp.sched_priority > 99)
12323+ if (lp.sched_priority < 0 || lp.sched_priority > MAX_USER_RT_PRIO-1)
12324 goto out_unlock;
12325 if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))
12326 goto out_unlock;
12327
12328 retval = -EPERM;
12329- if ((policy == SCHED_FIFO || policy == SCHED_RR) &&
12330+ if ((policy == SCHED_FIFO || policy == SCHED_RR) &&
12331 !capable(CAP_SYS_NICE))
12332 goto out_unlock;
12333 if ((current->euid != p->euid) && (current->euid != p->uid) &&
12334 !capable(CAP_SYS_NICE))
12335 goto out_unlock;
12336
12337+ array = p->array;
12338+ if (array)
12339+ deactivate_task(p, task_rq(p));
12340 retval = 0;
12341 p->policy = policy;
12342 p->rt_priority = lp.sched_priority;
12343-
12344- current->need_resched = 1;
12345+ if (policy != SCHED_OTHER)
12346+ p->prio = MAX_USER_RT_PRIO-1 - p->rt_priority;
12347+ else
12348+ p->prio = p->static_prio;
12349+ if (array)
12350+ activate_task(p, task_rq(p));
12351
12352 out_unlock:
12353- spin_unlock(&runqueue_lock);
12354+ task_rq_unlock(rq, &flags);
12355+out_unlock_tasklist:
12356 read_unlock_irq(&tasklist_lock);
12357
12358 out_nounlock:
12359 return retval;
12360 }
12361
12362-asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
12363+asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
12364 struct sched_param *param)
12365 {
12366 return setscheduler(pid, policy, param);
12367@@ -977,7 +1255,7 @@
12368
12369 asmlinkage long sys_sched_getscheduler(pid_t pid)
12370 {
12371- struct task_struct *p;
12372+ task_t *p;
12373 int retval;
12374
12375 retval = -EINVAL;
12376@@ -988,16 +1266,107 @@
12377 read_lock(&tasklist_lock);
12378 p = find_process_by_pid(pid);
12379 if (p)
12380- retval = p->policy & ~SCHED_YIELD;
12381+ retval = p->policy;
12382 read_unlock(&tasklist_lock);
12383
12384 out_nounlock:
12385 return retval;
12386 }
12387
12388+/**
12389+ * sys_sched_setaffinity - set the cpu affinity of a process
12390+ * @pid: pid of the process
12391+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
12392+ * @user_mask_ptr: user-space pointer to the new cpu mask
12393+ */
12394+asmlinkage int sys_sched_setaffinity(pid_t pid, unsigned int len,
12395+ unsigned long *user_mask_ptr)
12396+{
12397+ unsigned long new_mask;
12398+ task_t *p;
12399+ int retval;
12400+
12401+ if (len < sizeof(new_mask))
12402+ return -EINVAL;
12403+
12404+ if (copy_from_user(&new_mask, user_mask_ptr, sizeof(new_mask)))
12405+ return -EFAULT;
12406+
12407+ new_mask &= cpu_online_map;
12408+ if (!new_mask)
12409+ return -EINVAL;
12410+
12411+ /*
12412+ * We cannot hold a lock across a call to set_cpus_allowed, however
12413+ * we need to assure our task does not slip out from under us. Since
12414+ * we are only concerned that its task_struct remains, we can pin it
12415+ * here and decrement the usage count when we are done.
12416+ */
12417+ read_lock(&tasklist_lock);
12418+
12419+ p = find_process_by_pid(pid);
12420+ if (!p) {
12421+ read_unlock(&tasklist_lock);
12422+ return -ESRCH;
12423+ }
12424+
12425+ get_task_struct(p);
12426+ read_unlock(&tasklist_lock);
12427+
12428+ retval = -EPERM;
12429+ if ((current->euid != p->euid) && (current->euid != p->uid) &&
12430+ !capable(CAP_SYS_NICE))
12431+ goto out_unlock;
12432+
12433+ retval = 0;
12434+ set_cpus_allowed(p, new_mask);
12435+
12436+out_unlock:
12437+ free_task_struct(p);
12438+ return retval;
12439+}
12440+
12441+/**
12442+ * sys_sched_getaffinity - get the cpu affinity of a process
12443+ * @pid: pid of the process
12444+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
12445+ * @user_mask_ptr: user-space pointer to hold the current cpu mask
12446+ */
12447+asmlinkage int sys_sched_getaffinity(pid_t pid, unsigned int len,
12448+ unsigned long *user_mask_ptr)
12449+{
12450+ unsigned long mask;
12451+ unsigned int real_len;
12452+ task_t *p;
12453+ int retval;
12454+
12455+ real_len = sizeof(mask);
12456+
12457+ if (len < real_len)
12458+ return -EINVAL;
12459+
12460+ read_lock(&tasklist_lock);
12461+
12462+ retval = -ESRCH;
12463+ p = find_process_by_pid(pid);
12464+ if (!p)
12465+ goto out_unlock;
12466+
12467+ retval = 0;
12468+ mask = p->cpus_allowed & cpu_online_map;
12469+
12470+out_unlock:
12471+ read_unlock(&tasklist_lock);
12472+ if (retval)
12473+ return retval;
12474+ if (copy_to_user(user_mask_ptr, &mask, real_len))
12475+ return -EFAULT;
12476+ return real_len;
12477+}
12478+
12479 asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param *param)
12480 {
12481- struct task_struct *p;
12482+ task_t *p;
12483 struct sched_param lp;
12484 int retval;
12485
12486@@ -1028,42 +1397,43 @@
12487
12488 asmlinkage long sys_sched_yield(void)
12489 {
12490- /*
12491- * Trick. sched_yield() first counts the number of truly
12492- * 'pending' runnable processes, then returns if it's
12493- * only the current processes. (This test does not have
12494- * to be atomic.) In threaded applications this optimization
12495- * gets triggered quite often.
12496- */
12497-
12498- int nr_pending = nr_running;
12499-
12500-#if CONFIG_SMP
12501+ runqueue_t *rq;
12502+ prio_array_t *array = current->array;
12503 int i;
12504
12505- // Subtract non-idle processes running on other CPUs.
12506- for (i = 0; i < smp_num_cpus; i++) {
12507- int cpu = cpu_logical_map(i);
12508- if (aligned_data[cpu].schedule_data.curr != idle_task(cpu))
12509- nr_pending--;
12510+ preempt_disable();
12511+ rq = this_rq();
12512+ spin_lock_irq(&rq->lock);
12513+
12514+ if (unlikely(rt_task(current))) {
12515+ list_del(&current->run_list);
12516+ list_add_tail(&current->run_list, array->queue + current->prio);
12517+ goto out_unlock;
12518 }
12519-#else
12520- // on UP this process is on the runqueue as well
12521- nr_pending--;
12522-#endif
12523- if (nr_pending) {
12524- /*
12525- * This process can only be rescheduled by us,
12526- * so this is safe without any locking.
12527- */
12528- if (current->policy == SCHED_OTHER)
12529- current->policy |= SCHED_YIELD;
12530- current->need_resched = 1;
12531-
12532- spin_lock_irq(&runqueue_lock);
12533- move_last_runqueue(current);
12534- spin_unlock_irq(&runqueue_lock);
12535+
12536+ list_del(&current->run_list);
12537+ if (!list_empty(array->queue + current->prio)) {
12538+ list_add(&current->run_list, array->queue[current->prio].next);
12539+ goto out_unlock;
12540 }
12541+ __clear_bit(current->prio, array->bitmap);
12542+
12543+ i = sched_find_first_bit(array->bitmap);
12544+
12545+ if (i == MAX_PRIO || i <= current->prio)
12546+ i = current->prio;
12547+ else
12548+ current->prio = i;
12549+
12550+ list_add(&current->run_list, array->queue[i].next);
12551+ __set_bit(i, array->bitmap);
12552+
12553+out_unlock:
12554+ spin_unlock_irq(&rq->lock);
12555+ preempt_enable_no_resched();
12556+
12557+ schedule();
12558+
12559 return 0;
12560 }
12561
12562@@ -1075,14 +1445,13 @@
12563 */
12564 void yield(void)
12565 {
12566- set_current_state(TASK_RUNNING);
12567+ __set_current_state(TASK_RUNNING);
12568 sys_sched_yield();
12569- schedule();
12570 }
12571
12572 void __cond_resched(void)
12573 {
12574- set_current_state(TASK_RUNNING);
12575+ __set_current_state(TASK_RUNNING);
12576 schedule();
12577 }
12578
12579@@ -1093,7 +1462,7 @@
12580 switch (policy) {
12581 case SCHED_FIFO:
12582 case SCHED_RR:
12583- ret = 99;
12584+ ret = MAX_USER_RT_PRIO-1;
12585 break;
12586 case SCHED_OTHER:
12587 ret = 0;
12588@@ -1120,7 +1489,7 @@
12589 asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
12590 {
12591 struct timespec t;
12592- struct task_struct *p;
12593+ task_t *p;
12594 int retval = -EINVAL;
12595
12596 if (pid < 0)
12597@@ -1130,8 +1499,8 @@
12598 read_lock(&tasklist_lock);
12599 p = find_process_by_pid(pid);
12600 if (p)
12601- jiffies_to_timespec(p->policy & SCHED_FIFO ? 0 : NICE_TO_TICKS(p->nice),
12602- &t);
12603+ jiffies_to_timespec(p->policy & SCHED_FIFO ?
12604+ 0 : TASK_TIMESLICE(p), &t);
12605 read_unlock(&tasklist_lock);
12606 if (p)
12607 retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
12608@@ -1139,14 +1508,14 @@
12609 return retval;
12610 }
12611
12612-static void show_task(struct task_struct * p)
12613+static void show_task(task_t * p)
12614 {
12615 unsigned long free = 0;
12616 int state;
12617 static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
12618
12619 printk("%-13.13s ", p->comm);
12620- state = p->state ? ffz(~p->state) + 1 : 0;
12621+ state = p->state ? __ffs(p->state) + 1 : 0;
12622 if (((unsigned) state) < sizeof(stat_nam)/sizeof(char *))
12623 printk(stat_nam[state]);
12624 else
12625@@ -1187,7 +1556,7 @@
12626 printk(" (NOTLB)\n");
12627
12628 {
12629- extern void show_trace_task(struct task_struct *tsk);
12630+ extern void show_trace_task(task_t *tsk);
12631 show_trace_task(p);
12632 }
12633 }
12634@@ -1209,7 +1578,7 @@
12635
12636 void show_state(void)
12637 {
12638- struct task_struct *p;
12639+ task_t *p;
12640
12641 #if (BITS_PER_LONG == 32)
12642 printk("\n"
12643@@ -1232,128 +1601,283 @@
12644 read_unlock(&tasklist_lock);
12645 }
12646
12647-/**
12648- * reparent_to_init() - Reparent the calling kernel thread to the init task.
12649- *
12650- * If a kernel thread is launched as a result of a system call, or if
12651- * it ever exits, it should generally reparent itself to init so that
12652- * it is correctly cleaned up on exit.
12653+/*
12654+ * double_rq_lock - safely lock two runqueues
12655 *
12656- * The various task state such as scheduling policy and priority may have
12657- * been inherited fro a user process, so we reset them to sane values here.
12658+ * Note this does not disable interrupts like task_rq_lock,
12659+ * you need to do so manually before calling.
12660+ */
12661+static inline void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
12662+{
12663+ if (rq1 == rq2)
12664+ spin_lock(&rq1->lock);
12665+ else {
12666+ if (rq1 < rq2) {
12667+ spin_lock(&rq1->lock);
12668+ spin_lock(&rq2->lock);
12669+ } else {
12670+ spin_lock(&rq2->lock);
12671+ spin_lock(&rq1->lock);
12672+ }
12673+ }
12674+}
12675+
12676+/*
12677+ * double_rq_unlock - safely unlock two runqueues
12678 *
12679- * NOTE that reparent_to_init() gives the caller full capabilities.
12680+ * Note this does not restore interrupts like task_rq_unlock,
12681+ * you need to do so manually after calling.
12682 */
12683-void reparent_to_init(void)
12684+static inline void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2)
12685 {
12686- struct task_struct *this_task = current;
12687+ spin_unlock(&rq1->lock);
12688+ if (rq1 != rq2)
12689+ spin_unlock(&rq2->lock);
12690+}
12691+
12692+void __init init_idle(task_t *idle, int cpu)
12693+{
12694+ runqueue_t *idle_rq = cpu_rq(cpu), *rq = cpu_rq(idle->cpu);
12695+ unsigned long flags;
12696
12697- write_lock_irq(&tasklist_lock);
12698+ __save_flags(flags);
12699+ __cli();
12700+ double_rq_lock(idle_rq, rq);
12701+
12702+ idle_rq->curr = idle_rq->idle = idle;
12703+ deactivate_task(idle, rq);
12704+ idle->array = NULL;
12705+ idle->prio = MAX_PRIO;
12706+ idle->state = TASK_RUNNING;
12707+ idle->cpu = cpu;
12708+ double_rq_unlock(idle_rq, rq);
12709+ set_tsk_need_resched(idle);
12710+ __restore_flags(flags);
12711
12712- /* Reparent to init */
12713- REMOVE_LINKS(this_task);
12714- this_task->p_pptr = child_reaper;
12715- this_task->p_opptr = child_reaper;
12716- SET_LINKS(this_task);
12717+ /* Set the preempt count _outside_ the spinlocks! */
12718+ idle->preempt_count = (idle->lock_depth >= 0);
12719+}
12720+
12721+extern void init_timervecs(void);
12722+extern void timer_bh(void);
12723+extern void tqueue_bh(void);
12724+extern void immediate_bh(void);
12725+
12726+void __init sched_init(void)
12727+{
12728+ runqueue_t *rq;
12729+ int i, j, k;
12730
12731- /* Set the exit signal to SIGCHLD so we signal init on exit */
12732- this_task->exit_signal = SIGCHLD;
12733+ for (i = 0; i < NR_CPUS; i++) {
12734+ prio_array_t *array;
12735
12736- /* We also take the runqueue_lock while altering task fields
12737- * which affect scheduling decisions */
12738- spin_lock(&runqueue_lock);
12739+ rq = cpu_rq(i);
12740+ rq->active = rq->arrays;
12741+ rq->expired = rq->arrays + 1;
12742+ spin_lock_init(&rq->lock);
12743+ INIT_LIST_HEAD(&rq->migration_queue);
12744+
12745+ for (j = 0; j < 2; j++) {
12746+ array = rq->arrays + j;
12747+ for (k = 0; k < MAX_PRIO; k++) {
12748+ INIT_LIST_HEAD(array->queue + k);
12749+ __clear_bit(k, array->bitmap);
12750+ }
12751+ // delimiter for bitsearch
12752+ __set_bit(MAX_PRIO, array->bitmap);
12753+ }
12754+ }
12755+ /*
12756+ * We have to do a little magic to get the first
12757+ * process right in SMP mode.
12758+ */
12759+ rq = this_rq();
12760+ rq->curr = current;
12761+ rq->idle = current;
12762+ current->cpu = smp_processor_id();
12763+ wake_up_process(current);
12764
12765- this_task->ptrace = 0;
12766- this_task->nice = DEF_NICE;
12767- this_task->policy = SCHED_OTHER;
12768- /* cpus_allowed? */
12769- /* rt_priority? */
12770- /* signals? */
12771- this_task->cap_effective = CAP_INIT_EFF_SET;
12772- this_task->cap_inheritable = CAP_INIT_INH_SET;
12773- this_task->cap_permitted = CAP_FULL_SET;
12774- this_task->keep_capabilities = 0;
12775- memcpy(this_task->rlim, init_task.rlim, sizeof(*(this_task->rlim)));
12776- this_task->user = INIT_USER;
12777+ init_timervecs();
12778+ init_bh(TIMER_BH, timer_bh);
12779+ init_bh(TQUEUE_BH, tqueue_bh);
12780+ init_bh(IMMEDIATE_BH, immediate_bh);
12781
12782- spin_unlock(&runqueue_lock);
12783- write_unlock_irq(&tasklist_lock);
12784+ /*
12785+ * The boot idle thread does lazy MMU switching as well:
12786+ */
12787+ atomic_inc(&init_mm.mm_count);
12788+ enter_lazy_tlb(&init_mm, current, smp_processor_id());
12789 }
12790
12791+#if CONFIG_SMP
12792+
12793 /*
12794- * Put all the gunge required to become a kernel thread without
12795- * attached user resources in one place where it belongs.
12796+ * This is how migration works:
12797+ *
12798+ * 1) we queue a migration_req_t structure in the source CPU's
12799+ * runqueue and wake up that CPU's migration thread.
12800+ * 2) we down() the locked semaphore => thread blocks.
12801+ * 3) migration thread wakes up (implicitly it forces the migrated
12802+ * thread off the CPU)
12803+ * 4) it gets the migration request and checks whether the migrated
12804+ * task is still in the wrong runqueue.
12805+ * 5) if it's in the wrong runqueue then the migration thread removes
12806+ * it and puts it into the right queue.
12807+ * 6) migration thread up()s the semaphore.
12808+ * 7) we wake up and the migration is done.
12809 */
12810
12811-void daemonize(void)
12812+typedef struct {
12813+ list_t list;
12814+ task_t *task;
12815+ struct completion done;
12816+} migration_req_t;
12817+
12818+/*
12819+ * Change a given task's CPU affinity. Migrate the process to a
12820+ * proper CPU and schedule it away if the CPU it's executing on
12821+ * is removed from the allowed bitmask.
12822+ *
12823+ * NOTE: the caller must have a valid reference to the task, the
12824+ * task must not exit() & deallocate itself prematurely. The
12825+ * call is not atomic; no spinlocks may be held.
12826+ */
12827+void set_cpus_allowed(task_t *p, unsigned long new_mask)
12828 {
12829- struct fs_struct *fs;
12830+ unsigned long flags;
12831+ migration_req_t req;
12832+ runqueue_t *rq;
12833
12834+ new_mask &= cpu_online_map;
12835+ if (!new_mask)
12836+ BUG();
12837
12838+ preempt_disable();
12839+ rq = task_rq_lock(p, &flags);
12840+ p->cpus_allowed = new_mask;
12841 /*
12842- * If we were started as result of loading a module, close all of the
12843- * user space pages. We don't need them, and if we didn't close them
12844- * they would be locked into memory.
12845+ * Can the task run on the task's current CPU? If not then
12846+ * migrate the process off to a proper CPU.
12847 */
12848- exit_mm(current);
12849+ if (new_mask & (1UL << p->cpu)) {
12850+ task_rq_unlock(rq, &flags);
12851+ return;
12852+ }
12853
12854- current->session = 1;
12855- current->pgrp = 1;
12856- current->tty = NULL;
12857+ /*
12858+ * If the task is not on a runqueue, then it is safe to
12859+ * simply update the task's cpu field.
12860+ */
12861+ if (!p->array && (p != rq->curr)) {
12862+ p->cpu = __ffs(p->cpus_allowed);
12863+ task_rq_unlock(rq, &flags);
12864+ return;
12865+ }
12866
12867- /* Become as one with the init task */
12868+ init_completion(&req.done);
12869+ req.task = p;
12870+ list_add(&req.list, &rq->migration_queue);
12871+ task_rq_unlock(rq, &flags);
12872+ wake_up_process(rq->migration_thread);
12873
12874- exit_fs(current); /* current->fs->count--; */
12875- fs = init_task.fs;
12876- current->fs = fs;
12877- atomic_inc(&fs->count);
12878- exit_files(current);
12879- current->files = init_task.files;
12880- atomic_inc(&current->files->count);
12881+ wait_for_completion(&req.done);
12882+ preempt_enable();
12883 }
12884
12885-extern unsigned long wait_init_idle;
12886+static __initdata int master_migration_thread;
12887
12888-void __init init_idle(void)
12889+static int migration_thread(void * bind_cpu)
12890 {
12891- struct schedule_data * sched_data;
12892- sched_data = &aligned_data[smp_processor_id()].schedule_data;
12893+ int cpu = cpu_logical_map((int) (long) bind_cpu);
12894+ struct sched_param param = { sched_priority: MAX_RT_PRIO-1 };
12895+ runqueue_t *rq;
12896+ int ret;
12897
12898- if (current != &init_task && task_on_runqueue(current)) {
12899- printk("UGH! (%d:%d) was on the runqueue, removing.\n",
12900- smp_processor_id(), current->pid);
12901- del_from_runqueue(current);
12902+ daemonize();
12903+ sigfillset(&current->blocked);
12904+ set_fs(KERNEL_DS);
12905+ /*
12906+ * The first migration thread is started on the boot CPU, it
12907+ * migrates the other migration threads to their destination CPUs.
12908+ */
12909+ if (cpu != master_migration_thread) {
12910+ while (!cpu_rq(master_migration_thread)->migration_thread)
12911+ yield();
12912+ set_cpus_allowed(current, 1UL << cpu);
12913 }
12914- sched_data->curr = current;
12915- sched_data->last_schedule = get_cycles();
12916- clear_bit(current->processor, &wait_init_idle);
12917-}
12918+ printk("migration_task %d on cpu=%d\n", cpu, smp_processor_id());
12919+ ret = setscheduler(0, SCHED_FIFO, &param);
12920
12921-extern void init_timervecs (void);
12922+ rq = this_rq();
12923+ rq->migration_thread = current;
12924
12925-void __init sched_init(void)
12926-{
12927- /*
12928- * We have to do a little magic to get the first
12929- * process right in SMP mode.
12930- */
12931- int cpu = smp_processor_id();
12932- int nr;
12933+ sprintf(current->comm, "migration_CPU%d", smp_processor_id());
12934
12935- init_task.processor = cpu;
12936+ for (;;) {
12937+ runqueue_t *rq_src, *rq_dest;
12938+ struct list_head *head;
12939+ int cpu_src, cpu_dest;
12940+ migration_req_t *req;
12941+ unsigned long flags;
12942+ task_t *p;
12943
12944- for(nr = 0; nr < PIDHASH_SZ; nr++)
12945- pidhash[nr] = NULL;
12946+ spin_lock_irqsave(&rq->lock, flags);
12947+ head = &rq->migration_queue;
12948+ current->state = TASK_INTERRUPTIBLE;
12949+ if (list_empty(head)) {
12950+ spin_unlock_irqrestore(&rq->lock, flags);
12951+ schedule();
12952+ continue;
12953+ }
12954+ req = list_entry(head->next, migration_req_t, list);
12955+ list_del_init(head->next);
12956+ spin_unlock_irqrestore(&rq->lock, flags);
12957+
12958+ p = req->task;
12959+ cpu_dest = __ffs(p->cpus_allowed);
12960+ rq_dest = cpu_rq(cpu_dest);
12961+repeat:
12962+ cpu_src = p->cpu;
12963+ rq_src = cpu_rq(cpu_src);
12964+
12965+ local_irq_save(flags);
12966+ double_rq_lock(rq_src, rq_dest);
12967+ if (p->cpu != cpu_src) {
12968+ double_rq_unlock(rq_src, rq_dest);
12969+ local_irq_restore(flags);
12970+ goto repeat;
12971+ }
12972+ if (rq_src == rq) {
12973+ p->cpu = cpu_dest;
12974+ if (p->array) {
12975+ deactivate_task(p, rq_src);
12976+ activate_task(p, rq_dest);
12977+ }
12978+ }
12979+ double_rq_unlock(rq_src, rq_dest);
12980+ local_irq_restore(flags);
12981
12982- init_timervecs();
12983+ complete(&req->done);
12984+ }
12985+}
12986
12987- init_bh(TIMER_BH, timer_bh);
12988- init_bh(TQUEUE_BH, tqueue_bh);
12989- init_bh(IMMEDIATE_BH, immediate_bh);
12990+void __init migration_init(void)
12991+{
12992+ int cpu;
12993
12994- /*
12995- * The boot idle thread does lazy MMU switching as well:
12996- */
12997- atomic_inc(&init_mm.mm_count);
12998- enter_lazy_tlb(&init_mm, current, cpu);
12999+ master_migration_thread = smp_processor_id();
13000+ current->cpus_allowed = 1UL << master_migration_thread;
13001+
13002+ for (cpu = 0; cpu < smp_num_cpus; cpu++) {
13003+ if (kernel_thread(migration_thread, (void *) (long) cpu,
13004+ CLONE_FS | CLONE_FILES | CLONE_SIGNAL) < 0)
13005+ BUG();
13006+ }
13007+ current->cpus_allowed = -1L;
13008+
13009+ for (cpu = 0; cpu < smp_num_cpus; cpu++)
13010+ while (!cpu_rq(cpu_logical_map(cpu))->migration_thread)
13011+ schedule_timeout(2);
13012 }
13013+
13014+#endif /* CONFIG_SMP */
13015diff -urN linux-2.4.20/kernel/signal.c linux-2.4.20-o1-preempt/kernel/signal.c
13016--- linux-2.4.20/kernel/signal.c Fri Nov 29 00:53:15 2002
13017+++ linux-2.4.20-o1-preempt/kernel/signal.c Tue Feb 18 03:51:30 2003
13018@@ -490,12 +490,9 @@
13019 * process of changing - but no harm is done by that
13020 * other than doing an extra (lightweight) IPI interrupt.
13021 */
13022- spin_lock(&runqueue_lock);
13023- if (task_has_cpu(t) && t->processor != smp_processor_id())
13024- smp_send_reschedule(t->processor);
13025- spin_unlock(&runqueue_lock);
13026-#endif /* CONFIG_SMP */
13027-
13028+ if ((t->state == TASK_RUNNING) && (t->cpu != cpu()))
13029+ kick_if_running(t);
13030+#endif
13031 if (t->state & TASK_INTERRUPTIBLE) {
13032 wake_up_process(t);
13033 return;
13034diff -urN linux-2.4.20/kernel/softirq.c linux-2.4.20-o1-preempt/kernel/softirq.c
13035--- linux-2.4.20/kernel/softirq.c Fri Nov 29 00:53:15 2002
13036+++ linux-2.4.20-o1-preempt/kernel/softirq.c Tue Feb 18 03:51:30 2003
13037@@ -364,13 +364,13 @@
13038 int cpu = cpu_logical_map(bind_cpu);
13039
13040 daemonize();
13041- current->nice = 19;
13042+ set_user_nice(current, 19);
13043 sigfillset(&current->blocked);
13044
13045 /* Migrate to the right CPU */
13046- current->cpus_allowed = 1UL << cpu;
13047- while (smp_processor_id() != cpu)
13048- schedule();
13049+ set_cpus_allowed(current, 1UL << cpu);
13050+ if (cpu() != cpu)
13051+ BUG();
13052
13053 sprintf(current->comm, "ksoftirqd_CPU%d", bind_cpu);
13054
13055@@ -395,7 +395,7 @@
13056 }
13057 }
13058
13059-static __init int spawn_ksoftirqd(void)
13060+__init int spawn_ksoftirqd(void)
13061 {
13062 int cpu;
13063
13064diff -urN linux-2.4.20/kernel/sys.c linux-2.4.20-o1-preempt/kernel/sys.c
13065--- linux-2.4.20/kernel/sys.c Sat Aug 3 02:39:46 2002
13066+++ linux-2.4.20-o1-preempt/kernel/sys.c Tue Feb 18 03:51:30 2003
13067@@ -220,10 +220,10 @@
13068 }
13069 if (error == -ESRCH)
13070 error = 0;
13071- if (niceval < p->nice && !capable(CAP_SYS_NICE))
13072+ if (niceval < task_nice(p) && !capable(CAP_SYS_NICE))
13073 error = -EACCES;
13074 else
13075- p->nice = niceval;
13076+ set_user_nice(p, niceval);
13077 }
13078 read_unlock(&tasklist_lock);
13079
13080@@ -249,7 +249,7 @@
13081 long niceval;
13082 if (!proc_sel(p, which, who))
13083 continue;
13084- niceval = 20 - p->nice;
13085+ niceval = 20 - task_nice(p);
13086 if (niceval > retval)
13087 retval = niceval;
13088 }
13089diff -urN linux-2.4.20/kernel/timer.c linux-2.4.20-o1-preempt/kernel/timer.c
13090--- linux-2.4.20/kernel/timer.c Fri Nov 29 00:53:15 2002
13091+++ linux-2.4.20-o1-preempt/kernel/timer.c Tue Feb 18 03:51:30 2003
13092@@ -25,6 +25,8 @@
13093
13094 #include <asm/uaccess.h>
13095
13096+struct kernel_stat kstat;
13097+
13098 /*
13099 * Timekeeping variables
13100 */
13101@@ -598,25 +600,7 @@
13102 int cpu = smp_processor_id(), system = user_tick ^ 1;
13103
13104 update_one_process(p, user_tick, system, cpu);
13105- if (p->pid) {
13106- if (--p->counter <= 0) {
13107- p->counter = 0;
13108- /*
13109- * SCHED_FIFO is priority preemption, so this is
13110- * not the place to decide whether to reschedule a
13111- * SCHED_FIFO task or not - Bhavesh Davda
13112- */
13113- if (p->policy != SCHED_FIFO) {
13114- p->need_resched = 1;
13115- }
13116- }
13117- if (p->nice > 0)
13118- kstat.per_cpu_nice[cpu] += user_tick;
13119- else
13120- kstat.per_cpu_user[cpu] += user_tick;
13121- kstat.per_cpu_system[cpu] += system;
13122- } else if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
13123- kstat.per_cpu_system[cpu] += system;
13124+ scheduler_tick(user_tick, system);
13125 }
13126
13127 /*
13128@@ -624,17 +608,7 @@
13129 */
13130 static unsigned long count_active_tasks(void)
13131 {
13132- struct task_struct *p;
13133- unsigned long nr = 0;
13134-
13135- read_lock(&tasklist_lock);
13136- for_each_task(p) {
13137- if ((p->state == TASK_RUNNING ||
13138- (p->state & TASK_UNINTERRUPTIBLE)))
13139- nr += FIXED_1;
13140- }
13141- read_unlock(&tasklist_lock);
13142- return nr;
13143+ return (nr_running() + nr_uninterruptible()) * FIXED_1;
13144 }
13145
13146 /*
13147@@ -827,6 +801,89 @@
13148
13149 #endif
13150
13151+static void process_timeout(unsigned long __data)
13152+{
13153+ wake_up_process((task_t *)__data);
13154+}
13155+
13156+/**
13157+ * schedule_timeout - sleep until timeout
13158+ * @timeout: timeout value in jiffies
13159+ *
13160+ * Make the current task sleep until @timeout jiffies have
13161+ * elapsed. The routine will return immediately unless
13162+ * the current task state has been set (see set_current_state()).
13163+ *
13164+ * You can set the task state as follows -
13165+ *
13166+ * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
13167+ * pass before the routine returns. The routine will return 0
13168+ *
13169+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
13170+ * delivered to the current task. In this case the remaining time
13171+ * in jiffies will be returned, or 0 if the timer expired in time
13172+ *
13173+ * The current task state is guaranteed to be TASK_RUNNING when this
13174+ * routine returns.
13175+ *
13176+ * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
13177+ * the CPU away without a bound on the timeout. In this case the return
13178+ * value will be %MAX_SCHEDULE_TIMEOUT.
13179+ *
13180+ * In all cases the return value is guaranteed to be non-negative.
13181+ */
13182+signed long schedule_timeout(signed long timeout)
13183+{
13184+ struct timer_list timer;
13185+ unsigned long expire;
13186+
13187+ switch (timeout)
13188+ {
13189+ case MAX_SCHEDULE_TIMEOUT:
13190+ /*
13191+ * These two special cases are useful to be comfortable
13192+ * in the caller. Nothing more. We could take
13193+ * MAX_SCHEDULE_TIMEOUT from one of the negative value
13194+ * but I' d like to return a valid offset (>=0) to allow
13195+ * the caller to do everything it want with the retval.
13196+ */
13197+ schedule();
13198+ goto out;
13199+ default:
13200+ /*
13201+ * Another bit of PARANOID. Note that the retval will be
13202+ * 0 since no piece of kernel is supposed to do a check
13203+ * for a negative retval of schedule_timeout() (since it
13204+ * should never happens anyway). You just have the printk()
13205+ * that will tell you if something is gone wrong and where.
13206+ */
13207+ if (timeout < 0)
13208+ {
13209+ printk(KERN_ERR "schedule_timeout: wrong timeout "
13210+ "value %lx from %p\n", timeout,
13211+ __builtin_return_address(0));
13212+ current->state = TASK_RUNNING;
13213+ goto out;
13214+ }
13215+ }
13216+
13217+ expire = timeout + jiffies;
13218+
13219+ init_timer(&timer);
13220+ timer.expires = expire;
13221+ timer.data = (unsigned long) current;
13222+ timer.function = process_timeout;
13223+
13224+ add_timer(&timer);
13225+ schedule();
13226+ del_timer_sync(&timer);
13227+
13228+ timeout = expire - jiffies;
13229+
13230+ out:
13231+ return timeout < 0 ? 0 : timeout;
13232+}
13233+
13234 /* Thread ID - the internal kernel "pid" */
13235 asmlinkage long sys_gettid(void)
13236 {
13237@@ -873,4 +930,3 @@
13238 }
13239 return 0;
13240 }
13241-
13242diff -urN linux-2.4.20/lib/dec_and_lock.c linux-2.4.20-o1-preempt/lib/dec_and_lock.c
13243--- linux-2.4.20/lib/dec_and_lock.c Wed Oct 3 18:11:26 2001
13244+++ linux-2.4.20-o1-preempt/lib/dec_and_lock.c Tue Feb 18 03:52:06 2003
13245@@ -1,5 +1,6 @@
13246 #include <linux/module.h>
13247 #include <linux/spinlock.h>
13248+#include <linux/sched.h>
13249 #include <asm/atomic.h>
13250
13251 /*
13252diff -urN linux-2.4.20/mm/oom_kill.c linux-2.4.20-o1-preempt/mm/oom_kill.c
13253--- linux-2.4.20/mm/oom_kill.c Fri Nov 29 00:53:15 2002
13254+++ linux-2.4.20-o1-preempt/mm/oom_kill.c Tue Feb 18 03:51:30 2003
13255@@ -82,7 +82,7 @@
13256 * Niced processes are most likely less important, so double
13257 * their badness points.
13258 */
13259- if (p->nice > 0)
13260+ if (task_nice(p) > 0)
13261 points *= 2;
13262
13263 /*
13264@@ -146,7 +146,7 @@
13265 * all the memory it needs. That way it should be able to
13266 * exit() and clear out its resources quickly...
13267 */
13268- p->counter = 5 * HZ;
13269+ p->time_slice = HZ;
13270 p->flags |= PF_MEMALLOC | PF_MEMDIE;
13271
13272 /* This process has hardware access, be more careful. */
13273diff -urN linux-2.4.20/mm/slab.c linux-2.4.20-o1-preempt/mm/slab.c
13274--- linux-2.4.20/mm/slab.c Fri Nov 29 00:53:15 2002
13275+++ linux-2.4.20-o1-preempt/mm/slab.c Tue Feb 18 03:52:06 2003
13276@@ -49,7 +49,8 @@
13277 * constructors and destructors are called without any locking.
13278 * Several members in kmem_cache_t and slab_t never change, they
13279 * are accessed without any locking.
13280- * The per-cpu arrays are never accessed from the wrong cpu, no locking.
13281+ * The per-cpu arrays are never accessed from the wrong cpu, no locking,
13282+ * and local interrupts are disabled so slab code is preempt-safe.
13283 * The non-constant members are protected with a per-cache irq spinlock.
13284 *
13285 * Further notes from the original documentation:
13286diff -urN linux-2.4.20/net/core/dev.c linux-2.4.20-o1-preempt/net/core/dev.c
13287--- linux-2.4.20/net/core/dev.c Fri Nov 29 00:53:15 2002
13288+++ linux-2.4.20-o1-preempt/net/core/dev.c Tue Feb 18 03:52:06 2003
13289@@ -1049,9 +1049,15 @@
13290 int cpu = smp_processor_id();
13291
13292 if (dev->xmit_lock_owner != cpu) {
13293+ /*
13294+ * The spin_lock effectivly does a preempt lock, but
13295+ * we are about to drop that...
13296+ */
13297+ preempt_disable();
13298 spin_unlock(&dev->queue_lock);
13299 spin_lock(&dev->xmit_lock);
13300 dev->xmit_lock_owner = cpu;
13301+ preempt_enable();
13302
13303 if (!netif_queue_stopped(dev)) {
13304 if (netdev_nit)
13305diff -urN linux-2.4.20/net/core/skbuff.c linux-2.4.20-o1-preempt/net/core/skbuff.c
13306--- linux-2.4.20/net/core/skbuff.c Sat Aug 3 02:39:46 2002
13307+++ linux-2.4.20-o1-preempt/net/core/skbuff.c Tue Feb 18 03:52:06 2003
13308@@ -111,33 +111,37 @@
13309
13310 static __inline__ struct sk_buff *skb_head_from_pool(void)
13311 {
13312- struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
13313+ struct sk_buff_head *list;
13314+ struct sk_buff *skb = NULL;
13315+ unsigned long flags;
13316
13317- if (skb_queue_len(list)) {
13318- struct sk_buff *skb;
13319- unsigned long flags;
13320+ local_irq_save(flags);
13321
13322- local_irq_save(flags);
13323+ list = &skb_head_pool[smp_processor_id()].list;
13324+
13325+ if (skb_queue_len(list))
13326 skb = __skb_dequeue(list);
13327- local_irq_restore(flags);
13328- return skb;
13329- }
13330- return NULL;
13331+
13332+ local_irq_restore(flags);
13333+ return skb;
13334 }
13335
13336 static __inline__ void skb_head_to_pool(struct sk_buff *skb)
13337 {
13338- struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
13339+ struct sk_buff_head *list;
13340+ unsigned long flags;
13341
13342- if (skb_queue_len(list) < sysctl_hot_list_len) {
13343- unsigned long flags;
13344+ local_irq_save(flags);
13345+ list = &skb_head_pool[smp_processor_id()].list;
13346
13347- local_irq_save(flags);
13348+ if (skb_queue_len(list) < sysctl_hot_list_len) {
13349 __skb_queue_head(list, skb);
13350 local_irq_restore(flags);
13351
13352 return;
13353 }
13354+
13355+ local_irq_restore(flags);
13356 kmem_cache_free(skbuff_head_cache, skb);
13357 }
13358
13359diff -urN linux-2.4.20/net/socket.c linux-2.4.20-o1-preempt/net/socket.c
13360--- linux-2.4.20/net/socket.c Fri Nov 29 00:53:16 2002
13361+++ linux-2.4.20-o1-preempt/net/socket.c Tue Feb 18 03:52:07 2003
13362@@ -132,7 +132,7 @@
13363
13364 static struct net_proto_family *net_families[NPROTO];
13365
13366-#ifdef CONFIG_SMP
13367+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
13368 static atomic_t net_family_lockct = ATOMIC_INIT(0);
13369 static spinlock_t net_family_lock = SPIN_LOCK_UNLOCKED;
13370
13371diff -urN linux-2.4.20/net/sunrpc/pmap_clnt.c linux-2.4.20-o1-preempt/net/sunrpc/pmap_clnt.c
13372--- linux-2.4.20/net/sunrpc/pmap_clnt.c Sat Aug 3 02:39:46 2002
13373+++ linux-2.4.20-o1-preempt/net/sunrpc/pmap_clnt.c Tue Feb 18 03:52:07 2003
13374@@ -12,6 +12,7 @@
13375 #include <linux/config.h>
13376 #include <linux/types.h>
13377 #include <linux/socket.h>
13378+#include <linux/sched.h>
13379 #include <linux/kernel.h>
13380 #include <linux/errno.h>
13381 #include <linux/uio.h>
This page took 2.197414 seconds and 4 git commands to generate.