1 --- linux/fs/proc/array.c.orig Thu Jul 18 15:22:23 2002
2 +++ linux/fs/proc/array.c Thu Jul 18 15:22:33 2002
5 /* scale priority and nice values from timeslices to -20..20 */
6 /* to make it look like a "normal" Unix priority/nice value */
7 - priority = task->counter;
8 - priority = 20 - (priority * 10 + DEF_COUNTER / 2) / DEF_COUNTER;
10 + priority = task_prio(task);
11 + nice = task_nice(task);
13 read_lock(&tasklist_lock);
14 ppid = task->pid ? task->p_opptr->pid : 0;
24 --- linux/fs/proc/proc_misc.c.orig Thu Jul 18 15:22:23 2002
25 +++ linux/fs/proc/proc_misc.c Thu Jul 18 15:22:33 2002
27 a = avenrun[0] + (FIXED_1/200);
28 b = avenrun[1] + (FIXED_1/200);
29 c = avenrun[2] + (FIXED_1/200);
30 - len = sprintf(page,"%d.%02d %d.%02d %d.%02d %d/%d %d\n",
31 + len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
32 LOAD_INT(a), LOAD_FRAC(a),
33 LOAD_INT(b), LOAD_FRAC(b),
34 LOAD_INT(c), LOAD_FRAC(c),
35 - nr_running, nr_threads, last_pid);
36 + nr_running(), nr_threads, last_pid);
37 return proc_calc_metrics(page, start, off, count, eof, len);
44 - idle = init_tasks[0]->times.tms_utime + init_tasks[0]->times.tms_stime;
45 + idle = init_task.times.tms_utime + init_task.times.tms_stime;
47 /* The formula for the fraction parts really is ((t * 100) / HZ) % 100, but
48 that would overflow about every five days at HZ == 100.
52 proc_sprintf(page, &off, &len,
57 - kstat.context_swtch,
58 + nr_context_switches(),
59 xtime.tv_sec - jif / HZ,
62 --- linux/fs/nfs/pagelist.c.orig Thu Jul 18 15:22:23 2002
63 +++ linux/fs/nfs/pagelist.c Thu Jul 18 15:22:33 2002
66 if (signalled() && (server->flags & NFS_MOUNT_INTR))
67 return ERR_PTR(-ERESTARTSYS);
68 - current->policy |= SCHED_YIELD;
73 /* Initialize the request struct. Initially, we assume a
74 --- linux/fs/ufs/truncate.c.orig Thu Jul 18 15:22:16 2002
75 +++ linux/fs/ufs/truncate.c Thu Jul 18 15:22:33 2002
77 if (IS_SYNC(inode) && (inode->i_state & I_DIRTY))
78 ufs_sync_inode (inode);
79 run_task_queue(&tq_disk);
80 - current->policy |= SCHED_YIELD;
86 offset = inode->i_size & uspi->s_fshift;
88 --- linux/fs/reiserfs/buffer2.c.orig Thu Jul 18 15:22:11 2002
89 +++ linux/fs/reiserfs/buffer2.c Thu Jul 18 15:22:33 2002
91 buffer_journal_dirty(bh) ? ' ' : '!');
93 run_task_queue(&tq_disk);
94 - current->policy |= SCHED_YIELD;
98 if (repeat_counter > 30000000) {
99 reiserfs_warning("vs-3051: done waiting, ignore vs-3050 messages for (%b)\n", bh) ;
101 struct buffer_head * reiserfs_bread (struct super_block *super, int n_block, int n_size)
103 struct buffer_head *result;
104 - PROC_EXP( unsigned int ctx_switches = kstat.context_swtch );
105 + PROC_EXP( unsigned int ctx_switches = nr_context_switches(); );
107 result = bread (super -> s_dev, n_block, n_size);
108 PROC_INFO_INC( super, breads );
109 - PROC_EXP( if( kstat.context_swtch != ctx_switches )
110 + PROC_EXP( if( nr_context_switches() != ctx_switches )
111 PROC_INFO_INC( super, bread_miss ) );
114 --- linux/fs/reiserfs/journal.c.orig Thu Jul 18 15:22:23 2002
115 +++ linux/fs/reiserfs/journal.c Thu Jul 18 15:22:33 2002
118 bn = allocate_bitmap_node(p_s_sb) ;
120 - current->policy |= SCHED_YIELD ;
126 --- linux/fs/jffs2/background.c.orig Thu Jul 18 15:22:02 2002
127 +++ linux/fs/jffs2/background.c Thu Jul 18 15:22:33 2002
130 sprintf(current->comm, "jffs2_gcd_mtd%d", c->mtd->index);
132 - /* FIXME in the 2.2 backport */
133 - current->nice = 10;
136 spin_lock_irq(¤t->sigmask_lock);
137 siginitsetinv (¤t->blocked, sigmask(SIGHUP) | sigmask(SIGKILL) | sigmask(SIGSTOP) | sigmask(SIGCONT));
138 --- linux/fs/jbd/journal.c.orig Thu Jul 18 15:22:23 2002
139 +++ linux/fs/jbd/journal.c Thu Jul 18 15:22:33 2002
141 printk (KERN_NOTICE __FUNCTION__
142 ": ENOMEM at get_unused_buffer_head, "
144 - current->policy |= SCHED_YIELD;
149 /* keep subsequent assertions sane */
150 @@ -1543,8 +1542,7 @@
151 last_warning = jiffies;
154 - current->policy |= SCHED_YIELD;
160 @@ -1602,8 +1600,7 @@
161 last_warning = jiffies;
164 - current->policy |= SCHED_YIELD;
167 ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
170 --- linux/fs/jbd/revoke.c.orig Thu Jul 18 15:22:16 2002
171 +++ linux/fs/jbd/revoke.c Thu Jul 18 15:22:33 2002
173 if (!journal_oom_retry)
175 jbd_debug(1, "ENOMEM in " __FUNCTION__ ", retrying.\n");
176 - current->policy |= SCHED_YIELD;
182 --- linux/fs/jbd/transaction.c.orig Thu Jul 18 15:22:16 2002
183 +++ linux/fs/jbd/transaction.c Thu Jul 18 15:22:33 2002
184 @@ -1379,8 +1379,7 @@
186 old_handle_count = transaction->t_handle_count;
187 set_current_state(TASK_RUNNING);
188 - current->policy |= SCHED_YIELD;
191 } while (old_handle_count != transaction->t_handle_count);
194 --- linux/fs/binfmt_elf.c.orig Thu Jul 18 15:22:23 2002
195 +++ linux/fs/binfmt_elf.c Thu Jul 18 15:22:33 2002
196 @@ -1143,7 +1143,7 @@
198 psinfo.pr_sname = (i < 0 || i > 5) ? '.' : "RSDZTD"[i];
199 psinfo.pr_zomb = psinfo.pr_sname == 'Z';
200 - psinfo.pr_nice = current->nice;
201 + psinfo.pr_nice = task_nice(current);
202 psinfo.pr_flag = current->flags;
203 psinfo.pr_uid = NEW_TO_OLD_UID(current->uid);
204 psinfo.pr_gid = NEW_TO_OLD_GID(current->gid);
205 --- linux/fs/buffer.c.orig Thu Jul 18 15:22:23 2002
206 +++ linux/fs/buffer.c Thu Jul 18 15:22:33 2002
209 try_to_free_pages(zone, GFP_NOFS, 0);
210 run_task_queue(&tq_disk);
211 - current->policy |= SCHED_YIELD;
212 __set_current_state(TASK_RUNNING);
217 void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
218 --- linux/fs/locks.c.orig Thu Jul 18 15:22:00 2002
219 +++ linux/fs/locks.c Thu Jul 18 15:22:33 2002
221 /* Let the blocked process remove waiter from the
222 * block list when it gets scheduled.
224 - current->policy |= SCHED_YIELD;
228 /* Remove waiter from the block list, because by the
229 * time it wakes up blocker won't exist any more.
230 --- linux/init/do_mounts.c.orig Thu Jul 18 15:22:25 2002
231 +++ linux/init/do_mounts.c Thu Jul 18 15:22:33 2002
234 pid = kernel_thread(do_linuxrc, "/linuxrc", SIGCHLD);
236 - while (pid != wait(&i)) {
237 - current->policy |= SCHED_YIELD;
240 + while (pid != wait(&i))
244 sys_mount("..", ".", NULL, MS_MOVE, NULL);
245 --- linux/init/main.c.orig Thu Jul 18 15:22:25 2002
246 +++ linux/init/main.c Thu Jul 18 15:22:33 2002
248 extern void setup_arch(char **);
249 extern void cpu_idle(void);
251 -unsigned long wait_init_idle;
255 #ifdef CONFIG_X86_LOCAL_APIC
256 @@ -303,29 +301,19 @@
261 /* Called by boot processor to activate the rest. */
262 static void __init smp_init(void)
264 /* Get other processors into their bootup holding patterns. */
266 - wait_init_idle = cpu_online_map;
267 - clear_bit(current->processor, &wait_init_idle); /* Don't wait on me! */
272 - /* Wait for the other cpus to set up their idle processes */
273 - printk("Waiting on wait_init_idle (map = 0x%lx)\n", wait_init_idle);
274 - while (wait_init_idle) {
278 - printk("All processors have done init_idle\n");
285 * We need to finalize in a non-__init function or else race conditions
286 * between the root thread and the init thread may cause start_kernel to
289 kernel_thread(init, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
291 - current->need_resched = 1;
298 * Activate the first processor.
299 @@ -424,14 +411,18 @@
304 printk("POSIX conformance testing by UNIFIX\n");
307 - * We count on the initial thread going ok
308 - * Like idlers init is an unlocked kernel thread, which will
309 - * make syscalls (and thus be locked).
310 + init_idle(current, smp_processor_id());
312 + * We count on the initial thread going ok
313 + * Like idlers init is an unlocked kernel thread, which will
314 + * make syscalls (and thus be locked).
318 + /* Do the rest non-__init'ed, we're now alive */
324 static void __init do_basic_setup(void)
326 + /* Start the per-CPU migration threads */
332 * Tell the world that we're going to be the grim
333 --- linux/kernel/capability.c.orig Sat Jun 24 06:06:37 2000
334 +++ linux/kernel/capability.c Thu Jul 18 15:22:33 2002
336 #include <linux/mm.h>
337 #include <asm/uaccess.h>
339 +unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
341 kernel_cap_t cap_bset = CAP_INIT_EFF_SET;
343 /* Note: never hold tasklist_lock while spinning for this one */
344 --- linux/kernel/exit.c.orig Thu Jul 18 15:22:25 2002
345 +++ linux/kernel/exit.c Thu Jul 18 15:22:33 2002
348 static void release_task(struct task_struct * p)
350 - if (p != current) {
355 - * Wait to make sure the process isn't on the
356 - * runqueue (active on some other CPU still)
360 - if (!task_has_cpu(p))
366 - } while (task_has_cpu(p));
369 + wait_task_inactive(p);
371 - atomic_dec(&p->user->processes);
376 - current->cmin_flt += p->min_flt + p->cmin_flt;
377 - current->cmaj_flt += p->maj_flt + p->cmaj_flt;
378 - current->cnswap += p->nswap + p->cnswap;
380 - * Potentially available timeslices are retrieved
381 - * here - this way the parent does not get penalized
382 - * for creating too many processes.
384 - * (this cannot be used to artificially 'generate'
385 - * timeslices, because any timeslice recovered here
386 - * was given away by the parent in the first place.)
388 - current->counter += p->counter;
389 - if (current->counter >= MAX_COUNTER)
390 - current->counter = MAX_COUNTER;
392 - free_task_struct(p);
394 - printk("task releasing itself\n");
396 + atomic_dec(&p->user->processes);
401 + current->cmin_flt += p->min_flt + p->cmin_flt;
402 + current->cmaj_flt += p->maj_flt + p->cmaj_flt;
403 + current->cnswap += p->nswap + p->cnswap;
406 + free_task_struct(p);
412 read_unlock(&tasklist_lock);
417 + * reparent_to_init() - Reparent the calling kernel thread to the init task.
419 + * If a kernel thread is launched as a result of a system call, or if
420 + * it ever exits, it should generally reparent itself to init so that
421 + * it is correctly cleaned up on exit.
423 + * The various task state such as scheduling policy and priority may have
424 + * been inherited from a user process, so we reset them to sane values here.
426 + * NOTE that reparent_to_init() gives the caller full capabilities.
428 +void reparent_to_init(void)
430 + write_lock_irq(&tasklist_lock);
432 + /* Reparent to init */
433 + REMOVE_LINKS(current);
434 + current->p_pptr = child_reaper;
435 + current->p_opptr = child_reaper;
436 + SET_LINKS(current);
438 + /* Set the exit signal to SIGCHLD so we signal init on exit */
439 + current->exit_signal = SIGCHLD;
441 + current->ptrace = 0;
442 + if ((current->policy == SCHED_OTHER) && (task_nice(current) < 0))
443 + set_user_nice(current, 0);
444 + /* cpus_allowed? */
447 + current->cap_effective = CAP_INIT_EFF_SET;
448 + current->cap_inheritable = CAP_INIT_INH_SET;
449 + current->cap_permitted = CAP_FULL_SET;
450 + current->keep_capabilities = 0;
451 + memcpy(current->rlim, init_task.rlim, sizeof(*(current->rlim)));
452 + current->user = INIT_USER;
454 + write_unlock_irq(&tasklist_lock);
458 + * Put all the gunge required to become a kernel thread without
459 + * attached user resources in one place where it belongs.
462 +void daemonize(void)
464 + struct fs_struct *fs;
468 + * If we were started as result of loading a module, close all of the
469 + * user space pages. We don't need them, and if we didn't close them
470 + * they would be locked into memory.
474 + current->session = 1;
476 + current->tty = NULL;
478 + /* Become as one with the init task */
480 + exit_fs(current); /* current->fs->count--; */
483 + atomic_inc(&fs->count);
484 + exit_files(current);
485 + current->files = init_task.files;
486 + atomic_inc(¤t->files->count);
490 --- linux/kernel/fork.c.orig Thu Jul 18 15:22:25 2002
491 +++ linux/kernel/fork.c Fri Jul 19 15:01:41 2002
494 /* The idle threads do not count.. */
499 unsigned long total_forks; /* Handle normal Linux uptimes. */
502 struct task_struct *pidhash[PIDHASH_SZ];
504 +rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */
506 void add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
510 if (p->pid == 0 && current->pid != 0)
511 goto bad_fork_cleanup;
513 - p->run_list.next = NULL;
514 - p->run_list.prev = NULL;
515 + INIT_LIST_HEAD(&p->run_list);
518 init_waitqueue_head(&p->wait_chldexit);
519 @@ -665,14 +665,15 @@
523 - p->cpus_runnable = ~0UL;
524 - p->processor = current->processor;
526 /* ?? should we just memset this ?? */
527 for(i = 0; i < smp_num_cpus; i++)
528 - p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0;
529 + p->per_cpu_utime[cpu_logical_map(i)] =
530 + p->per_cpu_stime[cpu_logical_map(i)] = 0;
531 spin_lock_init(&p->sigmask_lock);
535 p->lock_depth = -1; /* -1 = no lock */
536 p->start_time = jiffies;
538 @@ -706,15 +707,27 @@
539 p->pdeath_signal = 0;
542 - * "share" dynamic priority between parent and child, thus the
543 - * total amount of dynamic priorities in the system doesnt change,
544 - * more scheduling fairness. This is only important in the first
545 - * timeslice, on the long run the scheduling behaviour is unchanged.
547 - p->counter = (current->counter + 1) >> 1;
548 - current->counter >>= 1;
549 - if (!current->counter)
550 - current->need_resched = 1;
551 + * Share the timeslice between parent and child, thus the
552 + * total amount of pending timeslices in the system doesnt change,
553 + * resulting in more scheduling fairness.
556 + if (!current->time_slice)
558 + p->time_slice = (current->time_slice + 1) >> 1;
559 + p->first_time_slice = 1;
560 + current->time_slice >>= 1;
561 + if (!current->time_slice) {
563 + * This case is rare, it happens when the parent has only
564 + * a single jiffy left from its timeslice. Taking the
565 + * runqueue lock is not a problem.
567 + current->time_slice = 1;
568 + scheduler_tick(0,0);
570 + p->sleep_timestamp = jiffies;
574 * Ok, add it to the run-queues and make it
575 @@ -750,11 +763,16 @@
577 if (p->ptrace & PT_PTRACED)
578 send_sig(SIGSTOP, p, 1);
580 - wake_up_process(p); /* do this last */
581 + wake_up_forked_process(p); /* do this last */
583 if (clone_flags & CLONE_VFORK)
584 wait_for_completion(&vfork);
587 + * Let the child process run first, to avoid most of the
588 + * COW overhead when the child exec()s afterwards.
590 + current->need_resched = 1;
594 --- linux/kernel/ksyms.c.orig Thu Jul 18 15:22:25 2002
595 +++ linux/kernel/ksyms.c Thu Jul 18 15:22:33 2002
597 /* process management */
598 EXPORT_SYMBOL(complete_and_exit);
599 EXPORT_SYMBOL(__wake_up);
600 -EXPORT_SYMBOL(__wake_up_sync);
602 +EXPORT_SYMBOL_GPL(__wake_up_sync); /* internal use only */
604 EXPORT_SYMBOL(wake_up_process);
605 EXPORT_SYMBOL(sleep_on);
606 EXPORT_SYMBOL(sleep_on_timeout);
608 EXPORT_SYMBOL(schedule);
609 EXPORT_SYMBOL(schedule_timeout);
610 EXPORT_SYMBOL(sys_sched_yield);
611 +EXPORT_SYMBOL(set_user_nice);
612 +EXPORT_SYMBOL(task_nice);
613 +EXPORT_SYMBOL_GPL(idle_cpu);
615 +EXPORT_SYMBOL_GPL(set_cpus_allowed);
617 EXPORT_SYMBOL(jiffies);
618 EXPORT_SYMBOL(xtime);
619 EXPORT_SYMBOL(do_gettimeofday);
622 EXPORT_SYMBOL(kstat);
623 EXPORT_SYMBOL(nr_running);
624 +EXPORT_SYMBOL(nr_context_switches);
627 EXPORT_SYMBOL(panic);
628 --- linux/kernel/ptrace.c.orig Thu Jul 18 15:22:25 2002
629 +++ linux/kernel/ptrace.c Thu Jul 18 15:22:33 2002
631 if (child->state != TASK_STOPPED)
634 - /* Make sure the child gets off its CPU.. */
637 - if (!task_has_cpu(child))
639 - task_unlock(child);
641 - if (child->state != TASK_STOPPED)
645 - } while (task_has_cpu(child));
647 - task_unlock(child);
648 + wait_task_inactive(child);
652 --- linux/kernel/sched.c.orig Thu Jul 18 15:22:25 2002
653 +++ linux/kernel/sched.c Fri Jul 19 14:59:03 2002
656 - * linux/kernel/sched.c
659 * Kernel scheduler and related syscalls
661 - * Copyright (C) 1991, 1992 Linus Torvalds
662 + * Copyright (C) 1991-2002 Linus Torvalds
664 * 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and
665 - * make semaphores SMP safe
666 + * make semaphores SMP safe
667 * 1998-11-19 Implemented schedule_timeout() and related stuff
668 * by Andrea Arcangeli
669 - * 1998-12-28 Implemented better SMP scheduling by Ingo Molnar
670 + * 2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar:
671 + * hybrid priority-list and round-robin design with
672 + * an array-switch method of distributing timeslices
673 + * and per-CPU runqueues. Additional code by Davide
674 + * Libenzi, Robert Love, and Rusty Russell.
678 - * 'sched.c' is the main kernel file. It contains scheduling primitives
679 - * (sleep_on, wakeup, schedule etc) as well as a number of simple system
680 - * call functions (type getpid()), which just extract a field from
684 -#include <linux/config.h>
685 #include <linux/mm.h>
686 +#include <linux/nmi.h>
687 #include <linux/init.h>
688 +#include <asm/uaccess.h>
689 +#include <linux/highmem.h>
690 #include <linux/smp_lock.h>
691 -#include <linux/nmi.h>
692 +#include <asm/mmu_context.h>
693 #include <linux/interrupt.h>
694 -#include <linux/kernel_stat.h>
695 #include <linux/completion.h>
696 -#include <linux/prefetch.h>
697 -#include <linux/compiler.h>
698 +#include <linux/kernel_stat.h>
700 -#include <asm/uaccess.h>
701 -#include <asm/mmu_context.h>
703 + * Convert user-nice values [ -20 ... 0 ... 19 ]
704 + * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
707 +#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20)
708 +#define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20)
709 +#define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio)
711 -extern void timer_bh(void);
712 -extern void tqueue_bh(void);
713 -extern void immediate_bh(void);
715 + * 'User priority' is the nice value converted to something we
716 + * can work with better when scaling various scheduler parameters,
717 + * it's a [ 0 ... 39 ] range.
719 +#define USER_PRIO(p) ((p)-MAX_RT_PRIO)
720 +#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio)
721 +#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO))
724 + * These are the 'tuning knobs' of the scheduler:
726 + * Minimum timeslice is 10 msecs, default timeslice is 150 msecs,
727 + * maximum timeslice is 300 msecs. Timeslices get refilled after
730 +#define MIN_TIMESLICE ( 10 * HZ / 1000)
731 +#define MAX_TIMESLICE (300 * HZ / 1000)
732 +#define CHILD_PENALTY 95
733 +#define PARENT_PENALTY 100
734 +#define EXIT_WEIGHT 3
735 +#define PRIO_BONUS_RATIO 25
736 +#define INTERACTIVE_DELTA 2
737 +#define MAX_SLEEP_AVG (2*HZ)
738 +#define STARVATION_LIMIT (2*HZ)
741 - * scheduler variables
742 + * If a task is 'interactive' then we reinsert it in the active
743 + * array after it has expired its current timeslice. (it will not
744 + * continue to run immediately, it will still roundrobin with
745 + * other interactive tasks.)
747 + * This part scales the interactivity limit depending on niceness.
749 + * We scale it linearly, offset by the INTERACTIVE_DELTA delta.
750 + * Here are a few examples of different nice levels:
752 + * TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0]
753 + * TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0]
754 + * TASK_INTERACTIVE( 0): [1,1,1,1,0,0,0,0,0,0,0]
755 + * TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0]
756 + * TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0]
758 + * (the X axis represents the possible -5 ... 0 ... +5 dynamic
759 + * priority range a task can explore, a value of '1' means the
760 + * task is rated interactive.)
762 + * Ie. nice +19 tasks can never get 'interactive' enough to be
763 + * reinserted into the active array. And only heavily CPU-hog nice -20
764 + * tasks will be expired. Default nice 0 tasks are somewhere between,
765 + * it takes some effort for them to get interactive, but it's not
769 -unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
770 +#define SCALE(v1,v1_max,v2_max) \
771 + (v1) * (v2_max) / (v1_max)
773 -extern void mem_use(void);
775 + (SCALE(TASK_NICE(p), 40, MAX_USER_PRIO*PRIO_BONUS_RATIO/100) + \
778 +#define TASK_INTERACTIVE(p) \
779 + ((p)->prio <= (p)->static_prio - DELTA(p))
782 - * Scheduling quanta.
784 - * NOTE! The unix "nice" value influences how long a process
785 - * gets. The nice value ranges from -20 to +19, where a -20
786 - * is a "high-priority" task, and a "+10" is a low-priority
788 + * BASE_TIMESLICE scales user-nice values [ -20 ... 19 ]
789 + * to time slice values.
791 - * We want the time-slice to be around 50ms or so, so this
792 - * calculation depends on the value of HZ.
793 + * The higher a process's priority, the bigger timeslices
794 + * it gets during one round of execution. But even the lowest
795 + * priority process gets MIN_TIMESLICE worth of execution time.
797 + * task_timeslice() is the interface that is used by the scheduler.
798 + * SCHED_BATCH tasks get longer timeslices to make use of better
799 + * caching. They are inherently noninteractive and they are
800 + * immediately preempted by SCHED_NORMAL tasks so there is no
801 + * downside in using shorter timeslices.
804 -#define TICK_SCALE(x) ((x) >> 2)
806 -#define TICK_SCALE(x) ((x) >> 1)
808 -#define TICK_SCALE(x) (x)
810 -#define TICK_SCALE(x) ((x) << 1)
812 -#define TICK_SCALE(x) ((x) << 2)
815 -#define NICE_TO_TICKS(nice) (TICK_SCALE(20-(nice))+1)
816 +#define BASE_TIMESLICE(p) (MIN_TIMESLICE + \
817 + ((MAX_TIMESLICE - MIN_TIMESLICE) * (MAX_PRIO-1-(p)->static_prio)/(MAX_USER_PRIO - 1)))
819 +static inline unsigned int task_timeslice(task_t *p)
821 + if (p->policy == SCHED_BATCH)
822 + return BASE_TIMESLICE(p) * 10;
824 + return BASE_TIMESLICE(p);
828 - * Init task must be ok at boot for the ix86 as we will check its signals
829 - * via the SMP irq return path.
830 + * These are the runqueue data structures:
833 -struct task_struct * init_tasks[NR_CPUS] = {&init_task, };
835 +#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long))
837 +typedef struct runqueue runqueue_t;
841 + unsigned long bitmap[BITMAP_SIZE];
842 + list_t queue[MAX_PRIO];
846 - * The tasklist_lock protects the linked list of processes.
848 - * The runqueue_lock locks the parts that actually access
849 - * and change the run-queues, and have to be interrupt-safe.
850 + * This is the main, per-CPU runqueue data structure.
852 - * If both locks are to be concurrently held, the runqueue_lock
853 - * nests inside the tasklist_lock.
855 - * task->alloc_lock nests inside tasklist_lock.
856 + * Locking rule: those places that want to lock multiple runqueues
857 + * (such as the load balancing or the process migration code), lock
858 + * acquire operations must be ordered by ascending &runqueue.
860 -spinlock_t runqueue_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED; /* inner */
861 -rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */
864 + unsigned long nr_running, nr_switches, expired_timestamp,
865 + nr_uninterruptible;
866 + task_t *curr, *idle;
867 + prio_array_t *active, *expired, arrays[2];
868 + int prev_nr_running[NR_CPUS];
870 + task_t *migration_thread;
871 + list_t migration_queue;
873 -static LIST_HEAD(runqueue_head);
875 + * The batch queue is a secondary ready-queue:
877 + unsigned long nr_batch;
878 + list_t batch_queue;
881 + * Per-CPU idle CPU time tracking:
883 + * - idle_ticks_left counts back from HZ to 0.
884 + * - idle_count is the number of idle ticks in the last second.
885 + * - once it reaches 0, a new idle_avg is calculated.
887 + #define IDLE_TICKS (HZ)
889 + unsigned int idle_ticks_left, idle_count, idle_avg;
891 +} ____cacheline_aligned;
893 +static struct runqueue runqueues[NR_CPUS] __cacheline_aligned;
895 +#define cpu_rq(cpu) (runqueues + (cpu))
896 +#define this_rq() cpu_rq(smp_processor_id())
897 +#define task_rq(p) cpu_rq(task_cpu(p))
898 +#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
899 +#define rt_task(p) ((p)->prio < MAX_RT_PRIO)
902 - * We align per-CPU scheduling data on cacheline boundaries,
903 - * to prevent cacheline ping-pong.
904 + * Default context-switch locking:
907 - struct schedule_data {
908 - struct task_struct * curr;
909 - cycles_t last_schedule;
911 - char __pad [SMP_CACHE_BYTES];
912 -} aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}};
914 -#define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr
915 -#define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule
916 +#ifndef prepare_arch_switch
917 +# define prepare_arch_switch(rq, next) do { } while(0)
918 +# define finish_arch_switch(rq, next) spin_unlock_irq(&(rq)->lock)
919 +# define task_running(rq, p) ((rq)->curr == (p))
922 -struct kernel_stat kstat;
923 -extern struct task_struct *child_reaper;
925 + * task_rq_lock - lock the runqueue a given task resides on and disable
926 + * interrupts. Note the ordering: we can safely lookup the task_rq without
927 + * explicitly disabling preemption.
929 +static inline runqueue_t *task_rq_lock(task_t *p, unsigned long *flags)
931 + struct runqueue *rq;
935 + local_irq_save(*flags);
937 + spin_lock(&rq->lock);
938 + if (unlikely(rq != task_rq(p))) {
939 + spin_unlock_irqrestore(&rq->lock, *flags);
940 + goto repeat_lock_task;
945 -#define idle_task(cpu) (init_tasks[cpu_number_map(cpu)])
946 -#define can_schedule(p,cpu) \
947 - ((p)->cpus_runnable & (p)->cpus_allowed & (1 << cpu))
948 +static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags)
950 + spin_unlock_irqrestore(&rq->lock, *flags);
955 + * rq_lock - lock a given runqueue and disable interrupts.
957 +static inline runqueue_t *this_rq_lock(void)
961 -#define idle_task(cpu) (&init_task)
962 -#define can_schedule(p,cpu) (1)
963 + local_irq_disable();
965 + spin_lock(&rq->lock);
971 -void scheduling_functions_start_here(void) { }
972 +static inline void rq_unlock(runqueue_t *rq)
974 + spin_unlock(&rq->lock);
975 + local_irq_enable();
979 - * This is the function that decides how desirable a process is..
980 - * You can weigh different processes against each other depending
981 - * on what CPU they've run on lately etc to try to handle cache
982 - * and TLB miss penalties.
985 - * -1000: never select this
986 - * 0: out of time, recalculate counters (but it might still be
988 - * +ve: "goodness" value (the larger, the better)
989 - * +1000: realtime process, select this.
990 + * Adding/removing a task to/from a priority array:
992 +static inline void dequeue_task(struct task_struct *p, prio_array_t *array)
994 + array->nr_active--;
995 + list_del(&p->run_list);
996 + if (list_empty(array->queue + p->prio))
997 + __clear_bit(p->prio, array->bitmap);
1000 -static inline int goodness(struct task_struct * p, int this_cpu, struct mm_struct *this_mm)
1001 +static inline void enqueue_task(struct task_struct *p, prio_array_t *array)
1004 + list_add_tail(&p->run_list, array->queue + p->prio);
1005 + __set_bit(p->prio, array->bitmap);
1006 + array->nr_active++;
1011 - * select the current process after every other
1012 - * runnable process, but before the idle thread.
1013 - * Also, dont trigger a counter recalculation.
1016 - if (p->policy & SCHED_YIELD)
1018 +static inline int effective_prio(task_t *p)
1023 - * Non-RT process - normal case first.
1024 + * Here we scale the actual sleep average [0 .... MAX_SLEEP_AVG]
1025 + * into the -5 ... 0 ... +5 bonus/penalty range.
1027 + * We use 25% of the full 0...39 priority range so that:
1029 + * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
1030 + * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
1032 + * Both properties are important to certain workloads.
1034 - if (p->policy == SCHED_OTHER) {
1035 + bonus = MAX_USER_PRIO*PRIO_BONUS_RATIO*p->sleep_avg/MAX_SLEEP_AVG/100 -
1036 + MAX_USER_PRIO*PRIO_BONUS_RATIO/100/2;
1038 + prio = p->static_prio - bonus;
1039 + if (prio < MAX_RT_PRIO)
1040 + prio = MAX_RT_PRIO;
1041 + if (prio > MAX_PRIO-1)
1042 + prio = MAX_PRIO-1;
1046 +static inline void activate_task(task_t *p, runqueue_t *rq)
1048 + unsigned long sleep_time = jiffies - p->sleep_timestamp;
1049 + prio_array_t *array = rq->active;
1051 + if (!rt_task(p) && sleep_time) {
1053 - * Give the process a first-approximation goodness value
1054 - * according to the number of clock-ticks it has left.
1056 - * Don't do any other calculations if the time slice is
1058 + * This code gives a bonus to interactive tasks. We update
1059 + * an 'average sleep time' value here, based on
1060 + * sleep_timestamp. The more time a task spends sleeping,
1061 + * the higher the average gets - and the higher the priority
1062 + * boost gets as well.
1064 - weight = p->counter;
1069 - /* Give a largish advantage to the same processor... */
1070 - /* (this is equivalent to penalizing other processors) */
1071 - if (p->processor == this_cpu)
1072 - weight += PROC_CHANGE_PENALTY;
1075 - /* .. and a slight advantage to the current MM */
1076 - if (p->mm == this_mm || !p->mm)
1078 - weight += 20 - p->nice;
1080 + p->sleep_avg += sleep_time;
1081 + if (p->sleep_avg > MAX_SLEEP_AVG)
1082 + p->sleep_avg = MAX_SLEEP_AVG;
1083 + p->prio = effective_prio(p);
1085 + enqueue_task(p, array);
1089 +static inline void activate_batch_task(task_t *p, runqueue_t *rq)
1092 + list_del(&p->run_list);
1093 + activate_task(p, rq);
1094 + p->flags &= ~PF_BATCH;
1097 +static inline void deactivate_task(struct task_struct *p, runqueue_t *rq)
1100 + if (p->state == TASK_UNINTERRUPTIBLE)
1101 + rq->nr_uninterruptible++;
1102 + dequeue_task(p, p->array);
1106 +static inline void deactivate_batch_task(task_t *p, runqueue_t *rq)
1108 + prio_array_t *array = p->array;
1110 + deactivate_task(p, rq);
1112 + if (array == rq->expired)
1113 + list_add_tail(&p->run_list, &rq->batch_queue);
1115 + list_add(&p->run_list, &rq->batch_queue);
1117 - * Realtime process, select the first one on the
1118 - * runqueue (taking priorities within processes
1120 + * Via this bit we can tell whether a task is in the batchqueue,
1121 + * this information is not available in any other cheap way.
1123 - weight = 1000 + p->rt_priority;
1126 + p->flags |= PF_BATCH;
1130 - * the 'goodness value' of replacing a process on a given CPU.
1131 - * positive value means 'replace', zero or negative means 'dont'.
1133 -static inline int preemption_goodness(struct task_struct * prev, struct task_struct * p, int cpu)
1134 +static inline void resched_task(task_t *p)
1136 - return goodness(p, cpu, prev->active_mm) - goodness(prev, cpu, prev->active_mm);
1140 + need_resched = p->need_resched;
1142 + set_tsk_need_resched(p);
1143 + if (!need_resched && (p->cpu != smp_processor_id()))
1144 + smp_send_reschedule(p->cpu);
1146 + set_tsk_need_resched(p);
1153 - * This is ugly, but reschedule_idle() is very timing-critical.
1154 - * We are called with the runqueue spinlock held and we must
1155 - * not claim the tasklist_lock.
1156 + * Wait for a process to unschedule. This is used by the exit() and
1159 -static FASTCALL(void reschedule_idle(struct task_struct * p));
1161 -static void reschedule_idle(struct task_struct * p)
1162 +void wait_task_inactive(task_t * p)
1165 - int this_cpu = smp_processor_id();
1166 - struct task_struct *tsk, *target_tsk;
1167 - int cpu, best_cpu, i, max_prio;
1168 - cycles_t oldest_idle;
1171 - * shortcut if the woken up task's last CPU is
1174 - best_cpu = p->processor;
1175 - if (can_schedule(p, best_cpu)) {
1176 - tsk = idle_task(best_cpu);
1177 - if (cpu_curr(best_cpu) == tsk) {
1181 - * If need_resched == -1 then we can skip sending
1182 - * the IPI altogether, tsk->need_resched is
1183 - * actively watched by the idle thread.
1185 - need_resched = tsk->need_resched;
1186 - tsk->need_resched = 1;
1187 - if ((best_cpu != this_cpu) && !need_resched)
1188 - smp_send_reschedule(best_cpu);
1192 + unsigned long flags;
1196 - * We know that the preferred CPU has a cache-affine current
1197 - * process, lets try to find a new idle CPU for the woken-up
1198 - * process. Select the least recently active idle CPU. (that
1199 - * one will have the least active cache context.) Also find
1200 - * the executing process which has the least priority.
1202 - oldest_idle = (cycles_t) -1;
1203 - target_tsk = NULL;
1206 - for (i = 0; i < smp_num_cpus; i++) {
1207 - cpu = cpu_logical_map(i);
1208 - if (!can_schedule(p, cpu))
1210 - tsk = cpu_curr(cpu);
1213 + if (unlikely(task_running(rq, p))) {
1216 - * We use the first available idle CPU. This creates
1217 - * a priority list between idle CPUs, but this is not
1219 + * enable/disable preemption just to make this
1220 + * a preemption point - we are busy-waiting
1223 - if (tsk == idle_task(cpu)) {
1224 -#if defined(__i386__) && defined(CONFIG_SMP)
1226 - * Check if two siblings are idle in the same
1227 - * physical package. Use them if found.
1229 - if (smp_num_siblings == 2) {
1230 - if (cpu_curr(cpu_sibling_map[cpu]) ==
1231 - idle_task(cpu_sibling_map[cpu])) {
1232 - oldest_idle = last_schedule(cpu);
1239 - if (last_schedule(cpu) < oldest_idle) {
1240 - oldest_idle = last_schedule(cpu);
1244 - if (oldest_idle == -1ULL) {
1245 - int prio = preemption_goodness(tsk, p, cpu);
1247 - if (prio > max_prio) {
1257 - if (oldest_idle != -1ULL) {
1258 - best_cpu = tsk->processor;
1259 - goto send_now_idle;
1261 - tsk->need_resched = 1;
1262 - if (tsk->processor != this_cpu)
1263 - smp_send_reschedule(tsk->processor);
1264 + rq = task_rq_lock(p, &flags);
1265 + if (unlikely(task_running(rq, p))) {
1266 + task_rq_unlock(rq, &flags);
1273 - int this_cpu = smp_processor_id();
1274 - struct task_struct *tsk;
1276 - tsk = cpu_curr(this_cpu);
1277 - if (preemption_goodness(tsk, p, this_cpu) > 0)
1278 - tsk->need_resched = 1;
1280 + task_rq_unlock(rq, &flags);
1286 + * Kick the remote CPU if the task is running currently,
1287 + * this code is used by the signal code to signal tasks
1288 + * which are in user-mode as quickly as possible.
1290 + * (Note that we do this lockless - if the task does anything
1291 + * while the message is in flight then it will notice the
1292 + * sigpending condition anyway.)
1294 - * This has to add the process to the _beginning_ of the
1295 - * run-queue, not the end. See the comment about "This is
1296 - * subtle" in the scheduler proper..
1297 + * this code also activates batch processes if they get a signal.
1299 -static inline void add_to_runqueue(struct task_struct * p)
1301 - list_add(&p->run_list, &runqueue_head);
1305 -static inline void move_last_runqueue(struct task_struct * p)
1306 +void kick_if_running(task_t * p)
1308 - list_del(&p->run_list);
1309 - list_add_tail(&p->run_list, &runqueue_head);
1311 + if (task_running(task_rq(p), p) && (p->cpu != smp_processor_id()))
1314 + * If batch processes get signals but are not running currently
1315 + * then give them a chance to handle the signal. (the kernel
1316 + * side signal handling code will run for sure, the userspace
1317 + * part depends on system load and might be delayed indefinitely.)
1319 + if (p->policy == SCHED_BATCH) {
1320 + unsigned long flags;
1323 -static inline void move_first_runqueue(struct task_struct * p)
1325 - list_del(&p->run_list);
1326 - list_add(&p->run_list, &runqueue_head);
1327 + rq = task_rq_lock(p, &flags);
1328 + if (p->flags & PF_BATCH)
1329 + activate_batch_task(p, rq);
1330 + task_rq_unlock(rq, &flags);
1335 @@ -347,416 +425,793 @@
1336 * progress), and as such you're allowed to do the simpler
1337 * "current->state = TASK_RUNNING" to mark yourself runnable
1338 * without the overhead of this.
1340 + * returns failure only if the task is already active.
1342 -static inline int try_to_wake_up(struct task_struct * p, int synchronous)
1343 +static int try_to_wake_up(task_t * p, int sync)
1345 unsigned long flags;
1351 - * We want the common case fall through straight, thus the goto.
1353 - spin_lock_irqsave(&runqueue_lock, flags);
1355 + rq = task_rq_lock(p, &flags);
1356 + old_state = p->state;
1359 + * Fast-migrate the task if it's not running or runnable
1360 + * currently. Do not violate hard affinity.
1362 + if (unlikely(sync && !task_running(rq, p) &&
1363 + (task_cpu(p) != smp_processor_id()) &&
1364 + (p->cpus_allowed & (1UL << smp_processor_id())))) {
1366 + set_task_cpu(p, smp_processor_id());
1368 + task_rq_unlock(rq, &flags);
1369 + goto repeat_lock_task;
1371 + if (old_state == TASK_UNINTERRUPTIBLE)
1372 + rq->nr_uninterruptible--;
1373 + activate_task(p, rq);
1375 + if (p->prio < rq->curr->prio || rq->curr->policy == SCHED_BATCH)
1376 + resched_task(rq->curr);
1379 p->state = TASK_RUNNING;
1380 - if (task_on_runqueue(p))
1382 - add_to_runqueue(p);
1383 - if (!synchronous || !(p->cpus_allowed & (1 << smp_processor_id())))
1384 - reschedule_idle(p);
1387 - spin_unlock_irqrestore(&runqueue_lock, flags);
1388 + task_rq_unlock(rq, &flags);
1393 -inline int wake_up_process(struct task_struct * p)
1394 +int wake_up_process(task_t * p)
1396 return try_to_wake_up(p, 0);
1399 -static void process_timeout(unsigned long __data)
1400 +void wake_up_forked_process(task_t * p)
1402 - struct task_struct * p = (struct task_struct *) __data;
1403 + runqueue_t *rq = this_rq_lock();
1405 + p->state = TASK_RUNNING;
1406 + if (!rt_task(p)) {
1408 + * We decrease the sleep average of forking parents
1409 + * and children as well, to keep max-interactive tasks
1410 + * from forking tasks that are max-interactive.
1412 + current->sleep_avg = current->sleep_avg * PARENT_PENALTY / 100;
1413 + p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100;
1414 + p->prio = effective_prio(p);
1416 + set_task_cpu(p, smp_processor_id());
1417 + activate_task(p, rq);
1419 - wake_up_process(p);
1424 - * schedule_timeout - sleep until timeout
1425 - * @timeout: timeout value in jiffies
1427 - * Make the current task sleep until @timeout jiffies have
1428 - * elapsed. The routine will return immediately unless
1429 - * the current task state has been set (see set_current_state()).
1431 - * You can set the task state as follows -
1433 - * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
1434 - * pass before the routine returns. The routine will return 0
1436 - * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
1437 - * delivered to the current task. In this case the remaining time
1438 - * in jiffies will be returned, or 0 if the timer expired in time
1440 - * The current task state is guaranteed to be TASK_RUNNING when this
1441 - * routine returns.
1443 - * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
1444 - * the CPU away without a bound on the timeout. In this case the return
1445 - * value will be %MAX_SCHEDULE_TIMEOUT.
1447 - * In all cases the return value is guaranteed to be non-negative.
1449 + * Potentially available exiting-child timeslices are
1450 + * retrieved here - this way the parent does not get
1451 + * penalized for creating too many processes.
1453 + * (this cannot be used to 'generate' timeslices
1454 + * artificially, because any timeslice recovered here
1455 + * was given away by the parent in the first place.)
1457 -signed long schedule_timeout(signed long timeout)
1458 +void sched_exit(task_t * p)
1460 - struct timer_list timer;
1461 - unsigned long expire;
1463 + if (p->first_time_slice) {
1464 + current->time_slice += p->time_slice;
1465 + if (unlikely(current->time_slice > MAX_TIMESLICE))
1466 + current->time_slice = MAX_TIMESLICE;
1470 + * If the child was a (relative-) CPU hog then decrease
1471 + * the sleep_avg of the parent as well.
1473 + if (p->sleep_avg < current->sleep_avg)
1474 + current->sleep_avg = (current->sleep_avg * EXIT_WEIGHT +
1475 + p->sleep_avg) / (EXIT_WEIGHT + 1);
1480 - case MAX_SCHEDULE_TIMEOUT:
1482 - * These two special cases are useful to be comfortable
1483 - * in the caller. Nothing more. We could take
1484 - * MAX_SCHEDULE_TIMEOUT from one of the negative value
1485 - * but I' d like to return a valid offset (>=0) to allow
1486 - * the caller to do everything it want with the retval.
1492 - * Another bit of PARANOID. Note that the retval will be
1493 - * 0 since no piece of kernel is supposed to do a check
1494 - * for a negative retval of schedule_timeout() (since it
1495 - * should never happens anyway). You just have the printk()
1496 - * that will tell you if something is gone wrong and where.
1500 - printk(KERN_ERR "schedule_timeout: wrong timeout "
1501 - "value %lx from %p\n", timeout,
1502 - __builtin_return_address(0));
1503 - current->state = TASK_RUNNING;
1507 +asmlinkage void schedule_tail(task_t *prev)
1509 + finish_arch_switch(this_rq(), prev);
1513 +static inline task_t * context_switch(task_t *prev, task_t *next)
1515 + struct mm_struct *mm = next->mm;
1516 + struct mm_struct *oldmm = prev->active_mm;
1518 + if (unlikely(!mm)) {
1519 + next->active_mm = oldmm;
1520 + atomic_inc(&oldmm->mm_count);
1521 + enter_lazy_tlb(oldmm, next, smp_processor_id());
1523 + switch_mm(oldmm, mm, next, smp_processor_id());
1525 + if (unlikely(!prev->mm)) {
1526 + prev->active_mm = NULL;
1530 - expire = timeout + jiffies;
1531 + /* Here we just switch the register state and the stack. */
1532 + switch_to(prev, next, prev);
1534 - init_timer(&timer);
1535 - timer.expires = expire;
1536 - timer.data = (unsigned long) current;
1537 - timer.function = process_timeout;
1541 - add_timer(&timer);
1543 - del_timer_sync(&timer);
1544 +unsigned long nr_running(void)
1546 + unsigned long i, sum = 0;
1548 - timeout = expire - jiffies;
1549 + for (i = 0; i < NR_CPUS; i++)
1550 + sum += cpu_rq(i)->nr_running;
1553 - return timeout < 0 ? 0 : timeout;
1557 +unsigned long nr_uninterruptible(void)
1559 + unsigned long i, sum = 0;
1561 + for (i = 0; i < NR_CPUS; i++)
1562 + sum += cpu_rq(i)->nr_uninterruptible;
1567 +unsigned long nr_context_switches(void)
1569 + unsigned long i, sum = 0;
1571 + for (i = 0; i < NR_CPUS; i++)
1572 + sum += cpu_rq(i)->nr_switches;
1578 - * schedule_tail() is getting called from the fork return path. This
1579 - * cleans up all remaining scheduler things, without impacting the
1581 + * double_rq_lock - safely lock two runqueues
1583 + * Note this does not disable interrupts like task_rq_lock,
1584 + * you need to do so manually before calling.
1586 -static inline void __schedule_tail(struct task_struct *prev)
1587 +static inline void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
1592 + spin_lock(&rq1->lock);
1595 + spin_lock(&rq1->lock);
1596 + spin_lock(&rq2->lock);
1598 + spin_lock(&rq2->lock);
1599 + spin_lock(&rq1->lock);
1605 + * double_rq_unlock - safely unlock two runqueues
1607 + * Note this does not restore interrupts like task_rq_unlock,
1608 + * you need to do so manually after calling.
1610 +static inline void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2)
1612 + spin_unlock(&rq1->lock);
1614 + spin_unlock(&rq2->lock);
1620 + * Batch balancing is much simpler since it's optimized for
1621 + * CPU-intensive workloads. The balancer keeps the batch-queue
1622 + * length as close to the average length as possible. It weighs
1623 + * runqueue distribution based on the idle percentage of each
1624 + * CPU - this way statistical fairness of timeslice distribution
1625 + * is preserved, in the long run it does not matter whether a
1626 + * batch task is queued to a busy CPU or not, it will get an
1627 + * equal share of all available idle CPU time.
1629 + * CPU-intensive SCHED_BATCH processes have a much lower
1630 + * fork()/exit() flux, so the balancing does not have to
1631 + * be prepared for high statistical fluctuations in queue
1634 +static inline void load_balance_batch(runqueue_t *this_rq, int this_cpu)
1636 + int i, nr_batch, nr_idle, goal, rq_goal;
1637 + runqueue_t *rq_src;
1640 - * prev->policy can be written from here only before `prev'
1641 - * can be scheduled (before setting prev->cpus_runnable to ~0UL).
1642 - * Of course it must also be read before allowing prev
1643 - * to be rescheduled, but since the write depends on the read
1644 - * to complete, wmb() is enough. (the spin_lock() acquired
1645 - * before setting cpus_runnable is not enough because the spin_lock()
1646 - * common code semantics allows code outside the critical section
1647 - * to enter inside the critical section)
1648 + * First the unlocked fastpath - is there any work to do?
1649 + * fastpath #1: no batch processes in the system,
1650 + * fastpath #2: no idle time available in the system.
1651 + * fastpath #3: no balancing needed for the current queue.
1653 - policy = prev->policy;
1654 - prev->policy = policy & ~SCHED_YIELD;
1660 - * fast path falls through. We have to clear cpus_runnable before
1661 - * checking prev->state to avoid a wakeup race. Protect against
1662 - * the task exiting early.
1665 - task_release_cpu(prev);
1667 - if (prev->state == TASK_RUNNING)
1668 - goto needs_resched;
1669 + for (i = 0; i < NR_CPUS; i++) {
1670 + if (!cpu_online(i))
1674 - task_unlock(prev); /* Synchronise here with release_task() if prev is TASK_ZOMBIE */
1676 + nr_batch += cpu_rq(i)->nr_batch;
1677 + nr_idle += cpu_rq(i)->idle_avg;
1679 + if (!nr_batch || !nr_idle)
1682 + goal = this_rq->idle_avg * nr_batch / nr_idle;
1683 + if (this_rq->nr_batch >= goal)
1687 - * Slow path - we 'push' the previous process and
1688 - * reschedule_idle() will attempt to find a new
1689 - * processor for it. (but it might preempt the
1690 - * current process as well.) We must take the runqueue
1691 - * lock and re-check prev->state to be correct. It might
1692 - * still happen that this process has a preemption
1693 - * 'in progress' already - but this is not a problem and
1694 - * might happen in other circumstances as well.
1695 + * The slow path - the local batch-queue is too short and
1696 + * needs balancing. We unlock the runqueue (but keep
1697 + * interrupts disabled) to simplify locking. (It does not
1698 + * matter if the runqueues change meanwhile - this is all
1699 + * statistical balancing so only the long run effects matter.)
1703 - unsigned long flags;
1704 + spin_unlock(&this_rq->lock);
1707 - * Avoid taking the runqueue lock in cases where
1708 - * no preemption-check is necessery:
1710 - if ((prev == idle_task(smp_processor_id())) ||
1711 - (policy & SCHED_YIELD))
1713 + for (i = 0; i < NR_CPUS; i++) {
1714 + if (!cpu_online(i) || (i == this_cpu))
1717 - spin_lock_irqsave(&runqueue_lock, flags);
1718 - if ((prev->state == TASK_RUNNING) && !task_has_cpu(prev))
1719 - reschedule_idle(prev);
1720 - spin_unlock_irqrestore(&runqueue_lock, flags);
1722 + rq_src = cpu_rq(i);
1723 + double_rq_lock(this_rq, rq_src);
1725 + rq_goal = rq_src->idle_avg * nr_batch / nr_idle;
1727 + if (rq_src->nr_batch > rq_goal) {
1729 + * Migrate a single batch-process.
1731 + list_t *tmp = rq_src->batch_queue.prev;
1734 + list_add_tail(tmp, &this_rq->batch_queue);
1735 + rq_src->nr_batch--;
1736 + this_rq->nr_batch++;
1737 + set_task_cpu(list_entry(tmp, task_t, run_list), this_cpu);
1740 + double_rq_unlock(this_rq, rq_src);
1741 + if (this_rq->nr_batch >= goal)
1745 - prev->policy &= ~SCHED_YIELD;
1746 -#endif /* CONFIG_SMP */
1747 + spin_lock(&this_rq->lock);
1750 + * Lock the busiest runqueue as well, this_rq is locked already.
1751 + * Recalculate nr_running if we have to drop the runqueue lock.
1753 +static inline unsigned int double_lock_balance(runqueue_t *this_rq,
1754 + runqueue_t *busiest, int this_cpu, int idle, unsigned int nr_running)
1756 + if (unlikely(!spin_trylock(&busiest->lock))) {
1757 + if (busiest < this_rq) {
1758 + spin_unlock(&this_rq->lock);
1759 + spin_lock(&busiest->lock);
1760 + spin_lock(&this_rq->lock);
1761 + /* Need to recalculate nr_running */
1762 + if (idle || (this_rq->nr_running > this_rq->prev_nr_running[this_cpu]))
1763 + nr_running = this_rq->nr_running;
1765 + nr_running = this_rq->prev_nr_running[this_cpu];
1767 + spin_lock(&busiest->lock);
1769 + return nr_running;
1772 +static inline runqueue_t *find_busiest_queue(runqueue_t *this_rq, int this_cpu, int idle, int *imbalance)
1774 + int nr_running, load, max_load, i;
1775 + runqueue_t *busiest, *rq_src;
1778 + * We search all runqueues to find the most busy one.
1779 + * We do this lockless to reduce cache-bouncing overhead,
1780 + * we re-check the 'best' source CPU later on again, with
1783 + * We fend off statistical fluctuations in runqueue lengths by
1784 + * saving the runqueue length during the previous load-balancing
1785 + * operation and using the smaller one the current and saved lengths.
1786 + * If a runqueue is long enough for a longer amount of time then
1787 + * we recognize it and pull tasks from it.
1789 + * The 'current runqueue length' is a statistical maximum variable,
1790 + * for that one we take the longer one - to avoid fluctuations in
1791 + * the other direction. So for a load-balance to happen it needs
1792 + * stable long runqueue on the target CPU and stable short runqueue
1793 + * on the local runqueue.
1795 + * We make an exception if this CPU is about to become idle - in
1796 + * that case we are less picky about moving a task across CPUs and
1797 + * take what can be taken.
1799 + if (idle || (this_rq->nr_running > this_rq->prev_nr_running[this_cpu]))
1800 + nr_running = this_rq->nr_running;
1802 + nr_running = this_rq->prev_nr_running[this_cpu];
1806 + for (i = 0; i < NR_CPUS; i++) {
1807 + if (!cpu_online(i))
1810 + rq_src = cpu_rq(i);
1811 + if (idle || (rq_src->nr_running < this_rq->prev_nr_running[i]))
1812 + load = rq_src->nr_running;
1814 + load = this_rq->prev_nr_running[i];
1815 + this_rq->prev_nr_running[i] = rq_src->nr_running;
1817 + if ((load > max_load) && (rq_src != this_rq)) {
1823 + if (likely(!busiest))
1826 -asmlinkage void schedule_tail(struct task_struct *prev)
1827 + *imbalance = (max_load - nr_running) / 2;
1829 + /* It needs an at least ~25% imbalance to trigger balancing. */
1830 + if (!idle && (*imbalance < (max_load + 3)/4)) {
1835 + nr_running = double_lock_balance(this_rq, busiest, this_cpu, idle, nr_running);
1837 + * Make sure nothing changed since we checked the
1838 + * runqueue length.
1840 + if (busiest->nr_running <= nr_running + 1) {
1841 + spin_unlock(&busiest->lock);
1849 + * Move a task from a remote runqueue to the local runqueue.
1850 + * Both runqueues must be locked.
1852 +static inline void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, runqueue_t *this_rq, int this_cpu)
1854 - __schedule_tail(prev);
1855 + dequeue_task(p, src_array);
1856 + src_rq->nr_running--;
1857 + set_task_cpu(p, this_cpu);
1858 + this_rq->nr_running++;
1859 + enqueue_task(p, this_rq->active);
1861 + * Note that idle threads have a prio of MAX_PRIO, for this test
1862 + * to be always true for them.
1864 + if (p->prio < this_rq->curr->prio)
1865 + set_need_resched();
1869 - * 'schedule()' is the scheduler function. It's a very simple and nice
1870 - * scheduler: it's not perfect, but certainly works for most things.
1871 + * Current runqueue is empty, or rebalance tick: if there is an
1872 + * inbalance (current runqueue is too short) then pull from
1873 + * busiest runqueue(s).
1875 - * The goto is "interesting".
1876 + * We call this with the current runqueue locked,
1879 +static void load_balance(runqueue_t *this_rq, int idle)
1881 + int imbalance, idx, this_cpu = smp_processor_id();
1882 + runqueue_t *busiest;
1883 + prio_array_t *array;
1884 + list_t *head, *curr;
1887 + busiest = find_busiest_queue(this_rq, this_cpu, idle, &imbalance);
1889 + goto balance_batch;
1892 + * We first consider expired tasks. Those will likely not be
1893 + * executed in the near future, and they are most likely to
1894 + * be cache-cold, thus switching CPUs has the least effect
1897 + if (busiest->expired->nr_active)
1898 + array = busiest->expired;
1900 + array = busiest->active;
1903 + /* Start searching at priority 0: */
1907 + idx = sched_find_first_bit(array->bitmap);
1909 + idx = find_next_bit(array->bitmap, MAX_PRIO, idx);
1910 + if (idx == MAX_PRIO) {
1911 + if (array == busiest->expired) {
1912 + array = busiest->active;
1918 + head = array->queue + idx;
1919 + curr = head->prev;
1921 + tmp = list_entry(curr, task_t, run_list);
1924 + * We do not migrate tasks that are:
1925 + * 1) running (obviously), or
1926 + * 2) cannot be migrated to this CPU due to cpus_allowed, or
1927 + * 3) are cache-hot on their current CPU.
1930 +#define CAN_MIGRATE_TASK(p,rq,this_cpu) \
1931 + ((jiffies - (p)->sleep_timestamp > cache_decay_ticks) && \
1932 + !task_running(rq, p) && \
1933 + ((p)->cpus_allowed & (1UL << (this_cpu))))
1935 + curr = curr->prev;
1937 + if (!CAN_MIGRATE_TASK(tmp, busiest, this_cpu)) {
1943 + pull_task(busiest, array, tmp, this_rq, this_cpu);
1944 + if (!idle && --imbalance) {
1951 + spin_unlock(&busiest->lock);
1953 + load_balance_batch(this_rq, this_cpu);
1957 + * One of the idle_cpu_tick() or the busy_cpu_tick() function will
1958 + * gets called every timer tick, on every CPU. Our balancing action
1959 + * frequency and balancing agressivity depends on whether the CPU is
1962 - * NOTE!! Task 0 is the 'idle' task, which gets called when no other
1963 - * tasks can run. It can not be killed, and it cannot sleep. The 'state'
1964 - * information in task[0] is never used.
1965 + * busy-rebalance every 250 msecs. idle-rebalance every 1 msec. (or on
1966 + * systems with HZ=100, every 10 msecs.)
1968 -asmlinkage void schedule(void)
1969 +#define BUSY_REBALANCE_TICK (HZ/4 ?: 1)
1970 +#define IDLE_REBALANCE_TICK (HZ/1000 ?: 1)
1972 +static inline void idle_tick(runqueue_t *rq)
1974 - struct schedule_data * sched_data;
1975 - struct task_struct *prev, *next, *p;
1976 - struct list_head *tmp;
1978 + if (jiffies % IDLE_REBALANCE_TICK)
1980 + spin_lock(&rq->lock);
1981 + load_balance(rq, 1);
1982 + spin_unlock(&rq->lock);
1988 + * We place interactive tasks back into the active array, if possible.
1990 + * To guarantee that this does not starve expired tasks we ignore the
1991 + * interactivity of a task if the first expired task had to wait more
1992 + * than a 'reasonable' amount of time. This deadline timeout is
1993 + * load-dependent, as the frequency of array switched decreases with
1994 + * increasing number of running tasks:
1996 +#define EXPIRED_STARVING(rq) \
1997 + ((rq)->expired_timestamp && \
1998 + (jiffies - (rq)->expired_timestamp >= \
1999 + STARVATION_LIMIT * ((rq)->nr_running) + 1))
2002 + * This function gets called by the timer code, with HZ frequency.
2003 + * We call it with interrupts disabled.
2005 +void scheduler_tick(int user_ticks, int sys_ticks)
2007 + int cpu = smp_processor_id();
2008 + runqueue_t *rq = this_rq();
2009 + task_t *p = current;
2011 - spin_lock_prefetch(&runqueue_lock);
2013 + if (user_ticks || sys_ticks) {
2015 + * This code is rare, triggered only once per second:
2017 + if (--rq->idle_ticks_left <= 0) {
2019 + * Maintain a simple running average:
2021 + rq->idle_avg += rq->idle_count;
2022 + rq->idle_avg >>= 1;
2024 - BUG_ON(!current->active_mm);
2027 - this_cpu = prev->processor;
2028 + rq->idle_ticks_left = IDLE_TICKS;
2029 + rq->idle_count = 0;
2031 - if (unlikely(in_interrupt())) {
2032 - printk("Scheduling in interrupt\n");
2036 + if (p == rq->idle || p->policy == SCHED_BATCH)
2039 + if (p == rq->idle) {
2040 + if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
2041 + kstat.per_cpu_system[cpu] += sys_ticks;
2047 + if (TASK_NICE(p) > 0 || p->policy == SCHED_BATCH)
2048 + kstat.per_cpu_nice[cpu] += user_ticks;
2050 + kstat.per_cpu_user[cpu] += user_ticks;
2051 + kstat.per_cpu_system[cpu] += sys_ticks;
2053 - release_kernel_lock(prev, this_cpu);
2055 + /* Task might have expired already, but not scheduled off yet */
2056 + if (p->array != rq->active) {
2057 + set_tsk_need_resched(p);
2060 + spin_lock(&rq->lock);
2061 + if (unlikely(rt_task(p))) {
2063 + * RR tasks need a special form of timeslice management.
2064 + * FIFO tasks have no timeslices.
2066 + if ((p->policy == SCHED_RR) && !--p->time_slice) {
2067 + p->time_slice = task_timeslice(p);
2068 + p->first_time_slice = 0;
2069 + set_tsk_need_resched(p);
2071 + /* put it at the end of the queue: */
2072 + dequeue_task(p, rq->active);
2073 + enqueue_task(p, rq->active);
2078 - * 'sched_data' is protected by the fact that we can run
2079 - * only one process per CPU.
2081 - sched_data = & aligned_data[this_cpu].schedule_data;
2082 + * The task was running during this tick - update the
2083 + * time slice counter and the sleep average. Note: we
2084 + * do not update a process's priority until it either
2085 + * goes to sleep or uses up its timeslice. This makes
2086 + * it possible for interactive tasks to use up their
2087 + * timeslices at their highest priority levels.
2091 + if (!--p->time_slice) {
2092 + dequeue_task(p, rq->active);
2093 + set_tsk_need_resched(p);
2094 + p->prio = effective_prio(p);
2095 + p->time_slice = task_timeslice(p);
2096 + p->first_time_slice = 0;
2098 + if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) {
2099 + if (!rq->expired_timestamp)
2100 + rq->expired_timestamp = jiffies;
2101 + enqueue_task(p, rq->expired);
2103 + enqueue_task(p, rq->active);
2107 + if (!(jiffies % BUSY_REBALANCE_TICK))
2108 + load_balance(rq, 0);
2110 + spin_unlock(&rq->lock);
2113 - spin_lock_irq(&runqueue_lock);
2114 +void scheduling_functions_start_here(void) { }
2116 - /* move an exhausted RR process to be last.. */
2117 - if (unlikely(prev->policy == SCHED_RR))
2118 - if (!prev->counter) {
2119 - prev->counter = NICE_TO_TICKS(prev->nice);
2120 - move_last_runqueue(prev);
2123 + * This function is called by the lowlevel kernel entry code if
2124 + * pure userspace code is preempted. Such processes, if SCHED_BATCH,
2125 + * are candidates for batch scheduling. Every other process (including
2126 + * kernel-mode SCHED_BATCH processes) is scheduled in a non-batch way.
2128 +asmlinkage void schedule_userspace(void)
2132 - switch (prev->state) {
2133 - case TASK_INTERRUPTIBLE:
2134 - if (signal_pending(prev)) {
2135 - prev->state = TASK_RUNNING;
2139 - del_from_runqueue(prev);
2140 - case TASK_RUNNING:;
2141 + if (current->policy != SCHED_BATCH) {
2145 - prev->need_resched = 0;
2148 - * this is the scheduler proper:
2149 + * Only handle batch tasks that are runnable.
2151 + if (current->state == TASK_RUNNING) {
2152 + rq = this_rq_lock();
2153 + deactivate_batch_task(current, rq);
2157 - * Default process to select..
2159 - next = idle_task(this_cpu);
2161 - list_for_each(tmp, &runqueue_head) {
2162 - p = list_entry(tmp, struct task_struct, run_list);
2163 - if (can_schedule(p, this_cpu)) {
2164 - int weight = goodness(p, this_cpu, prev->active_mm);
2166 - c = weight, next = p;
2170 - /* Do we need to re-calculate counters? */
2171 - if (unlikely(!c)) {
2172 - struct task_struct *p;
2174 - spin_unlock_irq(&runqueue_lock);
2175 - read_lock(&tasklist_lock);
2177 - p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice);
2178 - read_unlock(&tasklist_lock);
2179 - spin_lock_irq(&runqueue_lock);
2180 - goto repeat_schedule;
2181 + // we can keep irqs disabled:
2182 + spin_unlock(&rq->lock);
2186 - * from this point on nothing can prevent us from
2187 - * switching to the next task, save this fact in
2190 - sched_data->curr = next;
2191 - task_set_cpu(next, this_cpu);
2192 - spin_unlock_irq(&runqueue_lock);
2196 - if (unlikely(prev == next)) {
2197 - /* We won't go through the normal tail, so do this by hand */
2198 - prev->policy &= ~SCHED_YIELD;
2199 - goto same_process;
2202 + * 'schedule()' is the main scheduler function.
2204 +asmlinkage void schedule(void)
2206 + task_t *prev, *next;
2208 + prio_array_t *array;
2214 - * maintain the per-process 'last schedule' value.
2215 - * (this has to be recalculated even if we reschedule to
2216 - * the same process) Currently this is only used on SMP,
2217 - * and it's approximate, so we do not have to maintain
2218 - * it while holding the runqueue spinlock.
2220 - sched_data->last_schedule = get_cycles();
2221 + if (unlikely(in_interrupt()))
2225 - * We drop the scheduler lock early (it's a global spinlock),
2226 - * thus we have to lock the previous process from getting
2227 - * rescheduled during switch_to().
2233 -#endif /* CONFIG_SMP */
2234 + release_kernel_lock(prev, smp_processor_id());
2235 + prev->sleep_timestamp = jiffies;
2236 + spin_lock_irq(&rq->lock);
2238 - kstat.context_swtch++;
2240 - * there are 3 processes which are affected by a context switch:
2242 - * prev == .... ==> (last => next)
2244 - * It's the 'much more previous' 'prev' that is on next's stack,
2245 - * but prev is set to (the just run) 'last' process by switch_to().
2246 - * This might sound slightly confusing but makes tons of sense.
2248 - prepare_to_switch();
2250 - struct mm_struct *mm = next->mm;
2251 - struct mm_struct *oldmm = prev->active_mm;
2253 - BUG_ON(next->active_mm);
2254 - next->active_mm = oldmm;
2255 - atomic_inc(&oldmm->mm_count);
2256 - enter_lazy_tlb(oldmm, next, this_cpu);
2258 - BUG_ON(next->active_mm != mm);
2259 - switch_mm(oldmm, mm, next, this_cpu);
2260 + switch (prev->state) {
2261 + case TASK_INTERRUPTIBLE:
2262 + if (unlikely(signal_pending(prev))) {
2263 + prev->state = TASK_RUNNING;
2267 + deactivate_task(prev, rq);
2268 + case TASK_RUNNING:
2274 + if (unlikely(!rq->nr_running)) {
2276 + load_balance(rq, 1);
2277 + if (rq->nr_running)
2278 + goto pick_next_task;
2281 + * Pick a task from the batch queue if available.
2283 + if (rq->nr_batch) {
2284 + list_t *tmp = rq->batch_queue.next;
2287 - prev->active_mm = NULL;
2290 + next = list_entry(tmp, task_t, run_list);
2291 + activate_batch_task(next, rq);
2294 + rq->expired_timestamp = 0;
2295 + goto switch_tasks;
2299 - * This just switches the register state and the
2302 - switch_to(prev, next, prev);
2303 - __schedule_tail(prev);
2304 + array = rq->active;
2305 + if (unlikely(!array->nr_active)) {
2307 + * Switch the active and expired arrays.
2309 + rq->active = rq->expired;
2310 + rq->expired = array;
2311 + array = rq->active;
2312 + rq->expired_timestamp = 0;
2315 + idx = sched_find_first_bit(array->bitmap);
2316 + queue = array->queue + idx;
2317 + next = list_entry(queue->next, task_t, run_list);
2321 + clear_tsk_need_resched(prev);
2323 + if (likely(prev != next)) {
2324 + rq->nr_switches++;
2327 + prepare_arch_switch(rq, next);
2328 + prev = context_switch(prev, next);
2331 + finish_arch_switch(rq, prev);
2333 + spin_unlock_irq(&rq->lock);
2336 reacquire_kernel_lock(current);
2337 - if (current->need_resched)
2338 - goto need_resched_back;
2340 + if (need_resched())
2341 + goto need_resched;
2345 - * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just wake everything
2346 - * up. If it's an exclusive wakeup (nr_exclusive == small +ve number) then we wake all the
2347 - * non-exclusive tasks and one exclusive task.
2348 + * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
2349 + * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
2350 + * number) then we wake all the non-exclusive tasks and one exclusive task.
2352 * There are circumstances in which we can try to wake a task which has already
2353 - * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns zero
2354 - * in this (rare) case, and we handle it by contonuing to scan the queue.
2355 + * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
2356 + * zero in this (rare) case, and we handle it by continuing to scan the queue.
2358 -static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode,
2359 - int nr_exclusive, const int sync)
2360 +static inline void __wake_up_common(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, int sync)
2362 struct list_head *tmp;
2363 - struct task_struct *p;
2365 - CHECK_MAGIC_WQHEAD(q);
2366 - WQ_CHECK_LIST_HEAD(&q->task_list);
2368 - list_for_each(tmp,&q->task_list) {
2369 - unsigned int state;
2370 - wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
2371 + unsigned int state;
2372 + wait_queue_t *curr;
2375 - CHECK_MAGIC(curr->__magic);
2376 + list_for_each(tmp, &q->task_list) {
2377 + curr = list_entry(tmp, wait_queue_t, task_list);
2380 - if (state & mode) {
2381 - WQ_NOTE_WAKER(curr);
2382 - if (try_to_wake_up(p, sync) && (curr->flags&WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
2383 + if ((state & mode) && try_to_wake_up(p, sync) &&
2384 + ((curr->flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive))
2390 -void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr)
2391 +void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
2394 - unsigned long flags;
2395 - wq_read_lock_irqsave(&q->lock, flags);
2396 - __wake_up_common(q, mode, nr, 0);
2397 - wq_read_unlock_irqrestore(&q->lock, flags);
2399 + unsigned long flags;
2404 + spin_lock_irqsave(&q->lock, flags);
2405 + __wake_up_common(q, mode, nr_exclusive, 0);
2406 + spin_unlock_irqrestore(&q->lock, flags);
2409 -void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr)
2412 +void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
2415 - unsigned long flags;
2416 - wq_read_lock_irqsave(&q->lock, flags);
2417 - __wake_up_common(q, mode, nr, 1);
2418 - wq_read_unlock_irqrestore(&q->lock, flags);
2420 + unsigned long flags;
2425 + spin_lock_irqsave(&q->lock, flags);
2426 + if (likely(nr_exclusive))
2427 + __wake_up_common(q, mode, nr_exclusive, 1);
2429 + __wake_up_common(q, mode, nr_exclusive, 0);
2430 + spin_unlock_irqrestore(&q->lock, flags);
2435 void complete(struct completion *x)
2437 unsigned long flags;
2438 @@ -793,14 +1248,14 @@
2439 init_waitqueue_entry(&wait, current);
2441 #define SLEEP_ON_HEAD \
2442 - wq_write_lock_irqsave(&q->lock,flags); \
2443 + spin_lock_irqsave(&q->lock,flags); \
2444 __add_wait_queue(q, &wait); \
2445 - wq_write_unlock(&q->lock);
2446 + spin_unlock(&q->lock);
2448 #define SLEEP_ON_TAIL \
2449 - wq_write_lock_irq(&q->lock); \
2450 + spin_lock_irq(&q->lock); \
2451 __remove_wait_queue(q, &wait); \
2452 - wq_write_unlock_irqrestore(&q->lock,flags);
2453 + spin_unlock_irqrestore(&q->lock, flags);
2455 void interruptible_sleep_on(wait_queue_head_t *q)
2457 @@ -852,6 +1307,42 @@
2459 void scheduling_functions_end_here(void) { }
2461 +void set_user_nice(task_t *p, long nice)
2463 + unsigned long flags;
2464 + prio_array_t *array;
2467 + if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
2470 + * We have to be careful, if called from sys_setpriority(),
2471 + * the task might be in the middle of scheduling on another CPU.
2473 + rq = task_rq_lock(p, &flags);
2475 + p->static_prio = NICE_TO_PRIO(nice);
2480 + dequeue_task(p, array);
2481 + p->static_prio = NICE_TO_PRIO(nice);
2482 + p->prio = NICE_TO_PRIO(nice);
2484 + enqueue_task(p, array);
2486 + * If the task is running and lowered its priority,
2487 + * or increased its priority then reschedule its CPU:
2489 + if ((NICE_TO_PRIO(nice) < p->static_prio) ||
2490 + task_running(rq, p))
2491 + resched_task(rq->curr);
2494 + task_rq_unlock(rq, &flags);
2500 @@ -862,7 +1353,7 @@
2502 asmlinkage long sys_nice(int increment)
2508 * Setpriority might change our priority at the same moment.
2509 @@ -878,34 +1369,52 @@
2513 - newprio = current->nice + increment;
2514 - if (newprio < -20)
2518 - current->nice = newprio;
2519 + nice = PRIO_TO_NICE(current->static_prio) + increment;
2524 + set_user_nice(current, nice);
2530 -static inline struct task_struct *find_process_by_pid(pid_t pid)
2532 + * This is the priority value as seen by users in /proc
2534 + * RT tasks are offset by -200. Normal tasks are centered
2535 + * around 0, value goes from -16 to +15.
2537 +int task_prio(task_t *p)
2539 + return p->prio - MAX_USER_RT_PRIO;
2542 +int task_nice(task_t *p)
2544 - struct task_struct *tsk = current;
2545 + return TASK_NICE(p);
2548 +int idle_cpu(int cpu)
2550 + return cpu_curr(cpu) == cpu_rq(cpu)->idle;
2554 - tsk = find_task_by_pid(pid);
2556 +static inline task_t *find_process_by_pid(pid_t pid)
2558 + return pid ? find_task_by_pid(pid) : current;
2561 -static int setscheduler(pid_t pid, int policy,
2562 - struct sched_param *param)
2563 +static int setscheduler(pid_t pid, int policy, struct sched_param *param)
2565 struct sched_param lp;
2566 - struct task_struct *p;
2568 + int retval = -EINVAL;
2569 + prio_array_t *array;
2570 + unsigned long flags;
2575 if (!param || pid < 0)
2578 @@ -917,58 +1426,73 @@
2579 * We play safe to avoid deadlocks.
2581 read_lock_irq(&tasklist_lock);
2582 - spin_lock(&runqueue_lock);
2584 p = find_process_by_pid(pid);
2590 + goto out_unlock_tasklist;
2593 + * To be able to change p->policy safely, the apropriate
2594 + * runqueue lock must be held.
2596 + rq = task_rq_lock(p, &flags);
2602 if (policy != SCHED_FIFO && policy != SCHED_RR &&
2603 - policy != SCHED_OTHER)
2604 + policy != SCHED_NORMAL && policy != SCHED_BATCH)
2610 - * Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid
2611 - * priority for SCHED_OTHER is 0.
2612 + * Valid priorities for SCHED_FIFO and SCHED_RR are
2613 + * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and
2614 + * SCHED_BATCH is 0.
2617 - if (lp.sched_priority < 0 || lp.sched_priority > 99)
2618 + if (lp.sched_priority < 0 || lp.sched_priority > MAX_USER_RT_PRIO-1)
2620 - if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))
2621 + if ((policy == SCHED_NORMAL || policy == SCHED_BATCH) !=
2622 + (lp.sched_priority == 0))
2626 - if ((policy == SCHED_FIFO || policy == SCHED_RR) &&
2627 + if ((policy == SCHED_FIFO || policy == SCHED_RR) &&
2628 !capable(CAP_SYS_NICE))
2630 if ((current->euid != p->euid) && (current->euid != p->uid) &&
2631 !capable(CAP_SYS_NICE))
2634 + if (p->flags & PF_BATCH)
2635 + activate_batch_task(p, rq);
2638 + deactivate_task(p, task_rq(p));
2641 p->rt_priority = lp.sched_priority;
2642 - if (task_on_runqueue(p))
2643 - move_first_runqueue(p);
2645 - current->need_resched = 1;
2646 + if (policy != SCHED_NORMAL && policy != SCHED_BATCH)
2647 + p->prio = MAX_USER_RT_PRIO-1 - p->rt_priority;
2649 + p->prio = p->static_prio;
2651 + activate_task(p, task_rq(p));
2654 - spin_unlock(&runqueue_lock);
2655 + task_rq_unlock(rq, &flags);
2656 +out_unlock_tasklist:
2657 read_unlock_irq(&tasklist_lock);
2663 -asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
2664 +asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
2665 struct sched_param *param)
2667 return setscheduler(pid, policy, param);
2668 @@ -981,10 +1505,9 @@
2670 asmlinkage long sys_sched_getscheduler(pid_t pid)
2672 - struct task_struct *p;
2674 + int retval = -EINVAL;
2681 @@ -992,7 +1515,7 @@
2682 read_lock(&tasklist_lock);
2683 p = find_process_by_pid(pid);
2685 - retval = p->policy & ~SCHED_YIELD;
2686 + retval = p->policy;
2687 read_unlock(&tasklist_lock);
2690 @@ -1001,11 +1524,10 @@
2692 asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param *param)
2694 - struct task_struct *p;
2695 struct sched_param lp;
2697 + int retval = -EINVAL;
2701 if (!param || pid < 0)
2704 @@ -1030,44 +1552,118 @@
2708 -asmlinkage long sys_sched_yield(void)
2710 + * sys_sched_setaffinity - set the cpu affinity of a process
2711 + * @pid: pid of the process
2712 + * @len: length in bytes of the bitmask pointed to by user_mask_ptr
2713 + * @user_mask_ptr: user-space pointer to the new cpu mask
2715 +asmlinkage int sys_sched_setaffinity(pid_t pid, unsigned int len,
2716 + unsigned long *user_mask_ptr)
2718 + unsigned long new_mask;
2722 + if (len < sizeof(new_mask))
2725 + if (copy_from_user(&new_mask, user_mask_ptr, sizeof(new_mask)))
2728 + new_mask &= cpu_online_map;
2732 + read_lock(&tasklist_lock);
2734 + p = find_process_by_pid(pid);
2736 + read_unlock(&tasklist_lock);
2741 - * Trick. sched_yield() first counts the number of truly
2742 - * 'pending' runnable processes, then returns if it's
2743 - * only the current processes. (This test does not have
2744 - * to be atomic.) In threaded applications this optimization
2745 - * gets triggered quite often.
2746 + * It is not safe to call set_cpus_allowed with the
2747 + * tasklist_lock held. We will bump the task_struct's
2748 + * usage count and then drop tasklist_lock.
2750 + get_task_struct(p);
2751 + read_unlock(&tasklist_lock);
2753 - int nr_pending = nr_running;
2755 + if ((current->euid != p->euid) && (current->euid != p->uid) &&
2756 + !capable(CAP_SYS_NICE))
2762 + set_cpus_allowed(p, new_mask);
2764 - // Subtract non-idle processes running on other CPUs.
2765 - for (i = 0; i < smp_num_cpus; i++) {
2766 - int cpu = cpu_logical_map(i);
2767 - if (aligned_data[cpu].schedule_data.curr != idle_task(cpu))
2771 - // on UP this process is on the runqueue as well
2776 - * This process can only be rescheduled by us,
2777 - * so this is safe without any locking.
2779 - if (current->policy == SCHED_OTHER)
2780 - current->policy |= SCHED_YIELD;
2781 - current->need_resched = 1;
2783 - spin_lock_irq(&runqueue_lock);
2784 - move_last_runqueue(current);
2785 - spin_unlock_irq(&runqueue_lock);
2787 + free_task_struct(p);
2792 + * sys_sched_getaffinity - get the cpu affinity of a process
2793 + * @pid: pid of the process
2794 + * @len: length in bytes of the bitmask pointed to by user_mask_ptr
2795 + * @user_mask_ptr: user-space pointer to hold the current cpu mask
2797 +asmlinkage int sys_sched_getaffinity(pid_t pid, unsigned int len,
2798 + unsigned long *user_mask_ptr)
2800 + unsigned int real_len;
2801 + unsigned long mask;
2805 + real_len = sizeof(mask);
2806 + if (len < real_len)
2809 + read_lock(&tasklist_lock);
2812 + p = find_process_by_pid(pid);
2817 + mask = p->cpus_allowed & cpu_online_map;
2820 + read_unlock(&tasklist_lock);
2823 + if (copy_to_user(user_mask_ptr, &mask, real_len))
2828 +asmlinkage long sys_sched_yield(void)
2830 + runqueue_t *rq = this_rq_lock();
2831 + prio_array_t *array = current->array;
2834 + * We implement yielding by moving the task into the expired
2837 + * (special rule: RT tasks will just roundrobin in the active
2840 + if (likely(!rt_task(current))) {
2841 + dequeue_task(current, array);
2842 + enqueue_task(current, rq->expired);
2844 + list_del(¤t->run_list);
2845 + list_add_tail(¤t->run_list, array->queue + current->prio);
2847 + spin_unlock(&rq->lock);
2854 @@ -1078,9 +1674,10 @@
2859 + ret = MAX_USER_RT_PRIO-1;
2862 + case SCHED_NORMAL:
2867 @@ -1096,7 +1693,8 @@
2872 + case SCHED_NORMAL:
2877 @@ -1104,9 +1702,9 @@
2879 asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
2881 - struct timespec t;
2882 - struct task_struct *p;
2883 int retval = -EINVAL;
2884 + struct timespec t;
2889 @@ -1115,8 +1713,8 @@
2890 read_lock(&tasklist_lock);
2891 p = find_process_by_pid(pid);
2893 - jiffies_to_timespec(p->policy & SCHED_FIFO ? 0 : NICE_TO_TICKS(p->nice),
2895 + jiffies_to_timespec(p->policy & SCHED_FIFO ?
2896 + 0 : task_timeslice(p), &t);
2897 read_unlock(&tasklist_lock);
2899 retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
2900 @@ -1124,14 +1722,14 @@
2904 -static void show_task(struct task_struct * p)
2905 +static void show_task(task_t * p)
2907 unsigned long free = 0;
2909 static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
2911 printk("%-13.13s ", p->comm);
2912 - state = p->state ? ffz(~p->state) + 1 : 0;
2913 + state = p->state ? __ffs(p->state) + 1 : 0;
2914 if (((unsigned) state) < sizeof(stat_nam)/sizeof(char *))
2915 printk(stat_nam[state]);
2917 @@ -1172,7 +1770,7 @@
2918 printk(" (NOTLB)\n");
2921 - extern void show_trace_task(struct task_struct *tsk);
2922 + extern void show_trace_task(task_t *tsk);
2926 @@ -1194,7 +1792,7 @@
2928 void show_state(void)
2930 - struct task_struct *p;
2933 #if (BITS_PER_LONG == 32)
2935 @@ -1217,121 +1815,244 @@
2936 read_unlock(&tasklist_lock);
2940 - * reparent_to_init() - Reparent the calling kernel thread to the init task.
2942 - * If a kernel thread is launched as a result of a system call, or if
2943 - * it ever exits, it should generally reparent itself to init so that
2944 - * it is correctly cleaned up on exit.
2946 - * The various task state such as scheduling policy and priority may have
2947 - * been inherited fro a user process, so we reset them to sane values here.
2949 - * NOTE that reparent_to_init() gives the caller full capabilities.
2951 -void reparent_to_init(void)
2952 +void __init init_idle(task_t *idle, int cpu)
2954 - struct task_struct *this_task = current;
2955 + runqueue_t *idle_rq = cpu_rq(cpu), *rq = cpu_rq(task_cpu(idle));
2956 + unsigned long flags;
2958 + __save_flags(flags);
2960 + double_rq_lock(idle_rq, rq);
2962 + idle_rq->curr = idle_rq->idle = idle;
2963 + deactivate_task(idle, rq);
2964 + idle->array = NULL;
2965 + idle->prio = MAX_PRIO;
2966 + idle->state = TASK_RUNNING;
2967 + set_task_cpu(idle, cpu);
2968 + double_rq_unlock(idle_rq, rq);
2969 + set_tsk_need_resched(idle);
2970 + __restore_flags(flags);
2975 - write_lock_irq(&tasklist_lock);
2977 + * This is how migration works:
2979 + * 1) we queue a migration_req_t structure in the source CPU's
2980 + * runqueue and wake up that CPU's migration thread.
2981 + * 2) we down() the locked semaphore => thread blocks.
2982 + * 3) migration thread wakes up (implicitly it forces the migrated
2983 + * thread off the CPU)
2984 + * 4) it gets the migration request and checks whether the migrated
2985 + * task is still in the wrong runqueue.
2986 + * 5) if it's in the wrong runqueue then the migration thread removes
2987 + * it and puts it into the right queue.
2988 + * 6) migration thread up()s the semaphore.
2989 + * 7) we wake up and the migration is done.
2992 - /* Reparent to init */
2993 - REMOVE_LINKS(this_task);
2994 - this_task->p_pptr = child_reaper;
2995 - this_task->p_opptr = child_reaper;
2996 - SET_LINKS(this_task);
3000 + struct semaphore sem;
3003 - /* Set the exit signal to SIGCHLD so we signal init on exit */
3004 - this_task->exit_signal = SIGCHLD;
3006 + * Change a given task's CPU affinity. Migrate the process to a
3007 + * proper CPU and schedule it away if the CPU it's executing on
3008 + * is removed from the allowed bitmask.
3010 + * NOTE: the caller must have a valid reference to the task, the
3011 + * task must not exit() & deallocate itself prematurely. The
3012 + * call is not atomic; no spinlocks may be held.
3014 +void set_cpus_allowed(task_t *p, unsigned long new_mask)
3016 + unsigned long flags;
3017 + migration_req_t req;
3020 - /* We also take the runqueue_lock while altering task fields
3021 - * which affect scheduling decisions */
3022 - spin_lock(&runqueue_lock);
3023 + new_mask &= cpu_online_map;
3027 - this_task->ptrace = 0;
3028 - this_task->nice = DEF_NICE;
3029 - this_task->policy = SCHED_OTHER;
3030 - /* cpus_allowed? */
3031 - /* rt_priority? */
3033 - this_task->cap_effective = CAP_INIT_EFF_SET;
3034 - this_task->cap_inheritable = CAP_INIT_INH_SET;
3035 - this_task->cap_permitted = CAP_FULL_SET;
3036 - this_task->keep_capabilities = 0;
3037 - memcpy(this_task->rlim, init_task.rlim, sizeof(*(this_task->rlim)));
3038 - this_task->user = INIT_USER;
3039 + rq = task_rq_lock(p, &flags);
3040 + p->cpus_allowed = new_mask;
3042 + * Can the task run on the task's current CPU? If not then
3043 + * migrate the process off to a proper CPU.
3045 + if (new_mask & (1UL << task_cpu(p))) {
3046 + task_rq_unlock(rq, &flags);
3050 + * If the task is not on a runqueue (and not running), then
3051 + * it is sufficient to simply update the task's cpu field.
3053 + if (!p->array && !task_running(rq, p)) {
3054 + set_task_cpu(p, __ffs(p->cpus_allowed));
3055 + task_rq_unlock(rq, &flags);
3058 + init_MUTEX_LOCKED(&req.sem);
3060 + list_add(&req.list, &rq->migration_queue);
3061 + task_rq_unlock(rq, &flags);
3062 + wake_up_process(rq->migration_thread);
3064 - spin_unlock(&runqueue_lock);
3065 - write_unlock_irq(&tasklist_lock);
3072 - * Put all the gunge required to become a kernel thread without
3073 - * attached user resources in one place where it belongs.
3075 +static __initdata int master_migration_thread;
3077 -void daemonize(void)
3078 +static int migration_thread(void * bind_cpu)
3080 - struct fs_struct *fs;
3081 + int cpu = (int) (long) bind_cpu;
3082 + struct sched_param param = { sched_priority: MAX_RT_PRIO-1 };
3087 + sigfillset(¤t->blocked);
3088 + set_fs(KERNEL_DS);
3091 - * If we were started as result of loading a module, close all of the
3092 - * user space pages. We don't need them, and if we didn't close them
3093 - * they would be locked into memory.
3094 + * The first migration thread is started on the boot CPU, it
3095 + * migrates the other migration threads to their destination CPUs.
3098 + if (cpu != master_migration_thread) {
3099 + while (!cpu_rq(master_migration_thread)->migration_thread)
3101 + set_cpus_allowed(current, 1UL << cpu);
3103 + printk("migration_task %d on cpu=%d\n", cpu, smp_processor_id());
3104 + ret = setscheduler(0, SCHED_FIFO, ¶m);
3106 - current->session = 1;
3107 - current->pgrp = 1;
3108 - current->tty = NULL;
3110 + rq->migration_thread = current;
3112 - /* Become as one with the init task */
3113 + sprintf(current->comm, "migration_CPU%d", smp_processor_id());
3115 - exit_fs(current); /* current->fs->count--; */
3116 - fs = init_task.fs;
3118 - atomic_inc(&fs->count);
3119 - exit_files(current);
3120 - current->files = init_task.files;
3121 - atomic_inc(¤t->files->count);
3124 + runqueue_t *rq_src, *rq_dest;
3125 + struct list_head *head;
3126 + int cpu_src, cpu_dest;
3127 + migration_req_t *req;
3128 + unsigned long flags;
3131 + spin_lock_irqsave(&rq->lock, flags);
3132 + head = &rq->migration_queue;
3133 + current->state = TASK_INTERRUPTIBLE;
3134 + if (list_empty(head)) {
3135 + spin_unlock_irqrestore(&rq->lock, flags);
3139 + req = list_entry(head->next, migration_req_t, list);
3140 + list_del_init(head->next);
3141 + spin_unlock_irqrestore(&rq->lock, flags);
3144 + cpu_dest = __ffs(p->cpus_allowed);
3145 + rq_dest = cpu_rq(cpu_dest);
3147 + cpu_src = task_cpu(p);
3148 + rq_src = cpu_rq(cpu_src);
3150 + local_irq_save(flags);
3151 + double_rq_lock(rq_src, rq_dest);
3152 + if (task_cpu(p) != cpu_src) {
3153 + double_rq_unlock(rq_src, rq_dest);
3154 + local_irq_restore(flags);
3157 + if (rq_src == rq) {
3158 + set_task_cpu(p, cpu_dest);
3160 + deactivate_task(p, rq_src);
3161 + activate_task(p, rq_dest);
3164 + double_rq_unlock(rq_src, rq_dest);
3165 + local_irq_restore(flags);
3167 -extern unsigned long wait_init_idle;
3172 -void __init init_idle(void)
3173 +void __init migration_init(void)
3175 - struct schedule_data * sched_data;
3176 - sched_data = &aligned_data[smp_processor_id()].schedule_data;
3179 + master_migration_thread = smp_processor_id();
3180 + current->cpus_allowed = 1UL << master_migration_thread;
3182 + for (cpu = 0; cpu < NR_CPUS; cpu++) {
3183 + if (!cpu_online(cpu))
3185 + if (kernel_thread(migration_thread, (void *) (long) cpu,
3186 + CLONE_FS | CLONE_FILES | CLONE_SIGNAL) < 0)
3189 + current->cpus_allowed = -1L;
3191 - if (current != &init_task && task_on_runqueue(current)) {
3192 - printk("UGH! (%d:%d) was on the runqueue, removing.\n",
3193 - smp_processor_id(), current->pid);
3194 - del_from_runqueue(current);
3195 + for (cpu = 0; cpu < NR_CPUS; cpu++) {
3196 + if (!cpu_online(cpu))
3198 + while (!cpu_rq(cpu)->migration_thread)
3199 + schedule_timeout(2);
3201 - sched_data->curr = current;
3202 - sched_data->last_schedule = get_cycles();
3203 - clear_bit(current->processor, &wait_init_idle);
3207 -extern void init_timervecs (void);
3208 +extern void init_timervecs(void);
3209 +extern void timer_bh(void);
3210 +extern void tqueue_bh(void);
3211 +extern void immediate_bh(void);
3213 void __init sched_init(void)
3218 + for (i = 0; i < NR_CPUS; i++) {
3219 + prio_array_t *array;
3222 + rq->active = rq->arrays;
3223 + rq->expired = rq->arrays + 1;
3224 + spin_lock_init(&rq->lock);
3225 + INIT_LIST_HEAD(&rq->migration_queue);
3226 + INIT_LIST_HEAD(&rq->batch_queue);
3227 + rq->idle_ticks_left = IDLE_TICKS;
3229 + for (j = 0; j < 2; j++) {
3230 + array = rq->arrays + j;
3231 + for (k = 0; k < MAX_PRIO; k++) {
3232 + INIT_LIST_HEAD(array->queue + k);
3233 + __clear_bit(k, array->bitmap);
3235 + // delimiter for bitsearch
3236 + __set_bit(MAX_PRIO, array->bitmap);
3240 * We have to do a little magic to get the first
3241 * process right in SMP mode.
3243 - int cpu = smp_processor_id();
3246 - init_task.processor = cpu;
3248 - for(nr = 0; nr < PIDHASH_SZ; nr++)
3249 - pidhash[nr] = NULL;
3251 + rq->curr = current;
3252 + rq->idle = current;
3253 + set_task_cpu(current, smp_processor_id());
3254 + wake_up_process(current);
3258 init_bh(TIMER_BH, timer_bh);
3259 init_bh(TQUEUE_BH, tqueue_bh);
3260 init_bh(IMMEDIATE_BH, immediate_bh);
3261 @@ -1340,5 +2061,6 @@
3262 * The boot idle thread does lazy MMU switching as well:
3264 atomic_inc(&init_mm.mm_count);
3265 - enter_lazy_tlb(&init_mm, current, cpu);
3266 + enter_lazy_tlb(&init_mm, current, smp_processor_id());
3269 --- linux/kernel/signal.c.orig Thu Jul 18 15:22:25 2002
3270 +++ linux/kernel/signal.c Thu Jul 18 15:22:34 2002
3271 @@ -507,12 +507,9 @@
3272 * process of changing - but no harm is done by that
3273 * other than doing an extra (lightweight) IPI interrupt.
3275 - spin_lock(&runqueue_lock);
3276 - if (task_has_cpu(t) && t->processor != smp_processor_id())
3277 - smp_send_reschedule(t->processor);
3278 - spin_unlock(&runqueue_lock);
3279 -#endif /* CONFIG_SMP */
3281 + if ((t->state == TASK_RUNNING) && (t->cpu != cpu()))
3282 + kick_if_running(t);
3284 if (t->state & TASK_INTERRUPTIBLE) {
3287 --- linux/kernel/softirq.c.orig Thu Jul 18 15:22:25 2002
3288 +++ linux/kernel/softirq.c Thu Jul 18 15:22:34 2002
3289 @@ -259,10 +259,9 @@
3291 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
3292 current->state = TASK_RUNNING;
3294 - current->policy |= SCHED_YIELD;
3296 - } while (test_bit(TASKLET_STATE_SCHED, &t->state));
3298 + sys_sched_yield();
3299 + while (test_bit(TASKLET_STATE_SCHED, &t->state));
3301 tasklet_unlock_wait(t);
3302 clear_bit(TASKLET_STATE_SCHED, &t->state);
3303 @@ -365,13 +364,13 @@
3304 int cpu = cpu_logical_map(bind_cpu);
3307 - current->nice = 19;
3308 + set_user_nice(current, 19);
3309 sigfillset(¤t->blocked);
3311 /* Migrate to the right CPU */
3312 - current->cpus_allowed = 1UL << cpu;
3313 - while (smp_processor_id() != cpu)
3315 + set_cpus_allowed(current, 1UL << cpu);
3319 sprintf(current->comm, "ksoftirqd_CPU%d", bind_cpu);
3325 -static __init int spawn_ksoftirqd(void)
3326 +__init int spawn_ksoftirqd(void)
3330 @@ -405,10 +404,8 @@
3331 CLONE_FS | CLONE_FILES | CLONE_SIGNAL) < 0)
3332 printk("spawn_ksoftirqd() failed for cpu %d\n", cpu);
3334 - while (!ksoftirqd_task(cpu_logical_map(cpu))) {
3335 - current->policy |= SCHED_YIELD;
3338 + while (!ksoftirqd_task(cpu_logical_map(cpu)))
3339 + sys_sched_yield();
3343 --- linux/kernel/sys.c.orig Thu Jul 18 15:22:25 2002
3344 +++ linux/kernel/sys.c Thu Jul 18 15:22:34 2002
3345 @@ -220,10 +220,10 @@
3347 if (error == -ESRCH)
3349 - if (niceval < p->nice && !capable(CAP_SYS_NICE))
3350 + if (niceval < task_nice(p) && !capable(CAP_SYS_NICE))
3353 - p->nice = niceval;
3354 + set_user_nice(p, niceval);
3356 read_unlock(&tasklist_lock);
3360 if (!proc_sel(p, which, who))
3362 - niceval = 20 - p->nice;
3363 + niceval = 20 - task_nice(p);
3364 if (niceval > retval)
3367 --- linux/kernel/timer.c.orig Thu Jul 18 15:22:25 2002
3368 +++ linux/kernel/timer.c Thu Jul 18 15:22:34 2002
3371 #include <asm/uaccess.h>
3373 +struct kernel_stat kstat;
3376 * Timekeeping variables
3378 @@ -598,18 +600,7 @@
3379 int cpu = smp_processor_id(), system = user_tick ^ 1;
3381 update_one_process(p, user_tick, system, cpu);
3383 - if (--p->counter <= 0) {
3385 - p->need_resched = 1;
3388 - kstat.per_cpu_nice[cpu] += user_tick;
3390 - kstat.per_cpu_user[cpu] += user_tick;
3391 - kstat.per_cpu_system[cpu] += system;
3392 - } else if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
3393 - kstat.per_cpu_system[cpu] += system;
3394 + scheduler_tick(user_tick, system);
3398 @@ -617,17 +608,7 @@
3400 static unsigned long count_active_tasks(void)
3402 - struct task_struct *p;
3403 - unsigned long nr = 0;
3405 - read_lock(&tasklist_lock);
3406 - for_each_task(p) {
3407 - if ((p->state == TASK_RUNNING ||
3408 - (p->state & TASK_UNINTERRUPTIBLE)))
3411 - read_unlock(&tasklist_lock);
3413 + return (nr_running() + nr_uninterruptible()) * FIXED_1;
3417 @@ -810,6 +791,89 @@
3421 +static void process_timeout(unsigned long __data)
3423 + wake_up_process((task_t *)__data);
3427 + * schedule_timeout - sleep until timeout
3428 + * @timeout: timeout value in jiffies
3430 + * Make the current task sleep until @timeout jiffies have
3431 + * elapsed. The routine will return immediately unless
3432 + * the current task state has been set (see set_current_state()).
3434 + * You can set the task state as follows -
3436 + * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
3437 + * pass before the routine returns. The routine will return 0
3439 + * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
3440 + * delivered to the current task. In this case the remaining time
3441 + * in jiffies will be returned, or 0 if the timer expired in time
3443 + * The current task state is guaranteed to be TASK_RUNNING when this
3444 + * routine returns.
3446 + * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
3447 + * the CPU away without a bound on the timeout. In this case the return
3448 + * value will be %MAX_SCHEDULE_TIMEOUT.
3450 + * In all cases the return value is guaranteed to be non-negative.
3452 +signed long schedule_timeout(signed long timeout)
3454 + struct timer_list timer;
3455 + unsigned long expire;
3459 + case MAX_SCHEDULE_TIMEOUT:
3461 + * These two special cases are useful to be comfortable
3462 + * in the caller. Nothing more. We could take
3463 + * MAX_SCHEDULE_TIMEOUT from one of the negative value
3464 + * but I' d like to return a valid offset (>=0) to allow
3465 + * the caller to do everything it want with the retval.
3471 + * Another bit of PARANOID. Note that the retval will be
3472 + * 0 since no piece of kernel is supposed to do a check
3473 + * for a negative retval of schedule_timeout() (since it
3474 + * should never happens anyway). You just have the printk()
3475 + * that will tell you if something is gone wrong and where.
3479 + printk(KERN_ERR "schedule_timeout: wrong timeout "
3480 + "value %lx from %p\n", timeout,
3481 + __builtin_return_address(0));
3482 + current->state = TASK_RUNNING;
3487 + expire = timeout + jiffies;
3489 + init_timer(&timer);
3490 + timer.expires = expire;
3491 + timer.data = (unsigned long) current;
3492 + timer.function = process_timeout;
3494 + add_timer(&timer);
3496 + del_timer_sync(&timer);
3498 + timeout = expire - jiffies;
3501 + return timeout < 0 ? 0 : timeout;
3504 /* Thread ID - the internal kernel "pid" */
3505 asmlinkage long sys_gettid(void)
3512 --- linux/mm/highmem.c.orig Thu Jul 18 15:22:25 2002
3513 +++ linux/mm/highmem.c Thu Jul 18 15:22:34 2002
3515 /* we need to wait I/O completion */
3516 run_task_queue(&tq_disk);
3518 - current->policy |= SCHED_YIELD;
3519 __set_current_state(TASK_RUNNING);
3526 /* we need to wait I/O completion */
3527 run_task_queue(&tq_disk);
3529 - current->policy |= SCHED_YIELD;
3530 __set_current_state(TASK_RUNNING);
3536 --- linux/mm/oom_kill.c.orig Thu Jul 18 15:22:25 2002
3537 +++ linux/mm/oom_kill.c Thu Jul 18 15:22:34 2002
3539 * Niced processes are most likely less important, so double
3540 * their badness points.
3543 + if (task_nice(p) > 0)
3548 * all the memory it needs. That way it should be able to
3549 * exit() and clear out its resources quickly...
3551 - p->counter = 5 * HZ;
3552 + p->time_slice = HZ;
3553 p->flags |= PF_MEMALLOC | PF_MEMDIE;
3555 /* This process has hardware access, be more careful. */
3557 * killing itself before someone else gets the chance to ask
3560 - current->policy |= SCHED_YIELD;
3566 --- linux/mm/page_alloc.c.orig Thu Jul 18 15:22:25 2002
3567 +++ linux/mm/page_alloc.c Thu Jul 18 15:22:34 2002
3571 /* Yield for kswapd, and try again */
3572 - current->policy |= SCHED_YIELD;
3573 __set_current_state(TASK_RUNNING);
3579 --- linux/include/linux/list.h.orig Thu Jul 18 15:22:11 2002
3580 +++ linux/include/linux/list.h Fri Jul 19 15:03:33 2002
3582 struct list_head *next, *prev;
3585 +typedef struct list_head list_t;
3587 #define LIST_HEAD_INIT(name) { &(name), &(name) }
3589 #define LIST_HEAD(name) \
3590 --- linux/include/linux/kernel_stat.h.orig Thu Jul 18 15:22:25 2002
3591 +++ linux/include/linux/kernel_stat.h Fri Jul 19 15:03:33 2002
3593 #if !defined(CONFIG_ARCH_S390)
3594 unsigned int irqs[NR_CPUS][NR_IRQS];
3596 - unsigned int context_swtch;
3599 +extern unsigned long nr_context_switches(void);
3601 extern struct kernel_stat kstat;
3603 --- linux/include/linux/sched.h.orig Thu Jul 18 15:22:25 2002
3604 +++ linux/include/linux/sched.h Fri Jul 19 15:03:33 2002
3606 #define CT_TO_SECS(x) ((x) / HZ)
3607 #define CT_TO_USECS(x) (((x) % HZ) * 1000000/HZ)
3609 -extern int nr_running, nr_threads;
3610 +extern int nr_threads;
3611 extern int last_pid;
3612 +extern unsigned long nr_running(void);
3613 +extern unsigned long nr_uninterruptible(void);
3615 #include <linux/fs.h>
3616 #include <linux/time.h>
3617 #include <linux/param.h>
3618 #include <linux/resource.h>
3620 #include <linux/timer.h>
3623 #include <asm/processor.h>
3626 #define TASK_ZOMBIE 4
3627 #define TASK_STOPPED 8
3629 +#define task_cpu(p) ((p)->cpu)
3630 +#define set_task_cpu(p, c) do { (p)->cpu = (c); } while (0)
3631 +#define cpu_online(i) ((i) < smp_num_cpus)
3633 #define __set_task_state(tsk, state_value) \
3634 do { (tsk)->state = (state_value); } while (0)
3636 @@ -115,15 +119,13 @@
3638 * Scheduling policies
3640 -#define SCHED_OTHER 0
3641 +#define SCHED_NORMAL 0
3642 #define SCHED_FIFO 1
3644 +#define SCHED_BATCH 3
3647 - * This is an additional bit set when we want to
3648 - * yield the CPU for one re-schedule..
3650 -#define SCHED_YIELD 0x10
3651 +/* compatibility */
3652 +#define SCHED_OTHER SCHED_NORMAL
3654 struct sched_param {
3656 @@ -142,21 +144,27 @@
3659 extern rwlock_t tasklist_lock;
3660 -extern spinlock_t runqueue_lock;
3661 extern spinlock_t mmlist_lock;
3663 +typedef struct task_struct task_t;
3665 extern void sched_init(void);
3666 -extern void init_idle(void);
3667 +extern void init_idle(task_t *idle, int cpu);
3668 extern void show_state(void);
3669 extern void cpu_init (void);
3670 extern void trap_init(void);
3671 extern void update_process_times(int user);
3672 -extern void update_one_process(struct task_struct *p, unsigned long user,
3673 +extern void update_one_process(task_t *p, unsigned long user,
3674 unsigned long system, int cpu);
3675 +extern void scheduler_tick(int user_tick, int system);
3676 +extern void migration_init(void);
3677 +extern unsigned long cache_decay_ticks;
3678 +extern int set_user(uid_t new_ruid, int dumpclear);
3680 #define MAX_SCHEDULE_TIMEOUT LONG_MAX
3681 extern signed long FASTCALL(schedule_timeout(signed long timeout));
3682 asmlinkage void schedule(void);
3683 +asmlinkage void schedule_userspace(void);
3685 extern int schedule_task(struct tq_struct *task);
3686 extern void flush_scheduled_tasks(void);
3687 @@ -164,6 +172,36 @@
3688 extern int current_is_keventd(void);
3691 + * Priority of a process goes from 0..MAX_PRIO-1, valid RT
3692 + * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL tasks are
3693 + * in the range MAX_RT_PRIO..MAX_PRIO-1. Priority values
3694 + * are inverted: lower p->prio value means higher priority.
3696 + * The MAX_RT_USER_PRIO value allows the actual maximum
3697 + * RT priority to be separate from the value exported to
3698 + * user-space. This allows kernel threads to set their
3699 + * priority to a value higher than any user task. Note:
3700 + * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
3703 +#define MAX_USER_RT_PRIO 100
3704 +#define MAX_RT_PRIO MAX_USER_RT_PRIO
3706 +#define MAX_PRIO (MAX_RT_PRIO + 40)
3709 + * The maximum RT priority is configurable. If the resulting
3710 + * bitmap is 160-bits , we can use a hand-coded routine which
3711 + * is optimal. Otherwise, we fall back on a generic routine for
3712 + * finding the first set bit from an arbitrarily-sized bitmap.
3714 +#if MAX_PRIO < 160 && MAX_PRIO > 127
3715 +#define sched_find_first_bit(map) _sched_find_first_bit(map)
3717 +#define sched_find_first_bit(map) find_first_bit(map, MAX_PRIO)
3721 * The default fd array needs to be at least BITS_PER_LONG,
3722 * as this is the granularity returned by copy_fdset().
3725 extern struct user_struct root_user;
3726 #define INIT_USER (&root_user)
3728 +typedef struct prio_array prio_array_t;
3730 struct task_struct {
3732 * offsets of these are hardcoded elsewhere - touch with care
3733 @@ -301,34 +341,25 @@
3735 int lock_depth; /* Lock depth */
3738 - * offset 32 begins here on 32-bit platforms. We keep
3739 - * all fields in a single cacheline that are needed for
3740 - * the goodness() loop in schedule().
3744 - unsigned long policy;
3745 - struct mm_struct *mm;
3748 - * cpus_runnable is ~0 if the process is not running on any
3749 - * CPU. It's (1 << cpu) if it's running on a CPU. This mask
3750 - * is updated under the runqueue lock.
3752 - * To determine whether a process might run on a CPU, this
3753 - * mask is AND-ed with cpus_allowed.
3754 + * offset 32 begins here on 32-bit platforms.
3756 - unsigned long cpus_runnable, cpus_allowed;
3758 - * (only the 'next' pointer fits into the cacheline, but
3759 - * that's just fine.)
3761 - struct list_head run_list;
3762 - unsigned long sleep_time;
3764 + int prio, static_prio;
3766 + prio_array_t *array;
3768 + unsigned long sleep_avg;
3769 + unsigned long sleep_timestamp;
3771 + unsigned long policy;
3772 + unsigned long cpus_allowed;
3773 + unsigned int time_slice, first_time_slice;
3775 + task_t *next_task, *prev_task;
3777 + struct mm_struct *mm, *active_mm;
3779 - struct task_struct *next_task, *prev_task;
3780 - struct mm_struct *active_mm;
3781 struct list_head local_pages;
3782 unsigned int allocation_order, nr_local_pages;
3784 @@ -351,12 +382,12 @@
3785 * older sibling, respectively. (p->father can be replaced with
3788 - struct task_struct *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr;
3789 + task_t *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr;
3790 struct list_head thread_group;
3792 /* PID hash table linkage. */
3793 - struct task_struct *pidhash_next;
3794 - struct task_struct **pidhash_pprev;
3795 + task_t *pidhash_next;
3796 + task_t **pidhash_pprev;
3798 wait_queue_head_t wait_chldexit; /* for wait4() */
3799 struct completion *vfork_done; /* for vfork() */
3801 #define PF_MEMDIE 0x00001000 /* Killed for out-of-memory */
3802 #define PF_FREE_PAGES 0x00002000 /* per process page freeing */
3803 #define PF_NOIO 0x00004000 /* avoid generating further I/O */
3804 +#define PF_BATCH 0x00080000 /* batch-priority process */
3806 #define PF_USEDFPU 0x00100000 /* task used FPU this quantum (SMP) */
3808 @@ -454,12 +486,19 @@
3810 #define _STK_LIM (8*1024*1024)
3812 -#define DEF_COUNTER (10*HZ/100) /* 100 ms time slice */
3813 -#define MAX_COUNTER (20*HZ/100)
3814 -#define DEF_NICE (0)
3816 +extern void set_cpus_allowed(task_t *p, unsigned long new_mask);
3818 +#define set_cpus_allowed(p, new_mask) do { } while (0)
3821 +extern void set_user_nice(task_t *p, long nice);
3822 +extern int task_prio(task_t *p);
3823 +extern int task_nice(task_t *p);
3824 +extern int idle_cpu(int cpu);
3826 asmlinkage long sys_sched_yield(void);
3827 -#define yield() sys_sched_yield()
3828 +#define yield() sys_sched_yield()
3831 * The default (Linux) execution domain.
3832 @@ -478,14 +517,14 @@
3833 addr_limit: KERNEL_DS, \
3834 exec_domain: &default_exec_domain, \
3836 - counter: DEF_COUNTER, \
3838 - policy: SCHED_OTHER, \
3839 + prio: MAX_PRIO-20, \
3840 + static_prio: MAX_PRIO-20, \
3841 + policy: SCHED_NORMAL, \
3842 + cpus_allowed: -1, \
3844 active_mm: &init_mm, \
3845 - cpus_runnable: -1, \
3846 - cpus_allowed: -1, \
3847 run_list: LIST_HEAD_INIT(tsk.run_list), \
3852 @@ -519,24 +558,24 @@
3856 - struct task_struct task;
3858 unsigned long stack[INIT_TASK_SIZE/sizeof(long)];
3861 extern union task_union init_task_union;
3863 extern struct mm_struct init_mm;
3864 -extern struct task_struct *init_tasks[NR_CPUS];
3865 +extern task_t *init_tasks[NR_CPUS];
3867 /* PID hashing. (shouldnt this be dynamic?) */
3868 #define PIDHASH_SZ (4096 >> 2)
3869 -extern struct task_struct *pidhash[PIDHASH_SZ];
3870 +extern task_t *pidhash[PIDHASH_SZ];
3872 #define pid_hashfn(x) ((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1))
3874 -static inline void hash_pid(struct task_struct *p)
3875 +static inline void hash_pid(task_t *p)
3877 - struct task_struct **htable = &pidhash[pid_hashfn(p->pid)];
3878 + task_t **htable = &pidhash[pid_hashfn(p->pid)];
3880 if((p->pidhash_next = *htable) != NULL)
3881 (*htable)->pidhash_pprev = &p->pidhash_next;
3882 @@ -544,16 +583,16 @@
3883 p->pidhash_pprev = htable;
3886 -static inline void unhash_pid(struct task_struct *p)
3887 +static inline void unhash_pid(task_t *p)
3890 p->pidhash_next->pidhash_pprev = p->pidhash_pprev;
3891 *p->pidhash_pprev = p->pidhash_next;
3894 -static inline struct task_struct *find_task_by_pid(int pid)
3895 +static inline task_t *find_task_by_pid(int pid)
3897 - struct task_struct *p, **htable = &pidhash[pid_hashfn(pid)];
3898 + task_t *p, **htable = &pidhash[pid_hashfn(pid)];
3900 for(p = *htable; p && p->pid != pid; p = p->pidhash_next)
3902 @@ -561,19 +600,6 @@
3906 -#define task_has_cpu(tsk) ((tsk)->cpus_runnable != ~0UL)
3908 -static inline void task_set_cpu(struct task_struct *tsk, unsigned int cpu)
3910 - tsk->processor = cpu;
3911 - tsk->cpus_runnable = 1UL << cpu;
3914 -static inline void task_release_cpu(struct task_struct *tsk)
3916 - tsk->cpus_runnable = ~0UL;
3919 /* per-UID process charging. */
3920 extern struct user_struct * alloc_uid(uid_t);
3921 extern void free_uid(struct user_struct *);
3922 @@ -600,47 +626,51 @@
3923 extern void FASTCALL(interruptible_sleep_on(wait_queue_head_t *q));
3924 extern long FASTCALL(interruptible_sleep_on_timeout(wait_queue_head_t *q,
3925 signed long timeout));
3926 -extern int FASTCALL(wake_up_process(struct task_struct * tsk));
3927 +extern int FASTCALL(wake_up_process(task_t * tsk));
3928 +extern void FASTCALL(wake_up_forked_process(task_t * tsk));
3929 +extern void FASTCALL(sched_exit(task_t * p));
3931 #define wake_up(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
3932 #define wake_up_nr(x, nr) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
3933 #define wake_up_all(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0)
3934 -#define wake_up_sync(x) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
3935 -#define wake_up_sync_nr(x, nr) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
3936 #define wake_up_interruptible(x) __wake_up((x),TASK_INTERRUPTIBLE, 1)
3937 #define wake_up_interruptible_nr(x, nr) __wake_up((x),TASK_INTERRUPTIBLE, nr)
3938 #define wake_up_interruptible_all(x) __wake_up((x),TASK_INTERRUPTIBLE, 0)
3939 -#define wake_up_interruptible_sync(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
3940 -#define wake_up_interruptible_sync_nr(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, nr)
3942 +#define wake_up_interruptible_sync(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
3944 +#define wake_up_interruptible_sync(x) __wake_up((x),TASK_INTERRUPTIBLE, 1)
3947 asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru);
3949 extern int in_group_p(gid_t);
3950 extern int in_egroup_p(gid_t);
3952 extern void proc_caches_init(void);
3953 -extern void flush_signals(struct task_struct *);
3954 -extern void flush_signal_handlers(struct task_struct *);
3955 +extern void flush_signals(task_t *);
3956 +extern void flush_signal_handlers(task_t *);
3957 extern void sig_exit(int, int, struct siginfo *);
3958 extern int dequeue_signal(sigset_t *, siginfo_t *);
3959 extern void block_all_signals(int (*notifier)(void *priv), void *priv,
3961 extern void unblock_all_signals(void);
3962 -extern int send_sig_info(int, struct siginfo *, struct task_struct *);
3963 -extern int force_sig_info(int, struct siginfo *, struct task_struct *);
3964 +extern int send_sig_info(int, struct siginfo *, task_t *);
3965 +extern int force_sig_info(int, struct siginfo *, task_t *);
3966 extern int kill_pg_info(int, struct siginfo *, pid_t);
3967 extern int kill_sl_info(int, struct siginfo *, pid_t);
3968 extern int kill_proc_info(int, struct siginfo *, pid_t);
3969 -extern void notify_parent(struct task_struct *, int);
3970 -extern void do_notify_parent(struct task_struct *, int);
3971 -extern void force_sig(int, struct task_struct *);
3972 -extern int send_sig(int, struct task_struct *, int);
3973 +extern void notify_parent(task_t *, int);
3974 +extern void do_notify_parent(task_t *, int);
3975 +extern void force_sig(int, task_t *);
3976 +extern int send_sig(int, task_t *, int);
3977 extern int kill_pg(pid_t, int, int);
3978 extern int kill_sl(pid_t, int, int);
3979 extern int kill_proc(pid_t, int, int);
3980 extern int do_sigaction(int, const struct k_sigaction *, struct k_sigaction *);
3981 extern int do_sigaltstack(const stack_t *, stack_t *, unsigned long);
3983 -static inline int signal_pending(struct task_struct *p)
3984 +static inline int signal_pending(task_t *p)
3986 return (p->sigpending != 0);
3989 This is required every time the blocked sigset_t changes.
3990 All callers should have t->sigmask_lock. */
3992 -static inline void recalc_sigpending(struct task_struct *t)
3993 +static inline void recalc_sigpending(task_t *t)
3995 t->sigpending = has_pending_signals(&t->pending.signal, &t->blocked);
3997 @@ -786,16 +816,17 @@
3998 extern int expand_fdset(struct files_struct *, int nr);
3999 extern void free_fdset(fd_set *, int);
4001 -extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
4002 +extern int copy_thread(int, unsigned long, unsigned long, unsigned long, task_t *, struct pt_regs *);
4003 extern void flush_thread(void);
4004 extern void exit_thread(void);
4006 -extern void exit_mm(struct task_struct *);
4007 -extern void exit_files(struct task_struct *);
4008 -extern void exit_sighand(struct task_struct *);
4009 +extern void exit_mm(task_t *);
4010 +extern void exit_files(task_t *);
4011 +extern void exit_sighand(task_t *);
4013 extern void reparent_to_init(void);
4014 extern void daemonize(void);
4015 +extern task_t *child_reaper;
4017 extern int do_execve(char *, char **, char **, struct pt_regs *);
4018 extern int do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long);
4020 extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait));
4021 extern void FASTCALL(remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait));
4023 +extern void wait_task_inactive(task_t * p);
4024 +extern void kick_if_running(task_t * p);
4026 #define __wait_event(wq, condition) \
4028 wait_queue_t __wait; \
4029 @@ -885,27 +919,12 @@
4030 for (task = next_thread(current) ; task != current ; task = next_thread(task))
4032 #define next_thread(p) \
4033 - list_entry((p)->thread_group.next, struct task_struct, thread_group)
4034 + list_entry((p)->thread_group.next, task_t, thread_group)
4036 #define thread_group_leader(p) (p->pid == p->tgid)
4038 -static inline void del_from_runqueue(struct task_struct * p)
4041 - p->sleep_time = jiffies;
4042 - list_del(&p->run_list);
4043 - p->run_list.next = NULL;
4046 -static inline int task_on_runqueue(struct task_struct *p)
4047 +static inline void unhash_process(task_t *p)
4049 - return (p->run_list.next != NULL);
4052 -static inline void unhash_process(struct task_struct *p)
4054 - if (task_on_runqueue(p))
4055 - out_of_line_bug();
4056 write_lock_irq(&tasklist_lock);
4059 @@ -915,12 +934,12 @@
4062 /* Protects ->fs, ->files, ->mm, and synchronises with wait4(). Nests inside tasklist_lock */
4063 -static inline void task_lock(struct task_struct *p)
4064 +static inline void task_lock(task_t *p)
4066 spin_lock(&p->alloc_lock);
4069 -static inline void task_unlock(struct task_struct *p)
4070 +static inline void task_unlock(task_t *p)
4072 spin_unlock(&p->alloc_lock);
4074 @@ -942,6 +961,31 @@
4080 +static inline void set_need_resched(void)
4082 + current->need_resched = 1;
4085 +static inline void clear_need_resched(void)
4087 + current->need_resched = 0;
4090 +static inline void set_tsk_need_resched(struct task_struct *tsk)
4092 + tsk->need_resched = 1;
4095 +static inline void clear_tsk_need_resched(struct task_struct *tsk)
4097 + tsk->need_resched = 0;
4100 +static inline int need_resched(void)
4102 + return unlikely(current->need_resched);
4105 #endif /* __KERNEL__ */
4106 --- linux/include/linux/smp.h.orig Sun Dec 31 20:10:17 2000
4107 +++ linux/include/linux/smp.h Fri Jul 19 15:03:33 2002
4109 #define cpu_number_map(cpu) 0
4110 #define smp_call_function(func,info,retry,wait) ({ 0; })
4111 #define cpu_online_map 1
4112 +static inline void smp_send_reschedule(int cpu) { }
4113 +static inline void smp_send_reschedule_all(void) { }
4118 + * Common definitions:
4120 +#define cpu() smp_processor_id()
4123 --- linux/include/asm-generic/bitops.h.orig Tue Nov 28 02:47:38 2000
4124 +++ linux/include/asm-generic/bitops.h Thu Jul 18 15:22:34 2002
4126 return ((mask & *addr) != 0);
4130 + * fls: find last bit set.
4133 +#define fls(x) generic_fls(x)
4138 --- linux/include/asm-i386/bitops.h.orig Tue Aug 21 14:26:16 2001
4139 +++ linux/include/asm-i386/bitops.h Fri Jul 19 15:03:33 2002
4143 #include <linux/config.h>
4144 +#include <linux/compiler.h>
4147 * These have to be done with inline assembly: that way the bit-setting
4153 +static __inline__ void __clear_bit(int nr, volatile void * addr)
4155 + __asm__ __volatile__(
4160 #define smp_mb__before_clear_bit() barrier()
4161 #define smp_mb__after_clear_bit() barrier()
4163 @@ -284,6 +293,34 @@
4167 + * find_first_bit - find the first set bit in a memory region
4168 + * @addr: The address to start the search at
4169 + * @size: The maximum size to search
4171 + * Returns the bit-number of the first set bit, not the number of the byte
4172 + * containing a bit.
4174 +static __inline__ int find_first_bit(void * addr, unsigned size)
4179 + /* This looks at memory. Mark it volatile to tell gcc not to move it around */
4180 + __asm__ __volatile__(
4181 + "xorl %%eax,%%eax\n\t"
4184 + "leal -4(%%edi),%%edi\n\t"
4185 + "bsfl (%%edi),%%eax\n"
4186 + "1:\tsubl %%ebx,%%edi\n\t"
4187 + "shll $3,%%edi\n\t"
4188 + "addl %%edi,%%eax"
4189 + :"=a" (res), "=&c" (d0), "=&D" (d1)
4190 + :"1" ((size + 31) >> 5), "2" (addr), "b" (addr));
4195 * find_next_zero_bit - find the first zero bit in a memory region
4196 * @addr: The address to base the search on
4197 * @offset: The bitnumber to start searching at
4202 - * Look for zero in first byte
4203 + * Look for zero in the first 32 bits.
4205 __asm__("bsfl %1,%0\n\t"
4207 @@ -317,6 +354,39 @@
4211 + * find_next_bit - find the first set bit in a memory region
4212 + * @addr: The address to base the search on
4213 + * @offset: The bitnumber to start searching at
4214 + * @size: The maximum size to search
4216 +static __inline__ int find_next_bit (void * addr, int size, int offset)
4218 + unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
4219 + int set = 0, bit = offset & 31, res;
4223 + * Look for nonzero in the first 32 bits:
4225 + __asm__("bsfl %1,%0\n\t"
4230 + : "r" (*p >> bit));
4231 + if (set < (32 - bit))
4232 + return set + offset;
4237 + * No set bit yet, search remaining full words for a bit
4239 + res = find_first_bit (p, size - 32 * (p - (unsigned long *) addr));
4240 + return (offset + set + res);
4244 * ffz - find first zero in word.
4245 * @word: The word to search
4247 @@ -330,7 +400,40 @@
4252 + * __ffs - find first bit in word.
4253 + * @word: The word to search
4255 + * Undefined if no bit exists, so code should check against 0 first.
4257 +static __inline__ unsigned long __ffs(unsigned long word)
4259 + __asm__("bsfl %1,%0"
4268 + * Every architecture must define this function. It's the fastest
4269 + * way of searching a 140-bit bitmap where the first 100 bits are
4270 + * unlikely to be set. It's guaranteed that at least one of the 140
4271 + * bits is cleared.
4273 +static inline int _sched_find_first_bit(unsigned long *b)
4275 + if (unlikely(b[0]))
4276 + return __ffs(b[0]);
4277 + if (unlikely(b[1]))
4278 + return __ffs(b[1]) + 32;
4279 + if (unlikely(b[2]))
4280 + return __ffs(b[2]) + 64;
4282 + return __ffs(b[3]) + 96;
4283 + return __ffs(b[4]) + 128;
4287 * ffs - find first bit set
4288 --- linux/include/asm-i386/mmu_context.h.orig Thu Jul 18 15:22:23 2002
4289 +++ linux/include/asm-i386/mmu_context.h Fri Jul 19 15:03:33 2002
4292 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu)
4294 - if (prev != next) {
4295 + if (likely(prev != next)) {
4296 /* stop flush ipis for the previous mm */
4297 clear_bit(cpu, &prev->cpu_vm_mask);
4299 * Re-load LDT if necessary
4301 - if (prev->context.segments != next->context.segments)
4302 + if (unlikely(prev->context.segments != next->context.segments))
4305 cpu_tlbstate[cpu].state = TLBSTATE_OK;
4306 --- linux/include/asm-i386/smp.h.orig Thu Jul 18 15:22:11 2002
4307 +++ linux/include/asm-i386/smp.h Fri Jul 19 15:03:33 2002
4309 extern void smp_flush_tlb(void);
4310 extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
4311 extern void smp_send_reschedule(int cpu);
4312 +extern void smp_send_reschedule_all(void);
4313 extern void smp_invalidate_rcv(void); /* Process an NMI */
4314 extern void (*mtrr_hook) (void);
4315 extern void zap_low_mappings (void);
4317 * so this is correct in the x86 case.
4320 -#define smp_processor_id() (current->processor)
4321 +#define smp_processor_id() (current->cpu)
4323 static __inline int hard_smp_processor_id(void)
4325 @@ -121,18 +122,6 @@
4326 #endif /* !__ASSEMBLY__ */
4328 #define NO_PROC_ID 0xFF /* No processor magic marker */
4331 - * This magic constant controls our willingness to transfer
4332 - * a process across CPUs. Such a transfer incurs misses on the L1
4333 - * cache, and on a P6 or P5 with multiple L2 caches L2 hits. My
4334 - * gut feeling is this will vary by board in value. For a board
4335 - * with separate L2 cache it probably depends also on the RSS, and
4336 - * for a board with shared L2 cache it ought to decay fast as other
4337 - * processes are run.
4340 -#define PROC_CHANGE_PENALTY 15 /* Schedule penalty */
4344 --- linux/include/asm-i386/system.h.orig Thu Jul 18 15:22:09 2002
4345 +++ linux/include/asm-i386/system.h Fri Jul 19 15:03:33 2002
4347 struct task_struct; /* one of the stranger aspects of C forward declarations.. */
4348 extern void FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next));
4350 -#define prepare_to_switch() do { } while(0)
4351 #define switch_to(prev,next,last) do { \
4352 asm volatile("pushl %%esi\n\t" \
4355 "movl %%esp,%0\n\t" /* save ESP */ \
4356 - "movl %3,%%esp\n\t" /* restore ESP */ \
4357 + "movl %2,%%esp\n\t" /* restore ESP */ \
4358 "movl $1f,%1\n\t" /* save EIP */ \
4359 - "pushl %4\n\t" /* restore EIP */ \
4360 + "pushl %3\n\t" /* restore EIP */ \
4361 "jmp __switch_to\n" \
4366 - :"=m" (prev->thread.esp),"=m" (prev->thread.eip), \
4368 + :"=m" (prev->thread.esp),"=m" (prev->thread.eip) \
4369 :"m" (next->thread.esp),"m" (next->thread.eip), \
4370 - "a" (prev), "d" (next), \
4372 + "a" (prev), "d" (next)); \
4375 #define _set_base(addr,base) do { unsigned long __pr; \
4376 --- linux/include/asm-mips/bitops.h.orig Thu Jul 18 15:22:24 2002
4377 +++ linux/include/asm-mips/bitops.h Thu Jul 18 15:22:34 2002
4380 #ifdef CONFIG_CPU_HAS_LLSC
4382 +#include <asm/mipsregs.h>
4385 * These functions for MIPS ISA > 1 are interrupt and SMP proof and
4386 * interrupt friendly
4389 : "=r" (res), "=r" (dummy), "=r" (addr)
4390 : "0" ((signed int) 0), "1" ((unsigned int) 0xffffffff),
4391 - "2" (addr), "r" (size));
4392 + "2" (addr), "r" (size)
4400 : "=r" (set), "=r" (dummy)
4401 - : "0" (0), "1" (1 << bit), "r" (*p));
4402 + : "0" (0), "1" (1 << bit), "r" (*p)
4404 if (set < (32 - bit))
4405 return set + offset;
4407 @@ -684,20 +688,29 @@
4409 * Undefined if no zero exists, so code should check against ~0UL first.
4411 -static __inline__ unsigned long ffz(unsigned long word)
4412 +extern __inline__ unsigned long ffz(unsigned long word)
4415 + unsigned int __res;
4416 + unsigned int mask = 1;
4419 - s = 16; if (word << 16 != 0) s = 0; b += s; word >>= s;
4420 - s = 8; if (word << 24 != 0) s = 0; b += s; word >>= s;
4421 - s = 4; if (word << 28 != 0) s = 0; b += s; word >>= s;
4422 - s = 2; if (word << 30 != 0) s = 0; b += s; word >>= s;
4423 - s = 1; if (word << 31 != 0) s = 0; b += s;
4425 + ".set\tnoreorder\n\t"
4428 + "1:\tand\t$1,%2,%1\n\t"
4436 + : "=&r" (__res), "=r" (mask)
4437 + : "r" (word), "1" (mask)
4447 --- linux/include/asm-alpha/bitops.h.orig Thu Jul 18 15:22:00 2002
4448 +++ linux/include/asm-alpha/bitops.h Thu Jul 18 15:22:34 2002
4451 #include <linux/config.h>
4452 #include <linux/kernel.h>
4453 +#include <asm/compiler.h>
4456 * Copyright 1994, Linus Torvalds.
4459 __asm__ __volatile__(
4468 :"=&r" (temp), "=m" (*m)
4469 - :"Ir" (~(1UL << (nr & 31))), "m" (*m));
4470 + :"Ir" (1UL << (nr & 31)), "m" (*m));
4474 * WARNING: non atomic version.
4476 static __inline__ void
4477 -__change_bit(unsigned long nr, volatile void * addr)
4478 +__clear_bit(unsigned long nr, volatile void * addr)
4480 int *m = ((int *) addr) + (nr >> 5);
4482 - *m ^= 1 << (nr & 31);
4483 + *m &= ~(1 << (nr & 31));
4488 :"Ir" (1UL << (nr & 31)), "m" (*m));
4492 + * WARNING: non atomic version.
4494 +static __inline__ void
4495 +__change_bit(unsigned long nr, volatile void * addr)
4497 + int *m = ((int *) addr) + (nr >> 5);
4499 + *m ^= 1 << (nr & 31);
4503 test_and_set_bit(unsigned long nr, volatile void *addr)
4505 @@ -181,20 +193,6 @@
4506 return (old & mask) != 0;
4510 - * WARNING: non atomic version.
4512 -static __inline__ int
4513 -__test_and_change_bit(unsigned long nr, volatile void * addr)
4515 - unsigned long mask = 1 << (nr & 0x1f);
4516 - int *m = ((int *) addr) + (nr >> 5);
4520 - return (old & mask) != 0;
4524 test_and_change_bit(unsigned long nr, volatile void * addr)
4526 @@ -220,6 +218,20 @@
4531 + * WARNING: non atomic version.
4533 +static __inline__ int
4534 +__test_and_change_bit(unsigned long nr, volatile void * addr)
4536 + unsigned long mask = 1 << (nr & 0x1f);
4537 + int *m = ((int *) addr) + (nr >> 5);
4541 + return (old & mask) != 0;
4545 test_bit(int nr, volatile void * addr)
4547 @@ -235,12 +247,15 @@
4549 static inline unsigned long ffz_b(unsigned long x)
4551 - unsigned long sum = 0;
4552 + unsigned long sum, x1, x2, x4;
4554 x = ~x & -~x; /* set first 0 bit, clear others */
4555 - if (x & 0xF0) sum += 4;
4556 - if (x & 0xCC) sum += 2;
4557 - if (x & 0xAA) sum += 1;
4562 + sum += (x4 != 0) * 4;
4567 @@ -257,24 +272,46 @@
4569 __asm__("cmpbge %1,%2,%0" : "=r"(bits) : "r"(word), "r"(~0UL));
4571 - __asm__("extbl %1,%2,%0" : "=r"(bits) : "r"(word), "r"(qofs));
4572 + bits = __kernel_extbl(word, qofs);
4575 return qofs*8 + bofs;
4580 + * __ffs = Find First set bit in word. Undefined if no set bit exists.
4582 +static inline unsigned long __ffs(unsigned long word)
4584 +#if defined(__alpha_cix__) && defined(__alpha_fix__)
4585 + /* Whee. EV67 can calculate it directly. */
4586 + unsigned long result;
4587 + __asm__("cttz %1,%0" : "=r"(result) : "r"(word));
4590 + unsigned long bits, qofs, bofs;
4592 + __asm__("cmpbge $31,%1,%0" : "=r"(bits) : "r"(word));
4593 + qofs = ffz_b(bits);
4594 + bits = __kernel_extbl(word, qofs);
4595 + bofs = ffz_b(~bits);
4597 + return qofs*8 + bofs;
4604 * ffs: find first bit set. This is defined the same way as
4605 * the libc and compiler builtin ffs routines, therefore
4606 - * differs in spirit from the above ffz (man ffs).
4607 + * differs in spirit from the above __ffs.
4610 static inline int ffs(int word)
4612 - int result = ffz(~word);
4613 + int result = __ffs(word);
4614 return word ? result+1 : 0;
4617 @@ -316,6 +353,14 @@
4618 #define hweight16(x) hweight64((x) & 0xfffful)
4619 #define hweight8(x) hweight64((x) & 0xfful)
4621 +static inline unsigned long hweight64(unsigned long w)
4623 + unsigned long result;
4624 + for (result = 0; w ; w >>= 1)
4625 + result += (w & 1);
4629 #define hweight32(x) generic_hweight32(x)
4630 #define hweight16(x) generic_hweight16(x)
4631 #define hweight8(x) generic_hweight8(x)
4632 @@ -365,12 +410,76 @@
4636 - * The optimizer actually does good code for this case..
4637 + * Find next one bit in a bitmap reasonably efficiently.
4639 +static inline unsigned long
4640 +find_next_bit(void * addr, unsigned long size, unsigned long offset)
4642 + unsigned long * p = ((unsigned long *) addr) + (offset >> 6);
4643 + unsigned long result = offset & ~63UL;
4644 + unsigned long tmp;
4646 + if (offset >= size)
4652 + tmp &= ~0UL << offset;
4656 + goto found_middle;
4660 + while (size & ~63UL) {
4661 + if ((tmp = *(p++)))
4662 + goto found_middle;
4670 + tmp &= ~0UL >> (64 - size);
4672 + return result + size;
4674 + return result + __ffs(tmp);
4678 + * The optimizer actually does good code for this case.
4680 #define find_first_zero_bit(addr, size) \
4681 find_next_zero_bit((addr), (size), 0)
4682 +#define find_first_bit(addr, size) \
4683 + find_next_bit((addr), (size), 0)
4688 + * Every architecture must define this function. It's the fastest
4689 + * way of searching a 140-bit bitmap where the first 100 bits are
4690 + * unlikely to be set. It's guaranteed that at least one of the 140
4693 +static inline unsigned long
4694 +sched_find_first_bit(unsigned long b[3])
4696 + unsigned long b0 = b[0], b1 = b[1], b2 = b[2];
4697 + unsigned long ofs;
4699 + ofs = (b1 ? 64 : 128);
4700 + b1 = (b1 ? b1 : b2);
4701 + ofs = (b0 ? 0 : ofs);
4702 + b0 = (b0 ? b0 : b1);
4704 + return __ffs(b0) + ofs;
4708 #define ext2_set_bit __test_and_set_bit
4709 #define ext2_clear_bit __test_and_clear_bit
4710 --- linux/include/asm-m68k/bitops.h.orig Thu Jul 18 15:22:02 2002
4711 +++ linux/include/asm-m68k/bitops.h Thu Jul 18 15:22:34 2002
4713 (__builtin_constant_p(nr) ? \
4714 __constant_clear_bit(nr, vaddr) : \
4715 __generic_clear_bit(nr, vaddr))
4716 +#define __clear_bit(nr,vaddr) clear_bit(nr,vaddr)
4718 extern __inline__ void __constant_clear_bit(int nr, volatile void * vaddr)
4720 @@ -239,6 +240,28 @@
4724 +#define __ffs(x) (ffs(x) - 1)
4728 + * Every architecture must define this function. It's the fastest
4729 + * way of searching a 140-bit bitmap where the first 100 bits are
4730 + * unlikely to be set. It's guaranteed that at least one of the 140
4731 + * bits is cleared.
4733 +static inline int sched_find_first_bit(unsigned long *b)
4735 + if (unlikely(b[0]))
4736 + return __ffs(b[0]);
4737 + if (unlikely(b[1]))
4738 + return __ffs(b[1]) + 32;
4739 + if (unlikely(b[2]))
4740 + return __ffs(b[2]) + 64;
4742 + return __ffs(b[3]) + 96;
4743 + return __ffs(b[4]) + 128;
4748 * hweightN: returns the hamming weight (i.e. the number
4749 --- linux/include/asm-ppc/bitops.h.orig Tue Aug 21 14:26:19 2001
4750 +++ linux/include/asm-ppc/bitops.h Thu Jul 18 15:22:34 2002
4753 - * BK Id: SCCS/s.bitops.h 1.9 05/26/01 14:48:14 paulus
4754 + * BK Id: %F% %I% %G% %U% %#%
4757 * bitops.h: Bit string operations on the ppc
4759 #define _PPC_BITOPS_H
4761 #include <linux/config.h>
4762 +#include <linux/compiler.h>
4763 #include <asm/byteorder.h>
4764 +#include <asm/atomic.h>
4767 * The test_and_*_bit operations are taken to imply a memory barrier
4769 * These used to be if'd out here because using : "cc" as a constraint
4770 * resulted in errors from egcs. Things appear to be OK with gcc-2.95.
4772 -static __inline__ void set_bit(int nr, volatile void * addr)
4773 +static __inline__ void set_bit(int nr, volatile unsigned long * addr)
4776 unsigned long mask = 1 << (nr & 0x1f);
4779 __asm__ __volatile__("\n\
4780 1: lwarx %0,0,%3 \n\
4782 - stwcx. %0,0,%3 \n\
4784 + PPC405_ERR77(0,%3)
4785 +" stwcx. %0,0,%3 \n\
4787 : "=&r" (old), "=m" (*p)
4788 : "r" (mask), "r" (p), "m" (*p)
4791 * non-atomic version
4793 -static __inline__ void __set_bit(int nr, volatile void *addr)
4794 +static __inline__ void __set_bit(int nr, volatile unsigned long *addr)
4796 unsigned long mask = 1 << (nr & 0x1f);
4797 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
4799 #define smp_mb__before_clear_bit() smp_mb()
4800 #define smp_mb__after_clear_bit() smp_mb()
4802 -static __inline__ void clear_bit(int nr, volatile void *addr)
4803 +static __inline__ void clear_bit(int nr, volatile unsigned long *addr)
4806 unsigned long mask = 1 << (nr & 0x1f);
4809 __asm__ __volatile__("\n\
4810 1: lwarx %0,0,%3 \n\
4812 - stwcx. %0,0,%3 \n\
4814 + PPC405_ERR77(0,%3)
4815 +" stwcx. %0,0,%3 \n\
4817 : "=&r" (old), "=m" (*p)
4818 : "r" (mask), "r" (p), "m" (*p)
4821 * non-atomic version
4823 -static __inline__ void __clear_bit(int nr, volatile void *addr)
4824 +static __inline__ void __clear_bit(int nr, volatile unsigned long *addr)
4826 unsigned long mask = 1 << (nr & 0x1f);
4827 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
4832 -static __inline__ void change_bit(int nr, volatile void *addr)
4833 +static __inline__ void change_bit(int nr, volatile unsigned long *addr)
4836 unsigned long mask = 1 << (nr & 0x1f);
4839 __asm__ __volatile__("\n\
4840 1: lwarx %0,0,%3 \n\
4842 - stwcx. %0,0,%3 \n\
4844 + PPC405_ERR77(0,%3)
4845 +" stwcx. %0,0,%3 \n\
4847 : "=&r" (old), "=m" (*p)
4848 : "r" (mask), "r" (p), "m" (*p)
4851 * non-atomic version
4853 -static __inline__ void __change_bit(int nr, volatile void *addr)
4854 +static __inline__ void __change_bit(int nr, volatile unsigned long *addr)
4856 unsigned long mask = 1 << (nr & 0x1f);
4857 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
4860 * test_and_*_bit do imply a memory barrier (?)
4862 -static __inline__ int test_and_set_bit(int nr, volatile void *addr)
4863 +static __inline__ int test_and_set_bit(int nr, volatile unsigned long *addr)
4865 unsigned int old, t;
4866 unsigned int mask = 1 << (nr & 0x1f);
4869 __asm__ __volatile__(SMP_WMB "\n\
4870 1: lwarx %0,0,%4 \n\
4872 - stwcx. %1,0,%4 \n\
4874 + PPC405_ERR77(0,%4)
4875 +" stwcx. %1,0,%4 \n\
4878 : "=&r" (old), "=&r" (t), "=m" (*p)
4881 * non-atomic version
4883 -static __inline__ int __test_and_set_bit(int nr, volatile void *addr)
4884 +static __inline__ int __test_and_set_bit(int nr, volatile unsigned long *addr)
4886 unsigned long mask = 1 << (nr & 0x1f);
4887 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
4889 return (old & mask) != 0;
4892 -static __inline__ int test_and_clear_bit(int nr, volatile void *addr)
4893 +static __inline__ int test_and_clear_bit(int nr, volatile unsigned long *addr)
4895 unsigned int old, t;
4896 unsigned int mask = 1 << (nr & 0x1f);
4899 __asm__ __volatile__(SMP_WMB "\n\
4900 1: lwarx %0,0,%4 \n\
4902 - stwcx. %1,0,%4 \n\
4904 + PPC405_ERR77(0,%4)
4905 +" stwcx. %1,0,%4 \n\
4908 : "=&r" (old), "=&r" (t), "=m" (*p)
4911 * non-atomic version
4913 -static __inline__ int __test_and_clear_bit(int nr, volatile void *addr)
4914 +static __inline__ int __test_and_clear_bit(int nr, volatile unsigned long *addr)
4916 unsigned long mask = 1 << (nr & 0x1f);
4917 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
4919 return (old & mask) != 0;
4922 -static __inline__ int test_and_change_bit(int nr, volatile void *addr)
4923 +static __inline__ int test_and_change_bit(int nr, volatile unsigned long *addr)
4925 unsigned int old, t;
4926 unsigned int mask = 1 << (nr & 0x1f);
4929 __asm__ __volatile__(SMP_WMB "\n\
4930 1: lwarx %0,0,%4 \n\
4932 - stwcx. %1,0,%4 \n\
4934 + PPC405_ERR77(0,%4)
4935 +" stwcx. %1,0,%4 \n\
4938 : "=&r" (old), "=&r" (t), "=m" (*p)
4941 * non-atomic version
4943 -static __inline__ int __test_and_change_bit(int nr, volatile void *addr)
4944 +static __inline__ int __test_and_change_bit(int nr, volatile unsigned long *addr)
4946 unsigned long mask = 1 << (nr & 0x1f);
4947 unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
4949 return (old & mask) != 0;
4952 -static __inline__ int test_bit(int nr, __const__ volatile void *addr)
4953 +static __inline__ int test_bit(int nr, __const__ volatile unsigned long *addr)
4955 __const__ unsigned int *p = (__const__ unsigned int *) addr;
4960 /* Return the bit position of the most significant 1 bit in a word */
4961 -static __inline__ int __ilog2(unsigned int x)
4962 +static __inline__ int __ilog2(unsigned long x)
4970 -static __inline__ int ffz(unsigned int x)
4971 +static __inline__ int ffz(unsigned long x)
4975 @@ -239,6 +247,11 @@
4979 +static inline int __ffs(unsigned long x)
4981 + return __ilog2(x & -x);
4985 * ffs: find first bit set. This is defined the same way as
4986 * the libc and compiler builtin ffs routines, therefore
4987 @@ -250,6 +263,18 @@
4991 + * fls: find last (most-significant) bit set.
4992 + * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
4994 +static __inline__ int fls(unsigned int x)
4998 + asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x));
5003 * hweightN: returns the hamming weight (i.e. the number
5004 * of bits set) of a N-bit word
5006 @@ -261,13 +286,86 @@
5007 #endif /* __KERNEL__ */
5010 + * Find the first bit set in a 140-bit bitmap.
5011 + * The first 100 bits are unlikely to be set.
5013 +static inline int sched_find_first_bit(unsigned long *b)
5015 + if (unlikely(b[0]))
5016 + return __ffs(b[0]);
5017 + if (unlikely(b[1]))
5018 + return __ffs(b[1]) + 32;
5019 + if (unlikely(b[2]))
5020 + return __ffs(b[2]) + 64;
5022 + return __ffs(b[3]) + 96;
5023 + return __ffs(b[4]) + 128;
5027 + * find_next_bit - find the next set bit in a memory region
5028 + * @addr: The address to base the search on
5029 + * @offset: The bitnumber to start searching at
5030 + * @size: The maximum size to search
5032 +static __inline__ unsigned long find_next_bit(unsigned long *addr,
5033 + unsigned long size, unsigned long offset)
5035 + unsigned int *p = ((unsigned int *) addr) + (offset >> 5);
5036 + unsigned int result = offset & ~31UL;
5039 + if (offset >= size)
5045 + tmp &= ~0UL << offset;
5049 + goto found_middle;
5053 + while (size >= 32) {
5054 + if ((tmp = *p++) != 0)
5055 + goto found_middle;
5064 + tmp &= ~0UL >> (32 - size);
5065 + if (tmp == 0UL) /* Are any bits set? */
5066 + return result + size; /* Nope. */
5068 + return result + __ffs(tmp);
5072 + * find_first_bit - find the first set bit in a memory region
5073 + * @addr: The address to start the search at
5074 + * @size: The maximum size to search
5076 + * Returns the bit-number of the first set bit, not the number of the byte
5077 + * containing a bit.
5079 +#define find_first_bit(addr, size) \
5080 + find_next_bit((addr), (size), 0)
5083 * This implementation of find_{first,next}_zero_bit was stolen from
5084 * Linus' asm-alpha/bitops.h.
5086 #define find_first_zero_bit(addr, size) \
5087 find_next_zero_bit((addr), (size), 0)
5089 -static __inline__ unsigned long find_next_zero_bit(void * addr,
5090 +static __inline__ unsigned long find_next_zero_bit(unsigned long * addr,
5091 unsigned long size, unsigned long offset)
5093 unsigned int * p = ((unsigned int *) addr) + (offset >> 5);
5098 -#define ext2_set_bit(nr, addr) __test_and_set_bit((nr) ^ 0x18, addr)
5099 -#define ext2_clear_bit(nr, addr) __test_and_clear_bit((nr) ^ 0x18, addr)
5100 +#define ext2_set_bit(nr, addr) __test_and_set_bit((nr) ^ 0x18, (unsigned long *)(addr))
5101 +#define ext2_clear_bit(nr, addr) __test_and_clear_bit((nr) ^ 0x18, (unsigned long *)(addr))
5103 static __inline__ int ext2_test_bit(int nr, __const__ void * addr)
5105 --- linux/include/asm-sparc64/system.h.orig Thu Jul 18 15:22:25 2002
5106 +++ linux/include/asm-sparc64/system.h Thu Jul 18 15:22:34 2002
5107 @@ -143,7 +143,11 @@
5109 #define flush_user_windows flushw_user
5110 #define flush_register_windows flushw_all
5111 -#define prepare_to_switch flushw_all
5113 +#define prepare_arch_schedule(prev) task_lock(prev)
5114 +#define finish_arch_schedule(prev) task_unlock(prev)
5115 +#define prepare_arch_switch(rq) do { spin_unlock(&(rq)->lock); flushw_all(); }
5116 +#define finish_arch_switch(rq) __sti()
5118 #ifndef CONFIG_DEBUG_SPINLOCK
5119 #define CHECK_LOCKS(PREV) do { } while(0)
5120 --- linux/include/asm-sparc64/bitops.h.orig Thu Jul 18 15:22:11 2002
5121 +++ linux/include/asm-sparc64/bitops.h Thu Jul 18 15:22:34 2002
5123 #ifndef _SPARC64_BITOPS_H
5124 #define _SPARC64_BITOPS_H
5126 +#include <linux/compiler.h>
5127 #include <asm/byteorder.h>
5129 -extern long ___test_and_set_bit(unsigned long nr, volatile void *addr);
5130 -extern long ___test_and_clear_bit(unsigned long nr, volatile void *addr);
5131 -extern long ___test_and_change_bit(unsigned long nr, volatile void *addr);
5132 +extern long ___test_and_set_bit(unsigned long nr, volatile unsigned long *addr);
5133 +extern long ___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr);
5134 +extern long ___test_and_change_bit(unsigned long nr, volatile unsigned long *addr);
5136 #define test_and_set_bit(nr,addr) ({___test_and_set_bit(nr,addr)!=0;})
5137 #define test_and_clear_bit(nr,addr) ({___test_and_clear_bit(nr,addr)!=0;})
5138 @@ -21,109 +22,132 @@
5139 #define change_bit(nr,addr) ((void)___test_and_change_bit(nr,addr))
5141 /* "non-atomic" versions... */
5142 -#define __set_bit(X,Y) \
5143 -do { unsigned long __nr = (X); \
5144 - long *__m = ((long *) (Y)) + (__nr >> 6); \
5145 - *__m |= (1UL << (__nr & 63)); \
5147 -#define __clear_bit(X,Y) \
5148 -do { unsigned long __nr = (X); \
5149 - long *__m = ((long *) (Y)) + (__nr >> 6); \
5150 - *__m &= ~(1UL << (__nr & 63)); \
5152 -#define __change_bit(X,Y) \
5153 -do { unsigned long __nr = (X); \
5154 - long *__m = ((long *) (Y)) + (__nr >> 6); \
5155 - *__m ^= (1UL << (__nr & 63)); \
5157 -#define __test_and_set_bit(X,Y) \
5158 -({ unsigned long __nr = (X); \
5159 - long *__m = ((long *) (Y)) + (__nr >> 6); \
5160 - long __old = *__m; \
5161 - long __mask = (1UL << (__nr & 63)); \
5162 - *__m = (__old | __mask); \
5163 - ((__old & __mask) != 0); \
5165 -#define __test_and_clear_bit(X,Y) \
5166 -({ unsigned long __nr = (X); \
5167 - long *__m = ((long *) (Y)) + (__nr >> 6); \
5168 - long __old = *__m; \
5169 - long __mask = (1UL << (__nr & 63)); \
5170 - *__m = (__old & ~__mask); \
5171 - ((__old & __mask) != 0); \
5173 -#define __test_and_change_bit(X,Y) \
5174 -({ unsigned long __nr = (X); \
5175 - long *__m = ((long *) (Y)) + (__nr >> 6); \
5176 - long __old = *__m; \
5177 - long __mask = (1UL << (__nr & 63)); \
5178 - *__m = (__old ^ __mask); \
5179 - ((__old & __mask) != 0); \
5182 +static __inline__ void __set_bit(int nr, volatile unsigned long *addr)
5184 + volatile unsigned long *m = addr + (nr >> 6);
5186 + *m |= (1UL << (nr & 63));
5189 +static __inline__ void __clear_bit(int nr, volatile unsigned long *addr)
5191 + volatile unsigned long *m = addr + (nr >> 6);
5193 + *m &= ~(1UL << (nr & 63));
5196 +static __inline__ void __change_bit(int nr, volatile unsigned long *addr)
5198 + volatile unsigned long *m = addr + (nr >> 6);
5200 + *m ^= (1UL << (nr & 63));
5203 +static __inline__ int __test_and_set_bit(int nr, volatile unsigned long *addr)
5205 + volatile unsigned long *m = addr + (nr >> 6);
5207 + long mask = (1UL << (nr & 63));
5209 + *m = (old | mask);
5210 + return ((old & mask) != 0);
5213 +static __inline__ int __test_and_clear_bit(int nr, volatile unsigned long *addr)
5215 + volatile unsigned long *m = addr + (nr >> 6);
5217 + long mask = (1UL << (nr & 63));
5219 + *m = (old & ~mask);
5220 + return ((old & mask) != 0);
5223 +static __inline__ int __test_and_change_bit(int nr, volatile unsigned long *addr)
5225 + volatile unsigned long *m = addr + (nr >> 6);
5227 + long mask = (1UL << (nr & 63));
5229 + *m = (old ^ mask);
5230 + return ((old & mask) != 0);
5233 #define smp_mb__before_clear_bit() do { } while(0)
5234 #define smp_mb__after_clear_bit() do { } while(0)
5236 -extern __inline__ int test_bit(int nr, __const__ void *addr)
5237 +static __inline__ int test_bit(int nr, __const__ volatile unsigned long *addr)
5239 - return (1UL & (((__const__ long *) addr)[nr >> 6] >> (nr & 63))) != 0UL;
5240 + return (1UL & ((addr)[nr >> 6] >> (nr & 63))) != 0UL;
5243 /* The easy/cheese version for now. */
5244 -extern __inline__ unsigned long ffz(unsigned long word)
5245 +static __inline__ unsigned long ffz(unsigned long word)
5247 unsigned long result;
5249 -#ifdef ULTRA_HAS_POPULATION_COUNT /* Thanks for nothing Sun... */
5250 - __asm__ __volatile__(
5253 -" xnor %0, %%g1, %%g2\n"
5255 -"1: " : "=&r" (result)
5259 -#if 1 /* def EASY_CHEESE_VERSION */
5266 - unsigned long tmp;
5271 - tmp = ~word & -~word;
5272 - if (!(unsigned)tmp) {
5276 - if (!(unsigned short)tmp) {
5280 - if (!(unsigned char)tmp) {
5284 + * __ffs - find first bit in word.
5285 + * @word: The word to search
5287 + * Undefined if no bit exists, so code should check against 0 first.
5289 +static __inline__ unsigned long __ffs(unsigned long word)
5291 + unsigned long result = 0;
5293 + while (!(word & 1UL)) {
5297 - if (tmp & 0xf0) result += 4;
5298 - if (tmp & 0xcc) result += 2;
5299 - if (tmp & 0xaa) result ++;
5306 + * fls: find last bit set.
5309 +#define fls(x) generic_fls(x)
5314 + * Every architecture must define this function. It's the fastest
5315 + * way of searching a 140-bit bitmap where the first 100 bits are
5316 + * unlikely to be set. It's guaranteed that at least one of the 140
5317 + * bits is cleared.
5319 +static inline int sched_find_first_bit(unsigned long *b)
5321 + if (unlikely(b[0]))
5322 + return __ffs(b[0]);
5323 + if (unlikely(((unsigned int)b[1])))
5324 + return __ffs(b[1]) + 64;
5326 + return __ffs(b[1] >> 32) + 96;
5327 + return __ffs(b[2]) + 128;
5331 * ffs: find first bit set. This is defined the same way as
5332 * the libc and compiler builtin ffs routines, therefore
5333 * differs in spirit from the above ffz (man ffs).
5336 -#define ffs(x) generic_ffs(x)
5337 +static __inline__ int ffs(int x)
5341 + return __ffs((unsigned long)x);
5345 * hweightN: returns the hamming weight (i.e. the number
5348 #ifdef ULTRA_HAS_POPULATION_COUNT
5350 -extern __inline__ unsigned int hweight32(unsigned int w)
5351 +static __inline__ unsigned int hweight32(unsigned int w)
5359 -extern __inline__ unsigned int hweight16(unsigned int w)
5360 +static __inline__ unsigned int hweight16(unsigned int w)
5368 -extern __inline__ unsigned int hweight8(unsigned int w)
5369 +static __inline__ unsigned int hweight8(unsigned int w)
5373 @@ -165,14 +189,69 @@
5375 #endif /* __KERNEL__ */
5378 + * find_next_bit - find the next set bit in a memory region
5379 + * @addr: The address to base the search on
5380 + * @offset: The bitnumber to start searching at
5381 + * @size: The maximum size to search
5383 +static __inline__ unsigned long find_next_bit(unsigned long *addr, unsigned long size, unsigned long offset)
5385 + unsigned long *p = addr + (offset >> 6);
5386 + unsigned long result = offset & ~63UL;
5387 + unsigned long tmp;
5389 + if (offset >= size)
5395 + tmp &= (~0UL << offset);
5399 + goto found_middle;
5403 + while (size & ~63UL) {
5404 + if ((tmp = *(p++)))
5405 + goto found_middle;
5414 + tmp &= (~0UL >> (64 - size));
5415 + if (tmp == 0UL) /* Are any bits set? */
5416 + return result + size; /* Nope. */
5418 + return result + __ffs(tmp);
5422 + * find_first_bit - find the first set bit in a memory region
5423 + * @addr: The address to start the search at
5424 + * @size: The maximum size to search
5426 + * Returns the bit-number of the first set bit, not the number of the byte
5427 + * containing a bit.
5429 +#define find_first_bit(addr, size) \
5430 + find_next_bit((addr), (size), 0)
5432 /* find_next_zero_bit() finds the first zero bit in a bit string of length
5433 * 'size' bits, starting the search at bit 'offset'. This is largely based
5434 * on Linus's ALPHA routines, which are pretty portable BTW.
5437 -extern __inline__ unsigned long find_next_zero_bit(void *addr, unsigned long size, unsigned long offset)
5438 +static __inline__ unsigned long find_next_zero_bit(unsigned long *addr, unsigned long size, unsigned long offset)
5440 - unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
5441 + unsigned long *p = addr + (offset >> 6);
5442 unsigned long result = offset & ~63UL;
5445 @@ -211,15 +290,15 @@
5446 #define find_first_zero_bit(addr, size) \
5447 find_next_zero_bit((addr), (size), 0)
5449 -extern long ___test_and_set_le_bit(int nr, volatile void *addr);
5450 -extern long ___test_and_clear_le_bit(int nr, volatile void *addr);
5451 +extern long ___test_and_set_le_bit(int nr, volatile unsigned long *addr);
5452 +extern long ___test_and_clear_le_bit(int nr, volatile unsigned long *addr);
5454 #define test_and_set_le_bit(nr,addr) ({___test_and_set_le_bit(nr,addr)!=0;})
5455 #define test_and_clear_le_bit(nr,addr) ({___test_and_clear_le_bit(nr,addr)!=0;})
5456 #define set_le_bit(nr,addr) ((void)___test_and_set_le_bit(nr,addr))
5457 #define clear_le_bit(nr,addr) ((void)___test_and_clear_le_bit(nr,addr))
5459 -extern __inline__ int test_le_bit(int nr, __const__ void * addr)
5460 +static __inline__ int test_le_bit(int nr, __const__ unsigned long * addr)
5463 __const__ unsigned char *ADDR = (__const__ unsigned char *) addr;
5465 #define find_first_zero_le_bit(addr, size) \
5466 find_next_zero_le_bit((addr), (size), 0)
5468 -extern __inline__ unsigned long find_next_zero_le_bit(void *addr, unsigned long size, unsigned long offset)
5469 +static __inline__ unsigned long find_next_zero_le_bit(unsigned long *addr, unsigned long size, unsigned long offset)
5471 - unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
5472 + unsigned long *p = addr + (offset >> 6);
5473 unsigned long result = offset & ~63UL;
5476 @@ -271,18 +350,22 @@
5480 -#define ext2_set_bit test_and_set_le_bit
5481 -#define ext2_clear_bit test_and_clear_le_bit
5482 -#define ext2_test_bit test_le_bit
5483 -#define ext2_find_first_zero_bit find_first_zero_le_bit
5484 -#define ext2_find_next_zero_bit find_next_zero_le_bit
5485 +#define ext2_set_bit(nr,addr) test_and_set_le_bit((nr),(unsigned long *)(addr))
5486 +#define ext2_clear_bit(nr,addr) test_and_clear_le_bit((nr),(unsigned long *)(addr))
5487 +#define ext2_test_bit(nr,addr) test_le_bit((nr),(unsigned long *)(addr))
5488 +#define ext2_find_first_zero_bit(addr, size) \
5489 + find_first_zero_le_bit((unsigned long *)(addr), (size))
5490 +#define ext2_find_next_zero_bit(addr, size, off) \
5491 + find_next_zero_le_bit((unsigned long *)(addr), (size), (off))
5493 /* Bitmap functions for the minix filesystem. */
5494 -#define minix_test_and_set_bit(nr,addr) test_and_set_bit(nr,addr)
5495 -#define minix_set_bit(nr,addr) set_bit(nr,addr)
5496 -#define minix_test_and_clear_bit(nr,addr) test_and_clear_bit(nr,addr)
5497 -#define minix_test_bit(nr,addr) test_bit(nr,addr)
5498 -#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
5499 +#define minix_test_and_set_bit(nr,addr) test_and_set_bit((nr),(unsigned long *)(addr))
5500 +#define minix_set_bit(nr,addr) set_bit((nr),(unsigned long *)(addr))
5501 +#define minix_test_and_clear_bit(nr,addr) \
5502 + test_and_clear_bit((nr),(unsigned long *)(addr))
5503 +#define minix_test_bit(nr,addr) test_bit((nr),(unsigned long *)(addr))
5504 +#define minix_find_first_zero_bit(addr,size) \
5505 + find_first_zero_bit((unsigned long *)(addr),(size))
5507 #endif /* __KERNEL__ */
5509 --- linux/include/asm-arm/bitops.h.orig Thu Jul 18 15:21:50 2002
5510 +++ linux/include/asm-arm/bitops.h Thu Jul 18 15:22:34 2002
5512 * Copyright 1995, Russell King.
5513 * Various bits and pieces copyrights include:
5514 * Linus Torvalds (test_bit).
5515 + * Big endian support: Copyright 2001, Nicolas Pitre
5516 + * reworked by rmk.
5518 * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
5520 @@ -17,81 +19,271 @@
5524 +#include <asm/system.h>
5526 #define smp_mb__before_clear_bit() do { } while (0)
5527 #define smp_mb__after_clear_bit() do { } while (0)
5530 - * Function prototypes to keep gcc -Wall happy.
5531 + * These functions are the basis of our bit ops.
5532 + * First, the atomic bitops.
5534 + * The endian issue for these functions is handled by the macros below.
5536 -extern void set_bit(int nr, volatile void * addr);
5538 +____atomic_set_bit_mask(unsigned int mask, volatile unsigned char *p)
5540 + unsigned long flags;
5542 + local_irq_save(flags);
5544 + local_irq_restore(flags);
5548 +____atomic_clear_bit_mask(unsigned int mask, volatile unsigned char *p)
5550 + unsigned long flags;
5552 + local_irq_save(flags);
5554 + local_irq_restore(flags);
5558 +____atomic_change_bit_mask(unsigned int mask, volatile unsigned char *p)
5560 + unsigned long flags;
5562 + local_irq_save(flags);
5564 + local_irq_restore(flags);
5567 -static inline void __set_bit(int nr, volatile void *addr)
5569 +____atomic_test_and_set_bit_mask(unsigned int mask, volatile unsigned char *p)
5571 - ((unsigned char *) addr)[nr >> 3] |= (1U << (nr & 7));
5572 + unsigned long flags;
5575 + local_irq_save(flags);
5578 + local_irq_restore(flags);
5580 + return res & mask;
5583 -extern void clear_bit(int nr, volatile void * addr);
5585 +____atomic_test_and_clear_bit_mask(unsigned int mask, volatile unsigned char *p)
5587 + unsigned long flags;
5590 + local_irq_save(flags);
5593 + local_irq_restore(flags);
5595 + return res & mask;
5598 -static inline void __clear_bit(int nr, volatile void *addr)
5600 +____atomic_test_and_change_bit_mask(unsigned int mask, volatile unsigned char *p)
5602 - ((unsigned char *) addr)[nr >> 3] &= ~(1U << (nr & 7));
5603 + unsigned long flags;
5606 + local_irq_save(flags);
5609 + local_irq_restore(flags);
5611 + return res & mask;
5614 -extern void change_bit(int nr, volatile void * addr);
5616 + * Now the non-atomic variants. We let the compiler handle all optimisations
5619 +static inline void ____nonatomic_set_bit(int nr, volatile void *p)
5621 + ((unsigned char *) p)[nr >> 3] |= (1U << (nr & 7));
5624 -static inline void __change_bit(int nr, volatile void *addr)
5625 +static inline void ____nonatomic_clear_bit(int nr, volatile void *p)
5627 - ((unsigned char *) addr)[nr >> 3] ^= (1U << (nr & 7));
5628 + ((unsigned char *) p)[nr >> 3] &= ~(1U << (nr & 7));
5631 -extern int test_and_set_bit(int nr, volatile void * addr);
5632 +static inline void ____nonatomic_change_bit(int nr, volatile void *p)
5634 + ((unsigned char *) p)[nr >> 3] ^= (1U << (nr & 7));
5637 -static inline int __test_and_set_bit(int nr, volatile void *addr)
5638 +static inline int ____nonatomic_test_and_set_bit(int nr, volatile void *p)
5640 unsigned int mask = 1 << (nr & 7);
5641 unsigned int oldval;
5643 - oldval = ((unsigned char *) addr)[nr >> 3];
5644 - ((unsigned char *) addr)[nr >> 3] = oldval | mask;
5645 + oldval = ((unsigned char *) p)[nr >> 3];
5646 + ((unsigned char *) p)[nr >> 3] = oldval | mask;
5647 return oldval & mask;
5650 -extern int test_and_clear_bit(int nr, volatile void * addr);
5652 -static inline int __test_and_clear_bit(int nr, volatile void *addr)
5653 +static inline int ____nonatomic_test_and_clear_bit(int nr, volatile void *p)
5655 unsigned int mask = 1 << (nr & 7);
5656 unsigned int oldval;
5658 - oldval = ((unsigned char *) addr)[nr >> 3];
5659 - ((unsigned char *) addr)[nr >> 3] = oldval & ~mask;
5660 + oldval = ((unsigned char *) p)[nr >> 3];
5661 + ((unsigned char *) p)[nr >> 3] = oldval & ~mask;
5662 return oldval & mask;
5665 -extern int test_and_change_bit(int nr, volatile void * addr);
5667 -static inline int __test_and_change_bit(int nr, volatile void *addr)
5668 +static inline int ____nonatomic_test_and_change_bit(int nr, volatile void *p)
5670 unsigned int mask = 1 << (nr & 7);
5671 unsigned int oldval;
5673 - oldval = ((unsigned char *) addr)[nr >> 3];
5674 - ((unsigned char *) addr)[nr >> 3] = oldval ^ mask;
5675 + oldval = ((unsigned char *) p)[nr >> 3];
5676 + ((unsigned char *) p)[nr >> 3] = oldval ^ mask;
5677 return oldval & mask;
5680 -extern int find_first_zero_bit(void * addr, unsigned size);
5681 -extern int find_next_zero_bit(void * addr, int size, int offset);
5684 * This routine doesn't need to be atomic.
5686 -static inline int test_bit(int nr, const void * addr)
5687 +static inline int ____test_bit(int nr, const void * p)
5689 - return ((unsigned char *) addr)[nr >> 3] & (1U << (nr & 7));
5690 + return ((volatile unsigned char *) p)[nr >> 3] & (1U << (nr & 7));
5694 + * A note about Endian-ness.
5695 + * -------------------------
5697 + * When the ARM is put into big endian mode via CR15, the processor
5698 + * merely swaps the order of bytes within words, thus:
5700 + * ------------ physical data bus bits -----------
5701 + * D31 ... D24 D23 ... D16 D15 ... D8 D7 ... D0
5702 + * little byte 3 byte 2 byte 1 byte 0
5703 + * big byte 0 byte 1 byte 2 byte 3
5705 + * This means that reading a 32-bit word at address 0 returns the same
5706 + * value irrespective of the endian mode bit.
5708 + * Peripheral devices should be connected with the data bus reversed in
5709 + * "Big Endian" mode. ARM Application Note 61 is applicable, and is
5710 + * available from http://www.arm.com/.
5712 + * The following assumes that the data bus connectivity for big endian
5713 + * mode has been followed.
5715 + * Note that bit 0 is defined to be 32-bit word bit 0, not byte 0 bit 0.
5719 + * Little endian assembly bitops. nr = 0 -> byte 0 bit 0.
5721 +extern void _set_bit_le(int nr, volatile void * p);
5722 +extern void _clear_bit_le(int nr, volatile void * p);
5723 +extern void _change_bit_le(int nr, volatile void * p);
5724 +extern int _test_and_set_bit_le(int nr, volatile void * p);
5725 +extern int _test_and_clear_bit_le(int nr, volatile void * p);
5726 +extern int _test_and_change_bit_le(int nr, volatile void * p);
5727 +extern int _find_first_zero_bit_le(void * p, unsigned size);
5728 +extern int _find_next_zero_bit_le(void * p, int size, int offset);
5731 + * Big endian assembly bitops. nr = 0 -> byte 3 bit 0.
5733 +extern void _set_bit_be(int nr, volatile void * p);
5734 +extern void _clear_bit_be(int nr, volatile void * p);
5735 +extern void _change_bit_be(int nr, volatile void * p);
5736 +extern int _test_and_set_bit_be(int nr, volatile void * p);
5737 +extern int _test_and_clear_bit_be(int nr, volatile void * p);
5738 +extern int _test_and_change_bit_be(int nr, volatile void * p);
5739 +extern int _find_first_zero_bit_be(void * p, unsigned size);
5740 +extern int _find_next_zero_bit_be(void * p, int size, int offset);
5744 + * The __* form of bitops are non-atomic and may be reordered.
5746 +#define ATOMIC_BITOP_LE(name,nr,p) \
5747 + (__builtin_constant_p(nr) ? \
5748 + ____atomic_##name##_mask(1 << ((nr) & 7), \
5749 + ((unsigned char *)(p)) + ((nr) >> 3)) : \
5750 + _##name##_le(nr,p))
5752 +#define ATOMIC_BITOP_BE(name,nr,p) \
5753 + (__builtin_constant_p(nr) ? \
5754 + ____atomic_##name##_mask(1 << ((nr) & 7), \
5755 + ((unsigned char *)(p)) + (((nr) >> 3) ^ 3)) : \
5756 + _##name##_be(nr,p))
5758 +#define NONATOMIC_BITOP_LE(name,nr,p) \
5759 + (____nonatomic_##name(nr, p))
5761 +#define NONATOMIC_BITOP_BE(name,nr,p) \
5762 + (____nonatomic_##name(nr ^ 0x18, p))
5766 + * These are the little endian, atomic definitions.
5768 +#define set_bit(nr,p) ATOMIC_BITOP_LE(set_bit,nr,p)
5769 +#define clear_bit(nr,p) ATOMIC_BITOP_LE(clear_bit,nr,p)
5770 +#define change_bit(nr,p) ATOMIC_BITOP_LE(change_bit,nr,p)
5771 +#define test_and_set_bit(nr,p) ATOMIC_BITOP_LE(test_and_set_bit,nr,p)
5772 +#define test_and_clear_bit(nr,p) ATOMIC_BITOP_LE(test_and_clear_bit,nr,p)
5773 +#define test_and_change_bit(nr,p) ATOMIC_BITOP_LE(test_and_change_bit,nr,p)
5774 +#define test_bit(nr,p) ____test_bit(nr,p)
5775 +#define find_first_zero_bit(p,sz) _find_first_zero_bit_le(p,sz)
5776 +#define find_next_zero_bit(p,sz,off) _find_next_zero_bit_le(p,sz,off)
5779 + * These are the little endian, non-atomic definitions.
5781 +#define __set_bit(nr,p) NONATOMIC_BITOP_LE(set_bit,nr,p)
5782 +#define __clear_bit(nr,p) NONATOMIC_BITOP_LE(clear_bit,nr,p)
5783 +#define __change_bit(nr,p) NONATOMIC_BITOP_LE(change_bit,nr,p)
5784 +#define __test_and_set_bit(nr,p) NONATOMIC_BITOP_LE(test_and_set_bit,nr,p)
5785 +#define __test_and_clear_bit(nr,p) NONATOMIC_BITOP_LE(test_and_clear_bit,nr,p)
5786 +#define __test_and_change_bit(nr,p) NONATOMIC_BITOP_LE(test_and_change_bit,nr,p)
5787 +#define __test_bit(nr,p) ____test_bit(nr,p)
5792 + * These are the big endian, atomic definitions.
5794 +#define set_bit(nr,p) ATOMIC_BITOP_BE(set_bit,nr,p)
5795 +#define clear_bit(nr,p) ATOMIC_BITOP_BE(clear_bit,nr,p)
5796 +#define change_bit(nr,p) ATOMIC_BITOP_BE(change_bit,nr,p)
5797 +#define test_and_set_bit(nr,p) ATOMIC_BITOP_BE(test_and_set_bit,nr,p)
5798 +#define test_and_clear_bit(nr,p) ATOMIC_BITOP_BE(test_and_clear_bit,nr,p)
5799 +#define test_and_change_bit(nr,p) ATOMIC_BITOP_BE(test_and_change_bit,nr,p)
5800 +#define test_bit(nr,p) ____test_bit((nr) ^ 0x18, p)
5801 +#define find_first_zero_bit(p,sz) _find_first_zero_bit_be(p,sz)
5802 +#define find_next_zero_bit(p,sz,off) _find_next_zero_bit_be(p,sz,off)
5805 + * These are the big endian, non-atomic definitions.
5807 +#define __set_bit(nr,p) NONATOMIC_BITOP_BE(set_bit,nr,p)
5808 +#define __clear_bit(nr,p) NONATOMIC_BITOP_BE(clear_bit,nr,p)
5809 +#define __change_bit(nr,p) NONATOMIC_BITOP_BE(change_bit,nr,p)
5810 +#define __test_and_set_bit(nr,p) NONATOMIC_BITOP_BE(test_and_set_bit,nr,p)
5811 +#define __test_and_clear_bit(nr,p) NONATOMIC_BITOP_BE(test_and_clear_bit,nr,p)
5812 +#define __test_and_change_bit(nr,p) NONATOMIC_BITOP_BE(test_and_change_bit,nr,p)
5813 +#define __test_bit(nr,p) ____test_bit((nr) ^ 0x18, p)
5818 * ffz = Find First Zero in word. Undefined if no zero exists,
5819 * so code should check against ~0UL first..
5821 @@ -110,6 +302,29 @@
5825 + * ffz = Find First Zero in word. Undefined if no zero exists,
5826 + * so code should check against ~0UL first..
5828 +static inline unsigned long __ffs(unsigned long word)
5833 + if (word & 0x0000ffff) { k -= 16; word <<= 16; }
5834 + if (word & 0x00ff0000) { k -= 8; word <<= 8; }
5835 + if (word & 0x0f000000) { k -= 4; word <<= 4; }
5836 + if (word & 0x30000000) { k -= 2; word <<= 2; }
5837 + if (word & 0x40000000) { k -= 1; }
5842 + * fls: find last bit set.
5845 +#define fls(x) generic_fls(x)
5848 * ffs: find first bit set. This is defined the same way as
5849 * the libc and compiler builtin ffs routines, therefore
5850 * differs in spirit from the above ffz (man ffs).
5851 @@ -118,6 +333,22 @@
5852 #define ffs(x) generic_ffs(x)
5855 + * Find first bit set in a 168-bit bitmap, where the first
5856 + * 128 bits are unlikely to be set.
5858 +static inline int sched_find_first_bit(unsigned long *b)
5863 + for (off = 0; v = b[off], off < 4; off++) {
5867 + return __ffs(v) + off * 32;
5871 * hweightN: returns the hamming weight (i.e. the number
5872 * of bits set) of a N-bit word
5874 @@ -126,18 +357,25 @@
5875 #define hweight16(x) generic_hweight16(x)
5876 #define hweight8(x) generic_hweight8(x)
5878 -#define ext2_set_bit test_and_set_bit
5879 -#define ext2_clear_bit test_and_clear_bit
5880 -#define ext2_test_bit test_bit
5881 -#define ext2_find_first_zero_bit find_first_zero_bit
5882 -#define ext2_find_next_zero_bit find_next_zero_bit
5884 -/* Bitmap functions for the minix filesystem. */
5885 -#define minix_test_and_set_bit(nr,addr) test_and_set_bit(nr,addr)
5886 -#define minix_set_bit(nr,addr) set_bit(nr,addr)
5887 -#define minix_test_and_clear_bit(nr,addr) test_and_clear_bit(nr,addr)
5888 -#define minix_test_bit(nr,addr) test_bit(nr,addr)
5889 -#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
5891 + * Ext2 is defined to use little-endian byte ordering.
5892 + * These do not need to be atomic.
5894 +#define ext2_set_bit(nr,p) NONATOMIC_BITOP_LE(test_and_set_bit,nr,p)
5895 +#define ext2_clear_bit(nr,p) NONATOMIC_BITOP_LE(test_and_clear_bit,nr,p)
5896 +#define ext2_test_bit(nr,p) __test_bit(nr,p)
5897 +#define ext2_find_first_zero_bit(p,sz) _find_first_zero_bit_le(p,sz)
5898 +#define ext2_find_next_zero_bit(p,sz,off) _find_next_zero_bit_le(p,sz,off)
5901 + * Minix is defined to use little-endian byte ordering.
5902 + * These do not need to be atomic.
5904 +#define minix_set_bit(nr,p) NONATOMIC_BITOP_LE(set_bit,nr,p)
5905 +#define minix_test_bit(nr,p) __test_bit(nr,p)
5906 +#define minix_test_and_set_bit(nr,p) NONATOMIC_BITOP_LE(test_and_set_bit,nr,p)
5907 +#define minix_test_and_clear_bit(nr,p) NONATOMIC_BITOP_LE(test_and_clear_bit,nr,p)
5908 +#define minix_find_first_zero_bit(p,sz) _find_first_zero_bit_le(p,sz)
5910 #endif /* __KERNEL__ */
5912 --- linux/include/asm-ia64/bitops.h.orig Thu Jul 18 15:22:24 2002
5913 +++ linux/include/asm-ia64/bitops.h Thu Jul 18 15:22:34 2002
5915 #define _ASM_IA64_BITOPS_H
5918 - * Copyright (C) 1998-2001 Hewlett-Packard Co
5919 - * Copyright (C) 1998-2001 David Mosberger-Tang <davidm@hpl.hp.com>
5920 + * Copyright (C) 1998-2002 Hewlett-Packard Co
5921 + * David Mosberger-Tang <davidm@hpl.hp.com>
5923 + * 02/06/02 find_next_bit() and find_first_bit() added from Erich Focht's ia64 O(1)
5927 +#include <linux/types.h>
5929 #include <asm/system.h>
5936 + * __clear_bit - Clears a bit in memory (non-atomic version)
5938 +static __inline__ void
5939 +__clear_bit (int nr, volatile void *addr)
5941 + volatile __u32 *p = (__u32 *) addr + (nr >> 5);
5942 + __u32 m = 1 << (nr & 31);
5947 * change_bit - Toggle a bit in memory
5949 * @addr: Address to start counting from
5950 @@ -264,12 +280,11 @@
5954 - * ffz - find the first zero bit in a memory region
5955 - * @x: The address to start the search at
5956 + * ffz - find the first zero bit in a long word
5957 + * @x: The long word to find the bit in
5959 - * Returns the bit-number (0..63) of the first (least significant) zero bit, not
5960 - * the number of the byte containing a bit. Undefined if no zero exists, so
5961 - * code should check against ~0UL first...
5962 + * Returns the bit-number (0..63) of the first (least significant) zero bit. Undefined if
5963 + * no zero exists, so code should check against ~0UL first...
5965 static inline unsigned long
5966 ffz (unsigned long x)
5967 @@ -280,6 +295,21 @@
5972 + * __ffs - find first bit in word.
5973 + * @x: The word to search
5975 + * Undefined if no bit exists, so code should check against 0 first.
5977 +static __inline__ unsigned long
5978 +__ffs (unsigned long x)
5980 + unsigned long result;
5982 + __asm__ ("popcnt %0=%1" : "=r" (result) : "r" ((x - 1) & ~x));
5989 @@ -296,6 +326,12 @@
5990 return exp - 0xffff;
5996 + return ia64_fls((unsigned int) x);
6000 * ffs: find first bit set. This is defined the same way as the libc and compiler builtin
6001 * ffs routines, therefore differs in spirit from the above ffz (man ffs): it operates on
6002 @@ -368,8 +404,53 @@
6004 #define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
6007 + * Find next bit in a bitmap reasonably efficiently..
6010 +find_next_bit (void *addr, unsigned long size, unsigned long offset)
6012 + unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
6013 + unsigned long result = offset & ~63UL;
6014 + unsigned long tmp;
6016 + if (offset >= size)
6022 + tmp &= ~0UL << offset;
6026 + goto found_middle;
6030 + while (size & ~63UL) {
6031 + if ((tmp = *(p++)))
6032 + goto found_middle;
6040 + tmp &= ~0UL >> (64-size);
6041 + if (tmp == 0UL) /* Are any bits set? */
6042 + return result + size; /* Nope. */
6044 + return result + __ffs(tmp);
6047 +#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
6051 +#define __clear_bit(nr, addr) clear_bit(nr, addr)
6053 #define ext2_set_bit test_and_set_bit
6054 #define ext2_clear_bit test_and_clear_bit
6055 #define ext2_test_bit test_bit
6056 @@ -382,6 +463,16 @@
6057 #define minix_test_and_clear_bit(nr,addr) test_and_clear_bit(nr,addr)
6058 #define minix_test_bit(nr,addr) test_bit(nr,addr)
6059 #define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
6062 +sched_find_first_bit (unsigned long *b)
6064 + if (unlikely(b[0]))
6065 + return __ffs(b[0]);
6066 + if (unlikely(b[1]))
6067 + return 64 + __ffs(b[1]);
6068 + return __ffs(b[2]) + 128;
6071 #endif /* __KERNEL__ */
6073 --- linux/include/asm-mips64/bitops.h.orig Thu Jul 18 15:22:24 2002
6074 +++ linux/include/asm-mips64/bitops.h Thu Jul 18 15:22:34 2002
6077 #include <asm/system.h>
6078 #include <asm/sgidefs.h>
6079 +#include <asm/mipsregs.h>
6082 * set_bit - Atomically set a bit in memory
6084 * Note that @nr may be almost arbitrarily large; this function is not
6085 * restricted to acting on a single-word quantity.
6087 -static inline void set_bit(unsigned long nr, volatile void *addr)
6088 +extern __inline__ void
6089 +set_bit(unsigned long nr, volatile void *addr)
6091 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
6094 * If it's called on the same region of memory simultaneously, the effect
6095 * may be that only one operation succeeds.
6097 -static inline void __set_bit(int nr, volatile void * addr)
6098 +extern __inline__ void __set_bit(int nr, volatile void * addr)
6100 unsigned long * m = ((unsigned long *) addr) + (nr >> 6);
6103 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
6104 * in order to ensure changes are visible on other processors.
6106 -static inline void clear_bit(unsigned long nr, volatile void *addr)
6107 +extern __inline__ void
6108 +clear_bit(unsigned long nr, volatile void *addr)
6110 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
6113 * Note that @nr may be almost arbitrarily large; this function is not
6114 * restricted to acting on a single-word quantity.
6116 -static inline void change_bit(unsigned long nr, volatile void *addr)
6117 +extern __inline__ void
6118 +change_bit(unsigned long nr, volatile void *addr)
6120 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
6123 * If it's called on the same region of memory simultaneously, the effect
6124 * may be that only one operation succeeds.
6126 -static inline void __change_bit(int nr, volatile void * addr)
6127 +extern __inline__ void __change_bit(int nr, volatile void * addr)
6129 unsigned long * m = ((unsigned long *) addr) + (nr >> 6);
6132 * This operation is atomic and cannot be reordered.
6133 * It also implies a memory barrier.
6135 -static inline unsigned long test_and_set_bit(unsigned long nr,
6136 - volatile void *addr)
6137 +extern __inline__ unsigned long
6138 +test_and_set_bit(unsigned long nr, volatile void *addr)
6140 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
6141 unsigned long temp, res;
6143 * If two examples of this operation race, one can appear to succeed
6144 * but actually fail. You must protect multiple accesses with a lock.
6146 -static inline int __test_and_set_bit(int nr, volatile void *addr)
6147 +extern __inline__ int
6148 +__test_and_set_bit(int nr, volatile void * addr)
6150 unsigned long mask, retval;
6151 long *a = (unsigned long *) addr;
6153 * This operation is atomic and cannot be reordered.
6154 * It also implies a memory barrier.
6156 -static inline unsigned long test_and_clear_bit(unsigned long nr,
6157 - volatile void *addr)
6158 +extern __inline__ unsigned long
6159 +test_and_clear_bit(unsigned long nr, volatile void *addr)
6161 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
6162 unsigned long temp, res;
6164 * If two examples of this operation race, one can appear to succeed
6165 * but actually fail. You must protect multiple accesses with a lock.
6167 -static inline int __test_and_clear_bit(int nr, volatile void * addr)
6168 +extern __inline__ int
6169 +__test_and_clear_bit(int nr, volatile void * addr)
6171 unsigned long mask, retval;
6172 unsigned long *a = (unsigned long *) addr;
6174 * This operation is atomic and cannot be reordered.
6175 * It also implies a memory barrier.
6177 -static inline unsigned long test_and_change_bit(unsigned long nr,
6178 - volatile void *addr)
6179 +extern __inline__ unsigned long
6180 +test_and_change_bit(unsigned long nr, volatile void *addr)
6182 unsigned long *m = ((unsigned long *) addr) + (nr >> 6);
6183 unsigned long temp, res;
6185 * If two examples of this operation race, one can appear to succeed
6186 * but actually fail. You must protect multiple accesses with a lock.
6188 -static inline int __test_and_change_bit(int nr, volatile void *addr)
6189 +extern __inline__ int
6190 +__test_and_change_bit(int nr, volatile void * addr)
6192 unsigned long mask, retval;
6193 unsigned long *a = (unsigned long *) addr;
6195 * @nr: bit number to test
6196 * @addr: Address to start counting from
6198 -static inline unsigned long test_bit(int nr, volatile void * addr)
6199 +extern __inline__ unsigned long
6200 +test_bit(int nr, volatile void * addr)
6202 return 1UL & (((volatile unsigned long *) addr)[nr >> 6] >> (nr & 0x3f));
6205 * Returns the bit-number of the first zero bit, not the number of the byte
6208 -static inline int find_first_zero_bit (void *addr, unsigned size)
6209 +extern __inline__ int
6210 +find_first_zero_bit (void *addr, unsigned size)
6212 unsigned long dummy;
6216 : "=r" (res), "=r" (dummy), "=r" (addr)
6217 : "0" ((signed int) 0), "1" ((unsigned int) 0xffffffff),
6218 - "2" (addr), "r" (size));
6219 + "2" (addr), "r" (size)
6225 * @offset: The bitnumber to start searching at
6226 * @size: The maximum size to search
6228 -static inline int find_next_zero_bit (void * addr, int size, int offset)
6229 +extern __inline__ int
6230 +find_next_zero_bit (void * addr, int size, int offset)
6232 unsigned int *p = ((unsigned int *) addr) + (offset >> 5);
6233 int set = 0, bit = offset & 31, res;
6237 : "=r" (set), "=r" (dummy)
6238 - : "0" (0), "1" (1 << bit), "r" (*p));
6239 + : "0" (0), "1" (1 << bit), "r" (*p)
6241 if (set < (32 - bit))
6242 return set + offset;
6244 @@ -400,19 +412,20 @@
6246 * Undefined if no zero exists, so code should check against ~0UL first.
6248 -static __inline__ unsigned long ffz(unsigned long word)
6249 +extern __inline__ unsigned long ffz(unsigned long word)
6255 - s = 32; if (word << 32 != 0) s = 0; b += s; word >>= s;
6256 - s = 16; if (word << 48 != 0) s = 0; b += s; word >>= s;
6257 - s = 8; if (word << 56 != 0) s = 0; b += s; word >>= s;
6258 - s = 4; if (word << 60 != 0) s = 0; b += s; word >>= s;
6259 - s = 2; if (word << 62 != 0) s = 0; b += s; word >>= s;
6260 - s = 1; if (word << 63 != 0) s = 0; b += s;
6262 + if (word & 0x00000000ffffffffUL) { k -= 32; word <<= 32; }
6263 + if (word & 0x0000ffff00000000UL) { k -= 16; word <<= 16; }
6264 + if (word & 0x00ff000000000000UL) { k -= 8; word <<= 8; }
6265 + if (word & 0x0f00000000000000UL) { k -= 4; word <<= 4; }
6266 + if (word & 0x3000000000000000UL) { k -= 2; word <<= 2; }
6267 + if (word & 0x4000000000000000UL) { k -= 1; }
6275 * @offset: The bitnumber to start searching at
6276 * @size: The maximum size to search
6278 -static inline unsigned long find_next_zero_bit(void *addr, unsigned long size,
6279 - unsigned long offset)
6280 +extern __inline__ unsigned long
6281 +find_next_zero_bit(void *addr, unsigned long size, unsigned long offset)
6283 unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
6284 unsigned long result = offset & ~63UL;
6289 -static inline int ext2_set_bit(int nr,void * addr)
6291 +ext2_set_bit(int nr,void * addr)
6293 int mask, retval, flags;
6294 unsigned char *ADDR = (unsigned char *) addr;
6299 -static inline int ext2_clear_bit(int nr, void * addr)
6301 +ext2_clear_bit(int nr, void * addr)
6303 int mask, retval, flags;
6304 unsigned char *ADDR = (unsigned char *) addr;
6309 -static inline int ext2_test_bit(int nr, const void * addr)
6311 +ext2_test_bit(int nr, const void * addr)
6314 const unsigned char *ADDR = (const unsigned char *) addr;
6316 #define ext2_find_first_zero_bit(addr, size) \
6317 ext2_find_next_zero_bit((addr), (size), 0)
6319 -static inline unsigned int ext2_find_next_zero_bit(void *addr,
6320 - unsigned long size,
6321 - unsigned long offset)
6322 +extern inline unsigned int
6323 +ext2_find_next_zero_bit(void *addr, unsigned long size, unsigned long offset)
6325 unsigned int *p = ((unsigned int *) addr) + (offset >> 5);
6326 unsigned int result = offset & ~31UL;
6327 --- linux/include/asm-s390/bitops.h.orig Thu Jul 18 15:22:25 2002
6328 +++ linux/include/asm-s390/bitops.h Thu Jul 18 15:22:35 2002
6329 @@ -47,272 +47,217 @@
6330 extern const char _oi_bitmap[];
6331 extern const char _ni_bitmap[];
6332 extern const char _zb_findmap[];
6333 +extern const char _sb_findmap[];
6337 * SMP save set_bit routine based on compare and swap (CS)
6339 -static __inline__ void set_bit_cs(int nr, volatile void * addr)
6340 +static inline void set_bit_cs(int nr, volatile void *ptr)
6342 - unsigned long bits, mask;
6343 - __asm__ __volatile__(
6344 + unsigned long addr, old, new, mask;
6346 + addr = (unsigned long) ptr;
6348 - " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
6349 - " nr %2,%1\n" /* isolate last 2 bits of address */
6350 - " xr %1,%2\n" /* make addr % 4 == 0 */
6352 - " ar %0,%2\n" /* add alignement to bitnr */
6353 + addr ^= addr & 3; /* align address to 4 */
6354 + nr += (addr & 3) << 3; /* add alignment to bit number */
6357 - " nr %2,%0\n" /* make shift value */
6361 - " la %1,0(%0,%1)\n" /* calc. address for CS */
6362 - " sll %3,0(%2)\n" /* make OR mask */
6364 - "0: lr %2,%0\n" /* CS loop starts here */
6365 - " or %2,%3\n" /* set bit */
6366 - " cs %0,%2,0(%1)\n"
6368 - : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
6369 - : "cc", "memory" );
6370 + addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
6371 + mask = 1UL << (nr & 31); /* make OR mask */
6376 + " cs %0,%1,0(%4)\n"
6378 + : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
6379 + : "d" (mask), "a" (addr)
6384 * SMP save clear_bit routine based on compare and swap (CS)
6386 -static __inline__ void clear_bit_cs(int nr, volatile void * addr)
6387 +static inline void clear_bit_cs(int nr, volatile void *ptr)
6389 - static const int minusone = -1;
6390 - unsigned long bits, mask;
6391 - __asm__ __volatile__(
6392 + unsigned long addr, old, new, mask;
6394 + addr = (unsigned long) ptr;
6396 - " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
6397 - " nr %2,%1\n" /* isolate last 2 bits of address */
6398 - " xr %1,%2\n" /* make addr % 4 == 0 */
6400 - " ar %0,%2\n" /* add alignement to bitnr */
6401 + addr ^= addr & 3; /* align address to 4 */
6402 + nr += (addr & 3) << 3; /* add alignment to bit number */
6405 - " nr %2,%0\n" /* make shift value */
6409 - " la %1,0(%0,%1)\n" /* calc. address for CS */
6411 - " x %3,%4\n" /* make AND mask */
6413 - "0: lr %2,%0\n" /* CS loop starts here */
6414 - " nr %2,%3\n" /* clear bit */
6415 - " cs %0,%2,0(%1)\n"
6417 - : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask)
6418 - : "m" (minusone) : "cc", "memory" );
6419 + addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
6420 + mask = ~(1UL << (nr & 31)); /* make AND mask */
6425 + " cs %0,%1,0(%4)\n"
6427 + : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
6428 + : "d" (mask), "a" (addr)
6433 * SMP save change_bit routine based on compare and swap (CS)
6435 -static __inline__ void change_bit_cs(int nr, volatile void * addr)
6436 +static inline void change_bit_cs(int nr, volatile void *ptr)
6438 - unsigned long bits, mask;
6439 - __asm__ __volatile__(
6440 + unsigned long addr, old, new, mask;
6442 + addr = (unsigned long) ptr;
6444 - " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
6445 - " nr %2,%1\n" /* isolate last 2 bits of address */
6446 - " xr %1,%2\n" /* make addr % 4 == 0 */
6448 - " ar %0,%2\n" /* add alignement to bitnr */
6449 + addr ^= addr & 3; /* align address to 4 */
6450 + nr += (addr & 3) << 3; /* add alignment to bit number */
6453 - " nr %2,%0\n" /* make shift value */
6457 - " la %1,0(%0,%1)\n" /* calc. address for CS */
6458 - " sll %3,0(%2)\n" /* make XR mask */
6460 - "0: lr %2,%0\n" /* CS loop starts here */
6461 - " xr %2,%3\n" /* change bit */
6462 - " cs %0,%2,0(%1)\n"
6464 - : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
6465 - : "cc", "memory" );
6466 + addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
6467 + mask = 1UL << (nr & 31); /* make XOR mask */
6472 + " cs %0,%1,0(%4)\n"
6474 + : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
6475 + : "d" (mask), "a" (addr)
6480 * SMP save test_and_set_bit routine based on compare and swap (CS)
6482 -static __inline__ int test_and_set_bit_cs(int nr, volatile void * addr)
6483 +static inline int test_and_set_bit_cs(int nr, volatile void *ptr)
6485 - unsigned long bits, mask;
6486 - __asm__ __volatile__(
6487 + unsigned long addr, old, new, mask;
6489 + addr = (unsigned long) ptr;
6491 - " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
6492 - " nr %2,%1\n" /* isolate last 2 bits of address */
6493 - " xr %1,%2\n" /* make addr % 4 == 0 */
6495 - " ar %0,%2\n" /* add alignement to bitnr */
6496 + addr ^= addr & 3; /* align address to 4 */
6497 + nr += (addr & 3) << 3; /* add alignment to bit number */
6500 - " nr %2,%0\n" /* make shift value */
6504 - " la %1,0(%0,%1)\n" /* calc. address for CS */
6505 - " sll %3,0(%2)\n" /* make OR mask */
6507 - "0: lr %2,%0\n" /* CS loop starts here */
6508 - " or %2,%3\n" /* set bit */
6509 - " cs %0,%2,0(%1)\n"
6511 - " nr %0,%3\n" /* isolate old bit */
6512 - : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
6513 - : "cc", "memory" );
6515 + addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
6516 + mask = 1UL << (nr & 31); /* make OR/test mask */
6521 + " cs %0,%1,0(%4)\n"
6523 + : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
6524 + : "d" (mask), "a" (addr)
6526 + return (old & mask) != 0;
6530 * SMP save test_and_clear_bit routine based on compare and swap (CS)
6532 -static __inline__ int test_and_clear_bit_cs(int nr, volatile void * addr)
6533 +static inline int test_and_clear_bit_cs(int nr, volatile void *ptr)
6535 - static const int minusone = -1;
6536 - unsigned long bits, mask;
6537 - __asm__ __volatile__(
6538 + unsigned long addr, old, new, mask;
6540 + addr = (unsigned long) ptr;
6542 - " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
6543 - " nr %2,%1\n" /* isolate last 2 bits of address */
6544 - " xr %1,%2\n" /* make addr % 4 == 0 */
6546 - " ar %0,%2\n" /* add alignement to bitnr */
6547 + addr ^= addr & 3; /* align address to 4 */
6548 + nr += (addr & 3) << 3; /* add alignment to bit number */
6551 - " nr %2,%0\n" /* make shift value */
6555 - " la %1,0(%0,%1)\n" /* calc. address for CS */
6558 - " x %3,%4\n" /* make AND mask */
6559 - "0: lr %2,%0\n" /* CS loop starts here */
6560 - " nr %2,%3\n" /* clear bit */
6561 - " cs %0,%2,0(%1)\n"
6564 - " nr %0,%3\n" /* isolate old bit */
6565 - : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask)
6566 - : "m" (minusone) : "cc", "memory" );
6568 + addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
6569 + mask = ~(1UL << (nr & 31)); /* make AND mask */
6574 + " cs %0,%1,0(%4)\n"
6576 + : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
6577 + : "d" (mask), "a" (addr)
6579 + return (old ^ new) != 0;
6583 * SMP save test_and_change_bit routine based on compare and swap (CS)
6585 -static __inline__ int test_and_change_bit_cs(int nr, volatile void * addr)
6586 +static inline int test_and_change_bit_cs(int nr, volatile void *ptr)
6588 - unsigned long bits, mask;
6589 - __asm__ __volatile__(
6590 + unsigned long addr, old, new, mask;
6592 + addr = (unsigned long) ptr;
6594 - " lhi %2,3\n" /* CS must be aligned on 4 byte b. */
6595 - " nr %2,%1\n" /* isolate last 2 bits of address */
6596 - " xr %1,%2\n" /* make addr % 4 == 0 */
6598 - " ar %0,%2\n" /* add alignement to bitnr */
6599 + addr ^= addr & 3; /* align address to 4 */
6600 + nr += (addr & 3) << 3; /* add alignment to bit number */
6603 - " nr %2,%0\n" /* make shift value */
6607 - " la %1,0(%0,%1)\n" /* calc. address for CS */
6608 - " sll %3,0(%2)\n" /* make OR mask */
6610 - "0: lr %2,%0\n" /* CS loop starts here */
6611 - " xr %2,%3\n" /* change bit */
6612 - " cs %0,%2,0(%1)\n"
6614 - " nr %0,%3\n" /* isolate old bit */
6615 - : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
6616 - : "cc", "memory" );
6618 + addr += (nr ^ (nr & 31)) >> 3; /* calculate address for CS */
6619 + mask = 1UL << (nr & 31); /* make XOR mask */
6624 + " cs %0,%1,0(%4)\n"
6626 + : "=&d" (old), "=&d" (new), "+m" (*(unsigned int *) addr)
6627 + : "d" (mask), "a" (addr)
6629 + return (old & mask) != 0;
6631 #endif /* CONFIG_SMP */
6634 * fast, non-SMP set_bit routine
6636 -static __inline__ void __set_bit(int nr, volatile void * addr)
6637 +static inline void __set_bit(int nr, volatile void *ptr)
6639 - unsigned long reg1, reg2;
6640 - __asm__ __volatile__(
6646 - " la %1,0(%1,%3)\n"
6647 - " la %0,0(%0,%4)\n"
6648 - " oc 0(1,%1),0(%0)"
6649 - : "=&a" (reg1), "=&a" (reg2)
6650 - : "r" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
6653 -static __inline__ void
6654 -__constant_set_bit(const int nr, volatile void * addr)
6658 - __asm__ __volatile__ ("la 1,%0\n\t"
6660 - : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6661 - : : "1", "cc", "memory");
6664 - __asm__ __volatile__ ("la 1,%0\n\t"
6666 - : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6667 - : : "1", "cc", "memory" );
6670 - __asm__ __volatile__ ("la 1,%0\n\t"
6672 - : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6673 - : : "1", "cc", "memory" );
6676 - __asm__ __volatile__ ("la 1,%0\n\t"
6678 - : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6679 - : : "1", "cc", "memory" );
6682 - __asm__ __volatile__ ("la 1,%0\n\t"
6684 - : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6685 - : : "1", "cc", "memory" );
6688 - __asm__ __volatile__ ("la 1,%0\n\t"
6690 - : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6691 - : : "1", "cc", "memory" );
6694 - __asm__ __volatile__ ("la 1,%0\n\t"
6696 - : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6697 - : : "1", "cc", "memory" );
6700 - __asm__ __volatile__ ("la 1,%0\n\t"
6702 - : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6703 - : : "1", "cc", "memory" );
6706 + unsigned long addr;
6708 + addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
6709 + asm volatile("oc 0(1,%1),0(%2)"
6710 + : "+m" (*(char *) addr)
6711 + : "a" (addr), "a" (_oi_bitmap + (nr & 7))
6716 +__constant_set_bit(const int nr, volatile void *ptr)
6718 + unsigned long addr;
6720 + addr = ((unsigned long) ptr) + ((nr >> 3) ^ 3);
6723 + asm volatile ("oi 0(%1),0x01"
6724 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6727 + asm volatile ("oi 0(%1),0x02"
6728 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6731 + asm volatile ("oi 0(%1),0x04"
6732 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6735 + asm volatile ("oi 0(%1),0x08"
6736 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6739 + asm volatile ("oi 0(%1),0x10"
6740 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6743 + asm volatile ("oi 0(%1),0x20"
6744 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6747 + asm volatile ("oi 0(%1),0x40"
6748 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6751 + asm volatile ("oi 0(%1),0x80"
6752 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6757 #define set_bit_simple(nr,addr) \
6758 @@ -323,76 +268,58 @@
6760 * fast, non-SMP clear_bit routine
6762 -static __inline__ void
6763 -__clear_bit(int nr, volatile void * addr)
6765 +__clear_bit(int nr, volatile void *ptr)
6767 - unsigned long reg1, reg2;
6768 - __asm__ __volatile__(
6774 - " la %1,0(%1,%3)\n"
6775 - " la %0,0(%0,%4)\n"
6776 - " nc 0(1,%1),0(%0)"
6777 - : "=&a" (reg1), "=&a" (reg2)
6778 - : "r" (nr), "a" (addr), "a" (&_ni_bitmap) : "cc", "memory" );
6781 -static __inline__ void
6782 -__constant_clear_bit(const int nr, volatile void * addr)
6786 - __asm__ __volatile__ ("la 1,%0\n\t"
6788 - : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6789 - : : "1", "cc", "memory" );
6792 - __asm__ __volatile__ ("la 1,%0\n\t"
6794 - : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6795 - : : "1", "cc", "memory" );
6798 - __asm__ __volatile__ ("la 1,%0\n\t"
6800 - : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6801 - : : "1", "cc", "memory" );
6804 - __asm__ __volatile__ ("la 1,%0\n\t"
6806 - : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6807 - : : "1", "cc", "memory" );
6810 - __asm__ __volatile__ ("la 1,%0\n\t"
6812 - : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6813 - : : "cc", "memory" );
6816 - __asm__ __volatile__ ("la 1,%0\n\t"
6818 - : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6819 - : : "1", "cc", "memory" );
6822 - __asm__ __volatile__ ("la 1,%0\n\t"
6824 - : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6825 - : : "1", "cc", "memory" );
6828 - __asm__ __volatile__ ("la 1,%0\n\t"
6830 - : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6831 - : : "1", "cc", "memory" );
6834 + unsigned long addr;
6836 + addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
6837 + asm volatile("nc 0(1,%1),0(%2)"
6838 + : "+m" (*(char *) addr)
6839 + : "a" (addr), "a" (_ni_bitmap + (nr & 7))
6844 +__constant_clear_bit(const int nr, volatile void *ptr)
6846 + unsigned long addr;
6848 + addr = ((unsigned long) ptr) + ((nr >> 3) ^ 3);
6851 + asm volatile ("ni 0(%1),0xFE"
6852 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6855 + asm volatile ("ni 0(%1),0xFD"
6856 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6859 + asm volatile ("ni 0(%1),0xFB"
6860 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6863 + asm volatile ("ni 0(%1),0xF7"
6864 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6867 + asm volatile ("ni 0(%1),0xEF"
6868 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6871 + asm volatile ("ni 0(%1),0xDF"
6872 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6875 + asm volatile ("ni 0(%1),0xBF"
6876 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6879 + asm volatile ("ni 0(%1),0x7F"
6880 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6885 #define clear_bit_simple(nr,addr) \
6886 @@ -403,75 +330,57 @@
6888 * fast, non-SMP change_bit routine
6890 -static __inline__ void __change_bit(int nr, volatile void * addr)
6891 +static inline void __change_bit(int nr, volatile void *ptr)
6893 - unsigned long reg1, reg2;
6894 - __asm__ __volatile__(
6900 - " la %1,0(%1,%3)\n"
6901 - " la %0,0(%0,%4)\n"
6902 - " xc 0(1,%1),0(%0)"
6903 - : "=&a" (reg1), "=&a" (reg2)
6904 - : "r" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
6907 -static __inline__ void
6908 -__constant_change_bit(const int nr, volatile void * addr)
6912 - __asm__ __volatile__ ("la 1,%0\n\t"
6914 - : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6915 - : : "cc", "memory" );
6918 - __asm__ __volatile__ ("la 1,%0\n\t"
6920 - : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6921 - : : "cc", "memory" );
6924 - __asm__ __volatile__ ("la 1,%0\n\t"
6926 - : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6927 - : : "cc", "memory" );
6930 - __asm__ __volatile__ ("la 1,%0\n\t"
6932 - : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6933 - : : "cc", "memory" );
6936 - __asm__ __volatile__ ("la 1,%0\n\t"
6938 - : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6939 - : : "cc", "memory" );
6942 - __asm__ __volatile__ ("la 1,%0\n\t"
6944 - : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6945 - : : "1", "cc", "memory" );
6948 - __asm__ __volatile__ ("la 1,%0\n\t"
6950 - : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6951 - : : "1", "cc", "memory" );
6954 - __asm__ __volatile__ ("la 1,%0\n\t"
6956 - : "=m" (*((volatile char *) addr + ((nr>>3)^3)))
6957 - : : "1", "cc", "memory" );
6960 + unsigned long addr;
6962 + addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
6963 + asm volatile("xc 0(1,%1),0(%2)"
6964 + : "+m" (*(char *) addr)
6965 + : "a" (addr), "a" (_oi_bitmap + (nr & 7))
6970 +__constant_change_bit(const int nr, volatile void *ptr)
6972 + unsigned long addr;
6974 + addr = ((unsigned long) ptr) + ((nr >> 3) ^ 3);
6977 + asm volatile ("xi 0(%1),0x01"
6978 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6981 + asm volatile ("xi 0(%1),0x02"
6982 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6985 + asm volatile ("xi 0(%1),0x04"
6986 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6989 + asm volatile ("xi 0(%1),0x08"
6990 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6993 + asm volatile ("xi 0(%1),0x10"
6994 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
6997 + asm volatile ("xi 0(%1),0x20"
6998 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7001 + asm volatile ("xi 0(%1),0x40"
7002 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7005 + asm volatile ("xi 0(%1),0x80"
7006 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7011 #define change_bit_simple(nr,addr) \
7012 @@ -482,74 +391,54 @@
7014 * fast, non-SMP test_and_set_bit routine
7016 -static __inline__ int test_and_set_bit_simple(int nr, volatile void * addr)
7017 +static inline int test_and_set_bit_simple(int nr, volatile void *ptr)
7019 - unsigned long reg1, reg2;
7021 - __asm__ __volatile__(
7027 - " la %1,0(%1,%4)\n"
7030 - " la %2,0(%2,%5)\n"
7031 - " oc 0(1,%1),0(%2)"
7032 - : "=d&" (oldbit), "=&a" (reg1), "=&a" (reg2)
7033 - : "r" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
7034 - return oldbit & 1;
7035 + unsigned long addr;
7038 + addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
7039 + ch = *(unsigned char *) addr;
7040 + asm volatile("oc 0(1,%1),0(%2)"
7041 + : "+m" (*(char *) addr)
7042 + : "a" (addr), "a" (_oi_bitmap + (nr & 7))
7044 + return (ch >> (nr & 7)) & 1;
7046 #define __test_and_set_bit(X,Y) test_and_set_bit_simple(X,Y)
7049 * fast, non-SMP test_and_clear_bit routine
7051 -static __inline__ int test_and_clear_bit_simple(int nr, volatile void * addr)
7052 +static inline int test_and_clear_bit_simple(int nr, volatile void *ptr)
7054 - unsigned long reg1, reg2;
7056 + unsigned long addr;
7059 - __asm__ __volatile__(
7065 - " la %1,0(%1,%4)\n"
7068 - " la %2,0(%2,%5)\n"
7069 - " nc 0(1,%1),0(%2)"
7070 - : "=d&" (oldbit), "=&a" (reg1), "=&a" (reg2)
7071 - : "r" (nr), "a" (addr), "a" (&_ni_bitmap) : "cc", "memory" );
7072 - return oldbit & 1;
7073 + addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
7074 + ch = *(unsigned char *) addr;
7075 + asm volatile("nc 0(1,%1),0(%2)"
7076 + : "+m" (*(char *) addr)
7077 + : "a" (addr), "a" (_ni_bitmap + (nr & 7))
7079 + return (ch >> (nr & 7)) & 1;
7081 #define __test_and_clear_bit(X,Y) test_and_clear_bit_simple(X,Y)
7084 * fast, non-SMP test_and_change_bit routine
7086 -static __inline__ int test_and_change_bit_simple(int nr, volatile void * addr)
7087 +static inline int test_and_change_bit_simple(int nr, volatile void *ptr)
7089 - unsigned long reg1, reg2;
7091 + unsigned long addr;
7094 - __asm__ __volatile__(
7100 - " la %1,0(%1,%4)\n"
7103 - " la %2,0(%2,%5)\n"
7104 - " xc 0(1,%1),0(%2)"
7105 - : "=d&" (oldbit), "=&a" (reg1), "=&a" (reg2)
7106 - : "r" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
7107 - return oldbit & 1;
7108 + addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
7109 + ch = *(unsigned char *) addr;
7110 + asm volatile("xc 0(1,%1),0(%2)"
7111 + : "+m" (*(char *) addr)
7112 + : "a" (addr), "a" (_oi_bitmap + (nr & 7))
7114 + return (ch >> (nr & 7)) & 1;
7116 #define __test_and_change_bit(X,Y) test_and_change_bit_simple(X,Y)
7118 @@ -574,25 +463,17 @@
7119 * This routine doesn't need to be atomic.
7122 -static __inline__ int __test_bit(int nr, volatile void * addr)
7123 +static inline int __test_bit(int nr, volatile void *ptr)
7125 - unsigned long reg1, reg2;
7127 + unsigned long addr;
7130 - __asm__ __volatile__(
7136 - " ic %0,0(%2,%4)\n"
7138 - : "=d&" (oldbit), "=&a" (reg1), "=&a" (reg2)
7139 - : "r" (nr), "a" (addr) : "cc" );
7140 - return oldbit & 1;
7141 + addr = (unsigned long) ptr + ((nr ^ 24) >> 3);
7142 + ch = *(unsigned char *) addr;
7143 + return (ch >> (nr & 7)) & 1;
7146 -static __inline__ int __constant_test_bit(int nr, volatile void * addr) {
7147 +static inline int __constant_test_bit(int nr, volatile void * addr) {
7148 return (((volatile char *) addr)[(nr>>3)^3] & (1<<(nr&7))) != 0;
7153 * Find-bit routines..
7155 -static __inline__ int find_first_zero_bit(void * addr, unsigned size)
7156 +static inline int find_first_zero_bit(void * addr, unsigned size)
7158 unsigned long cmp, count;
7160 @@ -642,7 +523,45 @@
7161 return (res < size) ? res : size;
7164 -static __inline__ int find_next_zero_bit (void * addr, int size, int offset)
7165 +static inline int find_first_bit(void * addr, unsigned size)
7167 + unsigned long cmp, count;
7172 + __asm__(" slr %1,%1\n"
7177 + "0: c %1,0(%0,%4)\n"
7183 + "1: l %2,0(%0,%4)\n"
7186 + " tml %2,0xffff\n"
7190 + "2: tml %2,0x00ff\n"
7195 + " ic %2,0(%2,%5)\n"
7198 + : "=&a" (res), "=&d" (cmp), "=&a" (count)
7199 + : "a" (size), "a" (addr), "a" (&_sb_findmap) : "cc" );
7200 + return (res < size) ? res : size;
7203 +static inline int find_next_zero_bit (void * addr, int size, int offset)
7205 unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
7206 unsigned long bitvec, reg;
7207 @@ -680,11 +599,49 @@
7208 return (offset + res);
7211 +static inline int find_next_bit (void * addr, int size, int offset)
7213 + unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
7214 + unsigned long bitvec, reg;
7215 + int set, bit = offset & 31, res;
7219 + * Look for set bit in first word
7221 + bitvec = (*p) >> bit;
7222 + __asm__(" slr %0,%0\n"
7224 + " tml %1,0xffff\n"
7228 + "0: tml %1,0x00ff\n"
7233 + " ic %1,0(%1,%3)\n"
7235 + : "=&d" (set), "+a" (bitvec), "=&d" (reg)
7236 + : "a" (&_sb_findmap) : "cc" );
7237 + if (set < (32 - bit))
7238 + return set + offset;
7239 + offset += 32 - bit;
7243 + * No set bit yet, search remaining full words for a bit
7245 + res = find_first_bit (p, size - 32 * (p - (unsigned long *) addr));
7246 + return (offset + res);
7250 * ffz = Find First Zero in word. Undefined if no zero exists,
7251 * so code should check against ~0UL first..
7253 -static __inline__ unsigned long ffz(unsigned long word)
7254 +static inline unsigned long ffz(unsigned long word)
7258 @@ -708,40 +665,109 @@
7262 + * __ffs = find first bit in word. Undefined if no bit exists,
7263 + * so code should check against 0UL first..
7265 +static inline unsigned long __ffs(unsigned long word)
7267 + unsigned long reg, result;
7269 + __asm__(" slr %0,%0\n"
7271 + " tml %1,0xffff\n"
7275 + "0: tml %1,0x00ff\n"
7280 + " ic %1,0(%1,%3)\n"
7282 + : "=&d" (result), "+a" (word), "=&d" (reg)
7283 + : "a" (&_sb_findmap) : "cc" );
7288 + * Every architecture must define this function. It's the fastest
7289 + * way of searching a 140-bit bitmap where the first 100 bits are
7290 + * unlikely to be set. It's guaranteed that at least one of the 140
7291 + * bits is cleared.
7293 +static inline int sched_find_first_bit(unsigned long *b)
7295 + return find_first_bit(b, 140);
7299 * ffs: find first bit set. This is defined the same way as
7300 * the libc and compiler builtin ffs routines, therefore
7301 * differs in spirit from the above ffz (man ffs).
7304 -extern int __inline__ ffs (int x)
7305 +extern int inline ffs (int x)
7312 - __asm__(" slr %0,%0\n"
7313 - " tml %1,0xffff\n"
7315 + __asm__(" tml %1,0xffff\n"
7320 "0: tml %1,0x00ff\n"
7325 "1: tml %1,0x000f\n"
7330 "2: tml %1,0x0003\n"
7335 "3: tml %1,0x0001\n"
7339 : "=&d" (r), "+d" (x) : : "cc" );
7345 + * fls: find last bit set.
7347 +extern __inline__ int fls(int x)
7353 + __asm__(" tmh %1,0xffff\n"
7357 + "0: tmh %1,0xff00\n"
7361 + "1: tmh %1,0xf000\n"
7365 + "2: tmh %1,0xc000\n"
7369 + "3: tmh %1,0x8000\n"
7373 + : "+d" (r), "+d" (x) : : "cc" );
7379 #define ext2_set_bit(nr, addr) test_and_set_bit((nr)^24, addr)
7380 #define ext2_clear_bit(nr, addr) test_and_clear_bit((nr)^24, addr)
7381 #define ext2_test_bit(nr, addr) test_bit((nr)^24, addr)
7382 -static __inline__ int ext2_find_first_zero_bit(void *vaddr, unsigned size)
7383 +static inline int ext2_find_first_zero_bit(void *vaddr, unsigned size)
7385 unsigned long cmp, count;
7388 return (res < size) ? res : size;
7391 -static __inline__ int
7393 ext2_find_next_zero_bit(void *vaddr, unsigned size, unsigned offset)
7395 unsigned long *addr = vaddr;
7396 --- linux/include/asm-cris/bitops.h.orig Thu Jul 18 15:22:16 2002
7397 +++ linux/include/asm-cris/bitops.h Thu Jul 18 15:22:35 2002
7399 /* We use generic_ffs so get it; include guards resolve the possible
7400 mutually inclusion. */
7401 #include <linux/bitops.h>
7402 +#include <linux/compiler.h>
7405 * Some hacks to defeat gcc over-optimizations..
7408 #define set_bit(nr, addr) (void)test_and_set_bit(nr, addr)
7410 +#define __set_bit(nr, addr) (void)__test_and_set_bit(nr, addr)
7413 * clear_bit - Clears a bit in memory
7417 #define clear_bit(nr, addr) (void)test_and_clear_bit(nr, addr)
7419 +#define __clear_bit(nr, addr) (void)__test_and_clear_bit(nr, addr)
7422 * change_bit - Toggle a bit in memory
7425 * It also implies a memory barrier.
7428 -static __inline__ int test_and_set_bit(int nr, void *addr)
7429 +static inline int test_and_set_bit(int nr, void *addr)
7431 unsigned int mask, retval;
7432 unsigned long flags;
7433 @@ -105,6 +110,18 @@
7437 +static inline int __test_and_set_bit(int nr, void *addr)
7439 + unsigned int mask, retval;
7440 + unsigned int *adr = (unsigned int *)addr;
7443 + mask = 1 << (nr & 0x1f);
7444 + retval = (mask & *adr) != 0;
7450 * clear_bit() doesn't provide any barrier for the compiler.
7453 * It also implies a memory barrier.
7456 -static __inline__ int test_and_clear_bit(int nr, void *addr)
7457 +static inline int test_and_clear_bit(int nr, void *addr)
7459 unsigned int mask, retval;
7460 unsigned long flags;
7462 * but actually fail. You must protect multiple accesses with a lock.
7465 -static __inline__ int __test_and_clear_bit(int nr, void *addr)
7466 +static inline int __test_and_clear_bit(int nr, void *addr)
7468 unsigned int mask, retval;
7469 unsigned int *adr = (unsigned int *)addr;
7471 * It also implies a memory barrier.
7474 -static __inline__ int test_and_change_bit(int nr, void *addr)
7475 +static inline int test_and_change_bit(int nr, void *addr)
7477 unsigned int mask, retval;
7478 unsigned long flags;
7481 /* WARNING: non atomic and it can be reordered! */
7483 -static __inline__ int __test_and_change_bit(int nr, void *addr)
7484 +static inline int __test_and_change_bit(int nr, void *addr)
7486 unsigned int mask, retval;
7487 unsigned int *adr = (unsigned int *)addr;
7489 * This routine doesn't need to be atomic.
7492 -static __inline__ int test_bit(int nr, const void *addr)
7493 +static inline int test_bit(int nr, const void *addr)
7496 unsigned int *adr = (unsigned int *)addr;
7498 * number. They differ in that the first function also inverts all bits
7501 -static __inline__ unsigned long cris_swapnwbrlz(unsigned long w)
7502 +static inline unsigned long cris_swapnwbrlz(unsigned long w)
7504 /* Let's just say we return the result in the same register as the
7505 input. Saying we clobber the input but can return the result
7510 -static __inline__ unsigned long cris_swapwbrlz(unsigned long w)
7511 +static inline unsigned long cris_swapwbrlz(unsigned long w)
7514 __asm__ ("swapwbr %0 \n\t"
7516 * ffz = Find First Zero in word. Undefined if no zero exists,
7517 * so code should check against ~0UL first..
7519 -static __inline__ unsigned long ffz(unsigned long w)
7520 +static inline unsigned long ffz(unsigned long w)
7522 /* The generic_ffs function is used to avoid the asm when the
7523 argument is a constant. */
7525 * Somewhat like ffz but the equivalent of generic_ffs: in contrast to
7526 * ffz we return the first one-bit *plus one*.
7528 -static __inline__ unsigned long ffs(unsigned long w)
7529 +static inline unsigned long ffs(unsigned long w)
7531 /* The generic_ffs function is used to avoid the asm when the
7532 argument is a constant. */
7534 * @offset: The bitnumber to start searching at
7535 * @size: The maximum size to search
7537 -static __inline__ int find_next_zero_bit (void * addr, int size, int offset)
7538 +static inline int find_next_zero_bit (void * addr, int size, int offset)
7540 unsigned long *p = ((unsigned long *) addr) + (offset >> 5);
7541 unsigned long result = offset & ~31UL;
7542 @@ -354,7 +371,45 @@
7543 #define minix_test_bit(nr,addr) test_bit(nr,addr)
7544 #define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
7546 -#endif /* __KERNEL__ */
7548 +/* TODO: see below */
7549 +#define sched_find_first_zero_bit(addr) find_first_zero_bit(addr, 168)
7552 +/* TODO: left out pending where to put it.. (there are .h dependencies) */
7555 + * Every architecture must define this function. It's the fastest
7556 + * way of searching a 168-bit bitmap where the first 128 bits are
7557 + * unlikely to be set. It's guaranteed that at least one of the 168
7558 + * bits is cleared.
7561 +#if MAX_RT_PRIO != 128 || MAX_PRIO != 168
7562 +# error update this function.
7565 +#define MAX_RT_PRIO 128
7566 +#define MAX_PRIO 168
7569 +static inline int sched_find_first_zero_bit(char *bitmap)
7571 + unsigned int *b = (unsigned int *)bitmap;
7574 + rt = b[0] & b[1] & b[2] & b[3];
7575 + if (unlikely(rt != 0xffffffff))
7576 + return find_first_zero_bit(bitmap, MAX_RT_PRIO);
7579 + return ffz(b[4]) + MAX_RT_PRIO;
7580 + return ffz(b[5]) + 32 + MAX_RT_PRIO;
7586 +#endif /* __KERNEL__ */
7588 #endif /* _CRIS_BITOPS_H */
7589 --- linux/include/asm-s390x/bitops.h.orig Thu Jul 18 15:22:25 2002
7590 +++ linux/include/asm-s390x/bitops.h Thu Jul 18 15:22:35 2002
7591 @@ -51,271 +51,220 @@
7592 extern const char _oi_bitmap[];
7593 extern const char _ni_bitmap[];
7594 extern const char _zb_findmap[];
7595 +extern const char _sb_findmap[];
7599 * SMP save set_bit routine based on compare and swap (CS)
7601 -static __inline__ void set_bit_cs(unsigned long nr, volatile void * addr)
7602 +static inline void set_bit_cs(unsigned long nr, volatile void *ptr)
7604 - unsigned long bits, mask;
7605 - __asm__ __volatile__(
7606 + unsigned long addr, old, new, mask;
7608 + addr = (unsigned long) ptr;
7610 - " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
7611 - " ngr %2,%1\n" /* isolate last 2 bits of address */
7612 - " xgr %1,%2\n" /* make addr % 4 == 0 */
7614 - " agr %0,%2\n" /* add alignement to bitnr */
7615 + addr ^= addr & 7; /* align address to 8 */
7616 + nr += (addr & 7) << 3; /* add alignment to bit number */
7619 - " nr %2,%0\n" /* make shift value */
7623 - " la %1,0(%0,%1)\n" /* calc. address for CS */
7624 - " sllg %3,%3,0(%2)\n" /* make OR mask */
7626 - "0: lgr %2,%0\n" /* CS loop starts here */
7627 - " ogr %2,%3\n" /* set bit */
7628 - " csg %0,%2,0(%1)\n"
7630 - : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
7631 - : "cc", "memory" );
7632 + addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
7633 + mask = 1UL << (nr & 63); /* make OR mask */
7638 + " csg %0,%1,0(%4)\n"
7640 + : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
7641 + : "d" (mask), "a" (addr)
7646 * SMP save clear_bit routine based on compare and swap (CS)
7648 -static __inline__ void clear_bit_cs(unsigned long nr, volatile void * addr)
7649 +static inline void clear_bit_cs(unsigned long nr, volatile void *ptr)
7651 - unsigned long bits, mask;
7652 - __asm__ __volatile__(
7653 + unsigned long addr, old, new, mask;
7655 + addr = (unsigned long) ptr;
7657 - " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
7658 - " ngr %2,%1\n" /* isolate last 2 bits of address */
7659 - " xgr %1,%2\n" /* make addr % 4 == 0 */
7661 - " agr %0,%2\n" /* add alignement to bitnr */
7662 + addr ^= addr & 7; /* align address to 8 */
7663 + nr += (addr & 7) << 3; /* add alignment to bit number */
7666 - " nr %2,%0\n" /* make shift value */
7670 - " la %1,0(%0,%1)\n" /* calc. address for CS */
7672 - " rllg %3,%3,0(%2)\n" /* make AND mask */
7674 - "0: lgr %2,%0\n" /* CS loop starts here */
7675 - " ngr %2,%3\n" /* clear bit */
7676 - " csg %0,%2,0(%1)\n"
7678 - : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
7679 - : "cc", "memory" );
7680 + addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
7681 + mask = ~(1UL << (nr & 63)); /* make AND mask */
7686 + " csg %0,%1,0(%4)\n"
7688 + : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
7689 + : "d" (mask), "a" (addr)
7694 * SMP save change_bit routine based on compare and swap (CS)
7696 -static __inline__ void change_bit_cs(unsigned long nr, volatile void * addr)
7697 +static inline void change_bit_cs(unsigned long nr, volatile void *ptr)
7699 - unsigned long bits, mask;
7700 - __asm__ __volatile__(
7701 + unsigned long addr, old, new, mask;
7703 + addr = (unsigned long) ptr;
7705 - " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
7706 - " ngr %2,%1\n" /* isolate last 2 bits of address */
7707 - " xgr %1,%2\n" /* make addr % 4 == 0 */
7709 - " agr %0,%2\n" /* add alignement to bitnr */
7710 + addr ^= addr & 7; /* align address to 8 */
7711 + nr += (addr & 7) << 3; /* add alignment to bit number */
7714 - " nr %2,%0\n" /* make shift value */
7718 - " la %1,0(%0,%1)\n" /* calc. address for CS */
7719 - " sllg %3,%3,0(%2)\n" /* make XR mask */
7721 - "0: lgr %2,%0\n" /* CS loop starts here */
7722 - " xgr %2,%3\n" /* change bit */
7723 - " csg %0,%2,0(%1)\n"
7725 - : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
7726 - : "cc", "memory" );
7727 + addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
7728 + mask = 1UL << (nr & 63); /* make XOR mask */
7733 + " csg %0,%1,0(%4)\n"
7735 + : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
7736 + : "d" (mask), "a" (addr)
7741 * SMP save test_and_set_bit routine based on compare and swap (CS)
7743 -static __inline__ int
7744 -test_and_set_bit_cs(unsigned long nr, volatile void * addr)
7746 +test_and_set_bit_cs(unsigned long nr, volatile void *ptr)
7748 - unsigned long bits, mask;
7749 - __asm__ __volatile__(
7750 + unsigned long addr, old, new, mask;
7752 + addr = (unsigned long) ptr;
7754 - " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
7755 - " ngr %2,%1\n" /* isolate last 2 bits of address */
7756 - " xgr %1,%2\n" /* make addr % 4 == 0 */
7758 - " agr %0,%2\n" /* add alignement to bitnr */
7759 + addr ^= addr & 7; /* align address to 8 */
7760 + nr += (addr & 7) << 3; /* add alignment to bit number */
7763 - " nr %2,%0\n" /* make shift value */
7767 - " la %1,0(%0,%1)\n" /* calc. address for CS */
7768 - " sllg %3,%3,0(%2)\n" /* make OR mask */
7770 - "0: lgr %2,%0\n" /* CS loop starts here */
7771 - " ogr %2,%3\n" /* set bit */
7772 - " csg %0,%2,0(%1)\n"
7774 - " ngr %0,%3\n" /* isolate old bit */
7775 - : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
7776 - : "cc", "memory" );
7778 + addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
7779 + mask = 1UL << (nr & 63); /* make OR/test mask */
7784 + " csg %0,%1,0(%4)\n"
7786 + : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
7787 + : "d" (mask), "a" (addr)
7789 + return (old & mask) != 0;
7793 * SMP save test_and_clear_bit routine based on compare and swap (CS)
7795 -static __inline__ int
7796 -test_and_clear_bit_cs(unsigned long nr, volatile void * addr)
7798 +test_and_clear_bit_cs(unsigned long nr, volatile void *ptr)
7800 - unsigned long bits, mask;
7801 - __asm__ __volatile__(
7802 + unsigned long addr, old, new, mask;
7804 + addr = (unsigned long) ptr;
7806 - " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
7807 - " ngr %2,%1\n" /* isolate last 2 bits of address */
7808 - " xgr %1,%2\n" /* make addr % 4 == 0 */
7810 - " agr %0,%2\n" /* add alignement to bitnr */
7811 + addr ^= addr & 7; /* align address to 8 */
7812 + nr += (addr & 7) << 3; /* add alignment to bit number */
7815 - " nr %2,%0\n" /* make shift value */
7819 - " la %1,0(%0,%1)\n" /* calc. address for CS */
7820 - " rllg %3,%3,0(%2)\n" /* make AND mask */
7822 - "0: lgr %2,%0\n" /* CS loop starts here */
7823 - " ngr %2,%3\n" /* clear bit */
7824 - " csg %0,%2,0(%1)\n"
7826 - " xgr %0,%2\n" /* isolate old bit */
7827 - : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
7828 - : "cc", "memory" );
7830 + addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
7831 + mask = ~(1UL << (nr & 63)); /* make AND mask */
7836 + " csg %0,%1,0(%4)\n"
7838 + : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
7839 + : "d" (mask), "a" (addr)
7841 + return (old ^ new) != 0;
7845 * SMP save test_and_change_bit routine based on compare and swap (CS)
7847 -static __inline__ int
7848 -test_and_change_bit_cs(unsigned long nr, volatile void * addr)
7850 +test_and_change_bit_cs(unsigned long nr, volatile void *ptr)
7852 - unsigned long bits, mask;
7853 - __asm__ __volatile__(
7854 + unsigned long addr, old, new, mask;
7856 + addr = (unsigned long) ptr;
7858 - " lghi %2,7\n" /* CS must be aligned on 4 byte b. */
7859 - " ngr %2,%1\n" /* isolate last 2 bits of address */
7860 - " xgr %1,%2\n" /* make addr % 4 == 0 */
7862 - " agr %0,%2\n" /* add alignement to bitnr */
7863 + addr ^= addr & 7; /* align address to 8 */
7864 + nr += (addr & 7) << 3; /* add alignment to bit number */
7867 - " nr %2,%0\n" /* make shift value */
7871 - " la %1,0(%0,%1)\n" /* calc. address for CS */
7872 - " sllg %3,%3,0(%2)\n" /* make OR mask */
7874 - "0: lgr %2,%0\n" /* CS loop starts here */
7875 - " xgr %2,%3\n" /* change bit */
7876 - " csg %0,%2,0(%1)\n"
7878 - " ngr %0,%3\n" /* isolate old bit */
7879 - : "+a" (nr), "+a" (addr), "=&a" (bits), "=&d" (mask) :
7880 - : "cc", "memory" );
7882 + addr += (nr ^ (nr & 63)) >> 3; /* calculate address for CS */
7883 + mask = 1UL << (nr & 63); /* make XOR mask */
7888 + " csg %0,%1,0(%4)\n"
7890 + : "=&d" (old), "=&d" (new), "+m" (*(unsigned long *) addr)
7891 + : "d" (mask), "a" (addr)
7893 + return (old & mask) != 0;
7895 #endif /* CONFIG_SMP */
7898 * fast, non-SMP set_bit routine
7900 -static __inline__ void __set_bit(unsigned long nr, volatile void * addr)
7901 +static inline void __set_bit(unsigned long nr, volatile void *ptr)
7903 - unsigned long reg1, reg2;
7904 - __asm__ __volatile__(
7910 - " la %1,0(%1,%3)\n"
7911 - " la %0,0(%0,%4)\n"
7912 - " oc 0(1,%1),0(%0)"
7913 - : "=&a" (reg1), "=&a" (reg2)
7914 - : "a" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
7917 -static __inline__ void
7918 -__constant_set_bit(const unsigned long nr, volatile void * addr)
7922 - __asm__ __volatile__ ("la 1,%0\n\t"
7924 - : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7925 - : : "1", "cc", "memory");
7928 - __asm__ __volatile__ ("la 1,%0\n\t"
7930 - : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7931 - : : "1", "cc", "memory" );
7934 - __asm__ __volatile__ ("la 1,%0\n\t"
7936 - : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7937 - : : "1", "cc", "memory" );
7940 - __asm__ __volatile__ ("la 1,%0\n\t"
7942 - : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7943 - : : "1", "cc", "memory" );
7946 - __asm__ __volatile__ ("la 1,%0\n\t"
7948 - : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7949 - : : "1", "cc", "memory" );
7952 - __asm__ __volatile__ ("la 1,%0\n\t"
7954 - : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7955 - : : "1", "cc", "memory" );
7958 - __asm__ __volatile__ ("la 1,%0\n\t"
7960 - : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7961 - : : "1", "cc", "memory" );
7964 - __asm__ __volatile__ ("la 1,%0\n\t"
7966 - : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
7967 - : : "1", "cc", "memory" );
7970 + unsigned long addr;
7972 + addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
7973 + asm volatile("oc 0(1,%1),0(%2)"
7974 + : "+m" (*(char *) addr)
7975 + : "a" (addr), "a" (_oi_bitmap + (nr & 7))
7980 +__constant_set_bit(const unsigned long nr, volatile void *ptr)
7982 + unsigned long addr;
7984 + addr = ((unsigned long) ptr) + ((nr >> 3) ^ 7);
7987 + asm volatile ("oi 0(%1),0x01"
7988 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7991 + asm volatile ("oi 0(%1),0x02"
7992 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7995 + asm volatile ("oi 0(%1),0x04"
7996 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
7999 + asm volatile ("oi 0(%1),0x08"
8000 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
8003 + asm volatile ("oi 0(%1),0x10"
8004 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
8007 + asm volatile ("oi 0(%1),0x20"
8008 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
8011 + asm volatile ("oi 0(%1),0x40"
8012 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
8015 + asm volatile ("oi 0(%1),0x80"
8016 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
8021 #define set_bit_simple(nr,addr) \
8022 @@ -326,76 +275,58 @@
8024 * fast, non-SMP clear_bit routine
8026 -static __inline__ void
8027 -__clear_bit(unsigned long nr, volatile void * addr)
8029 +__clear_bit(unsigned long nr, volatile void *ptr)
8031 - unsigned long reg1, reg2;
8032 - __asm__ __volatile__(
8038 - " la %1,0(%1,%3)\n"
8039 - " la %0,0(%0,%4)\n"
8040 - " nc 0(1,%1),0(%0)"
8041 - : "=&a" (reg1), "=&a" (reg2)
8042 - : "d" (nr), "a" (addr), "a" (&_ni_bitmap) : "cc", "memory" );
8045 -static __inline__ void
8046 -__constant_clear_bit(const unsigned long nr, volatile void * addr)
8050 - __asm__ __volatile__ ("la 1,%0\n\t"
8052 - : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
8053 - : : "1", "cc", "memory" );
8056 - __asm__ __volatile__ ("la 1,%0\n\t"
8058 - : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
8059 - : : "1", "cc", "memory" );
8062 - __asm__ __volatile__ ("la 1,%0\n\t"
8064 - : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
8065 - : : "1", "cc", "memory" );
8068 - __asm__ __volatile__ ("la 1,%0\n\t"
8070 - : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
8071 - : : "1", "cc", "memory" );
8074 - __asm__ __volatile__ ("la 1,%0\n\t"
8076 - : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
8077 - : : "cc", "memory" );
8080 - __asm__ __volatile__ ("la 1,%0\n\t"
8082 - : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
8083 - : : "1", "cc", "memory" );
8086 - __asm__ __volatile__ ("la 1,%0\n\t"
8088 - : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
8089 - : : "1", "cc", "memory" );
8092 - __asm__ __volatile__ ("la 1,%0\n\t"
8094 - : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
8095 - : : "1", "cc", "memory" );
8098 + unsigned long addr;
8100 + addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
8101 + asm volatile("nc 0(1,%1),0(%2)"
8102 + : "+m" (*(char *) addr)
8103 + : "a" (addr), "a" (_ni_bitmap + (nr & 7))
8108 +__constant_clear_bit(const unsigned long nr, volatile void *ptr)
8110 + unsigned long addr;
8112 + addr = ((unsigned long) ptr) + ((nr >> 3) ^ 7);
8115 + asm volatile ("ni 0(%1),0xFE"
8116 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
8119 + asm volatile ("ni 0(%1),0xFD"
8120 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
8123 + asm volatile ("ni 0(%1),0xFB"
8124 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
8127 + asm volatile ("ni 0(%1),0xF7"
8128 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
8131 + asm volatile ("ni 0(%1),0xEF"
8132 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
8135 + asm volatile ("ni 0(%1),0xDF"
8136 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
8139 + asm volatile ("ni 0(%1),0xBF"
8140 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
8143 + asm volatile ("ni 0(%1),0x7F"
8144 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
8149 #define clear_bit_simple(nr,addr) \
8150 @@ -406,75 +337,57 @@
8152 * fast, non-SMP change_bit routine
8154 -static __inline__ void __change_bit(unsigned long nr, volatile void * addr)
8155 +static inline void __change_bit(unsigned long nr, volatile void *ptr)
8157 - unsigned long reg1, reg2;
8158 - __asm__ __volatile__(
8164 - " la %1,0(%1,%3)\n"
8165 - " la %0,0(%0,%4)\n"
8166 - " xc 0(1,%1),0(%0)"
8167 - : "=&a" (reg1), "=&a" (reg2)
8168 - : "d" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
8171 -static __inline__ void
8172 -__constant_change_bit(const unsigned long nr, volatile void * addr)
8176 - __asm__ __volatile__ ("la 1,%0\n\t"
8178 - : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
8179 - : : "cc", "memory" );
8182 - __asm__ __volatile__ ("la 1,%0\n\t"
8184 - : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
8185 - : : "cc", "memory" );
8188 - __asm__ __volatile__ ("la 1,%0\n\t"
8190 - : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
8191 - : : "cc", "memory" );
8194 - __asm__ __volatile__ ("la 1,%0\n\t"
8196 - : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
8197 - : : "cc", "memory" );
8200 - __asm__ __volatile__ ("la 1,%0\n\t"
8202 - : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
8203 - : : "cc", "memory" );
8206 - __asm__ __volatile__ ("la 1,%0\n\t"
8208 - : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
8209 - : : "1", "cc", "memory" );
8212 - __asm__ __volatile__ ("la 1,%0\n\t"
8214 - : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
8215 - : : "1", "cc", "memory" );
8218 - __asm__ __volatile__ ("la 1,%0\n\t"
8220 - : "=m" (*((volatile char *) addr + ((nr>>3)^7)))
8221 - : : "1", "cc", "memory" );
8224 + unsigned long addr;
8226 + addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
8227 + asm volatile("xc 0(1,%1),0(%2)"
8228 + : "+m" (*(char *) addr)
8229 + : "a" (addr), "a" (_oi_bitmap + (nr & 7))
8234 +__constant_change_bit(const unsigned long nr, volatile void *ptr)
8236 + unsigned long addr;
8238 + addr = ((unsigned long) ptr) + ((nr >> 3) ^ 7);
8241 + asm volatile ("xi 0(%1),0x01"
8242 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
8245 + asm volatile ("xi 0(%1),0x02"
8246 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
8249 + asm volatile ("xi 0(%1),0x04"
8250 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
8253 + asm volatile ("xi 0(%1),0x08"
8254 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
8257 + asm volatile ("xi 0(%1),0x10"
8258 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
8261 + asm volatile ("xi 0(%1),0x20"
8262 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
8265 + asm volatile ("xi 0(%1),0x40"
8266 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
8269 + asm volatile ("xi 0(%1),0x80"
8270 + : "+m" (*(char *) addr) : "a" (addr) : "cc" );
8275 #define change_bit_simple(nr,addr) \
8276 @@ -485,77 +398,57 @@
8278 * fast, non-SMP test_and_set_bit routine
8280 -static __inline__ int
8281 -test_and_set_bit_simple(unsigned long nr, volatile void * addr)
8283 +test_and_set_bit_simple(unsigned long nr, volatile void *ptr)
8285 - unsigned long reg1, reg2;
8287 - __asm__ __volatile__(
8293 - " la %1,0(%1,%4)\n"
8296 - " la %2,0(%2,%5)\n"
8297 - " oc 0(1,%1),0(%2)"
8298 - : "=&d" (oldbit), "=&a" (reg1), "=&a" (reg2)
8299 - : "d" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
8300 - return oldbit & 1;
8301 + unsigned long addr;
8304 + addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
8305 + ch = *(unsigned char *) addr;
8306 + asm volatile("oc 0(1,%1),0(%2)"
8307 + : "+m" (*(char *) addr)
8308 + : "a" (addr), "a" (_oi_bitmap + (nr & 7))
8310 + return (ch >> (nr & 7)) & 1;
8312 #define __test_and_set_bit(X,Y) test_and_set_bit_simple(X,Y)
8315 * fast, non-SMP test_and_clear_bit routine
8317 -static __inline__ int
8318 -test_and_clear_bit_simple(unsigned long nr, volatile void * addr)
8320 +test_and_clear_bit_simple(unsigned long nr, volatile void *ptr)
8322 - unsigned long reg1, reg2;
8324 + unsigned long addr;
8327 - __asm__ __volatile__(
8333 - " la %1,0(%1,%4)\n"
8336 - " la %2,0(%2,%5)\n"
8337 - " nc 0(1,%1),0(%2)"
8338 - : "=&d" (oldbit), "=&a" (reg1), "=&a" (reg2)
8339 - : "d" (nr), "a" (addr), "a" (&_ni_bitmap) : "cc", "memory" );
8340 - return oldbit & 1;
8341 + addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
8342 + ch = *(unsigned char *) addr;
8343 + asm volatile("nc 0(1,%1),0(%2)"
8344 + : "+m" (*(char *) addr)
8345 + : "a" (addr), "a" (_ni_bitmap + (nr & 7))
8347 + return (ch >> (nr & 7)) & 1;
8349 #define __test_and_clear_bit(X,Y) test_and_clear_bit_simple(X,Y)
8352 * fast, non-SMP test_and_change_bit routine
8354 -static __inline__ int
8355 -test_and_change_bit_simple(unsigned long nr, volatile void * addr)
8357 +test_and_change_bit_simple(unsigned long nr, volatile void *ptr)
8359 - unsigned long reg1, reg2;
8361 + unsigned long addr;
8364 - __asm__ __volatile__(
8370 - " la %1,0(%1,%4)\n"
8373 - " la %2,0(%2,%5)\n"
8374 - " xc 0(1,%1),0(%2)"
8375 - : "=&d" (oldbit), "=&a" (reg1), "=&a" (reg2)
8376 - : "d" (nr), "a" (addr), "a" (&_oi_bitmap) : "cc", "memory" );
8377 - return oldbit & 1;
8378 + addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
8379 + ch = *(unsigned char *) addr;
8380 + asm volatile("xc 0(1,%1),0(%2)"
8381 + : "+m" (*(char *) addr)
8382 + : "a" (addr), "a" (_oi_bitmap + (nr & 7))
8384 + return (ch >> (nr & 7)) & 1;
8386 #define __test_and_change_bit(X,Y) test_and_change_bit_simple(X,Y)
8388 @@ -580,26 +473,18 @@
8389 * This routine doesn't need to be atomic.
8392 -static __inline__ int __test_bit(unsigned long nr, volatile void * addr)
8393 +static inline int __test_bit(unsigned long nr, volatile void *ptr)
8395 - unsigned long reg1, reg2;
8397 + unsigned long addr;
8400 - __asm__ __volatile__(
8406 - " ic %0,0(%2,%4)\n"
8408 - : "=&d" (oldbit), "=&a" (reg1), "=&a" (reg2)
8409 - : "d" (nr), "a" (addr) : "cc" );
8410 - return oldbit & 1;
8411 + addr = (unsigned long) ptr + ((nr ^ 56) >> 3);
8412 + ch = *(unsigned char *) addr;
8413 + return (ch >> (nr & 7)) & 1;
8416 -static __inline__ int
8417 -__constant_test_bit(unsigned long nr, volatile void * addr) {
8419 +__constant_test_bit(unsigned long nr, volatile void *addr) {
8420 return (((volatile char *) addr)[(nr>>3)^7] & (1<<(nr&7))) != 0;
8425 * Find-bit routines..
8427 -static __inline__ unsigned long
8428 +static inline unsigned long
8429 find_first_zero_bit(void * addr, unsigned long size)
8431 unsigned long res, cmp, count;
8432 @@ -653,7 +538,49 @@
8433 return (res < size) ? res : size;
8436 -static __inline__ unsigned long
8437 +static inline unsigned long
8438 +find_first_bit(void * addr, unsigned long size)
8440 + unsigned long res, cmp, count;
8444 + __asm__(" slgr %1,%1\n"
8449 + "0: cg %1,0(%0,%4)\n"
8455 + "1: lg %2,0(%0,%4)\n"
8460 + " srlg %2,%2,32\n"
8461 + "2: lghi %1,0xff\n"
8462 + " tmll %2,0xffff\n"
8466 + "3: tmll %2,0x00ff\n"
8471 + " ic %2,0(%2,%5)\n"
8474 + : "=&a" (res), "=&d" (cmp), "=&a" (count)
8475 + : "a" (size), "a" (addr), "a" (&_sb_findmap) : "cc" );
8476 + return (res < size) ? res : size;
8479 +static inline unsigned long
8480 find_next_zero_bit (void * addr, unsigned long size, unsigned long offset)
8482 unsigned long * p = ((unsigned long *) addr) + (offset >> 6);
8483 @@ -697,14 +624,56 @@
8484 return (offset + res);
8487 +static inline unsigned long
8488 +find_next_bit (void * addr, unsigned long size, unsigned long offset)
8490 + unsigned long * p = ((unsigned long *) addr) + (offset >> 6);
8491 + unsigned long bitvec, reg;
8492 + unsigned long set, bit = offset & 63, res;
8496 + * Look for zero in first word
8498 + bitvec = (*p) >> bit;
8499 + __asm__(" slgr %0,%0\n"
8503 + " srlg %1,%1,32\n"
8504 + "0: lghi %2,0xff\n"
8505 + " tmll %1,0xffff\n"
8508 + " srlg %1,%1,16\n"
8509 + "1: tmll %1,0x00ff\n"
8514 + " ic %1,0(%1,%3)\n"
8516 + : "=&d" (set), "+a" (bitvec), "=&d" (reg)
8517 + : "a" (&_sb_findmap) : "cc" );
8518 + if (set < (64 - bit))
8519 + return set + offset;
8520 + offset += 64 - bit;
8524 + * No set bit yet, search remaining full words for a bit
8526 + res = find_first_bit (p, size - 64 * (p - (unsigned long *) addr));
8527 + return (offset + res);
8531 * ffz = Find First Zero in word. Undefined if no zero exists,
8532 * so code should check against ~0UL first..
8534 -static __inline__ unsigned long ffz(unsigned long word)
8535 +static inline unsigned long ffz(unsigned long word)
8537 - unsigned long reg;
8539 + unsigned long reg, result;
8541 __asm__(" lhi %2,-1\n"
8543 @@ -730,40 +699,112 @@
8547 + * __ffs = find first bit in word. Undefined if no bit exists,
8548 + * so code should check against 0UL first..
8550 +static inline unsigned long __ffs (unsigned long word)
8552 + unsigned long reg, result;
8554 + __asm__(" slgr %0,%0\n"
8558 + " srlg %1,%1,32\n"
8559 + "0: lghi %2,0xff\n"
8560 + " tmll %1,0xffff\n"
8563 + " srlg %1,%1,16\n"
8564 + "1: tmll %1,0x00ff\n"
8569 + " ic %1,0(%1,%3)\n"
8571 + : "=&d" (result), "+a" (word), "=&d" (reg)
8572 + : "a" (&_sb_findmap) : "cc" );
8577 + * Every architecture must define this function. It's the fastest
8578 + * way of searching a 140-bit bitmap where the first 100 bits are
8579 + * unlikely to be set. It's guaranteed that at least one of the 140
8580 + * bits is cleared.
8582 +static inline int sched_find_first_bit(unsigned long *b)
8584 + return find_first_bit(b, 140);
8588 * ffs: find first bit set. This is defined the same way as
8589 * the libc and compiler builtin ffs routines, therefore
8590 * differs in spirit from the above ffz (man ffs).
8593 -extern int __inline__ ffs (int x)
8594 +extern int inline ffs (int x)
8601 - __asm__(" slr %0,%0\n"
8602 - " tml %1,0xffff\n"
8604 + __asm__(" tml %1,0xffff\n"
8609 "0: tml %1,0x00ff\n"
8614 "1: tml %1,0x000f\n"
8619 "2: tml %1,0x0003\n"
8624 "3: tml %1,0x0001\n"
8628 : "=&d" (r), "+d" (x) : : "cc" );
8634 + * fls: find last bit set.
8636 +extern __inline__ int fls(int x)
8642 + __asm__(" tmh %1,0xffff\n"
8646 + "0: tmh %1,0xff00\n"
8650 + "1: tmh %1,0xf000\n"
8654 + "2: tmh %1,0xc000\n"
8658 + "3: tmh %1,0x8000\n"
8662 + : "+d" (r), "+d" (x) : : "cc" );
8668 #define ext2_set_bit(nr, addr) test_and_set_bit((nr)^56, addr)
8669 #define ext2_clear_bit(nr, addr) test_and_clear_bit((nr)^56, addr)
8670 #define ext2_test_bit(nr, addr) test_bit((nr)^56, addr)
8671 -static __inline__ unsigned long
8672 +static inline unsigned long
8673 ext2_find_first_zero_bit(void *vaddr, unsigned long size)
8675 unsigned long res, cmp, count;
8677 return (res < size) ? res : size;
8680 -static __inline__ unsigned long
8681 +static inline unsigned long
8682 ext2_find_next_zero_bit(void *vaddr, unsigned long size, unsigned long offset)
8684 unsigned long *addr = vaddr;
8685 --- linux/include/asm-ppc64/bitops.h.orig Thu Jul 18 15:22:25 2002
8686 +++ linux/include/asm-ppc64/bitops.h Thu Jul 18 15:22:35 2002
8691 -#include <asm/byteorder.h>
8692 #include <asm/memory.h>
8696 #define smp_mb__before_clear_bit() smp_mb()
8697 #define smp_mb__after_clear_bit() smp_mb()
8699 -static __inline__ int test_bit(unsigned long nr, __const__ volatile void *addr)
8700 +static __inline__ int test_bit(unsigned long nr, __const__ volatile unsigned long *addr)
8702 return (1UL & (((__const__ long *) addr)[nr >> 6] >> (nr & 63)));
8705 -static __inline__ void set_bit(unsigned long nr, volatile void *addr)
8706 +static __inline__ void set_bit(unsigned long nr, volatile unsigned long *addr)
8709 unsigned long mask = 1UL << (nr & 0x3f);
8714 -static __inline__ void clear_bit(unsigned long nr, volatile void *addr)
8715 +static __inline__ void clear_bit(unsigned long nr, volatile unsigned long *addr)
8718 unsigned long mask = 1UL << (nr & 0x3f);
8723 -static __inline__ void change_bit(unsigned long nr, volatile void *addr)
8724 +static __inline__ void change_bit(unsigned long nr, volatile unsigned long *addr)
8727 unsigned long mask = 1UL << (nr & 0x3f);
8732 -static __inline__ int test_and_set_bit(unsigned long nr, volatile void *addr)
8733 +static __inline__ int test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
8735 unsigned long old, t;
8736 unsigned long mask = 1UL << (nr & 0x3f);
8738 return (old & mask) != 0;
8741 -static __inline__ int test_and_clear_bit(unsigned long nr, volatile void *addr)
8742 +static __inline__ int test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
8744 unsigned long old, t;
8745 unsigned long mask = 1UL << (nr & 0x3f);
8747 return (old & mask) != 0;
8750 -static __inline__ int test_and_change_bit(unsigned long nr, volatile void *addr)
8751 +static __inline__ int test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
8753 unsigned long old, t;
8754 unsigned long mask = 1UL << (nr & 0x3f);
8757 * non-atomic versions
8759 -static __inline__ void __set_bit(unsigned long nr, volatile void *addr)
8760 +static __inline__ void __set_bit(unsigned long nr, volatile unsigned long *addr)
8762 unsigned long mask = 1UL << (nr & 0x3f);
8763 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
8768 -static __inline__ void __clear_bit(unsigned long nr, volatile void *addr)
8769 +static __inline__ void __clear_bit(unsigned long nr, volatile unsigned long *addr)
8771 unsigned long mask = 1UL << (nr & 0x3f);
8772 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
8777 -static __inline__ void __change_bit(unsigned long nr, volatile void *addr)
8778 +static __inline__ void __change_bit(unsigned long nr, volatile unsigned long *addr)
8780 unsigned long mask = 1UL << (nr & 0x3f);
8781 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
8786 -static __inline__ int __test_and_set_bit(unsigned long nr, volatile void *addr)
8787 +static __inline__ int __test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
8789 unsigned long mask = 1UL << (nr & 0x3f);
8790 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
8792 return (old & mask) != 0;
8795 -static __inline__ int __test_and_clear_bit(unsigned long nr, volatile void *addr)
8796 +static __inline__ int __test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
8798 unsigned long mask = 1UL << (nr & 0x3f);
8799 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
8801 return (old & mask) != 0;
8804 -static __inline__ int __test_and_change_bit(unsigned long nr, volatile void *addr)
8805 +static __inline__ int __test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
8807 unsigned long mask = 1UL << (nr & 0x3f);
8808 unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
8809 @@ -224,54 +223,29 @@
8813 -/* Return the zero-based bit position
8814 - * from RIGHT TO LEFT 63 --> 0
8815 - * of the most significant (left-most) 1-bit in an 8-byte area.
8817 -static __inline__ long cnt_trailing_zeros(unsigned long mask)
8822 -" addi %0,%1,-1 \n\
8834 - * ffz = Find First Zero in word. Undefined if no zero exists,
8835 - * Determines the bit position of the LEAST significant
8836 - * (rightmost) 0 bit in the specified DOUBLE-WORD.
8837 - * The returned bit position will be zero-based, starting
8838 - * from the right side (63 - 0).
8839 - * the code should check against ~0UL first..
8840 + * Determines the bit position of the least significant (rightmost) 0 bit
8841 + * in the specified double word. The returned bit position will be zero-based,
8842 + * starting from the right side (63 - 0).
8844 static __inline__ unsigned long ffz(unsigned long x)
8848 - /* Change all of x's 1s to 0s and 0s to 1s in x.
8849 - * And insure at least 1 zero exists in the 8 byte area.
8851 + /* no zero exists anywhere in the 8 byte area. */
8853 - /* no zero exists anywhere in the 8 byte area. */
8856 - /* Calculate the bit position of the least significant '1' bit in x
8857 - * (since x has been changed this will actually be the least
8858 - * significant '0' bit in the original x).
8859 - * Note: (x & -x) gives us a mask that is the LEAST significant
8860 - * (RIGHT-most) 1-bit of the value in x.
8862 + * Calculate the bit position of the least signficant '1' bit in x
8863 + * (since x has been changed this will actually be the least signficant
8864 + * '0' bit in * the original x). Note: (x & -x) gives us a mask that
8865 + * is the least significant * (RIGHT-most) 1-bit of the value in x.
8867 - tempRC = __ilog2(x & -x);
8868 + return __ilog2(x & -x);
8872 +static __inline__ int __ffs(unsigned long x)
8874 + return __ilog2(x & -x);
8880 static __inline__ int ffs(int x)
8882 - int result = ffz(~x);
8883 - return x ? result+1 : 0;
8884 + unsigned long i = (unsigned long)x;
8885 + return __ilog2(i & -i) + 1;
8889 @@ -293,139 +267,82 @@
8890 #define hweight16(x) generic_hweight16(x)
8891 #define hweight8(x) generic_hweight8(x)
8893 -extern unsigned long find_next_zero_bit(void * addr, unsigned long size,
8894 - unsigned long offset);
8896 - * The optimizer actually does good code for this case..
8898 -#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
8899 +extern unsigned long find_next_zero_bit(unsigned long *addr, unsigned long size, unsigned long offset);
8900 +#define find_first_zero_bit(addr, size) \
8901 + find_next_zero_bit((addr), (size), 0)
8903 +extern unsigned long find_next_bit(unsigned long *addr, unsigned long size, unsigned long offset);
8904 +#define find_first_bit(addr, size) \
8905 + find_next_bit((addr), (size), 0)
8907 +extern unsigned long find_next_zero_le_bit(unsigned long *addr, unsigned long size, unsigned long offset);
8908 +#define find_first_zero_le_bit(addr, size) \
8909 + find_next_zero_le_bit((addr), (size), 0)
8911 -/* Bitmap functions for the ext2 filesystem. */
8912 -#define _EXT2_HAVE_ASM_BITOPS_
8914 -static __inline__ int ext2_set_bit(int nr, void* addr)
8915 +static __inline__ int test_le_bit(unsigned long nr, __const__ unsigned long * addr)
8917 - /* This method needs to take into account the fact that the ext2 file system represents
8918 - * it's bitmaps as "little endian" unsigned integers.
8919 - * Note: this method is not atomic, but ext2 does not need it to be.
8923 - unsigned char* ADDR = (unsigned char*) addr;
8925 - /* Determine the BYTE containing the specified bit
8926 - * (nr) - important as if we go to a byte there are no
8927 - * little endian concerns.
8930 - mask = 1 << (nr & 0x07); /* Create a mask to the bit within this byte. */
8931 - oldbit = *ADDR & mask; /* Save the bit's previous value. */
8932 - *ADDR |= mask; /* Turn the bit on. */
8933 - return oldbit; /* Return the bit's previous value. */
8934 + __const__ unsigned char *ADDR = (__const__ unsigned char *) addr;
8935 + return (ADDR[nr >> 3] >> (nr & 7)) & 1;
8938 -static __inline__ int ext2_clear_bit(int nr, void* addr)
8940 + * non-atomic versions
8942 +static __inline__ void __set_le_bit(unsigned long nr, unsigned long *addr)
8944 - /* This method needs to take into account the fact that the ext2 file system represents
8945 - * | it's bitmaps as "little endian" unsigned integers.
8946 - * Note: this method is not atomic, but ext2 does not need it to be.
8950 - unsigned char* ADDR = (unsigned char*) addr;
8952 - /* Determine the BYTE containing the specified bit (nr)
8953 - * - important as if we go to a byte there are no little endian concerns.
8956 - mask = 1 << (nr & 0x07); /* Create a mask to the bit within this byte. */
8957 - oldbit = *ADDR & mask; /* Save the bit's previous value. */
8958 - *ADDR = *ADDR & ~mask; /* Turn the bit off. */
8959 - return oldbit; /* Return the bit's previous value. */
8961 + unsigned char *ADDR = (unsigned char *)addr;
8963 -static __inline__ int ext2_test_bit(int nr, __const__ void * addr)
8965 - /* This method needs to take into account the fact that the ext2 file system represents
8966 - * | it's bitmaps as "little endian" unsigned integers.
8967 - * Determine the BYTE containing the specified bit (nr),
8968 - * then shift to the right the correct number of bits and return that bit's value.
8970 - __const__ unsigned char *ADDR = (__const__ unsigned char *) addr;
8971 - return (ADDR[nr >> 3] >> (nr & 7)) & 1;
8973 + *ADDR |= 1 << (nr & 0x07);
8976 -/* Returns the bit position of the most significant 1 bit in a WORD. */
8977 -static __inline__ int ext2_ilog2(unsigned int x)
8978 +static __inline__ void __clear_le_bit(unsigned long nr, unsigned long *addr)
8981 + unsigned char *ADDR = (unsigned char *)addr;
8983 - asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x));
8986 + *ADDR &= ~(1 << (nr & 0x07));
8989 -/* ext2_ffz = ext2's Find First Zero.
8990 - * Determines the bit position of the LEAST significant (rightmost) 0 bit in the specified WORD.
8991 - * The returned bit position will be zero-based, starting from the right side (31 - 0).
8993 -static __inline__ int ext2_ffz(unsigned int x)
8994 +static __inline__ int __test_and_set_le_bit(unsigned long nr, unsigned long *addr)
8997 - /* Change all of x's 1s to 0s and 0s to 1s in x. And insure at least 1 zero exists in the word. */
8998 - if ((x = ~x) == 0)
8999 - /* no zero exists anywhere in the 4 byte area. */
9001 - /* Calculate the bit position of the least significant '1' bit in x
9002 - * (since x has been changed this will actually be the least
9003 - * significant '0' bit in the original x).
9004 - * Note: (x & -x) gives us a mask that is the LEAST significant
9005 - * (RIGHT-most) 1-bit of the value in x.
9007 - tempRC = ext2_ilog2(x & -x);
9010 + unsigned char *ADDR = (unsigned char *)addr;
9013 + mask = 1 << (nr & 0x07);
9014 + retval = (mask & *ADDR) != 0;
9019 -static __inline__ u32 ext2_find_next_zero_bit(void* addr, u32 size, u32 offset)
9020 +static __inline__ int __test_and_clear_le_bit(unsigned long nr, unsigned long *addr)
9022 - /* This method needs to take into account the fact that the ext2 file system represents
9023 - * | it's bitmaps as "little endian" unsigned integers.
9025 - unsigned int *p = ((unsigned int *) addr) + (offset >> 5);
9026 - unsigned int result = offset & ~31;
9029 - if (offset >= size)
9034 - tmp = cpu_to_le32p(p++);
9035 - tmp |= ~0U >> (32-offset); /* bug or feature ? */
9039 - goto found_middle;
9043 - while (size >= 32) {
9044 - if ((tmp = cpu_to_le32p(p++)) != ~0)
9045 - goto found_middle;
9051 - tmp = cpu_to_le32p(p);
9053 - tmp |= ~0 << size;
9054 - if (tmp == ~0) /* Are any bits zero? */
9055 - return result + size; /* Nope. */
9057 - return result + ext2_ffz(tmp);
9060 + unsigned char *ADDR = (unsigned char *)addr;
9062 -#define ext2_find_first_zero_bit(addr, size) ext2_find_next_zero_bit((addr), (size), 0)
9064 + mask = 1 << (nr & 0x07);
9065 + retval = (mask & *ADDR) != 0;
9070 +#define ext2_set_bit(nr,addr) \
9071 + __test_and_set_le_bit((nr),(unsigned long*)addr)
9072 +#define ext2_clear_bit(nr, addr) \
9073 + __test_and_clear_le_bit((nr),(unsigned long*)addr)
9074 +#define ext2_test_bit(nr, addr) test_le_bit((nr),(unsigned long*)addr)
9075 +#define ext2_find_first_zero_bit(addr, size) \
9076 + find_first_zero_le_bit((unsigned long*)addr, size)
9077 +#define ext2_find_next_zero_bit(addr, size, off) \
9078 + find_next_zero_le_bit((unsigned long*)addr, size, off)
9080 +#define minix_test_and_set_bit(nr,addr) test_and_set_bit(nr,addr)
9081 +#define minix_set_bit(nr,addr) set_bit(nr,addr)
9082 +#define minix_test_and_clear_bit(nr,addr) test_and_clear_bit(nr,addr)
9083 +#define minix_test_bit(nr,addr) test_bit(nr,addr)
9084 +#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
9086 #endif /* __KERNEL__ */
9087 #endif /* _PPC64_BITOPS_H */
9088 --- linux/net/unix/af_unix.c.orig Thu Jul 18 15:22:25 2002
9089 +++ linux/net/unix/af_unix.c Thu Jul 18 15:22:35 2002
9090 @@ -565,10 +565,8 @@
9092 write_unlock(&unix_table_lock);
9093 /* Sanity yield. It is unusual case, but yet... */
9094 - if (!(ordernum&0xFF)) {
9095 - current->policy |= SCHED_YIELD;
9098 + if (!(ordernum&0xFF))
9102 addr->hash ^= sk->type;
9103 --- linux/net/ipv4/tcp_output.c.orig Thu Jul 18 15:22:25 2002
9104 +++ linux/net/ipv4/tcp_output.c Thu Jul 18 15:22:35 2002
9105 @@ -1010,8 +1010,7 @@
9106 skb = alloc_skb(MAX_TCP_HEADER, GFP_KERNEL);
9109 - current->policy |= SCHED_YIELD;
9114 /* Reserve space for headers and prepare control bits. */
9115 --- linux/net/sunrpc/sched.c.orig Thu Jul 18 15:22:25 2002
9116 +++ linux/net/sunrpc/sched.c Thu Jul 18 15:22:35 2002
9119 if (flags & RPC_TASK_ASYNC)
9121 - current->policy |= SCHED_YIELD;
9124 } while (!signalled());
9127 @@ -1112,8 +1111,7 @@
9130 dprintk("rpciod_killall: waiting for tasks to exit\n");
9131 - current->policy |= SCHED_YIELD;
9137 @@ -1183,8 +1181,7 @@
9138 * wait briefly before checking the process id.
9140 current->sigpending = 0;
9141 - current->policy |= SCHED_YIELD;
9145 * Display a message if we're going to wait longer.
9147 --- linux/net/sched/sch_generic.c.orig Thu Jul 18 15:22:25 2002
9148 +++ linux/net/sched/sch_generic.c Thu Jul 18 15:22:35 2002
9149 @@ -475,10 +475,8 @@
9151 dev_watchdog_down(dev);
9153 - while (test_bit(__LINK_STATE_SCHED, &dev->state)) {
9154 - current->policy |= SCHED_YIELD;
9157 + while (test_bit(__LINK_STATE_SCHED, &dev->state))
9160 spin_unlock_wait(&dev->xmit_lock);
9162 --- linux/net/socket.c.orig Thu Jul 18 15:22:25 2002
9163 +++ linux/net/socket.c Thu Jul 18 15:22:35 2002
9165 while (atomic_read(&net_family_lockct) != 0) {
9166 spin_unlock(&net_family_lock);
9168 - current->policy |= SCHED_YIELD;
9172 spin_lock(&net_family_lock);
9174 --- linux/drivers/net/slip.c.orig Thu Jul 18 15:22:12 2002
9175 +++ linux/drivers/net/slip.c Thu Jul 18 15:22:35 2002
9176 @@ -1393,10 +1393,8 @@
9177 /* First of all: check for active disciplines and hangup them.
9181 - current->counter = 0;
9185 + sys_sched_yield();
9189 --- linux/drivers/block/loop.c.orig Thu Jul 18 15:22:20 2002
9190 +++ linux/drivers/block/loop.c Thu Jul 18 15:22:35 2002
9192 flush_signals(current);
9193 spin_unlock_irq(¤t->sigmask_lock);
9195 - current->policy = SCHED_OTHER;
9196 - current->nice = -20;
9198 spin_lock_irq(&lo->lo_lock);
9199 lo->lo_state = Lo_bound;
9200 atomic_inc(&lo->lo_pending);
9201 --- linux/drivers/char/mwave/mwavedd.c.orig Thu Jul 18 15:22:12 2002
9202 +++ linux/drivers/char/mwave/mwavedd.c Thu Jul 18 15:22:35 2002
9204 pDrvData->IPCs[ipcnum].bIsHere = FALSE;
9205 pDrvData->IPCs[ipcnum].bIsEnabled = TRUE;
9206 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0)
9207 - current->nice = -20; /* boost to provide priority timing */
9209 current->priority = 0x28; /* boost to provide priority timing */
9211 --- linux/drivers/char/drm-4.0/ffb_drv.c.orig Thu Jul 18 15:22:12 2002
9212 +++ linux/drivers/char/drm-4.0/ffb_drv.c Thu Jul 18 15:22:35 2002
9215 atomic_inc(&dev->total_sleeps);
9216 current->state = TASK_INTERRUPTIBLE;
9217 - current->policy |= SCHED_YIELD;
9220 if (signal_pending(current)) {
9223 --- linux/drivers/char/drm-4.0/tdfx_drv.c.orig Thu Jul 18 15:22:12 2002
9224 +++ linux/drivers/char/drm-4.0/tdfx_drv.c Thu Jul 18 15:22:35 2002
9226 lock.context, current->pid, j,
9227 dev->lock.lock_time, jiffies);
9228 current->state = TASK_INTERRUPTIBLE;
9229 - current->policy |= SCHED_YIELD;
9230 schedule_timeout(DRM_LOCK_SLICE-j);
9231 DRM_DEBUG("jiffies=%d\n", jiffies);
9233 @@ -578,10 +577,7 @@
9236 atomic_inc(&dev->total_sleeps);
9238 - current->policy |= SCHED_YIELD;
9242 if (signal_pending(current)) {
9246 when dev->last_context == lock.context
9247 NOTE WE HOLD THE LOCK THROUGHOUT THIS
9249 - current->policy |= SCHED_YIELD;
9252 current->state = TASK_RUNNING;
9253 remove_wait_queue(&dev->context_wait, &entry);
9254 if (signal_pending(current)) {
9255 --- linux/drivers/char/serial_txx927.c.orig Thu Jul 18 15:22:20 2002
9256 +++ linux/drivers/char/serial_txx927.c Thu Jul 18 15:22:35 2002
9257 @@ -1533,7 +1533,6 @@
9258 printk("cisr = %d (jiff=%lu)...", cisr, jiffies);
9260 current->state = TASK_INTERRUPTIBLE;
9261 - current->counter = 0; /* make us low-priority */
9262 schedule_timeout(char_time);
9263 if (signal_pending(current))
9265 --- linux/drivers/ide/ataraid.c.orig Thu Jul 18 15:22:01 2002
9266 +++ linux/drivers/ide/ataraid.c Thu Jul 18 15:22:35 2002
9268 ptr=kmalloc(sizeof(struct buffer_head),GFP_NOIO);
9270 __set_current_state(TASK_RUNNING);
9271 - current->policy |= SCHED_YIELD;
9278 ptr=kmalloc(sizeof(struct ataraid_bh_private),GFP_NOIO);
9280 __set_current_state(TASK_RUNNING);
9281 - current->policy |= SCHED_YIELD;
9287 --- linux/drivers/md/md.c.orig Thu Jul 18 15:22:21 2002
9288 +++ linux/drivers/md/md.c Thu Jul 18 15:22:35 2002
9289 @@ -2936,8 +2936,6 @@
9290 * bdflush, otherwise bdflush will deadlock if there are too
9291 * many dirty RAID5 blocks.
9293 - current->policy = SCHED_OTHER;
9294 - current->nice = -20;
9297 complete(thread->event);
9298 @@ -3391,11 +3389,6 @@
9299 "(but not more than %d KB/sec) for reconstruction.\n",
9300 sysctl_speed_limit_max);
9303 - * Resync has low priority.
9305 - current->nice = 19;
9307 is_mddev_idle(mddev); /* this also initializes IO event counters */
9308 for (m = 0; m < SYNC_MARKS; m++) {
9310 @@ -3473,16 +3466,13 @@
9311 currspeed = (j-mddev->resync_mark_cnt)/2/((jiffies-mddev->resync_mark)/HZ +1) +1;
9313 if (currspeed > sysctl_speed_limit_min) {
9314 - current->nice = 19;
9316 if ((currspeed > sysctl_speed_limit_max) ||
9317 !is_mddev_idle(mddev)) {
9318 current->state = TASK_INTERRUPTIBLE;
9319 md_schedule_timeout(HZ/4);
9323 - current->nice = -20;
9326 printk(KERN_INFO "md: md%d: sync done.\n",mdidx(mddev));
9328 --- linux/arch/i386/mm/fault.c.orig Thu Jul 18 15:22:17 2002
9329 +++ linux/arch/i386/mm/fault.c Thu Jul 18 15:22:35 2002
9333 if (current->pid == 1) {
9334 - current->policy |= SCHED_YIELD;
9342 up_read(&mm->mmap_sem);
9343 if (tsk->pid == 1) {
9344 - tsk->policy |= SCHED_YIELD;
9347 down_read(&mm->mmap_sem);
9350 --- linux/arch/i386/kernel/entry.S.orig Thu Jul 18 15:22:17 2002
9351 +++ linux/arch/i386/kernel/entry.S Thu Jul 18 15:22:35 2002
9361 @@ -176,9 +176,11 @@
9364 ENTRY(ret_from_fork)
9367 call SYMBOL_NAME(schedule_tail)
9371 testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS
9373 @@ -256,7 +258,16 @@
9377 - call SYMBOL_NAME(schedule) # test
9378 + movl EFLAGS(%esp),%eax # mix EFLAGS and CS
9380 + testl $(VM_MASK | 3),%eax # return to VM86 mode or non-supervisor?
9381 + jne userspace_resched
9383 + call SYMBOL_NAME(schedule)
9384 + jmp ret_from_sys_call
9387 + call SYMBOL_NAME(schedule_userspace)
9388 jmp ret_from_sys_call
9392 .long SYMBOL_NAME(sys_tkill)
9393 .long SYMBOL_NAME(sys_ni_syscall) /* reserved for sendfile64 */
9394 .long SYMBOL_NAME(sys_ni_syscall) /* 240 reserved for futex */
9395 - .long SYMBOL_NAME(sys_ni_syscall) /* reserved for sched_setaffinity */
9396 - .long SYMBOL_NAME(sys_ni_syscall) /* reserved for sched_getaffinity */
9397 + .long SYMBOL_NAME(sys_sched_setaffinity)
9398 + .long SYMBOL_NAME(sys_sched_getaffinity)
9400 .rept NR_syscalls-(.-sys_call_table)/4
9401 .long SYMBOL_NAME(sys_ni_syscall)
9402 --- linux/arch/i386/kernel/process.c.orig Thu Jul 18 15:22:17 2002
9403 +++ linux/arch/i386/kernel/process.c Thu Jul 18 15:22:35 2002
9404 @@ -124,15 +124,12 @@
9405 void cpu_idle (void)
9407 /* endless idle loop with no priority at all */
9409 - current->nice = 20;
9410 - current->counter = -100;
9413 void (*idle)(void) = pm_idle;
9415 idle = default_idle;
9416 - while (!current->need_resched)
9417 + if (!current->need_resched)
9421 @@ -697,15 +694,17 @@
9422 asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs));
9425 - * Restore %fs and %gs.
9426 + * Restore %fs and %gs if needed.
9428 - loadsegment(fs, next->fs);
9429 - loadsegment(gs, next->gs);
9430 + if (unlikely(prev->fs | prev->gs | next->fs | next->gs)) {
9431 + loadsegment(fs, next->fs);
9432 + loadsegment(gs, next->gs);
9436 * Now maybe reload the debug registers
9438 - if (next->debugreg[7]){
9439 + if (unlikely(next->debugreg[7])) {
9447 - if (prev->ioperm || next->ioperm) {
9448 + if (unlikely(prev->ioperm || next->ioperm)) {
9451 * 4 cachelines copy ... not good, but not that
9452 --- linux/arch/i386/kernel/setup.c.orig Thu Jul 18 15:22:17 2002
9453 +++ linux/arch/i386/kernel/setup.c Thu Jul 18 15:22:35 2002
9454 @@ -3131,9 +3131,10 @@
9459 - * Clear all 6 debug registers:
9461 + /* Clear %fs and %gs. */
9462 + asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
9464 + /* Clear all 6 debug registers: */
9466 #define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) );
9468 --- linux/arch/i386/kernel/smp.c.orig Thu Jul 18 15:22:17 2002
9469 +++ linux/arch/i386/kernel/smp.c Thu Jul 18 15:22:35 2002
9470 @@ -493,10 +493,20 @@
9471 * it goes straight through and wastes no time serializing
9472 * anything. Worst case is that we lose a reschedule ...
9475 void smp_send_reschedule(int cpu)
9477 send_IPI_mask(1 << cpu, RESCHEDULE_VECTOR);
9481 + * this function sends a reschedule IPI to all (other) CPUs.
9482 + * This should only be used if some 'global' task became runnable,
9483 + * such as a RT task, that must be handled now. The first CPU
9484 + * that manages to grab the task will run it.
9486 +void smp_send_reschedule_all(void)
9488 + send_IPI_allbutself(RESCHEDULE_VECTOR);
9492 --- linux/arch/i386/kernel/smpboot.c.orig Thu Jul 18 15:22:17 2002
9493 +++ linux/arch/i386/kernel/smpboot.c Thu Jul 18 15:22:35 2002
9494 @@ -308,14 +308,14 @@
9495 if (tsc_values[i] < avg)
9496 realdelta = -realdelta;
9498 - printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n",
9500 + printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n", i, realdelta);
9506 printk("passed.\n");
9510 static void __init synchronize_tsc_ap (void)
9512 * (This works even if the APIC is not enabled.)
9514 phys_id = GET_APIC_ID(apic_read(APIC_ID));
9515 - cpuid = current->processor;
9517 if (test_and_set_bit(cpuid, &cpu_online_map)) {
9518 printk("huh, phys CPU#%d, CPU#%d already present??\n",
9522 smp_store_cpu_info(cpuid);
9524 + disable_APIC_timer();
9526 * Allow the master to continue.
9530 while (!atomic_read(&smp_commenced))
9532 + enable_APIC_timer();
9534 * low-memory mappings have been cleared, flush them from
9535 * the local TLBs too.
9536 @@ -803,16 +805,13 @@
9538 panic("No idle process for CPU %d", cpu);
9540 - idle->processor = cpu;
9541 - idle->cpus_runnable = 1 << cpu; /* we schedule the first task manually */
9542 + init_idle(idle, cpu);
9544 map_cpu_to_boot_apicid(cpu, apicid);
9546 idle->thread.eip = (unsigned long) start_secondary;
9548 - del_from_runqueue(idle);
9549 unhash_process(idle);
9550 - init_tasks[cpu] = idle;
9552 /* start_eip had better be page-aligned! */
9553 start_eip = setup_trampoline();
9557 cycles_t cacheflush_time;
9558 +unsigned long cache_decay_ticks;
9560 static void smp_tune_scheduling (void)
9562 @@ -958,9 +958,13 @@
9563 cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
9566 + cache_decay_ticks = (long)cacheflush_time/cpu_khz * HZ / 1000;
9568 printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
9569 (long)cacheflush_time/(cpu_khz/1000),
9570 ((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
9571 + printk("task migration cache decay timeout: %ld msecs.\n",
9572 + (cache_decay_ticks + 1) * 1000 / HZ);
9576 @@ -1023,8 +1027,7 @@
9577 map_cpu_to_boot_apicid(0, boot_cpu_apicid);
9579 global_irq_holder = 0;
9580 - current->processor = 0;
9583 smp_tune_scheduling();
9586 --- linux/arch/alpha/mm/fault.c.orig Thu Jul 18 15:21:51 2002
9587 +++ linux/arch/alpha/mm/fault.c Thu Jul 18 15:22:35 2002
9591 if (current->pid == 1) {
9592 - current->policy |= SCHED_YIELD;
9595 down_read(&mm->mmap_sem);
9598 --- linux/arch/alpha/kernel/process.c.orig Thu Jul 18 15:21:55 2002
9599 +++ linux/arch/alpha/kernel/process.c Thu Jul 18 15:22:35 2002
9603 /* An endless idle loop with no priority at all. */
9604 - current->nice = 20;
9605 - current->counter = -100;
9609 /* FIXME -- EV6 and LCA45 know how to power down
9610 --- linux/arch/sparc/kernel/process.c.orig Thu Jul 18 15:22:19 2002
9611 +++ linux/arch/sparc/kernel/process.c Thu Jul 18 15:22:35 2002
9615 /* endless idle loop with no priority at all */
9616 - current->nice = 20;
9617 - current->counter = -100;
9624 /* endless idle loop with no priority at all */
9625 - current->nice = 20;
9626 - current->counter = -100;
9630 --- linux/arch/mips/mm/fault.c.orig Thu Jul 18 15:22:18 2002
9631 +++ linux/arch/mips/mm/fault.c Thu Jul 18 15:22:35 2002
9634 up_read(&mm->mmap_sem);
9635 if (tsk->pid == 1) {
9636 - tsk->policy |= SCHED_YIELD;
9639 down_read(&mm->mmap_sem);
9642 --- linux/arch/mips/kernel/process.c.orig Thu Jul 18 15:22:18 2002
9643 +++ linux/arch/mips/kernel/process.c Thu Jul 18 15:22:35 2002
9645 ATTRIB_NORET void cpu_idle(void)
9647 /* endless idle loop with no priority at all */
9648 - current->nice = 20;
9649 - current->counter = -100;
9653 --- linux/arch/ppc/mm/fault.c.orig Thu Jul 18 15:21:56 2002
9654 +++ linux/arch/ppc/mm/fault.c Thu Jul 18 15:22:35 2002
9657 up_read(&mm->mmap_sem);
9658 if (current->pid == 1) {
9659 - current->policy |= SCHED_YIELD;
9662 down_read(&mm->mmap_sem);
9665 --- linux/arch/ppc/kernel/idle.c.orig Thu Jul 18 15:22:19 2002
9666 +++ linux/arch/ppc/kernel/idle.c Thu Jul 18 15:22:35 2002
9670 /* endless loop with no priority at all */
9671 - current->nice = 20;
9672 - current->counter = -100;
9677 if (!do_power_save) {
9678 --- linux/arch/ppc/8xx_io/uart.c.orig Thu Jul 18 15:22:19 2002
9679 +++ linux/arch/ppc/8xx_io/uart.c Thu Jul 18 15:22:35 2002
9680 @@ -1796,7 +1796,6 @@
9681 printk("lsr = %d (jiff=%lu)...", lsr, jiffies);
9683 current->state = TASK_INTERRUPTIBLE;
9684 -/* current->counter = 0; make us low-priority */
9685 schedule_timeout(char_time);
9686 if (signal_pending(current))
9688 --- linux/arch/ppc/8260_io/uart.c.orig Thu Jul 18 15:22:19 2002
9689 +++ linux/arch/ppc/8260_io/uart.c Thu Jul 18 15:22:35 2002
9690 @@ -1732,7 +1732,6 @@
9691 printk("lsr = %d (jiff=%lu)...", lsr, jiffies);
9693 current->state = TASK_INTERRUPTIBLE;
9694 -/* current->counter = 0; make us low-priority */
9695 schedule_timeout(char_time);
9696 if (signal_pending(current))
9698 --- linux/arch/m68k/kernel/process.c.orig Thu Jul 18 15:22:18 2002
9699 +++ linux/arch/m68k/kernel/process.c Thu Jul 18 15:22:35 2002
9702 /* endless idle loop with no priority at all */
9704 - current->nice = 20;
9705 - current->counter = -100;
9709 --- linux/arch/m68k/mm/fault.c.orig Thu Jul 18 15:22:18 2002
9710 +++ linux/arch/m68k/mm/fault.c Thu Jul 18 15:22:35 2002
9713 up_read(&mm->mmap_sem);
9714 if (current->pid == 1) {
9715 - current->policy |= SCHED_YIELD;
9718 down_read(&mm->mmap_sem);
9721 --- linux/arch/sparc64/kernel/process.c.orig Thu Jul 18 15:22:19 2002
9722 +++ linux/arch/sparc64/kernel/process.c Thu Jul 18 15:22:35 2002
9726 /* endless idle loop with no priority at all */
9727 - current->nice = 20;
9728 - current->counter = -100;
9733 #define unidle_me() (cpu_data[current->processor].idle_volume = 0)
9736 - current->nice = 20;
9737 - current->counter = -100;
9741 --- linux/arch/arm/kernel/process.c.orig Thu Jul 18 15:22:17 2002
9742 +++ linux/arch/arm/kernel/process.c Thu Jul 18 15:22:35 2002
9745 /* endless idle loop with no priority at all */
9747 - current->nice = 20;
9748 - current->counter = -100;
9751 void (*idle)(void) = pm_idle;
9752 --- linux/arch/arm/mm/fault-common.c.orig Thu Jul 18 15:22:17 2002
9753 +++ linux/arch/arm/mm/fault-common.c Thu Jul 18 15:22:35 2002
9755 * If we are out of memory for pid1,
9756 * sleep for a while and retry
9758 - tsk->policy |= SCHED_YIELD;
9764 --- linux/arch/sh/kernel/process.c.orig Thu Jul 18 15:21:59 2002
9765 +++ linux/arch/sh/kernel/process.c Thu Jul 18 15:22:35 2002
9768 /* endless idle loop with no priority at all */
9770 - current->nice = 20;
9771 - current->counter = -100;
9775 --- linux/arch/sh/mm/fault.c.orig Thu Jul 18 15:21:59 2002
9776 +++ linux/arch/sh/mm/fault.c Thu Jul 18 15:22:35 2002
9779 up_read(&mm->mmap_sem);
9780 if (current->pid == 1) {
9781 - current->policy |= SCHED_YIELD;
9784 down_read(&mm->mmap_sem);
9787 --- linux/arch/ia64/kernel/process.c.orig Thu Jul 18 15:22:17 2002
9788 +++ linux/arch/ia64/kernel/process.c Thu Jul 18 15:22:35 2002
9791 /* endless idle loop with no priority at all */
9793 - current->nice = 20;
9794 - current->counter = -100;
9799 --- linux/arch/ia64/mm/fault.c.orig Thu Jul 18 15:22:17 2002
9800 +++ linux/arch/ia64/mm/fault.c Thu Jul 18 15:22:35 2002
9803 up_read(&mm->mmap_sem);
9804 if (current->pid == 1) {
9805 - current->policy |= SCHED_YIELD;
9808 down_read(&mm->mmap_sem);
9811 --- linux/arch/mips64/kernel/process.c.orig Thu Jul 18 15:22:18 2002
9812 +++ linux/arch/mips64/kernel/process.c Thu Jul 18 15:22:35 2002
9815 /* endless idle loop with no priority at all */
9817 - current->nice = 20;
9818 - current->counter = -100;
9821 while (!current->need_resched)
9823 --- linux/arch/mips64/mm/fault.c.orig Thu Jul 18 15:22:18 2002
9824 +++ linux/arch/mips64/mm/fault.c Thu Jul 18 15:22:35 2002
9827 up_read(&mm->mmap_sem);
9828 if (tsk->pid == 1) {
9829 - tsk->policy |= SCHED_YIELD;
9832 down_read(&mm->mmap_sem);
9835 --- linux/arch/s390/kernel/process.c.orig Thu Jul 18 15:22:19 2002
9836 +++ linux/arch/s390/kernel/process.c Thu Jul 18 15:22:35 2002
9839 /* endless idle loop with no priority at all */
9841 - current->nice = 20;
9842 - current->counter = -100;
9845 if (current->need_resched) {
9847 --- linux/arch/s390/mm/fault.c.orig Thu Jul 18 15:22:19 2002
9848 +++ linux/arch/s390/mm/fault.c Thu Jul 18 15:22:35 2002
9851 up_read(&mm->mmap_sem);
9852 if (tsk->pid == 1) {
9853 - tsk->policy |= SCHED_YIELD;
9856 down_read(&mm->mmap_sem);
9859 --- linux/arch/parisc/kernel/process.c.orig Tue Aug 21 14:26:04 2001
9860 +++ linux/arch/parisc/kernel/process.c Thu Jul 18 15:22:35 2002
9863 /* endless idle loop with no priority at all */
9865 - current->nice = 20;
9866 - current->counter = -100;
9869 while (!current->need_resched) {
9870 --- linux/arch/cris/kernel/process.c.orig Thu Jul 18 15:22:11 2002
9871 +++ linux/arch/cris/kernel/process.c Thu Jul 18 15:22:35 2002
9872 @@ -124,10 +124,10 @@
9874 int cpu_idle(void *unused)
9877 - current->counter = -100;
9885 /* if the watchdog is enabled, we can simply disable interrupts and go
9886 --- linux/arch/s390x/kernel/process.c.orig Thu Jul 18 15:22:19 2002
9887 +++ linux/arch/s390x/kernel/process.c Thu Jul 18 15:22:35 2002
9890 /* endless idle loop with no priority at all */
9892 - current->nice = 20;
9893 - current->counter = -100;
9896 if (current->need_resched) {
9898 --- linux/arch/s390x/mm/fault.c.orig Thu Jul 18 15:22:19 2002
9899 +++ linux/arch/s390x/mm/fault.c Thu Jul 18 15:22:35 2002
9902 up_read(&mm->mmap_sem);
9903 if (tsk->pid == 1) {
9904 - tsk->policy |= SCHED_YIELD;
9907 down_read(&mm->mmap_sem);
9910 --- linux/arch/ppc64/kernel/idle.c.orig Thu Jul 18 15:22:19 2002
9911 +++ linux/arch/ppc64/kernel/idle.c Thu Jul 18 15:22:35 2002
9916 - /* endless loop with no priority at all */
9917 - current->nice = 20;
9918 - current->counter = -100;
9919 #ifdef CONFIG_PPC_ISERIES
9920 /* ensure iSeries run light will be out when idle */
9921 current->thread.flags &= ~PPC_FLAG_RUN_LIGHT;
9926 + /* endless loop with no priority at all */