1 diff -Nru a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c
2 --- a/arch/i386/kernel/cpu/proc.c Tue Oct 21 16:05:27 2003
3 +++ b/arch/i386/kernel/cpu/proc.c Tue Oct 21 16:05:27 2003
5 #include <asm/semaphore.h>
6 #include <linux/seq_file.h>
8 +#ifdef CONFIG_CPUSETS_PROC_CPUINFO
9 +#include <linux/sched.h>
10 +#include <linux/cpuset.h>
11 +#include <linux/cpuset_types.h>
15 * Get CPU information for use by the procfs.
21 +#ifdef CONFIG_CPUSETS_PROC_CPUINFO
22 + /* show only CPUs in current cpuset */
23 + if (!cpu_isset(n, current->cpuset->cpus_allowed))
25 +#endif /* CONFIG_CPUSETS_PROC_CPUINFO */
27 seq_printf(m, "processor\t: %d\n"
32 +#ifdef CONFIG_CPUSETS_PROC_CPUINFO
33 + cpuset_realtologic_cpuid(current->cpuset, n),
37 c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
40 diff -Nru a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
41 --- a/arch/i386/kernel/entry.S Tue Oct 21 16:05:27 2003
42 +++ b/arch/i386/kernel/entry.S Tue Oct 21 16:05:27 2003
45 .long sys_fadvise64_64
46 .long sys_ni_syscall /* sys_vserver */
47 + .long sys_ni_syscall
48 + .long sys_ni_syscall /* 275 */
49 + .long sys_ni_syscall
50 + .long sys_cpuset_create
51 + .long sys_cpuset_destroy
52 + .long sys_cpuset_alloc
53 + .long sys_cpuset_attach
54 + .long sys_cpuset_getfreecpus
57 nr_syscalls=(.-sys_call_table)/4
58 diff -Nru a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
59 --- a/arch/ia64/kernel/entry.S Tue Oct 21 16:05:27 2003
60 +++ b/arch/ia64/kernel/entry.S Tue Oct 21 16:05:27 2003
61 @@ -1481,11 +1481,19 @@
63 data8 ia64_ni_syscall // 1265
65 +#ifdef CONFIG_CPUSETS
66 + data8 sys_cpuset_create
67 + data8 sys_cpuset_destroy
68 + data8 sys_cpuset_alloc
69 + data8 sys_cpuset_attach // 1270
70 + data8 sys_cpuset_getfreecpus
75 data8 ia64_ni_syscall // 1270
81 diff -Nru a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
82 --- a/arch/ia64/kernel/setup.c Tue Oct 21 16:05:27 2003
83 +++ b/arch/ia64/kernel/setup.c Tue Oct 21 16:05:27 2003
85 #include <asm/system.h>
86 #include <asm/unistd.h>
88 +#ifdef CONFIG_CPUSETS_PROC_CPUINFO
89 +# include <linux/cpuset_types.h>
92 #if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE)
93 # error "struct cpuinfo_ia64 too big!"
99 +#ifdef CONFIG_CPUSETS_PROC_CPUINFO
100 + /* show only CPUs in current cpuset */
101 + if (!current->cpuset)
104 + if (!cpu_isset(cpunum, current->cpuset->cpus_allowed))
106 +#endif /* CONFIG_CPUSETS_PROC_CPUINFO */
112 "cpu MHz : %lu.%06lu\n"
113 "itc MHz : %lu.%06lu\n"
114 "BogoMIPS : %lu.%02lu\n\n",
115 - cpunum, c->vendor, family, c->model, c->revision, c->archrev,
116 +#ifdef CONFIG_CPUSETS_PROC_CPUINFO
117 + cpuset_realtologic_cpuid(current->cpuset, cpunum),
121 + c->vendor, family, c->model, c->revision, c->archrev,
122 features, c->ppn, c->number,
123 c->proc_freq / 1000000, c->proc_freq % 1000000,
124 c->itc_freq / 1000000, c->itc_freq % 1000000,
125 diff -Nru a/fs/proc/base.c b/fs/proc/base.c
126 --- a/fs/proc/base.c Tue Oct 21 16:05:27 2003
127 +++ b/fs/proc/base.c Tue Oct 21 16:05:27 2003
132 +#ifdef CONFIG_CPUSETS_PROC
135 #ifdef CONFIG_SECURITY
137 PROC_TGID_ATTR_CURRENT,
139 #ifdef CONFIG_KALLSYMS
140 E(PROC_TGID_WCHAN, "wchan", S_IFREG|S_IRUGO),
142 +#ifdef CONFIG_CPUSETS_PROC
143 + E(PROC_TGID_CPUSET, "cpuset", S_IFREG|S_IRUGO),
147 static struct pid_entry tid_base_stuff[] = {
150 #endif /* CONFIG_KALLSYMS */
153 +#ifdef CONFIG_CPUSETS_PROC
154 +int proc_pid_cpuset(struct task_struct *task, char *buffer);
155 +#endif /* CONFIG_CPUSETS_PROC */
157 /************************************************************************/
158 /* Here the fs part begins */
159 /************************************************************************/
160 @@ -1359,6 +1370,12 @@
161 case PROC_TGID_WCHAN:
162 inode->i_fop = &proc_info_file_operations;
163 ei->op.proc_read = proc_pid_wchan;
166 +#ifdef CONFIG_CPUSETS_PROC
167 + case PROC_TGID_CPUSET:
168 + inode->i_fop = &proc_info_file_operations;
169 + ei->op.proc_read = proc_pid_cpuset;
173 diff -Nru a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
174 --- a/fs/proc/proc_misc.c Tue Oct 21 16:05:27 2003
175 +++ b/fs/proc/proc_misc.c Tue Oct 21 16:05:27 2003
178 #include <asm/div64.h>
180 +#ifdef CONFIG_CPUSETS_PROC_STAT
181 +# include <linux/cpuset_types.h>
184 #define LOAD_INT(x) ((x) >> FSHIFT)
185 #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
190 if (!cpu_online(i)) continue;
191 +#ifdef CONFIG_CPUSETS_PROC_STAT
192 + /* show only CPUs in current cpuset */
193 + if (!cpu_isset(i, current->cpuset->cpus_allowed))
197 user += kstat_cpu(i).cpustat.user;
198 nice += kstat_cpu(i).cpustat.nice;
199 system += kstat_cpu(i).cpustat.system;
201 jiffies_to_clock_t(softirq));
202 for (i = 0; i < NR_CPUS; i++){
203 if (!cpu_online(i)) continue;
204 +#ifdef CONFIG_CPUSETS_PROC_STAT
205 + /* show only CPUs in current cpuset */
206 + if (!cpu_isset(i, current->cpuset->cpus_allowed))
209 seq_printf(p, "cpu%d %u %u %u %u %u %u %u\n",
210 +#ifdef CONFIG_CPUSETS_PROC_STAT
211 + cpuset_realtologic_cpuid(current->cpuset, i),
215 jiffies_to_clock_t(kstat_cpu(i).cpustat.user),
216 jiffies_to_clock_t(kstat_cpu(i).cpustat.nice),
217 jiffies_to_clock_t(kstat_cpu(i).cpustat.system),
218 diff -Nru a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
219 --- a/include/asm-i386/unistd.h Tue Oct 21 16:05:27 2003
220 +++ b/include/asm-i386/unistd.h Tue Oct 21 16:05:27 2003
222 #define __NR_fadvise64_64 272
223 #define __NR_vserver 273
225 -#define NR_syscalls 274
226 +#define __NR_sys_cpuset_create 277
227 +#define __NR_sys_cpuset_destroy 278
228 +#define __NR_sys_cpuset_alloc 279
229 +#define __NR_sys_cpuset_attach 280
230 +#define __NR_sys_cpuset_getfreecpus 281
232 +#define NR_syscalls 282
234 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
236 diff -Nru a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h
237 --- a/include/asm-ia64/unistd.h Tue Oct 21 16:05:27 2003
238 +++ b/include/asm-ia64/unistd.h Tue Oct 21 16:05:27 2003
241 #define NR_syscalls 256 /* length of syscall table */
243 +#define __NR_sys_cpuset_create 1267
244 +#define __NR_sys_cpuset_destroy 1268
245 +#define __NR_sys_cpuset_alloc 1269
246 +#define __NR_sys_cpuset_attach 1270
247 +#define __NR_sys_cpuset_getfreecpus 1271
249 #if !defined(__ASSEMBLY__) && !defined(ASSEMBLER)
251 extern long __ia64_syscall (long a0, long a1, long a2, long a3, long a4, long nr);
252 diff -Nru a/include/linux/cpuset.h b/include/linux/cpuset.h
253 --- /dev/null Wed Dec 31 16:00:00 1969
254 +++ b/include/linux/cpuset.h Tue Oct 21 16:05:27 2003
257 + * BULL cpuset interface
260 +#ifndef _LINUX_CPUSET_H
261 +#define _LINUX_CPUSET_H
263 +typedef unsigned int cpuset_t;
265 +#define CPUSET_STRICT 0x00000001
266 +#define CPUSET_AUTOCLEAN 0x00000002
270 +extern struct cpuset top_cpuset;
272 +void use_cpuset(struct cpuset *);
273 +void release_cpuset(struct cpuset *);
276 +int cpuset_setaffinity(struct task_struct * task, unsigned long mask);
278 +void cpusets_update_cpus_online(void);
280 +int cpuset_realtologic_cpuid(struct cpuset * cs, int cpuid);
282 +#endif /* __KERNEL__ */
284 +#endif /* _LINUX_CPUSET_H */
285 diff -Nru a/include/linux/cpuset_types.h b/include/linux/cpuset_types.h
286 --- /dev/null Wed Dec 31 16:00:00 1969
287 +++ b/include/linux/cpuset_types.h Tue Oct 21 16:05:27 2003
289 +#ifndef _LINUX_CPUSET_TYPES_H
290 +#define _LINUX_CPUSET_TYPES_H
296 + int has_been_attached;
298 + /* bitmask of the cpus present in this cpuset */
299 + cpumask_t cpus_allowed;
301 + /* bitmask of the cpus reserved in this cpuset */
302 + cpumask_t cpus_reserved;
304 + /* bitmask of the cpus reserved with CPUSET_STRICT */
305 + cpumask_t cpus_strictly_reserved;
307 + struct cpuset * parent;
308 + struct list_head list; /* for the whole list */
310 + struct list_head children;
311 + struct list_head brothers;
313 + /* overall users (processes + children) */
314 + /* will be replaced by atomic_t in the future */
317 + spinlock_t attach_lock;
328 diff -Nru a/include/linux/init_task.h b/include/linux/init_task.h
329 --- a/include/linux/init_task.h Tue Oct 21 16:05:27 2003
330 +++ b/include/linux/init_task.h Tue Oct 21 16:05:27 2003
332 .siglock = SPIN_LOCK_UNLOCKED, \
335 +#ifdef CONFIG_CPUSETS
336 +#define CPUSET_TSKINIT(a,b) .a = b,
338 +#define CPUSET_TSKINIT(a,b)
342 * INIT_TASK is used to set up the first task table, touch at
343 * your own risk!. Base=0, limit=0x1fffff (=2MB)
345 .proc_lock = SPIN_LOCK_UNLOCKED, \
346 .switch_lock = SPIN_LOCK_UNLOCKED, \
347 .journal_info = NULL, \
348 + CPUSET_TSKINIT(cpus_wanted, CPU_MASK_ALL) \
349 + CPUSET_TSKINIT(cpuset, &top_cpuset) \
350 + CPUSET_TSKINIT(cpuset_attach_lock, SPIN_LOCK_UNLOCKED) \
354 diff -Nru a/include/linux/sched.h b/include/linux/sched.h
355 --- a/include/linux/sched.h Tue Oct 21 16:05:27 2003
356 +++ b/include/linux/sched.h Tue Oct 21 16:05:27 2003
358 #include <linux/completion.h>
359 #include <linux/pid.h>
360 #include <linux/percpu.h>
361 +#include <linux/cpuset.h>
367 unsigned long ptrace_message;
368 siginfo_t *last_siginfo; /* For ptrace use. */
371 +#ifdef CONFIG_CPUSETS
372 + struct cpuset * cpuset;
373 + unsigned long cpus_wanted;
374 + spinlock_t cpuset_attach_lock;
378 static inline pid_t process_group(struct task_struct *tsk)
379 diff -Nru a/init/Kconfig b/init/Kconfig
380 --- a/init/Kconfig Tue Oct 21 16:05:27 2003
381 +++ b/init/Kconfig Tue Oct 21 16:05:27 2003
383 Disabling this option will cause the kernel to be built without
384 support for epoll family of system calls.
392 + This options will let you create and manage sets of cpu where you
393 + can run the processes.
398 + bool "/proc/cpusets support"
401 + Get some info about the existing cpusets in your system.
402 + To use this option, you have to ensure that the "/proc file system
403 + support" (CONFIG_PROC_FS) is enabled, too.
405 +config CPUSETS_PROC_CPUINFO
406 + bool "/proc/cpuinfo uses current cpuset"
407 + depends on CPUSETS_PROC
409 + With this option enabled, a process reading /proc/cpuinfo will
410 + only see the CPUs that are in its current cpuset.
412 +config CPUSETS_PROC_STAT
413 + bool "/proc/stat uses current cpuset"
414 + depends on CPUSETS_PROC
416 + With this option enabled, a process reading /proc/stat will
417 + only see the CPUs that are in its current cpuset.
421 source "drivers/block/Kconfig.iosched"
423 endmenu # General setup
424 diff -Nru a/init/main.c b/init/main.c
425 --- a/init/main.c Tue Oct 21 16:05:27 2003
426 +++ b/init/main.c Tue Oct 21 16:05:27 2003
428 #include <linux/writeback.h>
429 #include <linux/cpu.h>
433 +#ifdef CONFIG_CPUSETS
434 +#include <linux/cpuset.h>
439 #include <asm/bugs.h>
442 extern void free_initmem(void);
443 extern void populate_rootfs(void);
444 extern void driver_init(void);
445 +extern void cpusets_init(void);
448 extern void tc_init(void);
450 #ifdef CONFIG_PROC_FS
453 +#ifdef CONFIG_CPUSETS
458 printk("POSIX conformance testing by UNIFIX\n");
462 static void __init do_basic_setup(void)
464 +#ifdef CONFIG_CPUSETS
465 + cpusets_update_cpus_online();
478 * Ok, we have completed the initial bootup, and
479 diff -Nru a/kernel/Makefile b/kernel/Makefile
480 --- a/kernel/Makefile Tue Oct 21 16:05:27 2003
481 +++ b/kernel/Makefile Tue Oct 21 16:05:27 2003
483 obj-$(CONFIG_COMPAT) += compat.o
484 obj-$(CONFIG_IKCONFIG) += configs.o
485 obj-$(CONFIG_IKCONFIG_PROC) += configs.o
486 +obj-$(CONFIG_CPUSETS) += cpuset.o
488 ifneq ($(CONFIG_IA64),y)
489 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
490 diff -Nru a/kernel/cpuset.c b/kernel/cpuset.c
491 --- /dev/null Wed Dec 31 16:00:00 1969
492 +++ b/kernel/cpuset.c Tue Oct 21 16:05:27 2003
494 +#include <linux/cpumask.h>
495 +#include <linux/cpuset.h>
496 +#include <linux/slab.h> /* for kmalloc */
497 +#include <linux/list.h>
498 +#include <linux/sched.h> /* for find_task_by_pid and task_struct */
499 +#include <asm/uaccess.h>
500 +#include <linux/errno.h>
501 +#include <linux/seq_file.h>
502 +#include <linux/fs.h>
503 +#include <linux/init.h>
504 +#include <linux/proc_fs.h>
505 +#include <linux/spinlock.h>
506 +#include <linux/list.h>
507 +#include <linux/cpuset_types.h>
509 +#define info(args...) do {} while(0)
510 +//#define info(args...) printk(KERN_INFO args)
513 +#ifdef CPU_ARRAY_SIZE
514 +#warning "CPU ARRAY SIZE !"
516 +rwlock_t cpuset_lock = RW_LOCK_UNLOCKED;
518 +#define CPUSET_TOP_ID 1
520 +struct cpuset top_cpuset = {
521 + .id = CPUSET_TOP_ID,
522 + .flags = CPUSET_STRICT,
523 + .cpus_reserved = CPU_MASK_NONE,
524 + .cpus_strictly_reserved = CPU_MASK_NONE,
526 + .children = LIST_HEAD_INIT(top_cpuset.children),
527 + .brothers = LIST_HEAD_INIT(top_cpuset.brothers),
528 + .list = LIST_HEAD_INIT(top_cpuset.list),
529 + .count = ATOMIC_INIT(1), /* this cpuset can't be deleted */
530 + .has_been_attached = 0,
532 + .attach_lock = SPIN_LOCK_UNLOCKED,
537 +static int proc_cpusets_init(void);
539 +int __init cpusets_init(void)
541 + info("cpusets ("__FILE__ " compiled " __DATE__ " " __TIME__ "initializing..\n");
544 +#ifdef CONFIG_CPUSETS_PROC
545 + proc_cpusets_init();
546 +#endif /* CONFIG_CPUSETS_PROC */
551 + * later this function may be used to indicate that a CPU has been put
553 + * BUT currently it only exists because cpu_online_map becomes available
554 + * only late during kernel boot
556 +void cpusets_update_cpus_online(void)
558 + top_cpuset.cpus_allowed = cpu_online_map ;
562 +static const int N = (8*sizeof(cpumask_t));
563 +/* mask must NOT be ZERO ! */
564 +/* this is a cyclic version of next_cpu */
565 +static inline void _next_cpu(const cpumask_t mask, int * index)
568 + if (++*index >= N) *index = 0;
569 + if (cpu_isset(*index, mask)) return;
573 +static unsigned long cpuset_combine_mask(const cpumask_t wanted, const cpumask_t allowed)
578 + /* start with current cpu out of the mask
579 + * so the first call to next_cpu will take the first cpu
580 + * even if it is cpu zero
584 + if (cpus_empty(wanted)) return 0;
585 + if (cpus_empty(allowed)) return 0;
589 + for(i=0; i < N; i++) {
590 + _next_cpu(allowed, &cpu);
591 + if (cpu_isset(i, wanted))
592 + cpu_set(cpu, mask);
594 + info("cpuset_combine_mask: %016lx + %016lx --> %016lx\n",
595 + wanted, allowed, mask);
599 +/* translate a "real" cpu number to a "inside cpuset" (aka logical)
600 + * cpu number. Used for /proc/cpuinfo
602 +int cpuset_realtologic_cpuid(struct cpuset * cs, int cpuid)
606 + for(i=0; i < NR_CPUS; i++)
608 + if (i == cpuid) return l;
609 + if (cpu_isset(i, cs->cpus_allowed))
617 +static struct cpuset * find_cpuset_by_id(cpuset_t id)
619 + struct cpuset * cs;
620 + if (id == CPUSET_TOP_ID) return &top_cpuset;
622 + list_for_each_entry(cs, &top_cpuset.list, list) {
623 + if (cs->id == id) return cs;
629 +/* increment a cpuset use count */
630 +void use_cpuset(struct cpuset * cs)
632 + atomic_inc(&cs->count);
635 +static void check_cpuset_autoclean(struct cpuset *);
637 +/* decrement a cpuset use count, and maybe autodestroy it */
638 +/* cpuset_lock MUST NOT BE HELD */
639 +void release_cpuset(struct cpuset * cs)
641 + if (atomic_dec_and_test(&cs->count))
642 + check_cpuset_autoclean(cs);
645 +/* find a free cpuset ID */
646 +static cpuset_t cpuset_mkid(void)
648 + static cpuset_t curid = CPUSET_TOP_ID;
650 + while (find_cpuset_by_id(++curid));
655 +asmlinkage long sys_cpuset_create(cpuset_t * cpusetp, int flags)
657 + struct cpuset * cs;
659 + info("sys_cpuset_create(%016lx, %d) called\n",
660 + (unsigned long) cpusetp, flags);
662 + /* can only create a strict cs in another strict cs */
663 + if ((flags & CPUSET_STRICT) && (!(current->cpuset->flags & CPUSET_STRICT)))
666 + /* check if given pointer is valid */
667 + if (verify_area(VERIFY_WRITE, cpusetp, sizeof(cpuset_t)))
670 + cs = (struct cpuset *) kmalloc(sizeof(struct cpuset), GFP_KERNEL);
675 + atomic_set(&cs->count, 0);
676 + INIT_LIST_HEAD(&cs->children);
677 + cs->cpus_allowed = 0;
678 + cs->cpus_reserved = 0;
679 + cs->cpus_strictly_reserved = 0;
680 + cs->has_been_attached = 0;
681 + cs->uid = current->uid;
682 + cs->suid = current->suid;
683 + cs->attach_lock = SPIN_LOCK_UNLOCKED;
685 + cs->parent = current->cpuset;
687 + use_cpuset(cs->parent);
689 + write_lock(&cpuset_lock);
691 + cs->id = cpuset_mkid();
692 + list_add(&cs->brothers, &cs->parent->children);
693 + list_add(&cs->list, &top_cpuset.list);
695 + write_unlock(&cpuset_lock);
697 + if (put_user(cs->id, cpusetp))
698 + info("put_user failed !\n");
704 +static inline int bad_permission(struct cpuset * cs)
706 + return ((current->euid) && (current->euid != cs->uid) && (current->euid != cs->suid));
709 +static void __cpuset_destroy(struct cpuset * cs);
711 +asmlinkage long sys_cpuset_destroy(cpuset_t cpuset)
713 + struct cpuset * cs;
715 + info("sys_cpuset_destroy(%d) called\n", cpuset);
717 + if (cpuset == CPUSET_TOP_ID)
720 + read_lock(&cpuset_lock);
721 + cs = find_cpuset_by_id(cpuset);
724 + read_unlock(&cpuset_lock);
729 + read_unlock(&cpuset_lock);
731 + if (bad_permission(cs)) {
732 + release_cpuset(cs);
736 + write_lock(&cpuset_lock);
737 + /* there's at least 1 user (us), if there's more, we can't destroy cs */
738 + if (atomic_read(&cs->count) > 1) {
739 + write_unlock(&cpuset_lock);
740 + release_cpuset(cs);
744 + /* everything OK, destroy it */
745 + __cpuset_destroy(cs);
746 + /* write_unlock(&cpuset_lock) will be done inside __cpuset_destroy */
751 +static void rebuild_reserved_masks(struct cpuset * csp) {
754 + struct cpuset * cs;
755 + info("Updating cpuset %d masks\n", csp->id);
760 + list_for_each_entry(cs, &csp->children, brothers) {
761 + info(" child %d\n", cs->id);
762 + cpus_or(r, r, cs->cpus_allowed);
763 + if (cs->flags & CPUSET_STRICT)
764 + cpus_or(sr, sr, cs->cpus_allowed);
766 + csp->cpus_reserved = r;
767 + csp->cpus_strictly_reserved = sr;
770 +/* REALLY destroy a cpuset
772 + * -> write cpuset_lock must be held
773 + * -> ----------------- WILL BE RELEASED
774 + * this ugly hack is necessary to call release_cpuset(parent)
776 +static void __cpuset_destroy(struct cpuset * cs)
778 + list_del(&cs->list);
779 + list_del(&cs->brothers);
781 + /* cs will never be top_cpuset, so ->parent exists */
782 + rebuild_reserved_masks(cs->parent);
784 + write_unlock(&cpuset_lock);
785 + release_cpuset(cs->parent);
790 +/* remove an unused cpuset if it has the CPUSET_AUTOCLEAN flag */
791 +static void check_cpuset_autoclean(struct cpuset * cs)
793 + if (!(cs->flags & CPUSET_AUTOCLEAN)) return; /* not autoclean */
794 + if (!cs->has_been_attached) return;
796 + write_lock(&cpuset_lock);
798 + if (atomic_read(&cs->count) > 0) { /* still in use */
799 + write_unlock(&cpuset_lock);
803 + info("autocleaning cpuset %d\n", cs->id);
805 + __cpuset_destroy(cs);
806 + /* write_unlock(&cpuset_lock) will be done inside __cpuset_destroy */
809 +asmlinkage long sys_cpuset_attach(cpuset_t cpuset, pid_t pid)
811 + struct cpuset * cs;
812 + struct task_struct * task;
814 + info("sys_cpuset_attach(%d, %d) called\n", cpuset, pid);
816 + read_lock(&cpuset_lock);
817 + cs = find_cpuset_by_id(cpuset);
820 + read_unlock(&cpuset_lock);
827 + read_unlock(&cpuset_lock);
829 + if (bad_permission(cs)) {
830 + release_cpuset(cs);
834 + if (!cs->cpus_allowed) { /* cannot attach a cpuset with no CPU */
835 + release_cpuset(cs);
840 + read_lock(&tasklist_lock);
842 + task = find_task_by_pid(pid);
844 + read_unlock(&tasklist_lock);
845 + release_cpuset(cs);
849 + get_task_struct(task);
850 + read_unlock(&tasklist_lock);
852 + if ((current->euid) && (current->euid != task->uid) && (current->euid != task->suid)) {
853 + put_task_struct(task);
854 + release_cpuset(cs);
860 + get_task_struct(task);
863 + set_cpus_allowed(task, cpuset_combine_mask(task->cpus_wanted, cs->cpus_allowed));
864 + cs->has_been_attached = 1;
866 + /* release the current cpu set of the task */
867 + /* lock to prevent a race where two cpuset_attach would be called on the same
868 + * task at the same time, and task->cpuset would be released twice
870 + spin_lock(&task->cpuset_attach_lock);
871 + if (!task->cpuset) { /* task with no cpuset ? means it is exiting */
872 + spin_unlock(&task->cpuset_attach_lock);
873 + put_task_struct(task);
874 + release_cpuset(cs);
877 + release_cpuset(task->cpuset);
878 + /* now lock the cpuset, to protect any running migrate_cpuset...()
879 + * from being disturbed by us
881 + spin_lock(&cs->attach_lock);
883 + spin_unlock(&cs->attach_lock);
885 + spin_unlock(&task->cpuset_attach_lock);
888 + put_task_struct(task);
890 + /* don't call release_cpuset here,
891 + * the task being attached to the cpuset
892 + * is really a new user !
899 +static int __cpuset_setaffinity(struct task_struct * task)
902 + cpumask_t last = CPU_MASK_NONE; /* remember : 0 is not a valid mask */
904 + /* We cannot hold any lock while calling set_cpus_allowed
905 + * since it might sleep
906 + * Thus we try until we are sure we did it with the right mask
909 + spin_lock(&task->cpuset_attach_lock);
910 + if (!task->cpuset) {
912 + spin_unlock(&task->cpuset_attach_lock);
915 + allowed = task->cpuset->cpus_allowed;
916 + spin_unlock(&task->cpuset_attach_lock);
918 + if (last == allowed)
922 + ret = set_cpus_allowed(task, cpuset_combine_mask(task->cpus_wanted, allowed));
930 +/* Our replacement function for set_cpus_allowed */
931 +int cpuset_setaffinity(struct task_struct * task, cpumask_t mask)
933 + task->cpus_wanted = mask;
934 + return __cpuset_setaffinity(task);
937 +/* When a cpuset with attached processes is being realloc'ed CPUs
938 + * update the processes' masks and migrate them
940 +static void migrate_cpuset_processes(struct cpuset * cs)
942 + struct task_struct *g, *p;
943 + /* This should be a RARE use of the cpusets.
944 + * therefore we'll prefer an inefficient operation here
945 + * (searching the whole process list)
946 + * than adding another list_head in task_t
947 + * and locks and list_add for each fork()
950 + /* we need to lock tasklist_lock for reading the processes list
951 + * BUT we cannot call set_cpus_allowed with any spinlock held
952 + * => we need to store the list of task struct in an array
954 + struct task_struct ** array;
958 + spin_lock(&cs->attach_lock);
959 + /* at most cs->count - 1 processes to migrate */
960 + sz = atomic_read(&cs->count) - 1;
961 + array = (struct task_struct **) kmalloc(sz * sizeof(struct task_struct *), GFP_ATOMIC);
963 + spin_unlock(&cs->attach_lock);
964 + printk("Error allocating array in migrate_cpuset_processes !\n");
967 + /* see linux/sched.h for this nested for/do-while loop */
968 + read_lock(&tasklist_lock);
969 + do_each_thread(g, p) {
970 + if (p->cpuset == cs) {
972 + printk("migrate_cpuset_processes: array full !\n");
973 + goto end_loop; /* break won't work in this double loop */
975 + get_task_struct(p);
978 + } while_each_thread(g, p);
980 + read_unlock(&tasklist_lock);
981 + spin_unlock(&cs->attach_lock);
984 + struct task_struct * p = array[--nb];
985 + __cpuset_setaffinity(p);
986 + put_task_struct(p);
993 +/* see if mask b is included in mask a */
994 +/* old version : #define MASK_INCLUDED(a, b) (((a)|(b)) == (a)) */
995 +static inline int MASK_INCLUDED(cpumask_t a, cpumask_t b)
999 + return cpus_equal(r, a);
1002 +static inline cpumask_t CPUS_NOT(cpumask_t a)
1004 + cpus_complement(a);
1008 +static inline cpumask_t CPUS_OR(cpumask_t a, cpumask_t b)
1015 +static inline cpumask_t CPUS_AND(cpumask_t a, cpumask_t b)
1018 + cpus_and(r, a, b);
1023 +asmlinkage long sys_cpuset_alloc(cpuset_t cpuset, int len, unsigned long * user_mask_ptr)
1025 + cpumask_t new_mask;
1026 + cpumask_t old_mask;
1027 + struct cpuset * cs ;
1030 + info("sys_cpuset_alloc(%d, ...) called\n", cpuset);
1032 + if (cpuset == CPUSET_TOP_ID)
1035 + if (len < sizeof(new_mask))
1038 + if (copy_from_user(&new_mask, user_mask_ptr, sizeof(new_mask)))
1041 + /* do some sanity checks on the mask */
1042 + /* must have at least ONE cpu */
1043 + if (cpus_empty(new_mask))
1046 + /* XXX phys_cpu_present_map has changed type --
1047 + * I disable this test for now
1048 + * anyway it is not _NEEDED_ since new_mask will have to stay
1049 + * in the parent's mask
1050 + * (just some overhead in a _really_ rare case) */
1052 + /* must only have existing CPUs */
1053 + if (!MASK_INCLUDED(phys_cpu_present_map, new_mask))
1057 + info(" with mask %016lx\n", new_mask);
1059 + read_lock(&cpuset_lock);
1060 + cs = find_cpuset_by_id(cpuset);
1064 + read_unlock(&cpuset_lock);
1069 + read_unlock(&cpuset_lock);
1071 + if (bad_permission(cs)) {
1072 + release_cpuset(cs);
1076 + /* lock early - we do not want the parent's masks to change under us */
1077 + write_lock(&cpuset_lock);
1078 + /* must only have CPUs in the parent cpuset (if any) */
1080 + if (!MASK_INCLUDED(cs->parent->cpus_allowed, new_mask))
1083 + old_mask = cs->cpus_allowed;
1086 + /* must only have free cpus */
1087 + if (cs->flags & CPUSET_STRICT) {
1088 + /* CPUs already in this cs ARE free for us ! -> old_mask */
1089 + /* The next few lines mean :
1090 + * if (!MASK_INCLUDED(~cs->parent->cpus_reserved, new_mask & (~old_mask)))
1091 + * (just obfuscated my the cpus_ macros)
1093 + if (!MASK_INCLUDED(CPUS_NOT(cs->parent->cpus_reserved),
1094 + CPUS_AND(new_mask, CPUS_NOT(old_mask))))
1098 + if (!MASK_INCLUDED(CPUS_NOT(cs->parent->cpus_strictly_reserved), new_mask))
1103 + /* are we trying to FREE reserved CPUs
1104 + * (i.e. reserved by children cpusets)
1105 + * from a non-unused cpuset ? */
1106 + /* if (cs->cpus_reserved & ~new_mask) */
1107 + if (!cpus_empty(CPUS_AND(cs->cpus_reserved, CPUS_NOT(new_mask))))
1110 + /* everything is OK */
1111 + cs->cpus_allowed = new_mask;
1112 + rebuild_reserved_masks(cs->parent);
1113 + write_unlock(&cpuset_lock);
1115 + /* did we change a non-unused cpuset ? */
1116 + if ((atomic_read(&cs->count) > 1) && !cpus_equal(new_mask, old_mask)) {
1117 + migrate_cpuset_processes(cs);
1120 + release_cpuset(cs);
1124 + write_unlock(&cpuset_lock);
1125 + release_cpuset(cs);
1129 +asmlinkage long sys_cpuset_getfreecpus(int flags, int len, unsigned long * user_mask_ptr)
1131 + cpumask_t reserved;
1134 + int real_len = sizeof(unsigned long);
1135 + if (len < real_len)
1138 + if (flags & CPUSET_STRICT)
1139 + reserved = current->cpuset->cpus_reserved;
1141 + reserved = current->cpuset->cpus_strictly_reserved;
1143 + free = CPUS_AND(current->cpuset->cpus_allowed, CPUS_NOT(reserved));
1145 + if (copy_to_user(user_mask_ptr, &free, real_len))
1151 +/*************************************************************
1152 + ***************** /proc/cpusets stuff ***********************
1153 + *************************************************************
1155 +#ifdef CONFIG_CPUSETS_PROC
1157 +static void *proc_cpusets_start(struct seq_file *m, loff_t *pos)
1160 + struct list_head *p;
1162 + read_lock(&cpuset_lock);
1163 + if (!n) seq_puts(m, "cpusets info \n");
1165 + p = &top_cpuset.list;
1168 + if (p == &top_cpuset.list)
1171 + return list_entry(p, struct cpuset, list);
1174 +static void *proc_cpusets_next(struct seq_file *m, void *p, loff_t *pos)
1176 + struct cpuset * cs = p;
1178 + return cs->list.next == &top_cpuset.list ? NULL
1179 + : list_entry(cs->list.next, struct cpuset, list);
1182 +/* How many chars needed to print a long (as a mask) ? */
1183 +#define CHARS_FOR_LONG (BITS_PER_LONG / 4)
1184 +#define CFL CHARS_FOR_LONG
1185 +static void sprint_mask(char * buf, cpumask_t mask)
1187 +#ifdef CPU_ARRAY_SIZE
1189 + for (l = CPU_ARRAY_SIZE - 1; l>=0; l--) {
1190 + /* XXX only 64 bits long supported here ! */
1191 + sprintf(buf, "%016lx", mask.mask[l]);
1195 + /* XXX only 64 bits long supported here ! */
1196 + sprintf(buf, "%016lx", mask);
1201 +static int proc_cpusets_show(struct seq_file *m, void *p)
1203 + struct cpuset * cs = p;
1204 +#ifdef CPU_ARRAY_SIZE
1205 + char maskbuf[CPU_ARRAY_SIZE * CFL + 1];
1207 + char maskbuf[CFL + 1];
1210 + seq_printf(m, "cpuset %d {\n"
1215 + "\tuid & suid = %d & %d\n",
1216 + cs->id, cs->parent ? cs->parent->id : -1,
1217 + cs->flags, atomic_read(&cs->count), cs->has_been_attached,
1218 + cs->uid, cs->suid);
1220 + sprint_mask(maskbuf, cs->cpus_allowed);
1221 + seq_printf(m,"\tcpus_allowed = %s\n", maskbuf);
1222 + sprint_mask(maskbuf, cs->cpus_reserved);
1223 + seq_printf(m,"\tcpus_reserved = %s\n", maskbuf);
1224 + sprint_mask(maskbuf, cs->cpus_strictly_reserved);
1225 + seq_printf(m,"\tcpus_strictly_reserved = %s\n", maskbuf);
1227 + seq_printf(m, "}\n\n");
1232 +static void proc_cpusets_stop(struct seq_file *m, void *p)
1234 + read_unlock(&cpuset_lock);
1237 +static struct seq_operations cpusets_op = {
1238 + .start = proc_cpusets_start,
1239 + .next = proc_cpusets_next,
1240 + .stop = proc_cpusets_stop,
1241 + .show = proc_cpusets_show
1245 +static int proc_cpusets_open(struct inode *inode, struct file *file)
1247 + return seq_open(file, &cpusets_op);
1250 +static struct file_operations proc_cpusets_operations = {
1251 + .open = proc_cpusets_open,
1253 + .llseek = seq_lseek,
1254 + .release = seq_release,
1258 +static int __init proc_cpusets_init(void)
1260 + struct proc_dir_entry *entry;
1262 + entry = create_proc_entry("cpusets", 0, NULL);
1264 + entry->proc_fops = &proc_cpusets_operations;
1268 +/*************************************************************
1269 + *********** /proc/xxx/cpuset ********************************
1270 + *************************************************************
1272 +int proc_pid_cpuset(struct task_struct *task, char *buffer)
1274 + return sprintf(buffer, "%d\n", task->cpuset->id);
1277 +#endif /* CONFIG_CPUSETS_PROC */
1279 diff -Nru a/kernel/exit.c b/kernel/exit.c
1280 --- a/kernel/exit.c Tue Oct 21 16:05:27 2003
1281 +++ b/kernel/exit.c Tue Oct 21 16:05:27 2003
1284 BUG_ON(p->state < TASK_ZOMBIE);
1287 +#ifdef CONFIG_CPUSETS
1288 + spin_lock(&p->cpuset_attach_lock);
1289 + release_cpuset(p->cpuset);
1291 + /* mark that this process's cpuset has already been released
1292 + * another process might still try to cpuset_attach this process
1295 + spin_unlock(&p->cpuset_attach_lock);
1296 +#endif /* CONFIG_CPUSETS */
1299 atomic_dec(&p->user->processes);
1300 spin_lock(&p->proc_lock);
1301 proc_dentry = proc_pid_unhash(p);
1303 spin_unlock(&p->proc_lock);
1304 proc_pid_flush(proc_dentry);
1310 diff -Nru a/kernel/fork.c b/kernel/fork.c
1311 --- a/kernel/fork.c Tue Oct 21 16:05:27 2003
1312 +++ b/kernel/fork.c Tue Oct 21 16:05:27 2003
1314 #include <linux/ptrace.h>
1315 #include <linux/mount.h>
1317 +#ifdef CONFIG_CPUSETS
1318 +#include <linux/cpuset.h>
1321 #include <asm/pgtable.h>
1322 #include <asm/pgalloc.h>
1323 #include <asm/uaccess.h>
1324 @@ -1035,6 +1039,11 @@
1326 if (p->ptrace & PT_PTRACED)
1327 __ptrace_link(p, current->parent);
1329 +#ifdef CONFIG_CPUSETS
1330 + use_cpuset(p->cpuset);
1334 attach_pid(p, PIDTYPE_PID, p->pid);
1335 if (thread_group_leader(p)) {
1336 diff -Nru a/kernel/sched.c b/kernel/sched.c
1337 --- a/kernel/sched.c Tue Oct 21 16:05:27 2003
1338 +++ b/kernel/sched.c Tue Oct 21 16:05:27 2003
1340 #include <linux/cpu.h>
1341 #include <linux/percpu.h>
1343 +#ifdef CONFIG_CPUSETS
1344 +#include <linux/cpuset.h>
1348 #define cpu_to_node_mask(cpu) node_to_cpumask(cpu_to_node(cpu))
1350 @@ -2203,7 +2207,11 @@
1351 !capable(CAP_SYS_NICE))
1354 +#ifdef CONFIG_CPUSETS
1355 + retval = cpuset_setaffinity(p, new_mask);
1357 retval = set_cpus_allowed(p, new_mask);
1362 @@ -2236,7 +2244,11 @@
1366 +#ifdef CONFIG_CPUSETS
1367 + mask = p->cpus_wanted;
1369 cpus_and(mask, p->cpus_allowed, cpu_online_map);
1373 read_unlock(&tasklist_lock);