]>
Commit | Line | Data |
---|---|---|
5de4fc43 | 1 | diff -Nru a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c |
2 | --- a/arch/i386/kernel/cpu/proc.c Tue Oct 21 16:05:27 2003 | |
3 | +++ b/arch/i386/kernel/cpu/proc.c Tue Oct 21 16:05:27 2003 | |
4 | @@ -4,6 +4,12 @@ | |
5 | #include <asm/semaphore.h> | |
6 | #include <linux/seq_file.h> | |
7 | ||
8 | +#ifdef CONFIG_CPUSETS_PROC_CPUINFO | |
9 | +#include <linux/sched.h> | |
10 | +#include <linux/cpuset.h> | |
11 | +#include <linux/cpuset_types.h> | |
12 | +#endif | |
13 | + | |
14 | /* | |
15 | * Get CPU information for use by the procfs. | |
16 | */ | |
17 | @@ -63,12 +69,22 @@ | |
18 | if (!cpu_online(n)) | |
19 | return 0; | |
20 | #endif | |
21 | +#ifdef CONFIG_CPUSETS_PROC_CPUINFO | |
22 | + /* show only CPUs in current cpuset */ | |
23 | + if (!cpu_isset(n, current->cpuset->cpus_allowed)) | |
24 | + return 0; | |
25 | +#endif /* CONFIG_CPUSETS_PROC_CPUINFO */ | |
26 | + | |
27 | seq_printf(m, "processor\t: %d\n" | |
28 | "vendor_id\t: %s\n" | |
29 | "cpu family\t: %d\n" | |
30 | "model\t\t: %d\n" | |
31 | "model name\t: %s\n", | |
32 | +#ifdef CONFIG_CPUSETS_PROC_CPUINFO | |
33 | + cpuset_realtologic_cpuid(current->cpuset, n), | |
34 | +#else | |
35 | n, | |
36 | +#endif | |
37 | c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", | |
38 | c->x86, | |
39 | c->x86_model, | |
40 | diff -Nru a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S | |
41 | --- a/arch/i386/kernel/entry.S Tue Oct 21 16:05:27 2003 | |
42 | +++ b/arch/i386/kernel/entry.S Tue Oct 21 16:05:27 2003 | |
43 | @@ -880,5 +880,14 @@ | |
44 | .long sys_utimes | |
45 | .long sys_fadvise64_64 | |
46 | .long sys_ni_syscall /* sys_vserver */ | |
47 | + .long sys_ni_syscall | |
48 | + .long sys_ni_syscall /* 275 */ | |
49 | + .long sys_ni_syscall | |
50 | + .long sys_cpuset_create | |
51 | + .long sys_cpuset_destroy | |
52 | + .long sys_cpuset_alloc | |
53 | + .long sys_cpuset_attach | |
54 | + .long sys_cpuset_getfreecpus | |
55 | + | |
56 | ||
57 | nr_syscalls=(.-sys_call_table)/4 | |
58 | diff -Nru a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S | |
59 | --- a/arch/ia64/kernel/entry.S Tue Oct 21 16:05:27 2003 | |
60 | +++ b/arch/ia64/kernel/entry.S Tue Oct 21 16:05:27 2003 | |
61 | @@ -1481,11 +1481,19 @@ | |
62 | data8 ia64_ni_syscall | |
63 | data8 ia64_ni_syscall // 1265 | |
64 | data8 ia64_ni_syscall | |
65 | +#ifdef CONFIG_CPUSETS | |
66 | + data8 sys_cpuset_create | |
67 | + data8 sys_cpuset_destroy | |
68 | + data8 sys_cpuset_alloc | |
69 | + data8 sys_cpuset_attach // 1270 | |
70 | + data8 sys_cpuset_getfreecpus | |
71 | +#else | |
72 | data8 ia64_ni_syscall | |
73 | data8 ia64_ni_syscall | |
74 | data8 ia64_ni_syscall | |
75 | data8 ia64_ni_syscall // 1270 | |
76 | data8 ia64_ni_syscall | |
77 | +#endif | |
78 | data8 ia64_ni_syscall | |
79 | data8 ia64_ni_syscall | |
80 | data8 ia64_ni_syscall | |
81 | diff -Nru a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c | |
82 | --- a/arch/ia64/kernel/setup.c Tue Oct 21 16:05:27 2003 | |
83 | +++ b/arch/ia64/kernel/setup.c Tue Oct 21 16:05:27 2003 | |
84 | @@ -50,6 +50,10 @@ | |
85 | #include <asm/system.h> | |
86 | #include <asm/unistd.h> | |
87 | ||
88 | +#ifdef CONFIG_CPUSETS_PROC_CPUINFO | |
89 | +# include <linux/cpuset_types.h> | |
90 | +#endif | |
91 | + | |
92 | #if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE) | |
93 | # error "struct cpuinfo_ia64 too big!" | |
94 | #endif | |
95 | @@ -383,6 +387,15 @@ | |
96 | unsigned long mask; | |
97 | int i; | |
98 | ||
99 | +#ifdef CONFIG_CPUSETS_PROC_CPUINFO | |
100 | + /* show only CPUs in current cpuset */ | |
101 | + if (!current->cpuset) | |
102 | + BUG(); | |
103 | + | |
104 | + if (!cpu_isset(cpunum, current->cpuset->cpus_allowed)) | |
105 | + return 0; | |
106 | +#endif /* CONFIG_CPUSETS_PROC_CPUINFO */ | |
107 | + | |
108 | mask = c->features; | |
109 | ||
110 | switch (c->family) { | |
111 | @@ -427,7 +440,12 @@ | |
112 | "cpu MHz : %lu.%06lu\n" | |
113 | "itc MHz : %lu.%06lu\n" | |
114 | "BogoMIPS : %lu.%02lu\n\n", | |
115 | - cpunum, c->vendor, family, c->model, c->revision, c->archrev, | |
116 | +#ifdef CONFIG_CPUSETS_PROC_CPUINFO | |
117 | + cpuset_realtologic_cpuid(current->cpuset, cpunum), | |
118 | +#else | |
119 | + cpunum, | |
120 | +#endif | |
121 | + c->vendor, family, c->model, c->revision, c->archrev, | |
122 | features, c->ppn, c->number, | |
123 | c->proc_freq / 1000000, c->proc_freq % 1000000, | |
124 | c->itc_freq / 1000000, c->itc_freq % 1000000, | |
125 | diff -Nru a/fs/proc/base.c b/fs/proc/base.c | |
126 | --- a/fs/proc/base.c Tue Oct 21 16:05:27 2003 | |
127 | +++ b/fs/proc/base.c Tue Oct 21 16:05:27 2003 | |
128 | @@ -60,6 +60,9 @@ | |
129 | PROC_TGID_MAPS, | |
130 | PROC_TGID_MOUNTS, | |
131 | PROC_TGID_WCHAN, | |
132 | +#ifdef CONFIG_CPUSETS_PROC | |
133 | + PROC_TGID_CPUSET, | |
134 | +#endif | |
135 | #ifdef CONFIG_SECURITY | |
136 | PROC_TGID_ATTR, | |
137 | PROC_TGID_ATTR_CURRENT, | |
138 | @@ -123,6 +126,9 @@ | |
139 | #ifdef CONFIG_KALLSYMS | |
140 | E(PROC_TGID_WCHAN, "wchan", S_IFREG|S_IRUGO), | |
141 | #endif | |
142 | +#ifdef CONFIG_CPUSETS_PROC | |
143 | + E(PROC_TGID_CPUSET, "cpuset", S_IFREG|S_IRUGO), | |
144 | +#endif | |
145 | {0,0,NULL,0} | |
146 | }; | |
147 | static struct pid_entry tid_base_stuff[] = { | |
148 | @@ -366,6 +372,11 @@ | |
149 | } | |
150 | #endif /* CONFIG_KALLSYMS */ | |
151 | ||
152 | + | |
153 | +#ifdef CONFIG_CPUSETS_PROC | |
154 | +int proc_pid_cpuset(struct task_struct *task, char *buffer); | |
155 | +#endif /* CONFIG_CPUSETS_PROC */ | |
156 | + | |
157 | /************************************************************************/ | |
158 | /* Here the fs part begins */ | |
159 | /************************************************************************/ | |
160 | @@ -1359,6 +1370,12 @@ | |
161 | case PROC_TGID_WCHAN: | |
162 | inode->i_fop = &proc_info_file_operations; | |
163 | ei->op.proc_read = proc_pid_wchan; | |
164 | + break; | |
165 | +#endif | |
166 | +#ifdef CONFIG_CPUSETS_PROC | |
167 | + case PROC_TGID_CPUSET: | |
168 | + inode->i_fop = &proc_info_file_operations; | |
169 | + ei->op.proc_read = proc_pid_cpuset; | |
170 | break; | |
171 | #endif | |
172 | default: | |
173 | diff -Nru a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c | |
174 | --- a/fs/proc/proc_misc.c Tue Oct 21 16:05:27 2003 | |
175 | +++ b/fs/proc/proc_misc.c Tue Oct 21 16:05:27 2003 | |
176 | @@ -51,6 +51,10 @@ | |
177 | #include <asm/tlb.h> | |
178 | #include <asm/div64.h> | |
179 | ||
180 | +#ifdef CONFIG_CPUSETS_PROC_STAT | |
181 | +# include <linux/cpuset_types.h> | |
182 | +#endif | |
183 | + | |
184 | #define LOAD_INT(x) ((x) >> FSHIFT) | |
185 | #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) | |
186 | /* | |
187 | @@ -382,6 +386,12 @@ | |
188 | int j; | |
189 | ||
190 | if (!cpu_online(i)) continue; | |
191 | +#ifdef CONFIG_CPUSETS_PROC_STAT | |
192 | + /* show only CPUs in current cpuset */ | |
193 | + if (!cpu_isset(i, current->cpuset->cpus_allowed)) | |
194 | + continue; | |
195 | +#endif | |
196 | + | |
197 | user += kstat_cpu(i).cpustat.user; | |
198 | nice += kstat_cpu(i).cpustat.nice; | |
199 | system += kstat_cpu(i).cpustat.system; | |
200 | @@ -403,8 +413,17 @@ | |
201 | jiffies_to_clock_t(softirq)); | |
202 | for (i = 0; i < NR_CPUS; i++){ | |
203 | if (!cpu_online(i)) continue; | |
204 | +#ifdef CONFIG_CPUSETS_PROC_STAT | |
205 | + /* show only CPUs in current cpuset */ | |
206 | + if (!cpu_isset(i, current->cpuset->cpus_allowed)) | |
207 | + continue; | |
208 | +#endif | |
209 | seq_printf(p, "cpu%d %u %u %u %u %u %u %u\n", | |
210 | +#ifdef CONFIG_CPUSETS_PROC_STAT | |
211 | + cpuset_realtologic_cpuid(current->cpuset, i), | |
212 | +#else | |
213 | i, | |
214 | +#endif | |
215 | jiffies_to_clock_t(kstat_cpu(i).cpustat.user), | |
216 | jiffies_to_clock_t(kstat_cpu(i).cpustat.nice), | |
217 | jiffies_to_clock_t(kstat_cpu(i).cpustat.system), | |
218 | diff -Nru a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h | |
219 | --- a/include/asm-i386/unistd.h Tue Oct 21 16:05:27 2003 | |
220 | +++ b/include/asm-i386/unistd.h Tue Oct 21 16:05:27 2003 | |
221 | @@ -280,7 +280,13 @@ | |
222 | #define __NR_fadvise64_64 272 | |
223 | #define __NR_vserver 273 | |
224 | ||
225 | -#define NR_syscalls 274 | |
226 | +#define __NR_sys_cpuset_create 277 | |
227 | +#define __NR_sys_cpuset_destroy 278 | |
228 | +#define __NR_sys_cpuset_alloc 279 | |
229 | +#define __NR_sys_cpuset_attach 280 | |
230 | +#define __NR_sys_cpuset_getfreecpus 281 | |
231 | + | |
232 | +#define NR_syscalls 282 | |
233 | ||
234 | /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */ | |
235 | ||
236 | diff -Nru a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h | |
237 | --- a/include/asm-ia64/unistd.h Tue Oct 21 16:05:27 2003 | |
238 | +++ b/include/asm-ia64/unistd.h Tue Oct 21 16:05:27 2003 | |
239 | @@ -253,6 +253,12 @@ | |
240 | ||
241 | #define NR_syscalls 256 /* length of syscall table */ | |
242 | ||
243 | +#define __NR_sys_cpuset_create 1267 | |
244 | +#define __NR_sys_cpuset_destroy 1268 | |
245 | +#define __NR_sys_cpuset_alloc 1269 | |
246 | +#define __NR_sys_cpuset_attach 1270 | |
247 | +#define __NR_sys_cpuset_getfreecpus 1271 | |
248 | + | |
249 | #if !defined(__ASSEMBLY__) && !defined(ASSEMBLER) | |
250 | ||
251 | extern long __ia64_syscall (long a0, long a1, long a2, long a3, long a4, long nr); | |
252 | diff -Nru a/include/linux/cpuset.h b/include/linux/cpuset.h | |
253 | --- /dev/null Wed Dec 31 16:00:00 1969 | |
254 | +++ b/include/linux/cpuset.h Tue Oct 21 16:05:27 2003 | |
255 | @@ -0,0 +1,29 @@ | |
256 | +/* | |
257 | + * BULL cpuset interface | |
258 | + */ | |
259 | + | |
260 | +#ifndef _LINUX_CPUSET_H | |
261 | +#define _LINUX_CPUSET_H | |
262 | + | |
263 | +typedef unsigned int cpuset_t; | |
264 | + | |
265 | +#define CPUSET_STRICT 0x00000001 | |
266 | +#define CPUSET_AUTOCLEAN 0x00000002 | |
267 | + | |
268 | +#ifdef __KERNEL__ | |
269 | + | |
270 | +extern struct cpuset top_cpuset; | |
271 | + | |
272 | +void use_cpuset(struct cpuset *); | |
273 | +void release_cpuset(struct cpuset *); | |
274 | + | |
275 | +struct task_struct; | |
276 | +int cpuset_setaffinity(struct task_struct * task, unsigned long mask); | |
277 | + | |
278 | +void cpusets_update_cpus_online(void); | |
279 | + | |
280 | +int cpuset_realtologic_cpuid(struct cpuset * cs, int cpuid); | |
281 | + | |
282 | +#endif /* __KERNEL__ */ | |
283 | + | |
284 | +#endif /* _LINUX_CPUSET_H */ | |
285 | diff -Nru a/include/linux/cpuset_types.h b/include/linux/cpuset_types.h | |
286 | --- /dev/null Wed Dec 31 16:00:00 1969 | |
287 | +++ b/include/linux/cpuset_types.h Tue Oct 21 16:05:27 2003 | |
288 | @@ -0,0 +1,39 @@ | |
289 | +#ifndef _LINUX_CPUSET_TYPES_H | |
290 | +#define _LINUX_CPUSET_TYPES_H | |
291 | + | |
292 | + | |
293 | +struct cpuset { | |
294 | + cpuset_t id; | |
295 | + int flags; | |
296 | + int has_been_attached; | |
297 | + | |
298 | + /* bitmask of the cpus present in this cpuset */ | |
299 | + cpumask_t cpus_allowed; | |
300 | + | |
301 | + /* bitmask of the cpus reserved in this cpuset */ | |
302 | + cpumask_t cpus_reserved; | |
303 | + | |
304 | + /* bitmask of the cpus reserved with CPUSET_STRICT */ | |
305 | + cpumask_t cpus_strictly_reserved; | |
306 | + | |
307 | + struct cpuset * parent; | |
308 | + struct list_head list; /* for the whole list */ | |
309 | + | |
310 | + struct list_head children; | |
311 | + struct list_head brothers; | |
312 | + | |
313 | + /* overall users (processes + children) */ | |
314 | + /* will be replaced by atomic_t in the future */ | |
315 | + atomic_t count; | |
316 | + | |
317 | + spinlock_t attach_lock; | |
318 | + | |
319 | + /* owner */ | |
320 | + uid_t uid; | |
321 | + uid_t suid; | |
322 | + | |
323 | + | |
324 | +}; | |
325 | + | |
326 | + | |
327 | +#endif | |
328 | diff -Nru a/include/linux/init_task.h b/include/linux/init_task.h | |
329 | --- a/include/linux/init_task.h Tue Oct 21 16:05:27 2003 | |
330 | +++ b/include/linux/init_task.h Tue Oct 21 16:05:27 2003 | |
331 | @@ -56,6 +56,12 @@ | |
332 | .siglock = SPIN_LOCK_UNLOCKED, \ | |
333 | } | |
334 | ||
335 | +#ifdef CONFIG_CPUSETS | |
336 | +#define CPUSET_TSKINIT(a,b) .a = b, | |
337 | +#else | |
338 | +#define CPUSET_TSKINIT(a,b) | |
339 | +#endif | |
340 | + | |
341 | /* | |
342 | * INIT_TASK is used to set up the first task table, touch at | |
343 | * your own risk!. Base=0, limit=0x1fffff (=2MB) | |
344 | @@ -108,6 +114,9 @@ | |
345 | .proc_lock = SPIN_LOCK_UNLOCKED, \ | |
346 | .switch_lock = SPIN_LOCK_UNLOCKED, \ | |
347 | .journal_info = NULL, \ | |
348 | + CPUSET_TSKINIT(cpus_wanted, CPU_MASK_ALL) \ | |
349 | + CPUSET_TSKINIT(cpuset, &top_cpuset) \ | |
350 | + CPUSET_TSKINIT(cpuset_attach_lock, SPIN_LOCK_UNLOCKED) \ | |
351 | } | |
352 | ||
353 | ||
354 | diff -Nru a/include/linux/sched.h b/include/linux/sched.h | |
355 | --- a/include/linux/sched.h Tue Oct 21 16:05:27 2003 | |
356 | +++ b/include/linux/sched.h Tue Oct 21 16:05:27 2003 | |
357 | @@ -29,6 +29,7 @@ | |
358 | #include <linux/completion.h> | |
359 | #include <linux/pid.h> | |
360 | #include <linux/percpu.h> | |
361 | +#include <linux/cpuset.h> | |
362 | ||
363 | struct exec_domain; | |
364 | ||
365 | @@ -464,6 +465,13 @@ | |
366 | ||
367 | unsigned long ptrace_message; | |
368 | siginfo_t *last_siginfo; /* For ptrace use. */ | |
369 | + | |
370 | +/* cpuset info */ | |
371 | +#ifdef CONFIG_CPUSETS | |
372 | + struct cpuset * cpuset; | |
373 | + unsigned long cpus_wanted; | |
374 | + spinlock_t cpuset_attach_lock; | |
375 | +#endif | |
376 | }; | |
377 | ||
378 | static inline pid_t process_group(struct task_struct *tsk) | |
379 | diff -Nru a/init/Kconfig b/init/Kconfig | |
380 | --- a/init/Kconfig Tue Oct 21 16:05:27 2003 | |
381 | +++ b/init/Kconfig Tue Oct 21 16:05:27 2003 | |
382 | @@ -194,6 +194,41 @@ | |
383 | Disabling this option will cause the kernel to be built without | |
384 | support for epoll family of system calls. | |
385 | ||
386 | +if X86 || IA64 | |
387 | + | |
388 | +config CPUSETS | |
389 | + bool "cpusets" | |
390 | + depends on SMP | |
391 | + help | |
392 | + This options will let you create and manage sets of cpu where you | |
393 | + can run the processes. | |
394 | + | |
395 | + Say N if unsure. | |
396 | + | |
397 | +config CPUSETS_PROC | |
398 | + bool "/proc/cpusets support" | |
399 | + depends on CPUSETS | |
400 | + help | |
401 | + Get some info about the existing cpusets in your system. | |
402 | + To use this option, you have to ensure that the "/proc file system | |
403 | + support" (CONFIG_PROC_FS) is enabled, too. | |
404 | + | |
405 | +config CPUSETS_PROC_CPUINFO | |
406 | + bool "/proc/cpuinfo uses current cpuset" | |
407 | + depends on CPUSETS_PROC | |
408 | + help | |
409 | + With this option enabled, a process reading /proc/cpuinfo will | |
410 | + only see the CPUs that are in its current cpuset. | |
411 | + | |
412 | +config CPUSETS_PROC_STAT | |
413 | + bool "/proc/stat uses current cpuset" | |
414 | + depends on CPUSETS_PROC | |
415 | + help | |
416 | + With this option enabled, a process reading /proc/stat will | |
417 | + only see the CPUs that are in its current cpuset. | |
418 | + | |
419 | +endif | |
420 | + | |
421 | source "drivers/block/Kconfig.iosched" | |
422 | ||
423 | endmenu # General setup | |
424 | diff -Nru a/init/main.c b/init/main.c | |
425 | --- a/init/main.c Tue Oct 21 16:05:27 2003 | |
426 | +++ b/init/main.c Tue Oct 21 16:05:27 2003 | |
427 | @@ -39,6 +39,13 @@ | |
428 | #include <linux/writeback.h> | |
429 | #include <linux/cpu.h> | |
430 | ||
431 | + | |
432 | + | |
433 | +#ifdef CONFIG_CPUSETS | |
434 | +#include <linux/cpuset.h> | |
435 | +#endif | |
436 | + | |
437 | + | |
438 | #include <asm/io.h> | |
439 | #include <asm/bugs.h> | |
440 | ||
441 | @@ -85,6 +92,7 @@ | |
442 | extern void free_initmem(void); | |
443 | extern void populate_rootfs(void); | |
444 | extern void driver_init(void); | |
445 | +extern void cpusets_init(void); | |
446 | ||
447 | #ifdef CONFIG_TC | |
448 | extern void tc_init(void); | |
449 | @@ -456,6 +464,10 @@ | |
450 | #ifdef CONFIG_PROC_FS | |
451 | proc_root_init(); | |
452 | #endif | |
453 | +#ifdef CONFIG_CPUSETS | |
454 | + cpusets_init(); | |
455 | +#endif | |
456 | + | |
457 | check_bugs(); | |
458 | printk("POSIX conformance testing by UNIFIX\n"); | |
459 | ||
460 | @@ -524,6 +536,10 @@ | |
461 | */ | |
462 | static void __init do_basic_setup(void) | |
463 | { | |
464 | +#ifdef CONFIG_CPUSETS | |
465 | + cpusets_update_cpus_online(); | |
466 | +#endif | |
467 | + | |
468 | driver_init(); | |
469 | ||
470 | #ifdef CONFIG_SYSCTL | |
471 | @@ -579,6 +595,7 @@ | |
472 | do_basic_setup(); | |
473 | ||
474 | prepare_namespace(); | |
475 | + | |
476 | ||
477 | /* | |
478 | * Ok, we have completed the initial bootup, and | |
479 | diff -Nru a/kernel/Makefile b/kernel/Makefile | |
480 | --- a/kernel/Makefile Tue Oct 21 16:05:27 2003 | |
481 | +++ b/kernel/Makefile Tue Oct 21 16:05:27 2003 | |
482 | @@ -19,6 +19,7 @@ | |
483 | obj-$(CONFIG_COMPAT) += compat.o | |
484 | obj-$(CONFIG_IKCONFIG) += configs.o | |
485 | obj-$(CONFIG_IKCONFIG_PROC) += configs.o | |
486 | +obj-$(CONFIG_CPUSETS) += cpuset.o | |
487 | ||
488 | ifneq ($(CONFIG_IA64),y) | |
489 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is | |
490 | diff -Nru a/kernel/cpuset.c b/kernel/cpuset.c | |
491 | --- /dev/null Wed Dec 31 16:00:00 1969 | |
492 | +++ b/kernel/cpuset.c Tue Oct 21 16:05:27 2003 | |
493 | @@ -0,0 +1,785 @@ | |
494 | +#include <linux/cpumask.h> | |
495 | +#include <linux/cpuset.h> | |
496 | +#include <linux/slab.h> /* for kmalloc */ | |
497 | +#include <linux/list.h> | |
498 | +#include <linux/sched.h> /* for find_task_by_pid and task_struct */ | |
499 | +#include <asm/uaccess.h> | |
500 | +#include <linux/errno.h> | |
501 | +#include <linux/seq_file.h> | |
502 | +#include <linux/fs.h> | |
503 | +#include <linux/init.h> | |
504 | +#include <linux/proc_fs.h> | |
505 | +#include <linux/spinlock.h> | |
506 | +#include <linux/list.h> | |
507 | +#include <linux/cpuset_types.h> | |
508 | + | |
509 | +#define info(args...) do {} while(0) | |
510 | +//#define info(args...) printk(KERN_INFO args) | |
511 | + | |
512 | + | |
513 | +#ifdef CPU_ARRAY_SIZE | |
514 | +#warning "CPU ARRAY SIZE !" | |
515 | +#endif | |
516 | +rwlock_t cpuset_lock = RW_LOCK_UNLOCKED; | |
517 | + | |
518 | +#define CPUSET_TOP_ID 1 | |
519 | + | |
520 | +struct cpuset top_cpuset = { | |
521 | + .id = CPUSET_TOP_ID, | |
522 | + .flags = CPUSET_STRICT, | |
523 | + .cpus_reserved = CPU_MASK_NONE, | |
524 | + .cpus_strictly_reserved = CPU_MASK_NONE, | |
525 | + .parent = 0, | |
526 | + .children = LIST_HEAD_INIT(top_cpuset.children), | |
527 | + .brothers = LIST_HEAD_INIT(top_cpuset.brothers), | |
528 | + .list = LIST_HEAD_INIT(top_cpuset.list), | |
529 | + .count = ATOMIC_INIT(1), /* this cpuset can't be deleted */ | |
530 | + .has_been_attached = 0, | |
531 | + .uid = 0, | |
532 | + .attach_lock = SPIN_LOCK_UNLOCKED, | |
533 | + .suid = 0 | |
534 | +}; | |
535 | + | |
536 | + | |
537 | +static int proc_cpusets_init(void); | |
538 | + | |
539 | +int __init cpusets_init(void) | |
540 | +{ | |
541 | + info("cpusets ("__FILE__ " compiled " __DATE__ " " __TIME__ "initializing..\n"); | |
542 | + | |
543 | + | |
544 | +#ifdef CONFIG_CPUSETS_PROC | |
545 | + proc_cpusets_init(); | |
546 | +#endif /* CONFIG_CPUSETS_PROC */ | |
547 | + return 0; | |
548 | +} | |
549 | + | |
550 | +/* | |
551 | + * later this function may be used to indicate that a CPU has been put | |
552 | + * online/offline | |
553 | + * BUT currently it only exists because cpu_online_map becomes available | |
554 | + * only late during kernel boot | |
555 | + */ | |
556 | +void cpusets_update_cpus_online(void) | |
557 | +{ | |
558 | + top_cpuset.cpus_allowed = cpu_online_map ; | |
559 | +} | |
560 | + | |
561 | + | |
562 | +static const int N = (8*sizeof(cpumask_t)); | |
563 | +/* mask must NOT be ZERO ! */ | |
564 | +/* this is a cyclic version of next_cpu */ | |
565 | +static inline void _next_cpu(const cpumask_t mask, int * index) | |
566 | +{ | |
567 | + for(;;) { | |
568 | + if (++*index >= N) *index = 0; | |
569 | + if (cpu_isset(*index, mask)) return; | |
570 | + } | |
571 | +} | |
572 | + | |
573 | +static unsigned long cpuset_combine_mask(const cpumask_t wanted, const cpumask_t allowed) | |
574 | +{ | |
575 | + int i; | |
576 | + cpumask_t mask; | |
577 | + | |
578 | + /* start with current cpu out of the mask | |
579 | + * so the first call to next_cpu will take the first cpu | |
580 | + * even if it is cpu zero | |
581 | + */ | |
582 | + int cpu = N; | |
583 | + | |
584 | + if (cpus_empty(wanted)) return 0; | |
585 | + if (cpus_empty(allowed)) return 0; | |
586 | + | |
587 | + cpus_clear(mask); | |
588 | + | |
589 | + for(i=0; i < N; i++) { | |
590 | + _next_cpu(allowed, &cpu); | |
591 | + if (cpu_isset(i, wanted)) | |
592 | + cpu_set(cpu, mask); | |
593 | + } | |
594 | + info("cpuset_combine_mask: %016lx + %016lx --> %016lx\n", | |
595 | + wanted, allowed, mask); | |
596 | + return mask; | |
597 | +} | |
598 | + | |
599 | +/* translate a "real" cpu number to a "inside cpuset" (aka logical) | |
600 | + * cpu number. Used for /proc/cpuinfo | |
601 | + */ | |
602 | +int cpuset_realtologic_cpuid(struct cpuset * cs, int cpuid) | |
603 | +{ | |
604 | + int i; | |
605 | + int l = 0; | |
606 | + for(i=0; i < NR_CPUS; i++) | |
607 | + { | |
608 | + if (i == cpuid) return l; | |
609 | + if (cpu_isset(i, cs->cpus_allowed)) | |
610 | + l++; | |
611 | + } | |
612 | + /* NOT REACHED */ | |
613 | + BUG(); | |
614 | + return 0; | |
615 | +} | |
616 | + | |
617 | +static struct cpuset * find_cpuset_by_id(cpuset_t id) | |
618 | +{ | |
619 | + struct cpuset * cs; | |
620 | + if (id == CPUSET_TOP_ID) return &top_cpuset; | |
621 | + | |
622 | + list_for_each_entry(cs, &top_cpuset.list, list) { | |
623 | + if (cs->id == id) return cs; | |
624 | + } | |
625 | + /* Not found */ | |
626 | + return 0; | |
627 | +} | |
628 | + | |
629 | +/* increment a cpuset use count */ | |
630 | +void use_cpuset(struct cpuset * cs) | |
631 | +{ | |
632 | + atomic_inc(&cs->count); | |
633 | +} | |
634 | + | |
635 | +static void check_cpuset_autoclean(struct cpuset *); | |
636 | + | |
637 | +/* decrement a cpuset use count, and maybe autodestroy it */ | |
638 | +/* cpuset_lock MUST NOT BE HELD */ | |
639 | +void release_cpuset(struct cpuset * cs) | |
640 | +{ | |
641 | + if (atomic_dec_and_test(&cs->count)) | |
642 | + check_cpuset_autoclean(cs); | |
643 | +} | |
644 | + | |
645 | +/* find a free cpuset ID */ | |
646 | +static cpuset_t cpuset_mkid(void) | |
647 | +{ | |
648 | + static cpuset_t curid = CPUSET_TOP_ID; | |
649 | + | |
650 | + while (find_cpuset_by_id(++curid)); | |
651 | + | |
652 | + return curid; | |
653 | +} | |
654 | + | |
655 | +asmlinkage long sys_cpuset_create(cpuset_t * cpusetp, int flags) | |
656 | +{ | |
657 | + struct cpuset * cs; | |
658 | + | |
659 | + info("sys_cpuset_create(%016lx, %d) called\n", | |
660 | + (unsigned long) cpusetp, flags); | |
661 | + | |
662 | + /* can only create a strict cs in another strict cs */ | |
663 | + if ((flags & CPUSET_STRICT) && (!(current->cpuset->flags & CPUSET_STRICT))) | |
664 | + return -EINVAL; | |
665 | + | |
666 | + /* check if given pointer is valid */ | |
667 | + if (verify_area(VERIFY_WRITE, cpusetp, sizeof(cpuset_t))) | |
668 | + return -EFAULT; | |
669 | + | |
670 | + cs = (struct cpuset *) kmalloc(sizeof(struct cpuset), GFP_KERNEL); | |
671 | + if (!cs) | |
672 | + return -ENOMEM; | |
673 | + | |
674 | + cs->flags = flags; | |
675 | + atomic_set(&cs->count, 0); | |
676 | + INIT_LIST_HEAD(&cs->children); | |
677 | + cs->cpus_allowed = 0; | |
678 | + cs->cpus_reserved = 0; | |
679 | + cs->cpus_strictly_reserved = 0; | |
680 | + cs->has_been_attached = 0; | |
681 | + cs->uid = current->uid; | |
682 | + cs->suid = current->suid; | |
683 | + cs->attach_lock = SPIN_LOCK_UNLOCKED; | |
684 | + | |
685 | + cs->parent = current->cpuset; | |
686 | + | |
687 | + use_cpuset(cs->parent); | |
688 | + | |
689 | + write_lock(&cpuset_lock); | |
690 | + | |
691 | + cs->id = cpuset_mkid(); | |
692 | + list_add(&cs->brothers, &cs->parent->children); | |
693 | + list_add(&cs->list, &top_cpuset.list); | |
694 | + | |
695 | + write_unlock(&cpuset_lock); | |
696 | + | |
697 | + if (put_user(cs->id, cpusetp)) | |
698 | + info("put_user failed !\n"); | |
699 | + | |
700 | + return 0; | |
701 | +} | |
702 | + | |
703 | + | |
704 | +static inline int bad_permission(struct cpuset * cs) | |
705 | +{ | |
706 | + return ((current->euid) && (current->euid != cs->uid) && (current->euid != cs->suid)); | |
707 | +} | |
708 | + | |
709 | +static void __cpuset_destroy(struct cpuset * cs); | |
710 | + | |
711 | +asmlinkage long sys_cpuset_destroy(cpuset_t cpuset) | |
712 | +{ | |
713 | + struct cpuset * cs; | |
714 | + | |
715 | + info("sys_cpuset_destroy(%d) called\n", cpuset); | |
716 | + | |
717 | + if (cpuset == CPUSET_TOP_ID) | |
718 | + return -EINVAL; | |
719 | + | |
720 | + read_lock(&cpuset_lock); | |
721 | + cs = find_cpuset_by_id(cpuset); | |
722 | + | |
723 | + if (!cs) { | |
724 | + read_unlock(&cpuset_lock); | |
725 | + return -EINVAL; | |
726 | + } | |
727 | + | |
728 | + use_cpuset(cs); | |
729 | + read_unlock(&cpuset_lock); | |
730 | + | |
731 | + if (bad_permission(cs)) { | |
732 | + release_cpuset(cs); | |
733 | + return -EPERM; | |
734 | + } | |
735 | + | |
736 | + write_lock(&cpuset_lock); | |
737 | + /* there's at least 1 user (us), if there's more, we can't destroy cs */ | |
738 | + if (atomic_read(&cs->count) > 1) { | |
739 | + write_unlock(&cpuset_lock); | |
740 | + release_cpuset(cs); | |
741 | + return -EBUSY; | |
742 | + } | |
743 | + | |
744 | + /* everything OK, destroy it */ | |
745 | + __cpuset_destroy(cs); | |
746 | + /* write_unlock(&cpuset_lock) will be done inside __cpuset_destroy */ | |
747 | + | |
748 | + return 0; | |
749 | +} | |
750 | + | |
751 | +static void rebuild_reserved_masks(struct cpuset * csp) { | |
752 | + cpumask_t r; | |
753 | + cpumask_t sr; | |
754 | + struct cpuset * cs; | |
755 | + info("Updating cpuset %d masks\n", csp->id); | |
756 | + | |
757 | + cpus_clear(r); | |
758 | + cpus_clear(sr); | |
759 | + | |
760 | + list_for_each_entry(cs, &csp->children, brothers) { | |
761 | + info(" child %d\n", cs->id); | |
762 | + cpus_or(r, r, cs->cpus_allowed); | |
763 | + if (cs->flags & CPUSET_STRICT) | |
764 | + cpus_or(sr, sr, cs->cpus_allowed); | |
765 | + } | |
766 | + csp->cpus_reserved = r; | |
767 | + csp->cpus_strictly_reserved = sr; | |
768 | +} | |
769 | + | |
770 | +/* REALLY destroy a cpuset | |
771 | + * NOTE: | |
772 | + * -> write cpuset_lock must be held | |
773 | + * -> ----------------- WILL BE RELEASED | |
774 | + * this ugly hack is necessary to call release_cpuset(parent) | |
775 | + */ | |
776 | +static void __cpuset_destroy(struct cpuset * cs) | |
777 | +{ | |
778 | + list_del(&cs->list); | |
779 | + list_del(&cs->brothers); | |
780 | + | |
781 | + /* cs will never be top_cpuset, so ->parent exists */ | |
782 | + rebuild_reserved_masks(cs->parent); | |
783 | + | |
784 | + write_unlock(&cpuset_lock); | |
785 | + release_cpuset(cs->parent); | |
786 | + | |
787 | + kfree(cs); | |
788 | +} | |
789 | + | |
790 | +/* remove an unused cpuset if it has the CPUSET_AUTOCLEAN flag */ | |
791 | +static void check_cpuset_autoclean(struct cpuset * cs) | |
792 | +{ | |
793 | + if (!(cs->flags & CPUSET_AUTOCLEAN)) return; /* not autoclean */ | |
794 | + if (!cs->has_been_attached) return; | |
795 | + | |
796 | + write_lock(&cpuset_lock); | |
797 | + | |
798 | + if (atomic_read(&cs->count) > 0) { /* still in use */ | |
799 | + write_unlock(&cpuset_lock); | |
800 | + return; | |
801 | + } | |
802 | + | |
803 | + info("autocleaning cpuset %d\n", cs->id); | |
804 | + | |
805 | + __cpuset_destroy(cs); | |
806 | + /* write_unlock(&cpuset_lock) will be done inside __cpuset_destroy */ | |
807 | +} | |
808 | + | |
809 | +asmlinkage long sys_cpuset_attach(cpuset_t cpuset, pid_t pid) | |
810 | +{ | |
811 | + struct cpuset * cs; | |
812 | + struct task_struct * task; | |
813 | + | |
814 | + info("sys_cpuset_attach(%d, %d) called\n", cpuset, pid); | |
815 | + | |
816 | + read_lock(&cpuset_lock); | |
817 | + cs = find_cpuset_by_id(cpuset); | |
818 | + | |
819 | + if (!cs) { | |
820 | + read_unlock(&cpuset_lock); | |
821 | + return -EINVAL; | |
822 | + } | |
823 | + | |
824 | + | |
825 | + use_cpuset(cs); | |
826 | + | |
827 | + read_unlock(&cpuset_lock); | |
828 | + | |
829 | + if (bad_permission(cs)) { | |
830 | + release_cpuset(cs); | |
831 | + return -EPERM; | |
832 | + } | |
833 | + | |
834 | + if (!cs->cpus_allowed) { /* cannot attach a cpuset with no CPU */ | |
835 | + release_cpuset(cs); | |
836 | + return -EINVAL; | |
837 | + } | |
838 | + | |
839 | + if (pid) { | |
840 | + read_lock(&tasklist_lock); | |
841 | + | |
842 | + task = find_task_by_pid(pid); | |
843 | + if (!task) { | |
844 | + read_unlock(&tasklist_lock); | |
845 | + release_cpuset(cs); | |
846 | + return -ESRCH; | |
847 | + } | |
848 | + | |
849 | + get_task_struct(task); | |
850 | + read_unlock(&tasklist_lock); | |
851 | + | |
852 | + if ((current->euid) && (current->euid != task->uid) && (current->euid != task->suid)) { | |
853 | + put_task_struct(task); | |
854 | + release_cpuset(cs); | |
855 | + return -EPERM; | |
856 | + } | |
857 | + } | |
858 | + else { | |
859 | + task = current; | |
860 | + get_task_struct(task); | |
861 | + } | |
862 | + | |
863 | + set_cpus_allowed(task, cpuset_combine_mask(task->cpus_wanted, cs->cpus_allowed)); | |
864 | + cs->has_been_attached = 1; | |
865 | + | |
866 | + /* release the current cpu set of the task */ | |
867 | + /* lock to prevent a race where two cpuset_attach would be called on the same | |
868 | + * task at the same time, and task->cpuset would be released twice | |
869 | + */ | |
870 | + spin_lock(&task->cpuset_attach_lock); | |
871 | + if (!task->cpuset) { /* task with no cpuset ? means it is exiting */ | |
872 | + spin_unlock(&task->cpuset_attach_lock); | |
873 | + put_task_struct(task); | |
874 | + release_cpuset(cs); | |
875 | + return -ESRCH; | |
876 | + } | |
877 | + release_cpuset(task->cpuset); | |
878 | + /* now lock the cpuset, to protect any running migrate_cpuset...() | |
879 | + * from being disturbed by us | |
880 | + */ | |
881 | + spin_lock(&cs->attach_lock); | |
882 | + task->cpuset = cs; | |
883 | + spin_unlock(&cs->attach_lock); | |
884 | + | |
885 | + spin_unlock(&task->cpuset_attach_lock); | |
886 | + | |
887 | + | |
888 | + put_task_struct(task); | |
889 | + | |
890 | + /* don't call release_cpuset here, | |
891 | + * the task being attached to the cpuset | |
892 | + * is really a new user ! | |
893 | + */ | |
894 | + | |
895 | + return 0; | |
896 | +} | |
897 | + | |
898 | + | |
899 | +static int __cpuset_setaffinity(struct task_struct * task) | |
900 | +{ | |
901 | + cpumask_t allowed; | |
902 | + cpumask_t last = CPU_MASK_NONE; /* remember : 0 is not a valid mask */ | |
903 | + | |
904 | + /* We cannot hold any lock while calling set_cpus_allowed | |
905 | + * since it might sleep | |
906 | + * Thus we try until we are sure we did it with the right mask | |
907 | + */ | |
908 | + for(;;) { | |
909 | + spin_lock(&task->cpuset_attach_lock); | |
910 | + if (!task->cpuset) { | |
911 | + /* task exiting */ | |
912 | + spin_unlock(&task->cpuset_attach_lock); | |
913 | + return 0; | |
914 | + } | |
915 | + allowed = task->cpuset->cpus_allowed; | |
916 | + spin_unlock(&task->cpuset_attach_lock); | |
917 | + | |
918 | + if (last == allowed) | |
919 | + return 0; | |
920 | + | |
921 | + int ret; | |
922 | + ret = set_cpus_allowed(task, cpuset_combine_mask(task->cpus_wanted, allowed)); | |
923 | + if (ret < 0) | |
924 | + return ret; | |
925 | + | |
926 | + last = allowed; | |
927 | + } | |
928 | +} | |
929 | + | |
930 | +/* Our replacement function for set_cpus_allowed */ | |
931 | +int cpuset_setaffinity(struct task_struct * task, cpumask_t mask) | |
932 | +{ | |
933 | + task->cpus_wanted = mask; | |
934 | + return __cpuset_setaffinity(task); | |
935 | +} | |
936 | + | |
937 | +/* When a cpuset with attached processes is being realloc'ed CPUs | |
938 | + * update the processes' masks and migrate them | |
939 | + */ | |
940 | +static void migrate_cpuset_processes(struct cpuset * cs) | |
941 | +{ | |
942 | + struct task_struct *g, *p; | |
943 | + /* This should be a RARE use of the cpusets. | |
944 | + * therefore we'll prefer an inefficient operation here | |
945 | + * (searching the whole process list) | |
946 | + * than adding another list_head in task_t | |
947 | + * and locks and list_add for each fork() | |
948 | + */ | |
949 | + | |
950 | + /* we need to lock tasklist_lock for reading the processes list | |
951 | + * BUT we cannot call set_cpus_allowed with any spinlock held | |
952 | + * => we need to store the list of task struct in an array | |
953 | + */ | |
954 | + struct task_struct ** array; | |
955 | + int nb = 0; | |
956 | + int sz; | |
957 | + | |
958 | + spin_lock(&cs->attach_lock); | |
959 | + /* at most cs->count - 1 processes to migrate */ | |
960 | + sz = atomic_read(&cs->count) - 1; | |
961 | + array = (struct task_struct **) kmalloc(sz * sizeof(struct task_struct *), GFP_ATOMIC); | |
962 | + if (!array) { | |
963 | + spin_unlock(&cs->attach_lock); | |
964 | + printk("Error allocating array in migrate_cpuset_processes !\n"); | |
965 | + return; | |
966 | + } | |
967 | + /* see linux/sched.h for this nested for/do-while loop */ | |
968 | + read_lock(&tasklist_lock); | |
969 | + do_each_thread(g, p) { | |
970 | + if (p->cpuset == cs) { | |
971 | + if (nb == sz) { | |
972 | + printk("migrate_cpuset_processes: array full !\n"); | |
973 | + goto end_loop; /* break won't work in this double loop */ | |
974 | + } | |
975 | + get_task_struct(p); | |
976 | + array[nb++] = p; | |
977 | + } | |
978 | + } while_each_thread(g, p); | |
979 | +end_loop: | |
980 | + read_unlock(&tasklist_lock); | |
981 | + spin_unlock(&cs->attach_lock); | |
982 | + | |
983 | + while(nb) { | |
984 | + struct task_struct * p = array[--nb]; | |
985 | + __cpuset_setaffinity(p); | |
986 | + put_task_struct(p); | |
987 | + } | |
988 | + kfree(array); | |
989 | +} | |
990 | + | |
991 | + | |
992 | + | |
993 | +/* see if mask b is included in mask a */ | |
994 | +/* old version : #define MASK_INCLUDED(a, b) (((a)|(b)) == (a)) */ | |
995 | +static inline int MASK_INCLUDED(cpumask_t a, cpumask_t b) | |
996 | +{ | |
997 | + cpumask_t r; | |
998 | + cpus_or(r, a, b); | |
999 | + return cpus_equal(r, a); | |
1000 | +} | |
1001 | + | |
1002 | +static inline cpumask_t CPUS_NOT(cpumask_t a) | |
1003 | +{ | |
1004 | + cpus_complement(a); | |
1005 | + return a; | |
1006 | +} | |
1007 | + | |
1008 | +static inline cpumask_t CPUS_OR(cpumask_t a, cpumask_t b) | |
1009 | +{ | |
1010 | + cpumask_t r; | |
1011 | + cpus_or(r, a, b); | |
1012 | + return r; | |
1013 | +} | |
1014 | + | |
1015 | +static inline cpumask_t CPUS_AND(cpumask_t a, cpumask_t b) | |
1016 | +{ | |
1017 | + cpumask_t r; | |
1018 | + cpus_and(r, a, b); | |
1019 | + return r; | |
1020 | +} | |
1021 | + | |
1022 | + | |
1023 | +asmlinkage long sys_cpuset_alloc(cpuset_t cpuset, int len, unsigned long * user_mask_ptr) | |
1024 | +{ | |
1025 | + cpumask_t new_mask; | |
1026 | + cpumask_t old_mask; | |
1027 | + struct cpuset * cs ; | |
1028 | + int retval; | |
1029 | + | |
1030 | + info("sys_cpuset_alloc(%d, ...) called\n", cpuset); | |
1031 | + | |
1032 | + if (cpuset == CPUSET_TOP_ID) | |
1033 | + return -EINVAL; | |
1034 | + | |
1035 | + if (len < sizeof(new_mask)) | |
1036 | + return -EINVAL; | |
1037 | + | |
1038 | + if (copy_from_user(&new_mask, user_mask_ptr, sizeof(new_mask))) | |
1039 | + return -EFAULT; | |
1040 | + | |
1041 | + /* do some sanity checks on the mask */ | |
1042 | + /* must have at least ONE cpu */ | |
1043 | + if (cpus_empty(new_mask)) | |
1044 | + return -EINVAL; | |
1045 | + | |
1046 | + /* XXX phys_cpu_present_map has changed type -- | |
1047 | + * I disable this test for now | |
1048 | + * anyway it is not _NEEDED_ since new_mask will have to stay | |
1049 | + * in the parent's mask | |
1050 | + * (just some overhead in a _really_ rare case) */ | |
1051 | +#if 0 | |
1052 | + /* must only have existing CPUs */ | |
1053 | + if (!MASK_INCLUDED(phys_cpu_present_map, new_mask)) | |
1054 | + return -EINVAL; | |
1055 | +#endif | |
1056 | + | |
1057 | + info(" with mask %016lx\n", new_mask); | |
1058 | + | |
1059 | + read_lock(&cpuset_lock); | |
1060 | + cs = find_cpuset_by_id(cpuset); | |
1061 | + | |
1062 | + | |
1063 | + if (!cs) { | |
1064 | + read_unlock(&cpuset_lock); | |
1065 | + return -EINVAL; | |
1066 | + } | |
1067 | + | |
1068 | + use_cpuset(cs); | |
1069 | + read_unlock(&cpuset_lock); | |
1070 | + | |
1071 | + if (bad_permission(cs)) { | |
1072 | + release_cpuset(cs); | |
1073 | + return -EPERM; | |
1074 | + } | |
1075 | + | |
1076 | + /* lock early - we do not want the parent's masks to change under us */ | |
1077 | + write_lock(&cpuset_lock); | |
1078 | + /* must only have CPUs in the parent cpuset (if any) */ | |
1079 | + retval = -EACCES; | |
1080 | + if (!MASK_INCLUDED(cs->parent->cpus_allowed, new_mask)) | |
1081 | + goto mask_error; | |
1082 | + | |
1083 | + old_mask = cs->cpus_allowed; | |
1084 | + | |
1085 | + retval = -EBUSY; | |
1086 | + /* must only have free cpus */ | |
1087 | + if (cs->flags & CPUSET_STRICT) { | |
1088 | + /* CPUs already in this cs ARE free for us ! -> old_mask */ | |
1089 | + /* The next few lines mean : | |
1090 | + * if (!MASK_INCLUDED(~cs->parent->cpus_reserved, new_mask & (~old_mask))) | |
1091 | + * (just obfuscated my the cpus_ macros) | |
1092 | + */ | |
1093 | + if (!MASK_INCLUDED(CPUS_NOT(cs->parent->cpus_reserved), | |
1094 | + CPUS_AND(new_mask, CPUS_NOT(old_mask)))) | |
1095 | + goto mask_error; | |
1096 | + } | |
1097 | + else { | |
1098 | + if (!MASK_INCLUDED(CPUS_NOT(cs->parent->cpus_strictly_reserved), new_mask)) | |
1099 | + goto mask_error; | |
1100 | + } | |
1101 | + | |
1102 | + | |
1103 | + /* are we trying to FREE reserved CPUs | |
1104 | + * (i.e. reserved by children cpusets) | |
1105 | + * from a non-unused cpuset ? */ | |
1106 | + /* if (cs->cpus_reserved & ~new_mask) */ | |
1107 | + if (!cpus_empty(CPUS_AND(cs->cpus_reserved, CPUS_NOT(new_mask)))) | |
1108 | + goto mask_error; | |
1109 | + | |
1110 | + /* everything is OK */ | |
1111 | + cs->cpus_allowed = new_mask; | |
1112 | + rebuild_reserved_masks(cs->parent); | |
1113 | + write_unlock(&cpuset_lock); | |
1114 | + | |
1115 | + /* did we change a non-unused cpuset ? */ | |
1116 | + if ((atomic_read(&cs->count) > 1) && !cpus_equal(new_mask, old_mask)) { | |
1117 | + migrate_cpuset_processes(cs); | |
1118 | + } | |
1119 | + | |
1120 | + release_cpuset(cs); | |
1121 | + return 0; | |
1122 | + | |
1123 | +mask_error: | |
1124 | + write_unlock(&cpuset_lock); | |
1125 | + release_cpuset(cs); | |
1126 | + return retval; | |
1127 | +} | |
1128 | + | |
1129 | +asmlinkage long sys_cpuset_getfreecpus(int flags, int len, unsigned long * user_mask_ptr) | |
1130 | +{ | |
1131 | + cpumask_t reserved; | |
1132 | + cpumask_t free; | |
1133 | + | |
1134 | + int real_len = sizeof(unsigned long); | |
1135 | + if (len < real_len) | |
1136 | + return -EINVAL; | |
1137 | + | |
1138 | + if (flags & CPUSET_STRICT) | |
1139 | + reserved = current->cpuset->cpus_reserved; | |
1140 | + else | |
1141 | + reserved = current->cpuset->cpus_strictly_reserved; | |
1142 | + | |
1143 | + free = CPUS_AND(current->cpuset->cpus_allowed, CPUS_NOT(reserved)); | |
1144 | + | |
1145 | + if (copy_to_user(user_mask_ptr, &free, real_len)) | |
1146 | + return -EFAULT; | |
1147 | + | |
1148 | + return real_len; | |
1149 | +} | |
1150 | + | |
1151 | +/************************************************************* | |
1152 | + ***************** /proc/cpusets stuff *********************** | |
1153 | + ************************************************************* | |
1154 | + */ | |
1155 | +#ifdef CONFIG_CPUSETS_PROC | |
1156 | + | |
1157 | +static void *proc_cpusets_start(struct seq_file *m, loff_t *pos) | |
1158 | +{ | |
1159 | + loff_t n = *pos; | |
1160 | + struct list_head *p; | |
1161 | + | |
1162 | + read_lock(&cpuset_lock); | |
1163 | + if (!n) seq_puts(m, "cpusets info \n"); | |
1164 | + | |
1165 | + p = &top_cpuset.list; | |
1166 | + while (n--) { | |
1167 | + p = p->next; | |
1168 | + if (p == &top_cpuset.list) | |
1169 | + return NULL; | |
1170 | + } | |
1171 | + return list_entry(p, struct cpuset, list); | |
1172 | +} | |
1173 | + | |
1174 | +static void *proc_cpusets_next(struct seq_file *m, void *p, loff_t *pos) | |
1175 | +{ | |
1176 | + struct cpuset * cs = p; | |
1177 | + ++*pos; | |
1178 | + return cs->list.next == &top_cpuset.list ? NULL | |
1179 | + : list_entry(cs->list.next, struct cpuset, list); | |
1180 | +} | |
1181 | + | |
1182 | +/* How many chars needed to print a long (as a mask) ? */ | |
1183 | +#define CHARS_FOR_LONG (BITS_PER_LONG / 4) | |
1184 | +#define CFL CHARS_FOR_LONG | |
1185 | +static void sprint_mask(char * buf, cpumask_t mask) | |
1186 | +{ | |
1187 | +#ifdef CPU_ARRAY_SIZE | |
1188 | + int l; | |
1189 | + for (l = CPU_ARRAY_SIZE - 1; l>=0; l--) { | |
1190 | + /* XXX only 64 bits long supported here ! */ | |
1191 | + sprintf(buf, "%016lx", mask.mask[l]); | |
1192 | + buf += CFL; | |
1193 | + } | |
1194 | +#else | |
1195 | + /* XXX only 64 bits long supported here ! */ | |
1196 | + sprintf(buf, "%016lx", mask); | |
1197 | +#endif | |
1198 | +} | |
1199 | + | |
1200 | + | |
1201 | +static int proc_cpusets_show(struct seq_file *m, void *p) | |
1202 | +{ | |
1203 | + struct cpuset * cs = p; | |
1204 | +#ifdef CPU_ARRAY_SIZE | |
1205 | + char maskbuf[CPU_ARRAY_SIZE * CFL + 1]; | |
1206 | +#else | |
1207 | + char maskbuf[CFL + 1]; | |
1208 | +#endif | |
1209 | + | |
1210 | + seq_printf(m, "cpuset %d {\n" | |
1211 | + "\tparent = %d\n" | |
1212 | + "\tflags = %d\n" | |
1213 | + "\tcount = %d\n" | |
1214 | + "\thba = %d\n" | |
1215 | + "\tuid & suid = %d & %d\n", | |
1216 | + cs->id, cs->parent ? cs->parent->id : -1, | |
1217 | + cs->flags, atomic_read(&cs->count), cs->has_been_attached, | |
1218 | + cs->uid, cs->suid); | |
1219 | + | |
1220 | + sprint_mask(maskbuf, cs->cpus_allowed); | |
1221 | + seq_printf(m,"\tcpus_allowed = %s\n", maskbuf); | |
1222 | + sprint_mask(maskbuf, cs->cpus_reserved); | |
1223 | + seq_printf(m,"\tcpus_reserved = %s\n", maskbuf); | |
1224 | + sprint_mask(maskbuf, cs->cpus_strictly_reserved); | |
1225 | + seq_printf(m,"\tcpus_strictly_reserved = %s\n", maskbuf); | |
1226 | + | |
1227 | + seq_printf(m, "}\n\n"); | |
1228 | + | |
1229 | + return 0; | |
1230 | +} | |
1231 | + | |
1232 | +static void proc_cpusets_stop(struct seq_file *m, void *p) | |
1233 | +{ | |
1234 | + read_unlock(&cpuset_lock); | |
1235 | +} | |
1236 | + | |
1237 | +static struct seq_operations cpusets_op = { | |
1238 | + .start = proc_cpusets_start, | |
1239 | + .next = proc_cpusets_next, | |
1240 | + .stop = proc_cpusets_stop, | |
1241 | + .show = proc_cpusets_show | |
1242 | +}; | |
1243 | + | |
1244 | + | |
1245 | +static int proc_cpusets_open(struct inode *inode, struct file *file) | |
1246 | +{ | |
1247 | + return seq_open(file, &cpusets_op); | |
1248 | +} | |
1249 | + | |
1250 | +static struct file_operations proc_cpusets_operations = { | |
1251 | + .open = proc_cpusets_open, | |
1252 | + .read = seq_read, | |
1253 | + .llseek = seq_lseek, | |
1254 | + .release = seq_release, | |
1255 | +}; | |
1256 | + | |
1257 | + | |
1258 | +static int __init proc_cpusets_init(void) | |
1259 | +{ | |
1260 | + struct proc_dir_entry *entry; | |
1261 | + | |
1262 | + entry = create_proc_entry("cpusets", 0, NULL); | |
1263 | + if (entry) | |
1264 | + entry->proc_fops = &proc_cpusets_operations; | |
1265 | + return 0; | |
1266 | +} | |
1267 | + | |
1268 | +/************************************************************* | |
1269 | + *********** /proc/xxx/cpuset ******************************** | |
1270 | + ************************************************************* | |
1271 | + */ | |
1272 | +int proc_pid_cpuset(struct task_struct *task, char *buffer) | |
1273 | +{ | |
1274 | + return sprintf(buffer, "%d\n", task->cpuset->id); | |
1275 | +} | |
1276 | + | |
1277 | +#endif /* CONFIG_CPUSETS_PROC */ | |
1278 | + | |
1279 | diff -Nru a/kernel/exit.c b/kernel/exit.c | |
1280 | --- a/kernel/exit.c Tue Oct 21 16:05:27 2003 | |
1281 | +++ b/kernel/exit.c Tue Oct 21 16:05:27 2003 | |
1282 | @@ -54,6 +54,19 @@ | |
1283 | ||
1284 | BUG_ON(p->state < TASK_ZOMBIE); | |
1285 | ||
1286 | + | |
1287 | +#ifdef CONFIG_CPUSETS | |
1288 | + spin_lock(&p->cpuset_attach_lock); | |
1289 | + release_cpuset(p->cpuset); | |
1290 | + | |
1291 | + /* mark that this process's cpuset has already been released | |
1292 | + * another process might still try to cpuset_attach this process | |
1293 | + */ | |
1294 | + p->cpuset = NULL; | |
1295 | + spin_unlock(&p->cpuset_attach_lock); | |
1296 | +#endif /* CONFIG_CPUSETS */ | |
1297 | + | |
1298 | + | |
1299 | atomic_dec(&p->user->processes); | |
1300 | spin_lock(&p->proc_lock); | |
1301 | proc_dentry = proc_pid_unhash(p); | |
1302 | @@ -87,6 +100,7 @@ | |
1303 | spin_unlock(&p->proc_lock); | |
1304 | proc_pid_flush(proc_dentry); | |
1305 | release_thread(p); | |
1306 | + | |
1307 | put_task_struct(p); | |
1308 | } | |
1309 | ||
1310 | diff -Nru a/kernel/fork.c b/kernel/fork.c | |
1311 | --- a/kernel/fork.c Tue Oct 21 16:05:27 2003 | |
1312 | +++ b/kernel/fork.c Tue Oct 21 16:05:27 2003 | |
1313 | @@ -31,6 +31,10 @@ | |
1314 | #include <linux/ptrace.h> | |
1315 | #include <linux/mount.h> | |
1316 | ||
1317 | +#ifdef CONFIG_CPUSETS | |
1318 | +#include <linux/cpuset.h> | |
1319 | +#endif | |
1320 | + | |
1321 | #include <asm/pgtable.h> | |
1322 | #include <asm/pgalloc.h> | |
1323 | #include <asm/uaccess.h> | |
1324 | @@ -1035,6 +1039,11 @@ | |
1325 | SET_LINKS(p); | |
1326 | if (p->ptrace & PT_PTRACED) | |
1327 | __ptrace_link(p, current->parent); | |
1328 | + | |
1329 | +#ifdef CONFIG_CPUSETS | |
1330 | + use_cpuset(p->cpuset); | |
1331 | +#endif | |
1332 | + | |
1333 | ||
1334 | attach_pid(p, PIDTYPE_PID, p->pid); | |
1335 | if (thread_group_leader(p)) { | |
1336 | diff -Nru a/kernel/sched.c b/kernel/sched.c | |
1337 | --- a/kernel/sched.c Tue Oct 21 16:05:27 2003 | |
1338 | +++ b/kernel/sched.c Tue Oct 21 16:05:27 2003 | |
1339 | @@ -38,6 +38,10 @@ | |
1340 | #include <linux/cpu.h> | |
1341 | #include <linux/percpu.h> | |
1342 | ||
1343 | +#ifdef CONFIG_CPUSETS | |
1344 | +#include <linux/cpuset.h> | |
1345 | +#endif | |
1346 | + | |
1347 | #ifdef CONFIG_NUMA | |
1348 | #define cpu_to_node_mask(cpu) node_to_cpumask(cpu_to_node(cpu)) | |
1349 | #else | |
1350 | @@ -2203,7 +2207,11 @@ | |
1351 | !capable(CAP_SYS_NICE)) | |
1352 | goto out_unlock; | |
1353 | ||
1354 | +#ifdef CONFIG_CPUSETS | |
1355 | + retval = cpuset_setaffinity(p, new_mask); | |
1356 | +#else | |
1357 | retval = set_cpus_allowed(p, new_mask); | |
1358 | +#endif | |
1359 | ||
1360 | out_unlock: | |
1361 | put_task_struct(p); | |
1362 | @@ -2236,7 +2244,11 @@ | |
1363 | goto out_unlock; | |
1364 | ||
1365 | retval = 0; | |
1366 | +#ifdef CONFIG_CPUSETS | |
1367 | + mask = p->cpus_wanted; | |
1368 | +#else | |
1369 | cpus_and(mask, p->cpus_allowed, cpu_online_map); | |
1370 | +#endif | |
1371 | ||
1372 | out_unlock: | |
1373 | read_unlock(&tasklist_lock); |