]> git.pld-linux.org Git - packages/kernel.git/blame - linux-2.6-cpuset_virtualization.patch
- s/2.6-17/2.6.17 for Patch250
[packages/kernel.git] / linux-2.6-cpuset_virtualization.patch
CommitLineData
8631f2c2 1diff -ur linux-2.6.14.3/arch/i386/kernel/cpu/proc.c linux-2.6.14.3-cpusetvirt/arch/i386/kernel/cpu/proc.c
2--- linux-2.6.14.3/arch/i386/kernel/cpu/proc.c 2005-11-24 23:10:21.000000000 +0100
3+++ linux-2.6.14.3-cpusetvirt/arch/i386/kernel/cpu/proc.c 2005-11-25 19:28:28.088979320 +0100
4@@ -3,6 +3,7 @@
8631f2c2 5 #include <asm/semaphore.h>
6 #include <linux/seq_file.h>
ba369007 7 #include <linux/cpufreq.h>
8631f2c2 8+#include <linux/cpuset.h>
9
10 /*
11 * Get CPU information for use by the procfs.
12@@ -69,12 +70,20 @@
13 if (!cpu_online(n))
14 return 0;
15 #endif
16+#ifdef CONFIG_CPUSETS
2eb0c9a3 17+ if (!cpu_visible_in_cpuset(n, current->cpuset))
8631f2c2 18+ return 0;
19+#endif
20 seq_printf(m, "processor\t: %d\n"
21 "vendor_id\t: %s\n"
22 "cpu family\t: %d\n"
23 "model\t\t: %d\n"
24 "model name\t: %s\n",
25+#ifdef CONFIG_CPUSETS
26+ cpuid_in_cpuset(n, current->cpuset),
27+#else
28 n,
29+#endif
30 c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
31 c->x86,
32 c->x86_model,
33diff -ur linux-2.6.14.3/arch/ia64/kernel/setup.c linux-2.6.14.3-cpusetvirt/arch/ia64/kernel/setup.c
34--- linux-2.6.14.3/arch/ia64/kernel/setup.c 2005-11-24 23:10:21.000000000 +0100
35+++ linux-2.6.14.3-cpusetvirt/arch/ia64/kernel/setup.c 2005-11-25 19:28:28.090979016 +0100
36@@ -43,6 +43,7 @@
8631f2c2 37 #include <linux/platform.h>
38 #include <linux/pm.h>
ba369007 39 #include <linux/cpufreq.h>
8631f2c2 40+#include <linux/cpuset.h>
41
42 #include <asm/ia32.h>
43 #include <asm/machvec.h>
44@@ -476,6 +477,11 @@
45 unsigned long mask;
46 int i;
47
48+#ifdef CONFIG_CPUSETS
49+ if (!cpu_visible_in_cpuset(cpunum, current->cpuset))
50+ return 0;
51+#endif
52+
53 mask = c->features;
54
55 switch (c->family) {
56@@ -520,7 +526,12 @@
57 "cpu MHz : %lu.%06lu\n"
58 "itc MHz : %lu.%06lu\n"
59 "BogoMIPS : %lu.%02lu\n",
60- cpunum, c->vendor, family, c->model, c->revision, c->archrev,
61+#ifdef CONFIG_CPUSETS
62+ cpuid_in_cpuset(cpunum, current->cpuset),
63+#else
64+ cpunum,
65+#endif
66+ c->vendor, family, c->model, c->revision, c->archrev,
67 features, c->ppn, c->number,
68 c->proc_freq / 1000000, c->proc_freq % 1000000,
69 c->itc_freq / 1000000, c->itc_freq % 1000000,
70diff -ur linux-2.6.14.3/arch/x86_64/kernel/setup.c linux-2.6.14.3-cpusetvirt/arch/x86_64/kernel/setup.c
71--- linux-2.6.14.3/arch/x86_64/kernel/setup.c 2005-11-24 23:10:21.000000000 +0100
72+++ linux-2.6.14.3-cpusetvirt/arch/x86_64/kernel/setup.c 2005-11-25 19:37:48.755745016 +0100
56099a7e 73@@ -42,7 +42,8 @@
ba369007
JR
74 #include <linux/cpufreq.h>
75 #include <linux/dmi.h>
76 #include <linux/dma-mapping.h>
56099a7e 77 #include <linux/ctype.h>
8631f2c2 78+#include <linux/cpuset.h>
79
80 #include <asm/mtrr.h>
81 #include <asm/uaccess.h>
82@@ -1249,13 +1250,20 @@
83 if (!cpu_online(c-cpu_data))
84 return 0;
85 #endif
86-
87+#ifdef CONFIG_CPUSETS
88+ if (!cpu_visible_in_cpuset(c-cpu_data, current->cpuset))
89+ return 0;
90+#endif
91 seq_printf(m,"processor\t: %u\n"
92 "vendor_id\t: %s\n"
93 "cpu family\t: %d\n"
94 "model\t\t: %d\n"
95 "model name\t: %s\n",
96+#ifdef CONFIG_CPUSETS
97+ cpuid_in_cpuset(c-cpu_data, current->cpuset),
98+#else
99 (unsigned)(c-cpu_data),
100+#endif
101 c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
102 c->x86,
103 (int)c->x86_model,
104diff -ur linux-2.6.14.3/fs/proc/proc_misc.c linux-2.6.14.3-cpusetvirt/fs/proc/proc_misc.c
105--- linux-2.6.14.3/fs/proc/proc_misc.c 2005-11-24 23:10:21.000000000 +0100
106+++ linux-2.6.14.3-cpusetvirt/fs/proc/proc_misc.c 2005-11-25 19:28:28.092978712 +0100
107@@ -44,6 +44,7 @@
108 #include <linux/jiffies.h>
109 #include <linux/sysrq.h>
110 #include <linux/vmalloc.h>
111+#include <linux/cpuset.h>
112 #include <linux/crash_dump.h>
113 #include <asm/uaccess.h>
114 #include <asm/pgtable.h>
115@@ -353,6 +354,10 @@
116 for_each_cpu(i) {
117 int j;
118
119+#ifdef CONFIG_CPUSETS
120+ if (!cpu_visible_in_cpuset(i, current->cpuset))
121+ continue;
122+#endif
123 user = cputime64_add(user, kstat_cpu(i).cpustat.user);
124 nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice);
125 system = cputime64_add(system, kstat_cpu(i).cpustat.system);
126@@ -376,6 +381,10 @@
127 (unsigned long long)cputime64_to_clock_t(steal));
128 for_each_online_cpu(i) {
129
130+#ifdef CONFIG_CPUSETS
131+ if (!cpu_visible_in_cpuset(i, current->cpuset))
132+ continue;
133+#endif
134 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
135 user = kstat_cpu(i).cpustat.user;
136 nice = kstat_cpu(i).cpustat.nice;
137@@ -386,7 +395,11 @@
138 softirq = kstat_cpu(i).cpustat.softirq;
139 steal = kstat_cpu(i).cpustat.steal;
140 seq_printf(p, "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu\n",
141+#ifdef CONFIG_CPUSETS
142+ cpuid_in_cpuset(i, current->cpuset),
143+#else
144 i,
145+#endif
146 (unsigned long long)cputime64_to_clock_t(user),
147 (unsigned long long)cputime64_to_clock_t(nice),
148 (unsigned long long)cputime64_to_clock_t(system),
149diff -ur linux-2.6.14.3/include/linux/cpuset.h linux-2.6.14.3-cpusetvirt/include/linux/cpuset.h
150--- linux-2.6.14.3/include/linux/cpuset.h 2005-11-24 23:10:21.000000000 +0100
151+++ linux-2.6.14.3-cpusetvirt/include/linux/cpuset.h 2005-11-25 19:28:28.093978560 +0100
152@@ -28,6 +28,9 @@
153 extern struct file_operations proc_cpuset_operations;
154 extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer);
155
156+int cpu_visible_in_cpuset(int cpu, struct cpuset * cs);
157+int cpuid_in_cpuset(int cpu, struct cpuset * cs);
158+
159 #else /* !CONFIG_CPUSETS */
160
161 static inline int cpuset_init(void) { return 0; }
162diff -ur linux-2.6.14.3/include/linux/init_task.h linux-2.6.14.3-cpusetvirt/include/linux/init_task.h
163--- linux-2.6.14.3/include/linux/init_task.h 2005-11-24 23:10:21.000000000 +0100
164+++ linux-2.6.14.3-cpusetvirt/include/linux/init_task.h 2005-11-25 19:28:28.079980688 +0100
7de6a021 165@@ -88,6 +88,7 @@
8631f2c2 166 .static_prio = MAX_PRIO-20, \
167 .policy = SCHED_NORMAL, \
168 .cpus_allowed = CPU_MASK_ALL, \
dfe68324 169+ .cpus_virt_allowed = CPU_MASK_ALL, \
8631f2c2 170 .mm = NULL, \
171 .active_mm = &init_mm, \
172 .run_list = LIST_HEAD_INIT(tsk.run_list), \
173diff -ur linux-2.6.14.3/include/linux/sched.h linux-2.6.14.3-cpusetvirt/include/linux/sched.h
174--- linux-2.6.14.3/include/linux/sched.h 2005-11-24 23:10:21.000000000 +0100
175+++ linux-2.6.14.3-cpusetvirt/include/linux/sched.h 2005-11-25 19:28:28.081980384 +0100
176@@ -808,6 +808,7 @@
7de6a021 177 struct mempolicy *mempolicy;
8631f2c2 178 short il_next;
179 #endif
8631f2c2 180+ cpumask_t cpus_virt_allowed;
7de6a021 181 #ifdef CONFIG_CPUSETS
8631f2c2 182 struct cpuset *cpuset;
183 nodemask_t mems_allowed;
8631f2c2 184diff -ur linux-2.6.14.3/kernel/cpuset.c linux-2.6.14.3-cpusetvirt/kernel/cpuset.c
185--- linux-2.6.14.3/kernel/cpuset.c 2005-11-24 23:10:21.000000000 +0100
186+++ linux-2.6.14.3-cpusetvirt/kernel/cpuset.c 2005-11-25 19:28:28.084979928 +0100
187@@ -83,6 +83,7 @@
8631f2c2 188 CS_CPU_EXCLUSIVE,
189 CS_MEM_EXCLUSIVE,
ba369007 190 CS_MEMORY_MIGRATE,
8631f2c2 191+ CS_VIRTUALIZED,
192 CS_REMOVED,
193 CS_NOTIFY_ON_RELEASE
194 } cpuset_flagbits_t;
195@@ -98,6 +99,10 @@
196 return !!test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
197 }
198
199+static inline int is_virtualized(const struct cpuset *cs)
200+{
201+ return !!test_bit(CS_VIRTUALIZED, &cs->flags);
202+}
203 static inline int is_removed(const struct cpuset *cs)
204 {
205 return !!test_bit(CS_REMOVED, &cs->flags);
206@@ -590,6 +595,145 @@
207 is_mem_exclusive(p) <= is_mem_exclusive(q);
208 }
209
210+#define cyclic_next_cpu(index, mask) __cyclic_next_cpu(index, &mask)
211+static inline int __cyclic_next_cpu(int index, const cpumask_t * mask)
212+{
213+ int i;
214+ i = next_cpu(index, *mask);
215+ if (i >= NR_CPUS) {
216+ if (cpu_isset(0, *mask))
217+ return 0;
218+ i = next_cpu(0, *mask);
219+ }
220+ return i;
221+}
222+
223+/**
224+ * cpuset_combine_mask - translate a user cpu mask to a physical one.
225+ * @virt_allowed: the mask given by the user to sched_setaffinity()
226+ * @cs_allowed: the mask of the current cpuset.
227+ *
228+ * Returns combined mask in *mask.
229+ */
230+static int combine_mask(cpumask_t *mask, const cpumask_t virt_allowed, const cpumask_t cs_allowed)
231+{
232+ int i;
233+
234+ /* start with current cpu out of the mask
235+ * so the first call to next_cpu will take the first cpu
236+ * even if it is cpu zero
237+ */
238+ int cpu = NR_CPUS;
239+ cpus_clear(*mask);
240+
241+ if (cpus_empty(virt_allowed)) return 0;
242+ if (cpus_empty(cs_allowed)) return 0;
243+
244+ for (i = 0; i < NR_CPUS; i++) {
245+ cpu = cyclic_next_cpu(cpu, cs_allowed);
246+ if (cpu_isset(i, virt_allowed))
247+ cpu_set(cpu, *mask);
248+ }
249+ return 0;
250+}
251+
252+/**
253+ * Find out whether a cpu should be listed in /proc/cpuinfo
254+ *
255+ * For virtualized cpusets, only cpus present in the cpuset are shown
256+ */
257+int cpu_visible_in_cpuset(int cpu, struct cpuset * cs)
258+{
259+ /* all cpus are visible in non-virtualized cpusets */
260+ if (!is_virtualized(cs))
261+ return 1;
262+
263+ return cpu_isset(cpu, cs->cpus_allowed);
264+}
265+
266+/**
267+ * cpuid_in_cpuset - translate a "real" cpu number to a "inside cpuset" (logical)
268+ * @cs: the cpuset where all the magic occurs.
269+ * @cpu: cpu number to be translated
270+ *
271+ * Used for /proc/cpuinfo.
272+ * Returns the translated cpu number.
273+ */
274+int cpuid_in_cpuset(int cpu, struct cpuset * cs)
275+{
276+ int i;
277+ int l = 0;
278+
279+ /* translation needed only for virtualized cpusets */
280+ if (!is_virtualized(cs))
281+ return cpu;
282+
283+ for(i=0; i < NR_CPUS; i++)
284+ {
285+ if (i == cpu) return l;
286+ if (cpu_isset(i, cs->cpus_allowed))
287+ l++;
288+ }
289+ /* NOT REACHED */
290+ BUG();
291+ return 0;
292+}
293+
294+/**
295+ * set_cpus_virt_allowed - updated cpus_virt_allowed AND cpus_allowed masks
296+ * @virt_allowed: the mask given by the user to sched_setaffinity()
297+ * @p: the task
298+ *
299+ * This function does not mess with scheduler internals. Here we rely
300+ * on set_cpus_allowed(), that should, for instance, migrate the task
301+ * if necessary.
302+ */
303+static int set_cpus_virt_allowed(task_t *p, cpumask_t mask)
304+{
305+ cpumask_t new_mask;
306+ int retval;
307+
308+ p->cpus_virt_allowed = mask;
309+ combine_mask(&new_mask, p->cpus_virt_allowed, p->cpuset->cpus_allowed);
310+ retval = set_cpus_allowed(p, new_mask);
311+ return retval;
312+}
313+
314+/**
315+ * This is the exported entry point that will be called
316+ * by sched_setaffinity().
317+ */
318+int cpuset_set_cpus_affinity(task_t *p, cpumask_t mask)
319+{
320+ int retval;
321+
f97a7ccd 322+ mutex_lock(&callback_mutex);
8631f2c2 323+ if (is_virtualized(p->cpuset))
324+ retval = set_cpus_virt_allowed(p, mask);
325+ else {
326+ cpumask_t cpus_allowed;
ba369007 327+ cpus_allowed = cpuset_cpus_allowed(p);
8631f2c2 328+ cpus_and(mask, mask, cpus_allowed);
329+ retval = set_cpus_allowed(p, mask);
330+ }
f97a7ccd 331+ mutex_unlock(&callback_mutex);
8631f2c2 332+ return retval;
333+}
334+
335+/**
336+ * This is the exported entry point that will be called
337+ * by sched_getaffinity().
338+ */
339+int cpuset_get_cpus_virt_affinity(task_t *p, cpumask_t *mask)
340+{
341+ if (is_virtualized(p->cpuset)) {
342+ *mask = p->cpus_virt_allowed;
343+ return 0;
344+ }
345+ return -1;
346+}
347+
348+
349 /*
350 * validate_change() - Used to validate that any proposed cpuset change
351 * follows the structural rules for cpusets.
352@@ -624,6 +768,11 @@
353 if ((par = cur->parent) == NULL)
354 return 0;
355
356+ /* virtualization can only be turned on/off on empty cpusets */
357+ if ((atomic_read(&cur->count) > 0) || (!list_empty(&cur->children)))
358+ if (is_virtualized(cur) != is_virtualized(trial))
359+ return -EBUSY;
360+
361 /* We must be a subset of our parent cpuset */
362 if (!is_cpuset_subset(trial, par))
363 return -EACCES;
364@@ -818,11 +967,29 @@
365 return -ESRCH;
366 }
367 atomic_inc(&cs->count);
368+
369+ /* depending on current and future cpuset for this task,
370+ * affinity masks may be meaningful or not
371+ */
372+ cpumask_t virt_allowed, allowed;
373+ if (is_virtualized(cs) == is_virtualized(tsk->cpuset)) {
374+ virt_allowed = tsk->cpus_virt_allowed;
375+ allowed = tsk->cpus_allowed;
376+ } else {
377+ virt_allowed = CPU_MASK_ALL;
378+ allowed = CPU_MASK_ALL;
379+ }
380+
ba369007 381 rcu_assign_pointer(tsk->cpuset, cs);
8631f2c2 382 task_unlock(tsk);
383
384- guarantee_online_cpus(cs, &cpus);
385- set_cpus_allowed(tsk, cpus);
386+
387+ if (is_virtualized(cs))
388+ set_cpus_virt_allowed(tsk, virt_allowed);
389+ else {
390+ guarantee_online_cpus(cs, &cpus);
391+ set_cpus_allowed(tsk, cpus);
392+ }
393
ba369007
JR
394 from = oldcs->mems_allowed;
395 to = cs->mems_allowed;
8631f2c2 396@@ -839,6 +1006,7 @@
397 FILE_MEMLIST,
398 FILE_CPU_EXCLUSIVE,
399 FILE_MEM_EXCLUSIVE,
400+ FILE_VIRTUALIZE,
401 FILE_NOTIFY_ON_RELEASE,
402 FILE_TASKLIST,
403 } cpuset_filetype_t;
404@@ -887,6 +1055,9 @@
405 case FILE_MEM_EXCLUSIVE:
406 retval = update_flag(CS_MEM_EXCLUSIVE, cs, buffer);
407 break;
408+ case FILE_VIRTUALIZE:
409+ retval = update_flag(CS_VIRTUALIZED, cs, buffer);
410+ break;
411 case FILE_NOTIFY_ON_RELEASE:
412 retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer);
413 break;
414@@ -987,6 +1158,9 @@
415 case FILE_MEM_EXCLUSIVE:
416 *s++ = is_mem_exclusive(cs) ? '1' : '0';
417 break;
418+ case FILE_VIRTUALIZE:
419+ *s++ = is_virtualized(cs) ? '1' : '0';
420+ break;
421 case FILE_NOTIFY_ON_RELEASE:
422 *s++ = notify_on_release(cs) ? '1' : '0';
423 break;
424@@ -1310,6 +1484,11 @@
425 .private = FILE_MEM_EXCLUSIVE,
426 };
427
428+static struct cftype cft_virtualize = {
429+ .name = "virtualize",
430+ .private = FILE_VIRTUALIZE,
431+};
432+
433 static struct cftype cft_notify_on_release = {
434 .name = "notify_on_release",
435 .private = FILE_NOTIFY_ON_RELEASE,
436@@ -1327,6 +1506,8 @@
437 return err;
438 if ((err = cpuset_add_file(cs_dentry, &cft_mem_exclusive)) < 0)
439 return err;
440+ if ((err = cpuset_add_file(cs_dentry, &cft_virtualize)) < 0)
441+ return err;
442 if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0)
443 return err;
444 if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0)
445diff -ur linux-2.6.14.3/kernel/kthread.c linux-2.6.14.3-cpusetvirt/kernel/kthread.c
446--- linux-2.6.14.3/kernel/kthread.c 2005-11-24 23:10:21.000000000 +0100
447+++ linux-2.6.14.3-cpusetvirt/kernel/kthread.c 2005-11-25 19:28:28.094978408 +0100
448@@ -160,6 +160,15 @@
449 wait_task_inactive(k);
450 set_task_cpu(k, cpu);
451 k->cpus_allowed = cpumask_of_cpu(cpu);
452+#ifdef CONFIG_CPUSETS
453+ /* kthreads don't use sched_setaffinity() to bind themselves to
454+ * CPUs, we need to take care.
455+ * This should not be problem since it is unlikely that kthreads
456+ * will run in a virtualized cpuset.
457+ * But better be ready, so:
458+ */
459+ k->cpus_virt_allowed = cpumask_of_cpu(cpu);
460+#endif
461 }
462 EXPORT_SYMBOL(kthread_bind);
463
464diff -ur linux-2.6.14.3/kernel/sched.c linux-2.6.14.3-cpusetvirt/kernel/sched.c
465--- linux-2.6.14.3/kernel/sched.c 2005-11-24 23:10:21.000000000 +0100
466+++ linux-2.6.14.3-cpusetvirt/kernel/sched.c 2005-11-25 19:28:28.087979472 +0100
268431aa
JR
467@@ -3798,11 +3798,15 @@
468 return retval;
469 }
470
471+#ifdef CONFIG_CPUSETS
472+int cpuset_set_cpus_affinity(task_t *p, cpumask_t mask);
473+int cpuset_get_cpus_virt_affinity(task_t *p, cpumask_t *mask);
474+#endif
475+
476 long sched_setaffinity(pid_t pid, cpumask_t new_mask)
196e85c8
JR
477 {
478 task_t *p;
479 int retval;
480- cpumask_t cpus_allowed;
481
482 lock_cpu_hotplug();
483 read_lock(&tasklist_lock);
8631f2c2 484@@ -3827,9 +3827,11 @@
485 !capable(CAP_SYS_NICE))
486 goto out_unlock;
487
488- cpus_allowed = cpuset_cpus_allowed(p);
489- cpus_and(new_mask, new_mask, cpus_allowed);
490- retval = set_cpus_allowed(p, new_mask);
491+#ifdef CONFIG_CPUSETS
492+ retval = cpuset_set_cpus_affinity(p, new_mask);
493+#else
494+ retval = set_cpus_allowed(p, new_mask);
495+#endif
496
497 out_unlock:
498 put_task_struct(p);
8631f2c2 499@@ -3897,7 +3904,12 @@
500 goto out_unlock;
501
502 retval = 0;
ba369007 503- cpus_and(*mask, p->cpus_allowed, cpu_online_map);
8631f2c2 504+#ifdef CONFIG_CPUSETS
196e85c8 505+ if (cpuset_get_cpus_virt_affinity(p, mask) < 0)
ba369007 506+ cpus_and(*mask, p->cpus_allowed, cpu_online_map);
8631f2c2 507+#else
ba369007 508+ cpus_and(*mask, p->cpus_allowed, cpu_online_map);
8631f2c2 509+#endif
510
511 out_unlock:
512 read_unlock(&tasklist_lock);
This page took 0.832711 seconds and 4 git commands to generate.