]> git.pld-linux.org Git - packages/kernel.git/blame - linux-2.6-cpuset_virtualization.patch
This commit was manufactured by cvs2git to create branch 'LINUX_2_6_22'.
[packages/kernel.git] / linux-2.6-cpuset_virtualization.patch
CommitLineData
7f651772 1diff -ur linux-2.6.14.3/arch/i386/kernel/cpu/proc.c linux-2.6.14.3-cpusetvirt/arch/i386/kernel/cpu/proc.c
2--- linux-2.6.14.3/arch/i386/kernel/cpu/proc.c 2005-11-24 23:10:21.000000000 +0100
3+++ linux-2.6.14.3-cpusetvirt/arch/i386/kernel/cpu/proc.c 2005-11-25 19:28:28.088979320 +0100
4@@ -3,6 +3,7 @@
5 #include <asm/semaphore.h>
6 #include <linux/seq_file.h>
7 #include <linux/cpufreq.h>
8+#include <linux/cpuset.h>
9
10 /*
11 * Get CPU information for use by the procfs.
12@@ -69,12 +70,20 @@
13 if (!cpu_online(n))
14 return 0;
15 #endif
16+#ifdef CONFIG_CPUSETS
17+ if (!cpu_visible_in_cpuset(n, current->cpuset))
18+ return 0;
19+#endif
20 seq_printf(m, "processor\t: %d\n"
21 "vendor_id\t: %s\n"
22 "cpu family\t: %d\n"
23 "model\t\t: %d\n"
24 "model name\t: %s\n",
25+#ifdef CONFIG_CPUSETS
26+ cpuid_in_cpuset(n, current->cpuset),
27+#else
28 n,
29+#endif
30 c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
31 c->x86,
32 c->x86_model,
33diff -ur linux-2.6.14.3/arch/ia64/kernel/setup.c linux-2.6.14.3-cpusetvirt/arch/ia64/kernel/setup.c
34--- linux-2.6.14.3/arch/ia64/kernel/setup.c 2005-11-24 23:10:21.000000000 +0100
35+++ linux-2.6.14.3-cpusetvirt/arch/ia64/kernel/setup.c 2005-11-25 19:28:28.090979016 +0100
36@@ -43,6 +43,7 @@
37 #include <linux/cpufreq.h>
38 #include <linux/kexec.h>
39 #include <linux/crash_dump.h>
40+#include <linux/cpuset.h>
41
42 #include <asm/ia32.h>
43 #include <asm/machvec.h>
44@@ -476,6 +477,11 @@
45 unsigned long proc_freq;
46 int i;
47
48+#ifdef CONFIG_CPUSETS
49+ if (!cpu_visible_in_cpuset(cpunum, current->cpuset))
50+ return 0;
51+#endif
52+
53 mask = c->features;
54
55 switch (c->family) {
56@@ -520,7 +526,12 @@
57 "cpu MHz : %lu.%06lu\n"
58 "itc MHz : %lu.%06lu\n"
59 "BogoMIPS : %lu.%02lu\n",
60- cpunum, c->vendor, c->family, c->model,
61+#ifdef CONFIG_CPUSETS
62+ cpuid_in_cpuset(cpunum, current->cpuset),
63+#else
64+ cpunum,
65+#endif
66+ c->vendor, c->family, c->model,
67 c->model_name, c->revision, c->archrev,
68 features, c->ppn, c->number,
69 proc_freq / 1000, proc_freq % 1000,
70diff -ur linux-2.6.14.3/arch/x86_64/kernel/setup.c linux-2.6.14.3-cpusetvirt/arch/x86_64/kernel/setup.c
71--- linux-2.6.14.3/arch/x86_64/kernel/setup.c 2005-11-24 23:10:21.000000000 +0100
72+++ linux-2.6.14.3-cpusetvirt/arch/x86_64/kernel/setup.c 2005-11-25 19:37:48.755745016 +0100
73@@ -42,6 +42,7 @@
74 #include <linux/dmi.h>
75 #include <linux/dma-mapping.h>
76 #include <linux/ctype.h>
77+#include <linux/cpuset.h>
78
79 #include <asm/mtrr.h>
80 #include <asm/uaccess.h>
81@@ -1249,13 +1250,20 @@
82 if (!cpu_online(c-cpu_data))
83 return 0;
84 #endif
85-
86+#ifdef CONFIG_CPUSETS
87+ if (!cpu_visible_in_cpuset(c-cpu_data, current->cpuset))
88+ return 0;
89+#endif
90 seq_printf(m,"processor\t: %u\n"
91 "vendor_id\t: %s\n"
92 "cpu family\t: %d\n"
93 "model\t\t: %d\n"
94 "model name\t: %s\n",
95+#ifdef CONFIG_CPUSETS
96+ cpuid_in_cpuset(c-cpu_data, current->cpuset),
97+#else
98 (unsigned)(c-cpu_data),
99+#endif
100 c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
101 c->x86,
102 (int)c->x86_model,
103diff -ur linux-2.6.14.3/fs/proc/proc_misc.c linux-2.6.14.3-cpusetvirt/fs/proc/proc_misc.c
104--- linux-2.6.14.3/fs/proc/proc_misc.c 2005-11-24 23:10:21.000000000 +0100
105+++ linux-2.6.14.3-cpusetvirt/fs/proc/proc_misc.c 2005-11-25 19:28:28.092978712 +0100
106@@ -44,6 +44,7 @@
107 #include <linux/jiffies.h>
108 #include <linux/sysrq.h>
109 #include <linux/vmalloc.h>
110+#include <linux/cpuset.h>
111 #include <linux/crash_dump.h>
112 #include <asm/uaccess.h>
113 #include <asm/pgtable.h>
114@@ -353,6 +354,10 @@
115 for_each_possible_cpu(i) {
116 int j;
117
118+#ifdef CONFIG_CPUSETS
119+ if (!cpu_visible_in_cpuset(i, current->cpuset))
120+ continue;
121+#endif
122 user = cputime64_add(user, kstat_cpu(i).cpustat.user);
123 nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice);
124 system = cputime64_add(system, kstat_cpu(i).cpustat.system);
125@@ -376,6 +381,10 @@
126 (unsigned long long)cputime64_to_clock_t(steal));
127 for_each_online_cpu(i) {
128
129+#ifdef CONFIG_CPUSETS
130+ if (!cpu_visible_in_cpuset(i, current->cpuset))
131+ continue;
132+#endif
133 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
134 user = kstat_cpu(i).cpustat.user;
135 nice = kstat_cpu(i).cpustat.nice;
136@@ -386,7 +395,11 @@
137 softirq = kstat_cpu(i).cpustat.softirq;
138 steal = kstat_cpu(i).cpustat.steal;
139 seq_printf(p, "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu\n",
140+#ifdef CONFIG_CPUSETS
141+ cpuid_in_cpuset(i, current->cpuset),
142+#else
143 i,
144+#endif
145 (unsigned long long)cputime64_to_clock_t(user),
146 (unsigned long long)cputime64_to_clock_t(nice),
147 (unsigned long long)cputime64_to_clock_t(system),
148diff -ur linux-2.6.14.3/include/linux/cpuset.h linux-2.6.14.3-cpusetvirt/include/linux/cpuset.h
149--- linux-2.6.14.3/include/linux/cpuset.h 2005-11-24 23:10:21.000000000 +0100
150+++ linux-2.6.14.3-cpusetvirt/include/linux/cpuset.h 2005-11-25 19:28:28.093978560 +0100
151@@ -28,6 +28,9 @@
152 return current->flags & PF_SPREAD_SLAB;
153 }
154
155+int cpu_visible_in_cpuset(int cpu, struct cpuset * cs);
156+int cpuid_in_cpuset(int cpu, struct cpuset * cs);
157+
158 #else /* !CONFIG_CPUSETS */
159
160 static inline int cpuset_init_early(void) { return 0; }
161diff -ur linux-2.6.14.3/include/linux/init_task.h linux-2.6.14.3-cpusetvirt/include/linux/init_task.h
162--- linux-2.6.14.3/include/linux/init_task.h 2005-11-24 23:10:21.000000000 +0100
163+++ linux-2.6.14.3-cpusetvirt/include/linux/init_task.h 2005-11-25 19:28:28.079980688 +0100
164@@ -88,6 +88,7 @@
165 .normal_prio = MAX_PRIO-20, \
166 .policy = SCHED_NORMAL, \
167 .cpus_allowed = CPU_MASK_ALL, \
168+ .cpus_virt_allowed = CPU_MASK_ALL, \
169 .mm = NULL, \
170 .active_mm = &init_mm, \
171 .run_list = LIST_HEAD_INIT(tsk.run_list), \
172diff -ur linux-2.6.14.3/include/linux/sched.h linux-2.6.14.3-cpusetvirt/include/linux/sched.h
173--- linux-2.6.14.3/include/linux/sched.h 2005-11-24 23:10:21.000000000 +0100
174+++ linux-2.6.14.3-cpusetvirt/include/linux/sched.h 2005-11-25 19:28:28.081980384 +0100
175@@ -808,6 +808,7 @@
176 struct mempolicy *mempolicy;
177 short il_next;
178 #endif
179+ cpumask_t cpus_virt_allowed;
180 #ifdef CONFIG_CPUSETS
181 struct cpuset *cpuset;
182 nodemask_t mems_allowed;
183diff -ur linux-2.6.14.3/kernel/cpuset.c linux-2.6.14.3-cpusetvirt/kernel/cpuset.c
184--- linux-2.6.14.3/kernel/cpuset.c 2005-11-24 23:10:21.000000000 +0100
185+++ linux-2.6.14.3-cpusetvirt/kernel/cpuset.c 2005-11-25 19:28:28.084979928 +0100
186@@ -83,6 +83,7 @@
187 CS_CPU_EXCLUSIVE,
188 CS_MEM_EXCLUSIVE,
189 CS_MEMORY_MIGRATE,
190+ CS_VIRTUALIZED,
191 CS_REMOVED,
192 CS_NOTIFY_ON_RELEASE,
193 CS_SPREAD_PAGE,
194@@ -98,6 +99,10 @@
195 return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
196 }
197
198+static inline int is_virtualized(const struct cpuset *cs)
199+{
200+ return test_bit(CS_VIRTUALIZED, &cs->flags);
201+}
202 static inline int is_removed(const struct cpuset *cs)
203 {
204 return test_bit(CS_REMOVED, &cs->flags);
205@@ -590,6 +595,145 @@
206 is_mem_exclusive(p) <= is_mem_exclusive(q);
207 }
208
209+#define cyclic_next_cpu(index, mask) __cyclic_next_cpu(index, &mask)
210+static inline int __cyclic_next_cpu(int index, const cpumask_t * mask)
211+{
212+ int i;
213+ i = next_cpu(index, *mask);
214+ if (i >= NR_CPUS) {
215+ if (cpu_isset(0, *mask))
216+ return 0;
217+ i = next_cpu(0, *mask);
218+ }
219+ return i;
220+}
221+
222+/**
223+ * cpuset_combine_mask - translate a user cpu mask to a physical one.
224+ * @virt_allowed: the mask given by the user to sched_setaffinity()
225+ * @cs_allowed: the mask of the current cpuset.
226+ *
227+ * Returns combined mask in *mask.
228+ */
229+static int combine_mask(cpumask_t *mask, const cpumask_t virt_allowed, const cpumask_t cs_allowed)
230+{
231+ int i;
232+
233+ /* start with current cpu out of the mask
234+ * so the first call to next_cpu will take the first cpu
235+ * even if it is cpu zero
236+ */
237+ int cpu = NR_CPUS;
238+ cpus_clear(*mask);
239+
240+ if (cpus_empty(virt_allowed)) return 0;
241+ if (cpus_empty(cs_allowed)) return 0;
242+
243+ for (i = 0; i < NR_CPUS; i++) {
244+ cpu = cyclic_next_cpu(cpu, cs_allowed);
245+ if (cpu_isset(i, virt_allowed))
246+ cpu_set(cpu, *mask);
247+ }
248+ return 0;
249+}
250+
251+/**
252+ * Find out whether a cpu should be listed in /proc/cpuinfo
253+ *
254+ * For virtualized cpusets, only cpus present in the cpuset are shown
255+ */
256+int cpu_visible_in_cpuset(int cpu, struct cpuset * cs)
257+{
258+ /* all cpus are visible in non-virtualized cpusets */
259+ if (!is_virtualized(cs))
260+ return 1;
261+
262+ return cpu_isset(cpu, cs->cpus_allowed);
263+}
264+
265+/**
266+ * cpuid_in_cpuset - translate a "real" cpu number to a "inside cpuset" (logical)
267+ * @cs: the cpuset where all the magic occurs.
268+ * @cpu: cpu number to be translated
269+ *
270+ * Used for /proc/cpuinfo.
271+ * Returns the translated cpu number.
272+ */
273+int cpuid_in_cpuset(int cpu, struct cpuset * cs)
274+{
275+ int i;
276+ int l = 0;
277+
278+ /* translation needed only for virtualized cpusets */
279+ if (!is_virtualized(cs))
280+ return cpu;
281+
282+ for(i=0; i < NR_CPUS; i++)
283+ {
284+ if (i == cpu) return l;
285+ if (cpu_isset(i, cs->cpus_allowed))
286+ l++;
287+ }
288+ /* NOT REACHED */
289+ BUG();
290+ return 0;
291+}
292+
293+/**
294+ * set_cpus_virt_allowed - updated cpus_virt_allowed AND cpus_allowed masks
295+ * @virt_allowed: the mask given by the user to sched_setaffinity()
296+ * @p: the task
297+ *
298+ * This function does not mess with scheduler internals. Here we rely
299+ * on set_cpus_allowed(), that should, for instance, migrate the task
300+ * if necessary.
301+ */
302+static int set_cpus_virt_allowed(struct task_struct *p, cpumask_t mask)
303+{
304+ cpumask_t new_mask;
305+ int retval;
306+
307+ p->cpus_virt_allowed = mask;
308+ combine_mask(&new_mask, p->cpus_virt_allowed, p->cpuset->cpus_allowed);
309+ retval = set_cpus_allowed(p, new_mask);
310+ return retval;
311+}
312+
313+/**
314+ * This is the exported entry point that will be called
315+ * by sched_setaffinity().
316+ */
317+int cpuset_set_cpus_affinity(struct task_struct *p, cpumask_t mask)
318+{
319+ int retval;
320+
321+ mutex_lock(&callback_mutex);
322+ if (is_virtualized(p->cpuset))
323+ retval = set_cpus_virt_allowed(p, mask);
324+ else {
325+ cpumask_t cpus_allowed;
326+ cpus_allowed = cpuset_cpus_allowed(p);
327+ cpus_and(mask, mask, cpus_allowed);
328+ retval = set_cpus_allowed(p, mask);
329+ }
330+ mutex_unlock(&callback_mutex);
331+ return retval;
332+}
333+
334+/**
335+ * This is the exported entry point that will be called
336+ * by sched_getaffinity().
337+ */
338+int cpuset_get_cpus_virt_affinity(struct task_struct *p, cpumask_t *mask)
339+{
340+ if (is_virtualized(p->cpuset)) {
341+ *mask = p->cpus_virt_allowed;
342+ return 0;
343+ }
344+ return -1;
345+}
346+
347+
348 /*
349 * validate_change() - Used to validate that any proposed cpuset change
350 * follows the structural rules for cpusets.
351@@ -624,6 +768,11 @@
352 if ((par = cur->parent) == NULL)
353 return 0;
354
355+ /* virtualization can only be turned on/off on empty cpusets */
356+ if ((atomic_read(&cur->count) > 0) || (!list_empty(&cur->children)))
357+ if (is_virtualized(cur) != is_virtualized(trial))
358+ return -EBUSY;
359+
360 /* We must be a subset of our parent cpuset */
361 if (!is_cpuset_subset(trial, par))
362 return -EACCES;
363@@ -818,11 +967,29 @@
364 return -ESRCH;
365 }
366 atomic_inc(&cs->count);
367+
368+ /* depending on current and future cpuset for this task,
369+ * affinity masks may be meaningful or not
370+ */
371+ cpumask_t virt_allowed, allowed;
372+ if (is_virtualized(cs) == is_virtualized(tsk->cpuset)) {
373+ virt_allowed = tsk->cpus_virt_allowed;
374+ allowed = tsk->cpus_allowed;
375+ } else {
376+ virt_allowed = CPU_MASK_ALL;
377+ allowed = CPU_MASK_ALL;
378+ }
379+
380 rcu_assign_pointer(tsk->cpuset, cs);
381 task_unlock(tsk);
382
383- guarantee_online_cpus(cs, &cpus);
384- set_cpus_allowed(tsk, cpus);
385+
386+ if (is_virtualized(cs))
387+ set_cpus_virt_allowed(tsk, virt_allowed);
388+ else {
389+ guarantee_online_cpus(cs, &cpus);
390+ set_cpus_allowed(tsk, cpus);
391+ }
392
393 from = oldcs->mems_allowed;
394 to = cs->mems_allowed;
395@@ -839,6 +1006,7 @@
396 FILE_MEMLIST,
397 FILE_CPU_EXCLUSIVE,
398 FILE_MEM_EXCLUSIVE,
399+ FILE_VIRTUALIZE,
400 FILE_NOTIFY_ON_RELEASE,
401 FILE_MEMORY_PRESSURE_ENABLED,
402 FILE_MEMORY_PRESSURE,
403@@ -887,6 +1055,9 @@
404 case FILE_MEM_EXCLUSIVE:
405 retval = update_flag(CS_MEM_EXCLUSIVE, cs, buffer);
406 break;
407+ case FILE_VIRTUALIZE:
408+ retval = update_flag(CS_VIRTUALIZED, cs, buffer);
409+ break;
410 case FILE_NOTIFY_ON_RELEASE:
411 retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer);
412 break;
413@@ -987,6 +1158,9 @@
414 case FILE_MEM_EXCLUSIVE:
415 *s++ = is_mem_exclusive(cs) ? '1' : '0';
416 break;
417+ case FILE_VIRTUALIZE:
418+ *s++ = is_virtualized(cs) ? '1' : '0';
419+ break;
420 case FILE_NOTIFY_ON_RELEASE:
421 *s++ = notify_on_release(cs) ? '1' : '0';
422 break;
423@@ -1310,6 +1484,11 @@
424 .private = FILE_MEM_EXCLUSIVE,
425 };
426
427+static struct cftype cft_virtualize = {
428+ .name = "virtualize",
429+ .private = FILE_VIRTUALIZE,
430+};
431+
432 static struct cftype cft_notify_on_release = {
433 .name = "notify_on_release",
434 .private = FILE_NOTIFY_ON_RELEASE,
435@@ -1327,6 +1506,8 @@
436 return err;
437 if ((err = cpuset_add_file(cs_dentry, &cft_mem_exclusive)) < 0)
438 return err;
439+ if ((err = cpuset_add_file(cs_dentry, &cft_virtualize)) < 0)
440+ return err;
441 if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0)
442 return err;
443 if ((err = cpuset_add_file(cs_dentry, &cft_memory_migrate)) < 0)
444diff -ur linux-2.6.14.3/kernel/kthread.c linux-2.6.14.3-cpusetvirt/kernel/kthread.c
445--- linux-2.6.14.3/kernel/kthread.c 2005-11-24 23:10:21.000000000 +0100
446+++ linux-2.6.14.3-cpusetvirt/kernel/kthread.c 2005-11-25 19:28:28.094978408 +0100
447@@ -160,6 +160,15 @@
448 wait_task_inactive(k);
449 set_task_cpu(k, cpu);
450 k->cpus_allowed = cpumask_of_cpu(cpu);
451+#ifdef CONFIG_CPUSETS
452+ /* kthreads don't use sched_setaffinity() to bind themselves to
453+ * CPUs, we need to take care.
454+ * This should not be problem since it is unlikely that kthreads
455+ * will run in a virtualized cpuset.
456+ * But better be ready, so:
457+ */
458+ k->cpus_virt_allowed = cpumask_of_cpu(cpu);
459+#endif
460 }
461 EXPORT_SYMBOL(kthread_bind);
462
463diff -ur linux-2.6.14.3/kernel/sched.c linux-2.6.14.3-cpusetvirt/kernel/sched.c
464--- linux-2.6.14.3/kernel/sched.c 2005-11-24 23:10:21.000000000 +0100
465+++ linux-2.6.14.3-cpusetvirt/kernel/sched.c 2005-11-25 19:28:28.087979472 +0100
466@@ -3798,9 +3798,13 @@
467 return retval;
468 }
469
470+#ifdef CONFIG_CPUSETS
471+int cpuset_set_cpus_affinity(struct task_struct *p, cpumask_t mask);
472+int cpuset_get_cpus_virt_affinity(struct task_struct *p, cpumask_t *mask);
473+#endif
474+
475 long sched_setaffinity(pid_t pid, cpumask_t new_mask)
476 {
477- cpumask_t cpus_allowed;
478 struct task_struct *p;
479 int retval;
480
481@@ -3827,9 +3827,11 @@
482 if (retval)
483 goto out_unlock;
484
485- cpus_allowed = cpuset_cpus_allowed(p);
486- cpus_and(new_mask, new_mask, cpus_allowed);
487- retval = set_cpus_allowed(p, new_mask);
488+#ifdef CONFIG_CPUSETS
489+ retval = cpuset_set_cpus_affinity(p, new_mask);
490+#else
491+ retval = set_cpus_allowed(p, new_mask);
492+#endif
493
494 out_unlock:
495 put_task_struct(p);
496@@ -3897,7 +3904,12 @@
497 if (retval)
498 goto out_unlock;
499
500- cpus_and(*mask, p->cpus_allowed, cpu_online_map);
501+#ifdef CONFIG_CPUSETS
502+ if (cpuset_get_cpus_virt_affinity(p, mask) < 0)
503+ cpus_and(*mask, p->cpus_allowed, cpu_online_map);
504+#else
505+ cpus_and(*mask, p->cpus_allowed, cpu_online_map);
506+#endif
507
508 out_unlock:
509 read_unlock(&tasklist_lock);
This page took 0.197111 seconds and 4 git commands to generate.