]> git.pld-linux.org Git - packages/kernel.git/blob - linux-2.6-cpuset_virtualization.patch
- updated config list: SCSI_ARCMSR=m
[packages/kernel.git] / linux-2.6-cpuset_virtualization.patch
1 diff -ur linux-2.6.14.3/arch/i386/kernel/cpu/proc.c linux-2.6.14.3-cpusetvirt/arch/i386/kernel/cpu/proc.c
2 --- linux-2.6.14.3/arch/i386/kernel/cpu/proc.c  2005-11-24 23:10:21.000000000 +0100
3 +++ linux-2.6.14.3-cpusetvirt/arch/i386/kernel/cpu/proc.c       2005-11-25 19:28:28.088979320 +0100
4 @@ -3,6 +3,7 @@
5  #include <asm/semaphore.h>
6  #include <linux/seq_file.h>
7  #include <linux/cpufreq.h>
8 +#include <linux/cpuset.h>
9  
10  /*
11   *     Get CPU information for use by the procfs.
12 @@ -69,12 +70,20 @@
13         if (!cpu_online(n))
14                 return 0;
15  #endif
16 +#ifdef CONFIG_CPUSETS
17 +       if (!cpu_visible_in_cpuset(n, current->cpuset))
18 +               return 0;
19 +#endif
20         seq_printf(m, "processor\t: %d\n"
21                 "vendor_id\t: %s\n"
22                 "cpu family\t: %d\n"
23                 "model\t\t: %d\n"
24                 "model name\t: %s\n",
25 +#ifdef CONFIG_CPUSETS
26 +               cpuid_in_cpuset(n, current->cpuset),
27 +#else
28                 n,
29 +#endif
30                 c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
31                 c->x86,
32                 c->x86_model,
33 diff -ur linux-2.6.14.3/arch/ia64/kernel/setup.c linux-2.6.14.3-cpusetvirt/arch/ia64/kernel/setup.c
34 --- linux-2.6.14.3/arch/ia64/kernel/setup.c     2005-11-24 23:10:21.000000000 +0100
35 +++ linux-2.6.14.3-cpusetvirt/arch/ia64/kernel/setup.c  2005-11-25 19:28:28.090979016 +0100
36 @@ -43,6 +43,7 @@
37  #include <linux/platform.h>
38  #include <linux/pm.h>
39  #include <linux/cpufreq.h>
40 +#include <linux/cpuset.h>
41  
42  #include <asm/ia32.h>
43  #include <asm/machvec.h>
44 @@ -476,6 +477,11 @@
45         unsigned long mask;
46         int i;
47  
48 +#ifdef CONFIG_CPUSETS
49 +       if (!cpu_visible_in_cpuset(cpunum, current->cpuset))
50 +               return 0;
51 +#endif
52 +       
53         mask = c->features;
54  
55         switch (c->family) {
56 @@ -520,7 +526,12 @@
57                    "cpu MHz    : %lu.%06lu\n"
58                    "itc MHz    : %lu.%06lu\n"
59                    "BogoMIPS   : %lu.%02lu\n",
60 -                  cpunum, c->vendor, family, c->model, c->revision, c->archrev,
61 +#ifdef CONFIG_CPUSETS
62 +                  cpuid_in_cpuset(cpunum, current->cpuset),
63 +#else
64 +                  cpunum,
65 +#endif
66 +                  c->vendor, family, c->model, c->revision, c->archrev,
67                    features, c->ppn, c->number,
68                    c->proc_freq / 1000000, c->proc_freq % 1000000,
69                    c->itc_freq / 1000000, c->itc_freq % 1000000,
70 diff -ur linux-2.6.14.3/arch/x86_64/kernel/setup.c linux-2.6.14.3-cpusetvirt/arch/x86_64/kernel/setup.c
71 --- linux-2.6.14.3/arch/x86_64/kernel/setup.c   2005-11-24 23:10:21.000000000 +0100
72 +++ linux-2.6.14.3-cpusetvirt/arch/x86_64/kernel/setup.c        2005-11-25 19:37:48.755745016 +0100
73 @@ -42,6 +42,7 @@
74  #include <linux/cpufreq.h>
75  #include <linux/dmi.h>
76  #include <linux/dma-mapping.h>
77 +#include <linux/cpuset.h>
78  
79  #include <asm/mtrr.h>
80  #include <asm/uaccess.h>
81 @@ -1249,13 +1250,20 @@
82         if (!cpu_online(c-cpu_data))
83                 return 0;
84  #endif
85 -
86 +#ifdef CONFIG_CPUSETS
87 +       if (!cpu_visible_in_cpuset(c-cpu_data, current->cpuset))
88 +               return 0;
89 +#endif
90         seq_printf(m,"processor\t: %u\n"
91                      "vendor_id\t: %s\n"
92                      "cpu family\t: %d\n"
93                      "model\t\t: %d\n"
94                      "model name\t: %s\n",
95 +#ifdef CONFIG_CPUSETS
96 +                    cpuid_in_cpuset(c-cpu_data, current->cpuset),
97 +#else
98                      (unsigned)(c-cpu_data),
99 +#endif
100                      c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
101                      c->x86,
102                      (int)c->x86_model,
103 diff -ur linux-2.6.14.3/fs/proc/proc_misc.c linux-2.6.14.3-cpusetvirt/fs/proc/proc_misc.c
104 --- linux-2.6.14.3/fs/proc/proc_misc.c  2005-11-24 23:10:21.000000000 +0100
105 +++ linux-2.6.14.3-cpusetvirt/fs/proc/proc_misc.c       2005-11-25 19:28:28.092978712 +0100
106 @@ -44,6 +44,7 @@
107  #include <linux/jiffies.h>
108  #include <linux/sysrq.h>
109  #include <linux/vmalloc.h>
110 +#include <linux/cpuset.h>
111  #include <linux/crash_dump.h>
112  #include <asm/uaccess.h>
113  #include <asm/pgtable.h>
114 @@ -353,6 +354,10 @@
115         for_each_cpu(i) {
116                 int j;
117  
118 +#ifdef CONFIG_CPUSETS
119 +               if (!cpu_visible_in_cpuset(i, current->cpuset))
120 +                       continue;
121 +#endif
122                 user = cputime64_add(user, kstat_cpu(i).cpustat.user);
123                 nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice);
124                 system = cputime64_add(system, kstat_cpu(i).cpustat.system);
125 @@ -376,6 +381,10 @@
126                 (unsigned long long)cputime64_to_clock_t(steal));
127         for_each_online_cpu(i) {
128  
129 +#ifdef CONFIG_CPUSETS
130 +               if (!cpu_visible_in_cpuset(i, current->cpuset))
131 +                       continue;
132 +#endif
133                 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
134                 user = kstat_cpu(i).cpustat.user;
135                 nice = kstat_cpu(i).cpustat.nice;
136 @@ -386,7 +395,11 @@
137                 softirq = kstat_cpu(i).cpustat.softirq;
138                 steal = kstat_cpu(i).cpustat.steal;
139                 seq_printf(p, "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu\n",
140 +#ifdef CONFIG_CPUSETS
141 +                       cpuid_in_cpuset(i, current->cpuset),
142 +#else
143                         i,
144 +#endif
145                         (unsigned long long)cputime64_to_clock_t(user),
146                         (unsigned long long)cputime64_to_clock_t(nice),
147                         (unsigned long long)cputime64_to_clock_t(system),
148 diff -ur linux-2.6.14.3/include/linux/cpuset.h linux-2.6.14.3-cpusetvirt/include/linux/cpuset.h
149 --- linux-2.6.14.3/include/linux/cpuset.h       2005-11-24 23:10:21.000000000 +0100
150 +++ linux-2.6.14.3-cpusetvirt/include/linux/cpuset.h    2005-11-25 19:28:28.093978560 +0100
151 @@ -28,6 +28,9 @@
152  extern struct file_operations proc_cpuset_operations;
153  extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer);
154  
155 +int cpu_visible_in_cpuset(int cpu, struct cpuset * cs);
156 +int cpuid_in_cpuset(int cpu, struct cpuset * cs);
157 +
158  #else /* !CONFIG_CPUSETS */
159  
160  static inline int cpuset_init(void) { return 0; }
161 diff -ur linux-2.6.14.3/include/linux/init_task.h linux-2.6.14.3-cpusetvirt/include/linux/init_task.h
162 --- linux-2.6.14.3/include/linux/init_task.h    2005-11-24 23:10:21.000000000 +0100
163 +++ linux-2.6.14.3-cpusetvirt/include/linux/init_task.h 2005-11-25 19:28:28.079980688 +0100
164 @@ -88,6 +88,7 @@
165         .static_prio    = MAX_PRIO-20,                                  \
166         .policy         = SCHED_NORMAL,                                 \
167         .cpus_allowed   = CPU_MASK_ALL,                                 \
168 +       .cpus_virt_allowed      = CPU_MASK_ALL,                         \
169         .mm             = NULL,                                         \
170         .active_mm      = &init_mm,                                     \
171         .run_list       = LIST_HEAD_INIT(tsk.run_list),                 \
172 diff -ur linux-2.6.14.3/include/linux/sched.h linux-2.6.14.3-cpusetvirt/include/linux/sched.h
173 --- linux-2.6.14.3/include/linux/sched.h        2005-11-24 23:10:21.000000000 +0100
174 +++ linux-2.6.14.3-cpusetvirt/include/linux/sched.h     2005-11-25 19:28:28.081980384 +0100
175 @@ -808,6 +808,7 @@
176         struct mempolicy *mempolicy;
177         short il_next;
178  #endif
179 +       cpumask_t cpus_virt_allowed;
180  #ifdef CONFIG_CPUSETS
181         struct cpuset *cpuset;
182         nodemask_t mems_allowed;
183 diff -ur linux-2.6.14.3/kernel/cpuset.c linux-2.6.14.3-cpusetvirt/kernel/cpuset.c
184 --- linux-2.6.14.3/kernel/cpuset.c      2005-11-24 23:10:21.000000000 +0100
185 +++ linux-2.6.14.3-cpusetvirt/kernel/cpuset.c   2005-11-25 19:28:28.084979928 +0100
186 @@ -83,6 +83,7 @@
187         CS_CPU_EXCLUSIVE,
188         CS_MEM_EXCLUSIVE,
189         CS_MEMORY_MIGRATE,
190 +       CS_VIRTUALIZED,
191         CS_REMOVED,
192         CS_NOTIFY_ON_RELEASE
193  } cpuset_flagbits_t;
194 @@ -98,6 +99,10 @@
195         return !!test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
196  }
197  
198 +static inline int is_virtualized(const struct cpuset *cs)
199 +{
200 +       return !!test_bit(CS_VIRTUALIZED, &cs->flags);
201 +}
202  static inline int is_removed(const struct cpuset *cs)
203  {
204         return !!test_bit(CS_REMOVED, &cs->flags);
205 @@ -590,6 +595,145 @@
206                 is_mem_exclusive(p) <= is_mem_exclusive(q);
207  }
208  
209 +#define cyclic_next_cpu(index, mask)   __cyclic_next_cpu(index, &mask)
210 +static inline int __cyclic_next_cpu(int index, const cpumask_t * mask)
211 +{
212 +       int i;
213 +       i = next_cpu(index, *mask);
214 +       if (i >= NR_CPUS) {
215 +               if (cpu_isset(0, *mask))
216 +                       return 0;
217 +               i = next_cpu(0, *mask);
218 +       }
219 +       return i;
220 +}
221 +
222 +/**
223 + *     cpuset_combine_mask - translate a user cpu mask to a physical one.
224 + *     @virt_allowed:  the mask given by the user to sched_setaffinity()
225 + *     @cs_allowed:    the mask of the current cpuset.
226 + *
227 + *     Returns combined mask in *mask.
228 + */
229 +static int combine_mask(cpumask_t *mask, const cpumask_t virt_allowed, const cpumask_t cs_allowed)
230 +{
231 +       int i;
232 +
233 +       /* start with current cpu out of the mask
234 +        * so the first call to next_cpu will take the first cpu
235 +        * even if it is cpu zero
236 +        */
237 +       int cpu = NR_CPUS;
238 +       cpus_clear(*mask);
239 +
240 +       if (cpus_empty(virt_allowed)) return 0;
241 +       if (cpus_empty(cs_allowed)) return 0;
242 +
243 +       for (i = 0; i < NR_CPUS; i++) {
244 +               cpu = cyclic_next_cpu(cpu, cs_allowed);
245 +               if (cpu_isset(i, virt_allowed))
246 +                       cpu_set(cpu, *mask);
247 +       }
248 +       return 0;
249 +}
250 +
251 +/**
252 + * Find out whether a cpu should be listed in /proc/cpuinfo
253 + *
254 + * For virtualized cpusets, only cpus present in the cpuset are shown
255 + */
256 +int cpu_visible_in_cpuset(int cpu, struct cpuset * cs)
257 +{
258 +       /* all cpus are visible in non-virtualized cpusets */
259 +       if (!is_virtualized(cs))
260 +               return 1;
261 +
262 +       return cpu_isset(cpu, cs->cpus_allowed);
263 +}
264 +
265 +/**
266 + *     cpuid_in_cpuset - translate a "real" cpu number to a "inside cpuset" (logical)
267 + *     @cs:    the cpuset where all the magic occurs.
268 + *     @cpu:   cpu number to be translated
269 + *
270 + *     Used for /proc/cpuinfo.
271 + *     Returns the translated cpu number.
272 + */
273 +int cpuid_in_cpuset(int cpu, struct cpuset * cs)
274 +{
275 +       int i;
276 +       int l = 0;
277 +       
278 +       /* translation needed only for virtualized cpusets */
279 +       if (!is_virtualized(cs))
280 +               return cpu;
281 +               
282 +       for(i=0; i < NR_CPUS; i++)
283 +       {
284 +               if (i == cpu) return l;
285 +               if (cpu_isset(i, cs->cpus_allowed))
286 +                       l++;
287 +       }
288 +       /* NOT REACHED */
289 +       BUG();
290 +       return 0;
291 +}
292 +
293 +/**
294 + *     set_cpus_virt_allowed - updated cpus_virt_allowed AND cpus_allowed masks
295 + *     @virt_allowed:        the mask given by the user to sched_setaffinity()
296 + *     @p:             the task
297 + *
298 + *     This function does not mess with scheduler internals. Here we rely
299 + *     on set_cpus_allowed(), that should, for instance, migrate the task 
300 + *     if necessary.
301 + */
302 +static int set_cpus_virt_allowed(task_t *p, cpumask_t mask)
303 +{
304 +       cpumask_t new_mask;
305 +       int retval;
306 +
307 +       p->cpus_virt_allowed = mask;
308 +       combine_mask(&new_mask, p->cpus_virt_allowed, p->cpuset->cpus_allowed);
309 +       retval = set_cpus_allowed(p, new_mask);
310 +       return retval;
311 +}
312 +
313 +/**
314 + *     This is the exported entry point that will be called
315 + *     by sched_setaffinity().
316 + */
317 +int cpuset_set_cpus_affinity(task_t *p, cpumask_t mask)
318 +{
319 +       int retval;
320 +
321 +       down(&callback_sem);
322 +       if (is_virtualized(p->cpuset))
323 +               retval = set_cpus_virt_allowed(p, mask);
324 +       else {
325 +               cpumask_t cpus_allowed;
326 +               cpus_allowed = cpuset_cpus_allowed(p);
327 +               cpus_and(mask, mask, cpus_allowed);
328 +               retval = set_cpus_allowed(p, mask);
329 +       }
330 +       up(&callback_sem);
331 +       return retval;
332 +}
333 +
334 +/**
335 + *     This is the exported entry point that will be called
336 + *     by sched_getaffinity().
337 + */
338 +int cpuset_get_cpus_virt_affinity(task_t *p, cpumask_t *mask)
339 +{
340 +       if (is_virtualized(p->cpuset)) {
341 +               *mask = p->cpus_virt_allowed;
342 +               return 0;
343 +       }
344 +       return -1;
345 +}
346 +
347 +
348  /*
349   * validate_change() - Used to validate that any proposed cpuset change
350   *                    follows the structural rules for cpusets.
351 @@ -624,6 +768,11 @@
352         if ((par = cur->parent) == NULL)
353                 return 0;
354  
355 +       /* virtualization can only be turned on/off on empty cpusets  */
356 +       if ((atomic_read(&cur->count) > 0) || (!list_empty(&cur->children)))
357 +               if (is_virtualized(cur) != is_virtualized(trial))
358 +                       return -EBUSY;
359 +
360         /* We must be a subset of our parent cpuset */
361         if (!is_cpuset_subset(trial, par))
362                 return -EACCES;
363 @@ -818,11 +967,29 @@
364                 return -ESRCH;
365         }
366         atomic_inc(&cs->count);
367 +
368 +       /* depending on current and future cpuset for this task,
369 +        * affinity masks may be meaningful or not
370 +        */
371 +       cpumask_t virt_allowed, allowed;
372 +       if (is_virtualized(cs) == is_virtualized(tsk->cpuset)) {
373 +               virt_allowed = tsk->cpus_virt_allowed;
374 +               allowed = tsk->cpus_allowed;
375 +       } else {
376 +               virt_allowed = CPU_MASK_ALL;
377 +               allowed = CPU_MASK_ALL;
378 +       }
379 +               
380         rcu_assign_pointer(tsk->cpuset, cs);
381         task_unlock(tsk);
382  
383 -       guarantee_online_cpus(cs, &cpus);
384 -       set_cpus_allowed(tsk, cpus);
385 +
386 +       if (is_virtualized(cs))
387 +               set_cpus_virt_allowed(tsk, virt_allowed);
388 +       else {
389 +               guarantee_online_cpus(cs, &cpus);
390 +               set_cpus_allowed(tsk, cpus);
391 +       }
392  
393         from = oldcs->mems_allowed;
394         to = cs->mems_allowed;
395 @@ -839,6 +1006,7 @@
396         FILE_MEMLIST,
397         FILE_CPU_EXCLUSIVE,
398         FILE_MEM_EXCLUSIVE,
399 +       FILE_VIRTUALIZE,
400         FILE_NOTIFY_ON_RELEASE,
401         FILE_TASKLIST,
402  } cpuset_filetype_t;
403 @@ -887,6 +1055,9 @@
404         case FILE_MEM_EXCLUSIVE:
405                 retval = update_flag(CS_MEM_EXCLUSIVE, cs, buffer);
406                 break;
407 +       case FILE_VIRTUALIZE:
408 +               retval = update_flag(CS_VIRTUALIZED, cs, buffer);
409 +               break;
410         case FILE_NOTIFY_ON_RELEASE:
411                 retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer);
412                 break;
413 @@ -987,6 +1158,9 @@
414         case FILE_MEM_EXCLUSIVE:
415                 *s++ = is_mem_exclusive(cs) ? '1' : '0';
416                 break;
417 +       case FILE_VIRTUALIZE:
418 +               *s++ = is_virtualized(cs) ? '1' : '0';
419 +               break;
420         case FILE_NOTIFY_ON_RELEASE:
421                 *s++ = notify_on_release(cs) ? '1' : '0';
422                 break;
423 @@ -1310,6 +1484,11 @@
424         .private = FILE_MEM_EXCLUSIVE,
425  };
426  
427 +static struct cftype cft_virtualize = {
428 +       .name = "virtualize",
429 +       .private = FILE_VIRTUALIZE,
430 +};
431 +
432  static struct cftype cft_notify_on_release = {
433         .name = "notify_on_release",
434         .private = FILE_NOTIFY_ON_RELEASE,
435 @@ -1327,6 +1506,8 @@
436                 return err;
437         if ((err = cpuset_add_file(cs_dentry, &cft_mem_exclusive)) < 0)
438                 return err;
439 +       if ((err = cpuset_add_file(cs_dentry, &cft_virtualize)) < 0)
440 +               return err;
441         if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0)
442                 return err;
443         if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0)
444 diff -ur linux-2.6.14.3/kernel/kthread.c linux-2.6.14.3-cpusetvirt/kernel/kthread.c
445 --- linux-2.6.14.3/kernel/kthread.c     2005-11-24 23:10:21.000000000 +0100
446 +++ linux-2.6.14.3-cpusetvirt/kernel/kthread.c  2005-11-25 19:28:28.094978408 +0100
447 @@ -160,6 +160,15 @@
448         wait_task_inactive(k);
449         set_task_cpu(k, cpu);
450         k->cpus_allowed = cpumask_of_cpu(cpu);
451 +#ifdef CONFIG_CPUSETS
452 +       /* kthreads don't use sched_setaffinity() to bind themselves to
453 +        * CPUs, we need to take care.
454 +        * This should not be problem since it is unlikely that kthreads
455 +        * will run in a virtualized cpuset.
456 +        * But better be ready, so:
457 +        */
458 +       k->cpus_virt_allowed = cpumask_of_cpu(cpu);
459 +#endif
460  }
461  EXPORT_SYMBOL(kthread_bind);
462  
463 diff -ur linux-2.6.14.3/kernel/sched.c linux-2.6.14.3-cpusetvirt/kernel/sched.c
464 --- linux-2.6.14.3/kernel/sched.c       2005-11-24 23:10:21.000000000 +0100
465 +++ linux-2.6.14.3-cpusetvirt/kernel/sched.c    2005-11-25 19:28:28.087979472 +0100
466 @@ -3798,11 +3798,15 @@
467         return retval;
468  }
469  
470 +#ifdef CONFIG_CPUSETS
471 +int cpuset_set_cpus_affinity(task_t *p, cpumask_t mask);
472 +int cpuset_get_cpus_virt_affinity(task_t *p, cpumask_t *mask);
473 +#endif
474 +
475  long sched_setaffinity(pid_t pid, cpumask_t new_mask)
476  {
477         task_t *p;
478         int retval;
479 -       cpumask_t cpus_allowed;
480  
481         lock_cpu_hotplug();
482         read_lock(&tasklist_lock);
483 @@ -3827,9 +3827,11 @@
484                         !capable(CAP_SYS_NICE))
485                 goto out_unlock;
486  
487 -       cpus_allowed = cpuset_cpus_allowed(p);
488 -       cpus_and(new_mask, new_mask, cpus_allowed);
489 -       retval = set_cpus_allowed(p, new_mask);
490 +#ifdef CONFIG_CPUSETS
491 +       retval = cpuset_set_cpus_affinity(p, new_mask);
492 +#else
493 +       retval = set_cpus_allowed(p, new_mask);
494 +#endif
495  
496  out_unlock:
497         put_task_struct(p);
498 @@ -3897,7 +3904,12 @@
499                 goto out_unlock;
500  
501         retval = 0;
502 -       cpus_and(*mask, p->cpus_allowed, cpu_online_map);
503 +#ifdef CONFIG_CPUSETS
504 +       if (cpuset_get_cpus_virt_affinity(p, mask) < 0)
505 +               cpus_and(*mask, p->cpus_allowed, cpu_online_map);
506 +#else
507 +       cpus_and(*mask, p->cpus_allowed, cpu_online_map);
508 +#endif
509  
510  out_unlock:
511         read_unlock(&tasklist_lock);
This page took 0.070659 seconds and 3 git commands to generate.