]> git.pld-linux.org Git - packages/kernel.git/blob - kernel-vserver-2.3.patch
- up to patch-2.6.31.6-vs2.3.0.36.24.diff
[packages/kernel.git] / kernel-vserver-2.3.patch
1 diff -NurpP --minimal linux-2.6.31.6/arch/alpha/Kconfig linux-2.6.31.6-vs2.3.0.36.24/arch/alpha/Kconfig
2 --- linux-2.6.31.6/arch/alpha/Kconfig   2009-03-24 14:18:07.000000000 +0100
3 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/alpha/Kconfig     2009-09-10 16:11:43.000000000 +0200
4 @@ -666,6 +666,8 @@ config DUMMY_CONSOLE
5         depends on VGA_HOSE
6         default y
7  
8 +source "kernel/vserver/Kconfig"
9 +
10  source "security/Kconfig"
11  
12  source "crypto/Kconfig"
13 diff -NurpP --minimal linux-2.6.31.6/arch/alpha/kernel/entry.S linux-2.6.31.6-vs2.3.0.36.24/arch/alpha/kernel/entry.S
14 --- linux-2.6.31.6/arch/alpha/kernel/entry.S    2009-06-11 17:11:46.000000000 +0200
15 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/alpha/kernel/entry.S      2009-09-10 16:11:43.000000000 +0200
16 @@ -874,24 +874,15 @@ sys_getxgid:
17         .globl  sys_getxpid
18         .ent    sys_getxpid
19  sys_getxpid:
20 +       lda     $sp, -16($sp)
21 +       stq     $26, 0($sp)
22         .prologue 0
23 -       ldq     $2, TI_TASK($8)
24  
25 -       /* See linux/kernel/timer.c sys_getppid for discussion
26 -          about this loop.  */
27 -       ldq     $3, TASK_GROUP_LEADER($2)
28 -       ldq     $4, TASK_REAL_PARENT($3)
29 -       ldl     $0, TASK_TGID($2)
30 -1:     ldl     $1, TASK_TGID($4)
31 -#ifdef CONFIG_SMP
32 -       mov     $4, $5
33 -       mb
34 -       ldq     $3, TASK_GROUP_LEADER($2)
35 -       ldq     $4, TASK_REAL_PARENT($3)
36 -       cmpeq   $4, $5, $5
37 -       beq     $5, 1b
38 -#endif
39 -       stq     $1, 80($sp)
40 +       lda     $16, 96($sp)
41 +       jsr     $26, do_getxpid
42 +       ldq     $26, 0($sp)
43 +
44 +       lda     $sp, 16($sp)
45         ret
46  .end sys_getxpid
47  
48 diff -NurpP --minimal linux-2.6.31.6/arch/alpha/kernel/osf_sys.c linux-2.6.31.6-vs2.3.0.36.24/arch/alpha/kernel/osf_sys.c
49 --- linux-2.6.31.6/arch/alpha/kernel/osf_sys.c  2009-09-10 15:25:14.000000000 +0200
50 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/alpha/kernel/osf_sys.c    2009-09-10 16:11:43.000000000 +0200
51 @@ -872,7 +872,7 @@ SYSCALL_DEFINE2(osf_gettimeofday, struct
52  {
53         if (tv) {
54                 struct timeval ktv;
55 -               do_gettimeofday(&ktv);
56 +               vx_gettimeofday(&ktv);
57                 if (put_tv32(tv, &ktv))
58                         return -EFAULT;
59         }
60 diff -NurpP --minimal linux-2.6.31.6/arch/alpha/kernel/ptrace.c linux-2.6.31.6-vs2.3.0.36.24/arch/alpha/kernel/ptrace.c
61 --- linux-2.6.31.6/arch/alpha/kernel/ptrace.c   2009-09-10 15:25:14.000000000 +0200
62 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/alpha/kernel/ptrace.c     2009-09-10 16:11:43.000000000 +0200
63 @@ -14,6 +14,7 @@
64  #include <linux/slab.h>
65  #include <linux/security.h>
66  #include <linux/signal.h>
67 +#include <linux/vs_base.h>
68  
69  #include <asm/uaccess.h>
70  #include <asm/pgtable.h>
71 diff -NurpP --minimal linux-2.6.31.6/arch/alpha/kernel/systbls.S linux-2.6.31.6-vs2.3.0.36.24/arch/alpha/kernel/systbls.S
72 --- linux-2.6.31.6/arch/alpha/kernel/systbls.S  2009-03-24 14:18:08.000000000 +0100
73 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/alpha/kernel/systbls.S    2009-09-10 16:11:43.000000000 +0200
74 @@ -446,7 +446,7 @@ sys_call_table:
75         .quad sys_stat64                        /* 425 */
76         .quad sys_lstat64
77         .quad sys_fstat64
78 -       .quad sys_ni_syscall                    /* sys_vserver */
79 +       .quad sys_vserver                       /* sys_vserver */
80         .quad sys_ni_syscall                    /* sys_mbind */
81         .quad sys_ni_syscall                    /* sys_get_mempolicy */
82         .quad sys_ni_syscall                    /* sys_set_mempolicy */
83 diff -NurpP --minimal linux-2.6.31.6/arch/alpha/kernel/traps.c linux-2.6.31.6-vs2.3.0.36.24/arch/alpha/kernel/traps.c
84 --- linux-2.6.31.6/arch/alpha/kernel/traps.c    2009-06-11 17:11:46.000000000 +0200
85 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/alpha/kernel/traps.c      2009-09-10 16:11:43.000000000 +0200
86 @@ -183,7 +183,8 @@ die_if_kernel(char * str, struct pt_regs
87  #ifdef CONFIG_SMP
88         printk("CPU %d ", hard_smp_processor_id());
89  #endif
90 -       printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err);
91 +       printk("%s(%d[#%u]): %s %ld\n", current->comm,
92 +               task_pid_nr(current), current->xid, str, err);
93         dik_show_regs(regs, r9_15);
94         add_taint(TAINT_DIE);
95         dik_show_trace((unsigned long *)(regs+1));
96 diff -NurpP --minimal linux-2.6.31.6/arch/alpha/mm/fault.c linux-2.6.31.6-vs2.3.0.36.24/arch/alpha/mm/fault.c
97 --- linux-2.6.31.6/arch/alpha/mm/fault.c        2009-09-10 15:25:14.000000000 +0200
98 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/alpha/mm/fault.c  2009-09-10 16:11:43.000000000 +0200
99 @@ -193,8 +193,8 @@ do_page_fault(unsigned long address, uns
100                 down_read(&mm->mmap_sem);
101                 goto survive;
102         }
103 -       printk(KERN_ALERT "VM: killing process %s(%d)\n",
104 -              current->comm, task_pid_nr(current));
105 +       printk(KERN_ALERT "VM: killing process %s(%d:#%u)\n",
106 +              current->comm, task_pid_nr(current), current->xid);
107         if (!user_mode(regs))
108                 goto no_context;
109         do_group_exit(SIGKILL);
110 diff -NurpP --minimal linux-2.6.31.6/arch/arm/Kconfig linux-2.6.31.6-vs2.3.0.36.24/arch/arm/Kconfig
111 --- linux-2.6.31.6/arch/arm/Kconfig     2009-09-10 15:25:14.000000000 +0200
112 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/arm/Kconfig       2009-09-10 16:11:43.000000000 +0200
113 @@ -1483,6 +1483,8 @@ source "fs/Kconfig"
114  
115  source "arch/arm/Kconfig.debug"
116  
117 +source "kernel/vserver/Kconfig"
118 +
119  source "security/Kconfig"
120  
121  source "crypto/Kconfig"
122 diff -NurpP --minimal linux-2.6.31.6/arch/arm/kernel/calls.S linux-2.6.31.6-vs2.3.0.36.24/arch/arm/kernel/calls.S
123 --- linux-2.6.31.6/arch/arm/kernel/calls.S      2009-09-10 15:25:15.000000000 +0200
124 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/arm/kernel/calls.S        2009-09-10 16:11:43.000000000 +0200
125 @@ -322,7 +322,7 @@
126  /* 310 */      CALL(sys_request_key)
127                 CALL(sys_keyctl)
128                 CALL(ABI(sys_semtimedop, sys_oabi_semtimedop))
129 -/* vserver */  CALL(sys_ni_syscall)
130 +               CALL(sys_vserver)
131                 CALL(sys_ioprio_set)
132  /* 315 */      CALL(sys_ioprio_get)
133                 CALL(sys_inotify_init)
134 diff -NurpP --minimal linux-2.6.31.6/arch/arm/kernel/process.c linux-2.6.31.6-vs2.3.0.36.24/arch/arm/kernel/process.c
135 --- linux-2.6.31.6/arch/arm/kernel/process.c    2009-09-10 15:25:15.000000000 +0200
136 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/arm/kernel/process.c      2009-09-10 16:11:43.000000000 +0200
137 @@ -269,7 +269,8 @@ void __show_regs(struct pt_regs *regs)
138  void show_regs(struct pt_regs * regs)
139  {
140         printk("\n");
141 -       printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm);
142 +       printk("Pid: %d[#%u], comm: %20s\n",
143 +               task_pid_nr(current), current->xid, current->comm);
144         __show_regs(regs);
145         __backtrace();
146  }
147 diff -NurpP --minimal linux-2.6.31.6/arch/arm/kernel/traps.c linux-2.6.31.6-vs2.3.0.36.24/arch/arm/kernel/traps.c
148 --- linux-2.6.31.6/arch/arm/kernel/traps.c      2009-06-11 17:11:49.000000000 +0200
149 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/arm/kernel/traps.c        2009-09-10 16:11:43.000000000 +0200
150 @@ -228,8 +228,8 @@ static void __die(const char *str, int e
151                str, err, ++die_counter);
152         print_modules();
153         __show_regs(regs);
154 -       printk("Process %s (pid: %d, stack limit = 0x%p)\n",
155 -               tsk->comm, task_pid_nr(tsk), thread + 1);
156 +       printk("Process %s (pid: %d:#%u, stack limit = 0x%p)\n",
157 +               tsk->comm, task_pid_nr(tsk), tsk->xid, thread + 1);
158  
159         if (!user_mode(regs) || in_interrupt()) {
160                 dump_mem("Stack: ", regs->ARM_sp,
161 diff -NurpP --minimal linux-2.6.31.6/arch/arm/mm/fault.c linux-2.6.31.6-vs2.3.0.36.24/arch/arm/mm/fault.c
162 --- linux-2.6.31.6/arch/arm/mm/fault.c  2009-09-10 15:25:18.000000000 +0200
163 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/arm/mm/fault.c    2009-09-10 16:11:43.000000000 +0200
164 @@ -294,7 +294,8 @@ do_page_fault(unsigned long addr, unsign
165                  * happened to us that made us unable to handle
166                  * the page fault gracefully.
167                  */
168 -               printk("VM: killing process %s\n", tsk->comm);
169 +               printk("VM: killing process %s(%d:#%u)\n",
170 +                       tsk->comm, task_pid_nr(tsk), tsk->xid);
171                 do_group_exit(SIGKILL);
172                 return 0;
173         }
174 diff -NurpP --minimal linux-2.6.31.6/arch/avr32/mm/fault.c linux-2.6.31.6-vs2.3.0.36.24/arch/avr32/mm/fault.c
175 --- linux-2.6.31.6/arch/avr32/mm/fault.c        2009-09-10 15:25:20.000000000 +0200
176 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/avr32/mm/fault.c  2009-09-29 17:32:09.000000000 +0200
177 @@ -216,7 +216,8 @@ out_of_memory:
178                 down_read(&mm->mmap_sem);
179                 goto survive;
180         }
181 -       printk("VM: Killing process %s\n", tsk->comm);
182 +       printk("VM: Killing process %s(%d:#%u)\n",
183 +               tsk->comm, task_pid_nr(tsk), tsk->xid);
184         if (user_mode(regs))
185                 do_group_exit(SIGKILL);
186         goto no_context;
187 diff -NurpP --minimal linux-2.6.31.6/arch/cris/Kconfig linux-2.6.31.6-vs2.3.0.36.24/arch/cris/Kconfig
188 --- linux-2.6.31.6/arch/cris/Kconfig    2009-06-11 17:11:56.000000000 +0200
189 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/cris/Kconfig      2009-09-10 16:11:43.000000000 +0200
190 @@ -685,6 +685,8 @@ source "drivers/staging/Kconfig"
191  
192  source "arch/cris/Kconfig.debug"
193  
194 +source "kernel/vserver/Kconfig"
195 +
196  source "security/Kconfig"
197  
198  source "crypto/Kconfig"
199 diff -NurpP --minimal linux-2.6.31.6/arch/cris/mm/fault.c linux-2.6.31.6-vs2.3.0.36.24/arch/cris/mm/fault.c
200 --- linux-2.6.31.6/arch/cris/mm/fault.c 2009-09-10 15:25:21.000000000 +0200
201 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/cris/mm/fault.c   2009-09-29 17:25:13.000000000 +0200
202 @@ -245,7 +245,8 @@ do_page_fault(unsigned long address, str
203  
204   out_of_memory:
205         up_read(&mm->mmap_sem);
206 -       printk("VM: killing process %s\n", tsk->comm);
207 +       printk("VM: killing process %s(%d:#%u)\n",
208 +               tsk->comm, task_pid_nr(tsk), tsk->xid);
209         if (user_mode(regs))
210                 do_exit(SIGKILL);
211         goto no_context;
212 diff -NurpP --minimal linux-2.6.31.6/arch/frv/kernel/kernel_thread.S linux-2.6.31.6-vs2.3.0.36.24/arch/frv/kernel/kernel_thread.S
213 --- linux-2.6.31.6/arch/frv/kernel/kernel_thread.S      2008-12-25 00:26:37.000000000 +0100
214 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/frv/kernel/kernel_thread.S        2009-09-10 16:11:43.000000000 +0200
215 @@ -37,7 +37,7 @@ kernel_thread:
216  
217         # start by forking the current process, but with shared VM
218         setlos.p        #__NR_clone,gr7         ; syscall number
219 -       ori             gr10,#CLONE_VM,gr8      ; first syscall arg     [clone_flags]
220 +       ori             gr10,#CLONE_KT,gr8      ; first syscall arg     [clone_flags]
221         sethi.p         #0xe4e4,gr9             ; second syscall arg    [newsp]
222         setlo           #0xe4e4,gr9
223         setlos.p        #0,gr10                 ; third syscall arg     [parent_tidptr]
224 diff -NurpP --minimal linux-2.6.31.6/arch/frv/mm/fault.c linux-2.6.31.6-vs2.3.0.36.24/arch/frv/mm/fault.c
225 --- linux-2.6.31.6/arch/frv/mm/fault.c  2009-09-10 15:25:22.000000000 +0200
226 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/frv/mm/fault.c    2009-09-29 17:25:48.000000000 +0200
227 @@ -257,7 +257,8 @@ asmlinkage void do_page_fault(int datamm
228   */
229   out_of_memory:
230         up_read(&mm->mmap_sem);
231 -       printk("VM: killing process %s\n", current->comm);
232 +       printk("VM: killing process %s(%d:#%u)\n",
233 +               current->comm, task_pid_nr(current), current->xid);
234         if (user_mode(__frame))
235                 do_group_exit(SIGKILL);
236         goto no_context;
237 diff -NurpP --minimal linux-2.6.31.6/arch/h8300/Kconfig linux-2.6.31.6-vs2.3.0.36.24/arch/h8300/Kconfig
238 --- linux-2.6.31.6/arch/h8300/Kconfig   2009-03-24 14:18:24.000000000 +0100
239 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/h8300/Kconfig     2009-09-10 16:11:43.000000000 +0200
240 @@ -226,6 +226,8 @@ source "fs/Kconfig"
241  
242  source "arch/h8300/Kconfig.debug"
243  
244 +source "kernel/vserver/Kconfig"
245 +
246  source "security/Kconfig"
247  
248  source "crypto/Kconfig"
249 diff -NurpP --minimal linux-2.6.31.6/arch/ia64/ia32/ia32_entry.S linux-2.6.31.6-vs2.3.0.36.24/arch/ia64/ia32/ia32_entry.S
250 --- linux-2.6.31.6/arch/ia64/ia32/ia32_entry.S  2009-06-11 17:11:57.000000000 +0200
251 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/ia64/ia32/ia32_entry.S    2009-09-10 16:11:43.000000000 +0200
252 @@ -451,7 +451,7 @@ ia32_syscall_table:
253         data8 sys_tgkill        /* 270 */
254         data8 compat_sys_utimes
255         data8 sys32_fadvise64_64
256 -       data8 sys_ni_syscall
257 +       data8 sys32_vserver
258         data8 sys_ni_syscall
259         data8 sys_ni_syscall    /* 275 */
260         data8 sys_ni_syscall
261 diff -NurpP --minimal linux-2.6.31.6/arch/ia64/Kconfig linux-2.6.31.6-vs2.3.0.36.24/arch/ia64/Kconfig
262 --- linux-2.6.31.6/arch/ia64/Kconfig    2009-09-10 15:25:22.000000000 +0200
263 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/ia64/Kconfig      2009-09-10 16:11:43.000000000 +0200
264 @@ -676,6 +676,8 @@ source "fs/Kconfig"
265  
266  source "arch/ia64/Kconfig.debug"
267  
268 +source "kernel/vserver/Kconfig"
269 +
270  source "security/Kconfig"
271  
272  source "crypto/Kconfig"
273 diff -NurpP --minimal linux-2.6.31.6/arch/ia64/kernel/entry.S linux-2.6.31.6-vs2.3.0.36.24/arch/ia64/kernel/entry.S
274 --- linux-2.6.31.6/arch/ia64/kernel/entry.S     2009-09-10 15:25:22.000000000 +0200
275 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/ia64/kernel/entry.S       2009-09-10 16:11:43.000000000 +0200
276 @@ -1753,7 +1753,7 @@ sys_call_table:
277         data8 sys_mq_notify
278         data8 sys_mq_getsetattr
279         data8 sys_kexec_load
280 -       data8 sys_ni_syscall                    // reserved for vserver
281 +       data8 sys_vserver
282         data8 sys_waitid                        // 1270
283         data8 sys_add_key
284         data8 sys_request_key
285 diff -NurpP --minimal linux-2.6.31.6/arch/ia64/kernel/perfmon.c linux-2.6.31.6-vs2.3.0.36.24/arch/ia64/kernel/perfmon.c
286 --- linux-2.6.31.6/arch/ia64/kernel/perfmon.c   2009-09-10 15:25:22.000000000 +0200
287 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/ia64/kernel/perfmon.c     2009-09-10 16:11:43.000000000 +0200
288 @@ -41,6 +41,7 @@
289  #include <linux/rcupdate.h>
290  #include <linux/completion.h>
291  #include <linux/tracehook.h>
292 +#include <linux/vs_memory.h>
293  
294  #include <asm/errno.h>
295  #include <asm/intrinsics.h>
296 @@ -2372,7 +2373,7 @@ pfm_smpl_buffer_alloc(struct task_struct
297          */
298         insert_vm_struct(mm, vma);
299  
300 -       mm->total_vm  += size >> PAGE_SHIFT;
301 +       vx_vmpages_add(mm, size >> PAGE_SHIFT);
302         vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
303                                                         vma_pages(vma));
304         up_write(&task->mm->mmap_sem);
305 diff -NurpP --minimal linux-2.6.31.6/arch/ia64/kernel/process.c linux-2.6.31.6-vs2.3.0.36.24/arch/ia64/kernel/process.c
306 --- linux-2.6.31.6/arch/ia64/kernel/process.c   2009-06-11 17:11:57.000000000 +0200
307 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/ia64/kernel/process.c     2009-09-10 16:11:43.000000000 +0200
308 @@ -110,8 +110,8 @@ show_regs (struct pt_regs *regs)
309         unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
310  
311         print_modules();
312 -       printk("\nPid: %d, CPU %d, comm: %20s\n", task_pid_nr(current),
313 -                       smp_processor_id(), current->comm);
314 +       printk("\nPid: %d[#%u], CPU %d, comm: %20s\n", task_pid_nr(current),
315 +                       current->xid, smp_processor_id(), current->comm);
316         printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]    %s (%s)\n",
317                regs->cr_ipsr, regs->cr_ifs, ip, print_tainted(),
318                init_utsname()->release);
319 diff -NurpP --minimal linux-2.6.31.6/arch/ia64/kernel/ptrace.c linux-2.6.31.6-vs2.3.0.36.24/arch/ia64/kernel/ptrace.c
320 --- linux-2.6.31.6/arch/ia64/kernel/ptrace.c    2009-09-10 15:25:22.000000000 +0200
321 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/ia64/kernel/ptrace.c      2009-09-10 16:11:43.000000000 +0200
322 @@ -22,6 +22,7 @@
323  #include <linux/regset.h>
324  #include <linux/elf.h>
325  #include <linux/tracehook.h>
326 +#include <linux/vs_base.h>
327  
328  #include <asm/pgtable.h>
329  #include <asm/processor.h>
330 diff -NurpP --minimal linux-2.6.31.6/arch/ia64/kernel/traps.c linux-2.6.31.6-vs2.3.0.36.24/arch/ia64/kernel/traps.c
331 --- linux-2.6.31.6/arch/ia64/kernel/traps.c     2008-12-25 00:26:37.000000000 +0100
332 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/ia64/kernel/traps.c       2009-09-10 16:11:43.000000000 +0200
333 @@ -60,8 +60,9 @@ die (const char *str, struct pt_regs *re
334         put_cpu();
335  
336         if (++die.lock_owner_depth < 3) {
337 -               printk("%s[%d]: %s %ld [%d]\n",
338 -               current->comm, task_pid_nr(current), str, err, ++die_counter);
339 +               printk("%s[%d[#%u]]: %s %ld [%d]\n",
340 +                       current->comm, task_pid_nr(current), current->xid,
341 +                       str, err, ++die_counter);
342                 if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV)
343                     != NOTIFY_STOP)
344                         show_regs(regs);
345 @@ -324,8 +325,9 @@ handle_fpu_swa (int fp_fault, struct pt_
346                         if ((last.count & 15) < 5 && (ia64_fetchadd(1, &last.count, acq) & 15) < 5) {
347                                 last.time = current_jiffies + 5 * HZ;
348                                 printk(KERN_WARNING
349 -                                       "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
350 -                                       current->comm, task_pid_nr(current), regs->cr_iip + ia64_psr(regs)->ri, isr);
351 +                                       "%s(%d[#%u]): floating-point assist fault at ip %016lx, isr %016lx\n",
352 +                                       current->comm, task_pid_nr(current), current->xid,
353 +                                       regs->cr_iip + ia64_psr(regs)->ri, isr);
354                         }
355                 }
356         }
357 diff -NurpP --minimal linux-2.6.31.6/arch/ia64/mm/fault.c linux-2.6.31.6-vs2.3.0.36.24/arch/ia64/mm/fault.c
358 --- linux-2.6.31.6/arch/ia64/mm/fault.c 2009-09-10 15:25:23.000000000 +0200
359 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/ia64/mm/fault.c   2009-09-29 17:26:21.000000000 +0200
360 @@ -10,6 +10,7 @@
361  #include <linux/interrupt.h>
362  #include <linux/kprobes.h>
363  #include <linux/kdebug.h>
364 +#include <linux/vs_memory.h>
365  
366  #include <asm/pgtable.h>
367  #include <asm/processor.h>
368 @@ -281,7 +282,8 @@ ia64_do_page_fault (unsigned long addres
369                 down_read(&mm->mmap_sem);
370                 goto survive;
371         }
372 -       printk(KERN_CRIT "VM: killing process %s\n", current->comm);
373 +       printk(KERN_CRIT "VM: killing process %s(%d:#%u)\n",
374 +               current->comm, task_pid_nr(current), current->xid);
375         if (user_mode(regs))
376                 do_group_exit(SIGKILL);
377         goto no_context;
378 diff -NurpP --minimal linux-2.6.31.6/arch/m32r/kernel/traps.c linux-2.6.31.6-vs2.3.0.36.24/arch/m32r/kernel/traps.c
379 --- linux-2.6.31.6/arch/m32r/kernel/traps.c     2008-12-25 00:26:37.000000000 +0100
380 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/m32r/kernel/traps.c       2009-09-10 16:11:43.000000000 +0200
381 @@ -196,8 +196,9 @@ static void show_registers(struct pt_reg
382         } else {
383                 printk("SPI: %08lx\n", sp);
384         }
385 -       printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)",
386 -               current->comm, task_pid_nr(current), 0xffff & i, 4096+(unsigned long)current);
387 +       printk("Process %s (pid: %d[#%u], process nr: %d, stackpage=%08lx)",
388 +               current->comm, task_pid_nr(current), current->xid,
389 +               0xffff & i, 4096+(unsigned long)current);
390  
391         /*
392          * When in-kernel, we also print out the stack and code at the
393 diff -NurpP --minimal linux-2.6.31.6/arch/m32r/mm/fault.c linux-2.6.31.6-vs2.3.0.36.24/arch/m32r/mm/fault.c
394 --- linux-2.6.31.6/arch/m32r/mm/fault.c 2009-09-10 15:25:23.000000000 +0200
395 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/m32r/mm/fault.c   2009-09-29 17:26:48.000000000 +0200
396 @@ -276,7 +276,8 @@ out_of_memory:
397                 down_read(&mm->mmap_sem);
398                 goto survive;
399         }
400 -       printk("VM: killing process %s\n", tsk->comm);
401 +       printk("VM: killing process %s(%d:#%u)\n",
402 +               tsk->comm, task_pid_nr(tsk), tsk->xid);
403         if (error_code & ACE_USERMODE)
404                 do_group_exit(SIGKILL);
405         goto no_context;
406 diff -NurpP --minimal linux-2.6.31.6/arch/m68k/Kconfig linux-2.6.31.6-vs2.3.0.36.24/arch/m68k/Kconfig
407 --- linux-2.6.31.6/arch/m68k/Kconfig    2009-11-12 12:10:08.000000000 +0100
408 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/m68k/Kconfig      2009-11-12 12:26:38.000000000 +0100
409 @@ -616,6 +616,8 @@ source "fs/Kconfig"
410  
411  source "arch/m68k/Kconfig.debug"
412  
413 +source "kernel/vserver/Kconfig"
414 +
415  source "security/Kconfig"
416  
417  source "crypto/Kconfig"
418 diff -NurpP --minimal linux-2.6.31.6/arch/m68k/kernel/ptrace.c linux-2.6.31.6-vs2.3.0.36.24/arch/m68k/kernel/ptrace.c
419 --- linux-2.6.31.6/arch/m68k/kernel/ptrace.c    2008-12-25 00:26:37.000000000 +0100
420 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/m68k/kernel/ptrace.c      2009-09-10 16:11:43.000000000 +0200
421 @@ -18,6 +18,7 @@
422  #include <linux/ptrace.h>
423  #include <linux/user.h>
424  #include <linux/signal.h>
425 +#include <linux/vs_base.h>
426  
427  #include <asm/uaccess.h>
428  #include <asm/page.h>
429 @@ -269,6 +270,8 @@ long arch_ptrace(struct task_struct *chi
430                 ret = ptrace_request(child, request, addr, data);
431                 break;
432         }
433 +       if (!vx_check(vx_task_xid(child), VS_WATCH_P | VS_IDENT))
434 +               goto out_tsk;
435  
436         return ret;
437  out_eio:
438 diff -NurpP --minimal linux-2.6.31.6/arch/m68k/kernel/traps.c linux-2.6.31.6-vs2.3.0.36.24/arch/m68k/kernel/traps.c
439 --- linux-2.6.31.6/arch/m68k/kernel/traps.c     2009-09-10 15:25:23.000000000 +0200
440 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/m68k/kernel/traps.c       2009-09-10 16:11:43.000000000 +0200
441 @@ -906,8 +906,8 @@ void show_registers(struct pt_regs *regs
442         printk("d4: %08lx    d5: %08lx    a0: %08lx    a1: %08lx\n",
443                regs->d4, regs->d5, regs->a0, regs->a1);
444  
445 -       printk("Process %s (pid: %d, task=%p)\n",
446 -               current->comm, task_pid_nr(current), current);
447 +       printk("Process %s (pid: %d[#%u], task=%p)\n",
448 +               current->comm, task_pid_nr(current), current->xid, current);
449         addr = (unsigned long)&fp->un;
450         printk("Frame format=%X ", regs->format);
451         switch (regs->format) {
452 diff -NurpP --minimal linux-2.6.31.6/arch/m68k/mm/fault.c linux-2.6.31.6-vs2.3.0.36.24/arch/m68k/mm/fault.c
453 --- linux-2.6.31.6/arch/m68k/mm/fault.c 2009-09-10 15:25:23.000000000 +0200
454 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/m68k/mm/fault.c   2009-09-29 17:27:45.000000000 +0200
455 @@ -186,7 +186,8 @@ out_of_memory:
456                 goto survive;
457         }
458  
459 -       printk("VM: killing process %s\n", current->comm);
460 +       printk("VM: killing process %s(%d:#%u)\n",
461 +               current->comm, task_pid_nr(current), current->xid);
462         if (user_mode(regs))
463                 do_group_exit(SIGKILL);
464  
465 diff -NurpP --minimal linux-2.6.31.6/arch/m68knommu/Kconfig linux-2.6.31.6-vs2.3.0.36.24/arch/m68knommu/Kconfig
466 --- linux-2.6.31.6/arch/m68knommu/Kconfig       2009-06-11 17:11:59.000000000 +0200
467 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/m68knommu/Kconfig 2009-09-10 16:11:43.000000000 +0200
468 @@ -721,6 +721,8 @@ source "fs/Kconfig"
469  
470  source "arch/m68knommu/Kconfig.debug"
471  
472 +source "kernel/vserver/Kconfig"
473 +
474  source "security/Kconfig"
475  
476  source "crypto/Kconfig"
477 diff -NurpP --minimal linux-2.6.31.6/arch/m68knommu/kernel/traps.c linux-2.6.31.6-vs2.3.0.36.24/arch/m68knommu/kernel/traps.c
478 --- linux-2.6.31.6/arch/m68knommu/kernel/traps.c        2009-09-10 15:25:23.000000000 +0200
479 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/m68knommu/kernel/traps.c  2009-09-10 16:11:43.000000000 +0200
480 @@ -78,8 +78,9 @@ void die_if_kernel(char *str, struct pt_
481         printk(KERN_EMERG "d4: %08lx    d5: %08lx    a0: %08lx    a1: %08lx\n",
482                fp->d4, fp->d5, fp->a0, fp->a1);
483  
484 -       printk(KERN_EMERG "Process %s (pid: %d, stackpage=%08lx)\n",
485 -               current->comm, current->pid, PAGE_SIZE+(unsigned long)current);
486 +       printk(KERN_EMERG "Process %s (pid: %d[#%u], stackpage=%08lx)\n",
487 +               current->comm, task_pid_nr(current), current->xid,
488 +               PAGE_SIZE+(unsigned long)current);
489         show_stack(NULL, (unsigned long *)(fp + 1));
490         add_taint(TAINT_DIE);
491         do_exit(SIGSEGV);
492 diff -NurpP --minimal linux-2.6.31.6/arch/microblaze/mm/fault.c linux-2.6.31.6-vs2.3.0.36.24/arch/microblaze/mm/fault.c
493 --- linux-2.6.31.6/arch/microblaze/mm/fault.c   2009-09-10 15:25:24.000000000 +0200
494 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/microblaze/mm/fault.c     2009-09-29 17:28:08.000000000 +0200
495 @@ -279,7 +279,8 @@ out_of_memory:
496                 goto survive;
497         }
498         up_read(&mm->mmap_sem);
499 -       printk(KERN_WARNING "VM: killing process %s\n", current->comm);
500 +       printk(KERN_WARNING "VM: killing process %s(%d:#%u)\n",
501 +               current->comm, task_pid_nr(current), current->xid);
502         if (user_mode(regs))
503                 do_exit(SIGKILL);
504         bad_page_fault(regs, address, SIGKILL);
505 diff -NurpP --minimal linux-2.6.31.6/arch/mips/Kconfig linux-2.6.31.6-vs2.3.0.36.24/arch/mips/Kconfig
506 --- linux-2.6.31.6/arch/mips/Kconfig    2009-09-10 15:25:24.000000000 +0200
507 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/mips/Kconfig      2009-09-10 16:11:43.000000000 +0200
508 @@ -2186,6 +2186,8 @@ source "fs/Kconfig"
509  
510  source "arch/mips/Kconfig.debug"
511  
512 +source "kernel/vserver/Kconfig"
513 +
514  source "security/Kconfig"
515  
516  source "crypto/Kconfig"
517 diff -NurpP --minimal linux-2.6.31.6/arch/mips/kernel/ptrace.c linux-2.6.31.6-vs2.3.0.36.24/arch/mips/kernel/ptrace.c
518 --- linux-2.6.31.6/arch/mips/kernel/ptrace.c    2008-12-25 00:26:37.000000000 +0100
519 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/mips/kernel/ptrace.c      2009-09-10 16:11:43.000000000 +0200
520 @@ -25,6 +25,7 @@
521  #include <linux/security.h>
522  #include <linux/audit.h>
523  #include <linux/seccomp.h>
524 +#include <linux/vs_base.h>
525  
526  #include <asm/byteorder.h>
527  #include <asm/cpu.h>
528 @@ -259,6 +260,9 @@ long arch_ptrace(struct task_struct *chi
529  {
530         int ret;
531  
532 +       if (!vx_check(vx_task_xid(child), VS_WATCH_P | VS_IDENT))
533 +               goto out;
534 +
535         switch (request) {
536         /* when I and D space are separate, these will need to be fixed. */
537         case PTRACE_PEEKTEXT: /* read word at location addr. */
538 diff -NurpP --minimal linux-2.6.31.6/arch/mips/kernel/scall32-o32.S linux-2.6.31.6-vs2.3.0.36.24/arch/mips/kernel/scall32-o32.S
539 --- linux-2.6.31.6/arch/mips/kernel/scall32-o32.S       2009-09-10 15:25:38.000000000 +0200
540 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/mips/kernel/scall32-o32.S 2009-09-10 16:11:43.000000000 +0200
541 @@ -597,7 +597,7 @@ einval:     li      v0, -ENOSYS
542         sys     sys_mq_timedreceive     5
543         sys     sys_mq_notify           2       /* 4275 */
544         sys     sys_mq_getsetattr       3
545 -       sys     sys_ni_syscall          0       /* sys_vserver */
546 +       sys     sys_vserver             3
547         sys     sys_waitid              5
548         sys     sys_ni_syscall          0       /* available, was setaltroot */
549         sys     sys_add_key             5       /* 4280 */
550 diff -NurpP --minimal linux-2.6.31.6/arch/mips/kernel/scall64-64.S linux-2.6.31.6-vs2.3.0.36.24/arch/mips/kernel/scall64-64.S
551 --- linux-2.6.31.6/arch/mips/kernel/scall64-64.S        2009-09-10 15:25:38.000000000 +0200
552 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/mips/kernel/scall64-64.S  2009-09-10 16:11:43.000000000 +0200
553 @@ -434,7 +434,7 @@ sys_call_table:
554         PTR     sys_mq_timedreceive
555         PTR     sys_mq_notify
556         PTR     sys_mq_getsetattr               /* 5235 */
557 -       PTR     sys_ni_syscall                  /* sys_vserver */
558 +       PTR     sys_vserver
559         PTR     sys_waitid
560         PTR     sys_ni_syscall                  /* available, was setaltroot */
561         PTR     sys_add_key
562 diff -NurpP --minimal linux-2.6.31.6/arch/mips/kernel/scall64-n32.S linux-2.6.31.6-vs2.3.0.36.24/arch/mips/kernel/scall64-n32.S
563 --- linux-2.6.31.6/arch/mips/kernel/scall64-n32.S       2009-09-10 15:25:38.000000000 +0200
564 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/mips/kernel/scall64-n32.S 2009-09-10 16:11:43.000000000 +0200
565 @@ -360,7 +360,7 @@ EXPORT(sysn32_call_table)
566         PTR     compat_sys_mq_timedreceive
567         PTR     compat_sys_mq_notify
568         PTR     compat_sys_mq_getsetattr
569 -       PTR     sys_ni_syscall                  /* 6240, sys_vserver */
570 +       PTR     sys32_vserver                   /* 6240 */
571         PTR     compat_sys_waitid
572         PTR     sys_ni_syscall                  /* available, was setaltroot */
573         PTR     sys_add_key
574 diff -NurpP --minimal linux-2.6.31.6/arch/mips/kernel/scall64-o32.S linux-2.6.31.6-vs2.3.0.36.24/arch/mips/kernel/scall64-o32.S
575 --- linux-2.6.31.6/arch/mips/kernel/scall64-o32.S       2009-09-10 15:25:38.000000000 +0200
576 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/mips/kernel/scall64-o32.S 2009-09-10 16:11:43.000000000 +0200
577 @@ -480,7 +480,7 @@ sys_call_table:
578         PTR     compat_sys_mq_timedreceive
579         PTR     compat_sys_mq_notify            /* 4275 */
580         PTR     compat_sys_mq_getsetattr
581 -       PTR     sys_ni_syscall                  /* sys_vserver */
582 +       PTR     sys32_vserver
583         PTR     sys_32_waitid
584         PTR     sys_ni_syscall                  /* available, was setaltroot */
585         PTR     sys_add_key                     /* 4280 */
586 diff -NurpP --minimal linux-2.6.31.6/arch/mips/kernel/traps.c linux-2.6.31.6-vs2.3.0.36.24/arch/mips/kernel/traps.c
587 --- linux-2.6.31.6/arch/mips/kernel/traps.c     2009-09-10 15:25:38.000000000 +0200
588 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/mips/kernel/traps.c       2009-09-10 16:11:43.000000000 +0200
589 @@ -335,9 +335,10 @@ void show_registers(const struct pt_regs
590  
591         __show_regs(regs);
592         print_modules();
593 -       printk("Process %s (pid: %d, threadinfo=%p, task=%p, tls=%0*lx)\n",
594 -              current->comm, current->pid, current_thread_info(), current,
595 -             field, current_thread_info()->tp_value);
596 +       printk("Process %s (pid: %d:#%u, threadinfo=%p, task=%p, tls=%0*lx)\n",
597 +               current->comm, task_pid_nr(current), current->xid,
598 +               current_thread_info(), current,
599 +               field, current_thread_info()->tp_value);
600         if (cpu_has_userlocal) {
601                 unsigned long tls;
602  
603 diff -NurpP --minimal linux-2.6.31.6/arch/mn10300/mm/fault.c linux-2.6.31.6-vs2.3.0.36.24/arch/mn10300/mm/fault.c
604 --- linux-2.6.31.6/arch/mn10300/mm/fault.c      2009-09-10 15:25:39.000000000 +0200
605 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/mn10300/mm/fault.c        2009-09-29 17:28:34.000000000 +0200
606 @@ -339,7 +339,8 @@ no_context:
607  out_of_memory:
608         up_read(&mm->mmap_sem);
609         monitor_signal(regs);
610 -       printk(KERN_ALERT "VM: killing process %s\n", tsk->comm);
611 +       printk(KERN_ALERT "VM: killing process %s(%d:#%u)\n",
612 +               tsk->comm, task_pid_nr(tsk), tsk->xid);
613         if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR)
614                 do_exit(SIGKILL);
615         goto no_context;
616 diff -NurpP --minimal linux-2.6.31.6/arch/parisc/Kconfig linux-2.6.31.6-vs2.3.0.36.24/arch/parisc/Kconfig
617 --- linux-2.6.31.6/arch/parisc/Kconfig  2009-09-10 15:25:39.000000000 +0200
618 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/parisc/Kconfig    2009-09-10 16:11:43.000000000 +0200
619 @@ -293,6 +293,8 @@ source "fs/Kconfig"
620  
621  source "arch/parisc/Kconfig.debug"
622  
623 +source "kernel/vserver/Kconfig"
624 +
625  source "security/Kconfig"
626  
627  source "crypto/Kconfig"
628 diff -NurpP --minimal linux-2.6.31.6/arch/parisc/kernel/syscall_table.S linux-2.6.31.6-vs2.3.0.36.24/arch/parisc/kernel/syscall_table.S
629 --- linux-2.6.31.6/arch/parisc/kernel/syscall_table.S   2009-09-10 15:25:40.000000000 +0200
630 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/parisc/kernel/syscall_table.S     2009-09-10 16:11:43.000000000 +0200
631 @@ -361,7 +361,7 @@
632         ENTRY_COMP(mbind)               /* 260 */
633         ENTRY_COMP(get_mempolicy)
634         ENTRY_COMP(set_mempolicy)
635 -       ENTRY_SAME(ni_syscall)  /* 263: reserved for vserver */
636 +       ENTRY_DIFF(vserver)
637         ENTRY_SAME(add_key)
638         ENTRY_SAME(request_key)         /* 265 */
639         ENTRY_SAME(keyctl)
640 diff -NurpP --minimal linux-2.6.31.6/arch/parisc/kernel/traps.c linux-2.6.31.6-vs2.3.0.36.24/arch/parisc/kernel/traps.c
641 --- linux-2.6.31.6/arch/parisc/kernel/traps.c   2009-09-10 15:25:40.000000000 +0200
642 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/parisc/kernel/traps.c     2009-09-10 16:11:43.000000000 +0200
643 @@ -236,8 +236,9 @@ void die_if_kernel(char *str, struct pt_
644                 if (err == 0)
645                         return; /* STFU */
646  
647 -               printk(KERN_CRIT "%s (pid %d): %s (code %ld) at " RFMT "\n",
648 -                       current->comm, task_pid_nr(current), str, err, regs->iaoq[0]);
649 +               printk(KERN_CRIT "%s (pid %d:#%u): %s (code %ld) at " RFMT "\n",
650 +                       current->comm, task_pid_nr(current), current->xid,
651 +                       str, err, regs->iaoq[0]);
652  #ifdef PRINT_USER_FAULTS
653                 /* XXX for debugging only */
654                 show_regs(regs);
655 @@ -270,8 +271,8 @@ void die_if_kernel(char *str, struct pt_
656                 pdc_console_restart();
657         
658         if (err)
659 -               printk(KERN_CRIT "%s (pid %d): %s (code %ld)\n",
660 -                       current->comm, task_pid_nr(current), str, err);
661 +               printk(KERN_CRIT "%s (pid %d:#%u): %s (code %ld)\n",
662 +                       current->comm, task_pid_nr(current), current->xid, str, err);
663  
664         /* Wot's wrong wif bein' racy? */
665         if (current->thread.flags & PARISC_KERNEL_DEATH) {
666 diff -NurpP --minimal linux-2.6.31.6/arch/parisc/mm/fault.c linux-2.6.31.6-vs2.3.0.36.24/arch/parisc/mm/fault.c
667 --- linux-2.6.31.6/arch/parisc/mm/fault.c       2009-09-10 15:25:40.000000000 +0200
668 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/parisc/mm/fault.c 2009-09-10 16:11:43.000000000 +0200
669 @@ -237,8 +237,9 @@ bad_area:
670  
671  #ifdef PRINT_USER_FAULTS
672                 printk(KERN_DEBUG "\n");
673 -               printk(KERN_DEBUG "do_page_fault() pid=%d command='%s' type=%lu address=0x%08lx\n",
674 -                   task_pid_nr(tsk), tsk->comm, code, address);
675 +               printk(KERN_DEBUG "do_page_fault() pid=%d:#%u "
676 +                   "command='%s' type=%lu address=0x%08lx\n",
677 +                   task_pid_nr(tsk), tsk->xid, tsk->comm, code, address);
678                 if (vma) {
679                         printk(KERN_DEBUG "vm_start = 0x%08lx, vm_end = 0x%08lx\n",
680                                         vma->vm_start, vma->vm_end);
681 @@ -264,7 +265,8 @@ no_context:
682  
683    out_of_memory:
684         up_read(&mm->mmap_sem);
685 -       printk(KERN_CRIT "VM: killing process %s\n", current->comm);
686 +       printk(KERN_CRIT "VM: killing process %s(%d:#%u)\n",
687 +               current->comm, current->pid, current->xid);
688         if (user_mode(regs))
689                 do_group_exit(SIGKILL);
690         goto no_context;
691 diff -NurpP --minimal linux-2.6.31.6/arch/powerpc/include/asm/unistd.h linux-2.6.31.6-vs2.3.0.36.24/arch/powerpc/include/asm/unistd.h
692 --- linux-2.6.31.6/arch/powerpc/include/asm/unistd.h    2009-09-10 15:25:41.000000000 +0200
693 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/powerpc/include/asm/unistd.h      2009-10-22 01:07:37.000000000 +0200
694 @@ -275,7 +275,7 @@
695  #endif
696  #define __NR_rtas              255
697  #define __NR_sys_debug_setcontext 256
698 -/* Number 257 is reserved for vserver */
699 +#define __NR_vserver           257
700  #define __NR_migrate_pages     258
701  #define __NR_mbind             259
702  #define __NR_get_mempolicy     260
703 diff -NurpP --minimal linux-2.6.31.6/arch/powerpc/Kconfig linux-2.6.31.6-vs2.3.0.36.24/arch/powerpc/Kconfig
704 --- linux-2.6.31.6/arch/powerpc/Kconfig 2009-09-10 15:25:40.000000000 +0200
705 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/powerpc/Kconfig   2009-09-10 16:11:43.000000000 +0200
706 @@ -933,6 +933,8 @@ source "lib/Kconfig"
707  
708  source "arch/powerpc/Kconfig.debug"
709  
710 +source "kernel/vserver/Kconfig"
711 +
712  source "security/Kconfig"
713  
714  config KEYS_COMPAT
715 diff -NurpP --minimal linux-2.6.31.6/arch/powerpc/kernel/irq.c linux-2.6.31.6-vs2.3.0.36.24/arch/powerpc/kernel/irq.c
716 --- linux-2.6.31.6/arch/powerpc/kernel/irq.c    2009-09-10 15:25:41.000000000 +0200
717 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/powerpc/kernel/irq.c      2009-09-10 16:15:56.000000000 +0200
718 @@ -54,6 +54,7 @@
719  #include <linux/pci.h>
720  #include <linux/debugfs.h>
721  #include <linux/perf_counter.h>
722 +#include <linux/vs_context.h>
723  
724  #include <asm/uaccess.h>
725  #include <asm/system.h>
726 diff -NurpP --minimal linux-2.6.31.6/arch/powerpc/kernel/process.c linux-2.6.31.6-vs2.3.0.36.24/arch/powerpc/kernel/process.c
727 --- linux-2.6.31.6/arch/powerpc/kernel/process.c        2009-09-10 15:25:41.000000000 +0200
728 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/powerpc/kernel/process.c  2009-09-10 16:11:43.000000000 +0200
729 @@ -519,8 +519,9 @@ void show_regs(struct pt_regs * regs)
730  #else
731                 printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr);
732  #endif
733 -       printk("TASK = %p[%d] '%s' THREAD: %p",
734 -              current, task_pid_nr(current), current->comm, task_thread_info(current));
735 +       printk("TASK = %p[%d,#%u] '%s' THREAD: %p",
736 +              current, task_pid_nr(current), current->xid,
737 +              current->comm, task_thread_info(current));
738  
739  #ifdef CONFIG_SMP
740         printk(" CPU: %d", raw_smp_processor_id());
741 diff -NurpP --minimal linux-2.6.31.6/arch/powerpc/kernel/traps.c linux-2.6.31.6-vs2.3.0.36.24/arch/powerpc/kernel/traps.c
742 --- linux-2.6.31.6/arch/powerpc/kernel/traps.c  2009-09-10 15:25:41.000000000 +0200
743 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/powerpc/kernel/traps.c    2009-09-10 16:11:43.000000000 +0200
744 @@ -931,8 +931,9 @@ void nonrecoverable_exception(struct pt_
745  
746  void trace_syscall(struct pt_regs *regs)
747  {
748 -       printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld    %s\n",
749 -              current, task_pid_nr(current), regs->nip, regs->link, regs->gpr[0],
750 +       printk("Task: %p(%d[#%u]), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld    %s\n",
751 +              current, task_pid_nr(current), current->xid,
752 +              regs->nip, regs->link, regs->gpr[0],
753                regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted());
754  }
755  
756 diff -NurpP --minimal linux-2.6.31.6/arch/powerpc/kernel/vdso.c linux-2.6.31.6-vs2.3.0.36.24/arch/powerpc/kernel/vdso.c
757 --- linux-2.6.31.6/arch/powerpc/kernel/vdso.c   2009-03-24 14:18:35.000000000 +0100
758 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/powerpc/kernel/vdso.c     2009-09-10 16:11:43.000000000 +0200
759 @@ -22,6 +22,7 @@
760  #include <linux/security.h>
761  #include <linux/bootmem.h>
762  #include <linux/lmb.h>
763 +#include <linux/vs_memory.h>
764  
765  #include <asm/pgtable.h>
766  #include <asm/system.h>
767 diff -NurpP --minimal linux-2.6.31.6/arch/powerpc/mm/fault.c linux-2.6.31.6-vs2.3.0.36.24/arch/powerpc/mm/fault.c
768 --- linux-2.6.31.6/arch/powerpc/mm/fault.c      2009-09-10 15:25:41.000000000 +0200
769 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/powerpc/mm/fault.c        2009-09-10 16:11:43.000000000 +0200
770 @@ -358,7 +358,8 @@ out_of_memory:
771                 down_read(&mm->mmap_sem);
772                 goto survive;
773         }
774 -       printk("VM: killing process %s\n", current->comm);
775 +       printk("VM: killing process %s(%d:#%u)\n",
776 +               current->comm, current->pid, current->xid);
777         if (user_mode(regs))
778                 do_group_exit(SIGKILL);
779         return SIGKILL;
780 diff -NurpP --minimal linux-2.6.31.6/arch/s390/Kconfig linux-2.6.31.6-vs2.3.0.36.24/arch/s390/Kconfig
781 --- linux-2.6.31.6/arch/s390/Kconfig    2009-09-10 15:25:42.000000000 +0200
782 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/s390/Kconfig      2009-09-10 16:11:43.000000000 +0200
783 @@ -624,6 +624,8 @@ source "fs/Kconfig"
784  
785  source "arch/s390/Kconfig.debug"
786  
787 +source "kernel/vserver/Kconfig"
788 +
789  source "security/Kconfig"
790  
791  source "crypto/Kconfig"
792 diff -NurpP --minimal linux-2.6.31.6/arch/s390/kernel/ptrace.c linux-2.6.31.6-vs2.3.0.36.24/arch/s390/kernel/ptrace.c
793 --- linux-2.6.31.6/arch/s390/kernel/ptrace.c    2009-09-10 15:25:43.000000000 +0200
794 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/s390/kernel/ptrace.c      2009-09-10 16:17:22.000000000 +0200
795 @@ -36,6 +36,7 @@
796  #include <linux/regset.h>
797  #include <linux/tracehook.h>
798  #include <linux/seccomp.h>
799 +#include <linux/vs_base.h>
800  #include <trace/syscall.h>
801  #include <asm/compat.h>
802  #include <asm/segment.h>
803 diff -NurpP --minimal linux-2.6.31.6/arch/s390/kernel/syscalls.S linux-2.6.31.6-vs2.3.0.36.24/arch/s390/kernel/syscalls.S
804 --- linux-2.6.31.6/arch/s390/kernel/syscalls.S  2009-09-10 15:25:43.000000000 +0200
805 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/s390/kernel/syscalls.S    2009-09-10 16:11:43.000000000 +0200
806 @@ -271,7 +271,7 @@ SYSCALL(sys_clock_settime,sys_clock_sett
807  SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper)       /* 260 */
808  SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper)
809  SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper)
810 -NI_SYSCALL                                                     /* reserved for vserver */
811 +SYSCALL(sys_vserver,sys_vserver,sys32_vserver)
812  SYSCALL(sys_s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper)
813  SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper)
814  SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper)
815 diff -NurpP --minimal linux-2.6.31.6/arch/s390/lib/uaccess_pt.c linux-2.6.31.6-vs2.3.0.36.24/arch/s390/lib/uaccess_pt.c
816 --- linux-2.6.31.6/arch/s390/lib/uaccess_pt.c   2009-09-10 15:25:43.000000000 +0200
817 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/s390/lib/uaccess_pt.c     2009-09-29 17:29:02.000000000 +0200
818 @@ -90,7 +90,8 @@ out_of_memory:
819                 down_read(&mm->mmap_sem);
820                 goto survive;
821         }
822 -       printk("VM: killing process %s\n", current->comm);
823 +       printk("VM: killing process %s(%d:#%u)\n",
824 +               current->comm, task_pid_nr(current), current->xid);
825         return ret;
826  
827  out_sigbus:
828 diff -NurpP --minimal linux-2.6.31.6/arch/sh/Kconfig linux-2.6.31.6-vs2.3.0.36.24/arch/sh/Kconfig
829 --- linux-2.6.31.6/arch/sh/Kconfig      2009-09-10 15:25:43.000000000 +0200
830 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/sh/Kconfig        2009-09-10 16:11:43.000000000 +0200
831 @@ -813,6 +813,8 @@ source "fs/Kconfig"
832  
833  source "arch/sh/Kconfig.debug"
834  
835 +source "kernel/vserver/Kconfig"
836 +
837  source "security/Kconfig"
838  
839  source "crypto/Kconfig"
840 diff -NurpP --minimal linux-2.6.31.6/arch/sh/kernel/irq.c linux-2.6.31.6-vs2.3.0.36.24/arch/sh/kernel/irq.c
841 --- linux-2.6.31.6/arch/sh/kernel/irq.c 2009-09-10 15:25:45.000000000 +0200
842 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/sh/kernel/irq.c   2009-09-10 16:11:43.000000000 +0200
843 @@ -11,6 +11,7 @@
844  #include <linux/module.h>
845  #include <linux/kernel_stat.h>
846  #include <linux/seq_file.h>
847 +#include <linux/vs_context.h>
848  #include <asm/processor.h>
849  #include <asm/machvec.h>
850  #include <asm/uaccess.h>
851 diff -NurpP --minimal linux-2.6.31.6/arch/sh/kernel/vsyscall/vsyscall.c linux-2.6.31.6-vs2.3.0.36.24/arch/sh/kernel/vsyscall/vsyscall.c
852 --- linux-2.6.31.6/arch/sh/kernel/vsyscall/vsyscall.c   2009-03-24 14:18:42.000000000 +0100
853 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/sh/kernel/vsyscall/vsyscall.c     2009-09-10 16:11:43.000000000 +0200
854 @@ -19,6 +19,7 @@
855  #include <linux/elf.h>
856  #include <linux/sched.h>
857  #include <linux/err.h>
858 +#include <linux/vs_memory.h>
859  
860  /*
861   * Should the kernel map a VDSO page into processes and pass its
862 diff -NurpP --minimal linux-2.6.31.6/arch/sh/mm/fault_32.c linux-2.6.31.6-vs2.3.0.36.24/arch/sh/mm/fault_32.c
863 --- linux-2.6.31.6/arch/sh/mm/fault_32.c        2009-09-10 15:25:45.000000000 +0200
864 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/sh/mm/fault_32.c  2009-09-29 17:29:36.000000000 +0200
865 @@ -246,7 +246,8 @@ out_of_memory:
866                 down_read(&mm->mmap_sem);
867                 goto survive;
868         }
869 -       printk("VM: killing process %s\n", tsk->comm);
870 +       printk("VM: killing process %s(%d:#%u)\n",
871 +               tsk->comm, task_pid_nr(tsk), tsk->xid);
872         if (user_mode(regs))
873                 do_group_exit(SIGKILL);
874         goto no_context;
875 diff -NurpP --minimal linux-2.6.31.6/arch/sh/mm/tlbflush_64.c linux-2.6.31.6-vs2.3.0.36.24/arch/sh/mm/tlbflush_64.c
876 --- linux-2.6.31.6/arch/sh/mm/tlbflush_64.c     2009-09-10 15:25:45.000000000 +0200
877 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/sh/mm/tlbflush_64.c       2009-09-29 17:29:54.000000000 +0200
878 @@ -306,7 +306,8 @@ out_of_memory:
879                 down_read(&mm->mmap_sem);
880                 goto survive;
881         }
882 -       printk("VM: killing process %s\n", tsk->comm);
883 +       printk("VM: killing process %s(%d:#%u)\n",
884 +               tsk->comm, task_pid_nr(tsk), tsk->xid);
885         if (user_mode(regs))
886                 do_group_exit(SIGKILL);
887         goto no_context;
888 diff -NurpP --minimal linux-2.6.31.6/arch/sparc/include/asm/tlb_64.h linux-2.6.31.6-vs2.3.0.36.24/arch/sparc/include/asm/tlb_64.h
889 --- linux-2.6.31.6/arch/sparc/include/asm/tlb_64.h      2009-09-10 15:25:45.000000000 +0200
890 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/sparc/include/asm/tlb_64.h        2009-09-10 16:11:43.000000000 +0200
891 @@ -3,6 +3,7 @@
892  
893  #include <linux/swap.h>
894  #include <linux/pagemap.h>
895 +#include <linux/vs_memory.h>
896  #include <asm/pgalloc.h>
897  #include <asm/tlbflush.h>
898  #include <asm/mmu_context.h>
899 diff -NurpP --minimal linux-2.6.31.6/arch/sparc/include/asm/unistd.h linux-2.6.31.6-vs2.3.0.36.24/arch/sparc/include/asm/unistd.h
900 --- linux-2.6.31.6/arch/sparc/include/asm/unistd.h      2009-09-10 15:25:45.000000000 +0200
901 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/sparc/include/asm/unistd.h        2009-09-10 16:11:43.000000000 +0200
902 @@ -335,7 +335,7 @@
903  #define __NR_timer_getoverrun  264
904  #define __NR_timer_delete      265
905  #define __NR_timer_create      266
906 -/* #define __NR_vserver                267 Reserved for VSERVER */
907 +#define __NR_vserver           267
908  #define __NR_io_setup          268
909  #define __NR_io_destroy                269
910  #define __NR_io_submit         270
911 diff -NurpP --minimal linux-2.6.31.6/arch/sparc/Kconfig linux-2.6.31.6-vs2.3.0.36.24/arch/sparc/Kconfig
912 --- linux-2.6.31.6/arch/sparc/Kconfig   2009-09-10 15:25:45.000000000 +0200
913 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/sparc/Kconfig     2009-09-10 16:11:43.000000000 +0200
914 @@ -530,6 +530,8 @@ source "fs/Kconfig"
915  
916  source "arch/sparc/Kconfig.debug"
917  
918 +source "kernel/vserver/Kconfig"
919 +
920  source "security/Kconfig"
921  
922  source "crypto/Kconfig"
923 diff -NurpP --minimal linux-2.6.31.6/arch/sparc/kernel/systbls_32.S linux-2.6.31.6-vs2.3.0.36.24/arch/sparc/kernel/systbls_32.S
924 --- linux-2.6.31.6/arch/sparc/kernel/systbls_32.S       2009-09-10 15:25:45.000000000 +0200
925 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/sparc/kernel/systbls_32.S 2009-09-10 16:11:43.000000000 +0200
926 @@ -70,7 +70,7 @@ sys_call_table:
927  /*250*/        .long sparc_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl
928  /*255*/        .long sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
929  /*260*/        .long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
930 -/*265*/        .long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy
931 +/*265*/        .long sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
932  /*270*/        .long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
933  /*275*/        .long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
934  /*280*/        .long sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat
935 diff -NurpP --minimal linux-2.6.31.6/arch/sparc/kernel/systbls_64.S linux-2.6.31.6-vs2.3.0.36.24/arch/sparc/kernel/systbls_64.S
936 --- linux-2.6.31.6/arch/sparc/kernel/systbls_64.S       2009-09-10 15:25:45.000000000 +0200
937 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/sparc/kernel/systbls_64.S 2009-09-10 16:11:43.000000000 +0200
938 @@ -71,7 +71,7 @@ sys_call_table32:
939  /*250*/        .word sys32_mremap, sys32_sysctl, sys32_getsid, sys_fdatasync, sys32_nfsservctl
940         .word sys32_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep
941  /*260*/        .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun
942 -       .word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy
943 +       .word sys_timer_delete, compat_sys_timer_create, sys32_vserver, compat_sys_io_setup, sys_io_destroy
944  /*270*/        .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink
945         .word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid
946  /*280*/        .word sys32_tee, sys_add_key, sys_request_key, sys_keyctl, compat_sys_openat
947 @@ -146,7 +146,7 @@ sys_call_table:
948  /*250*/        .word sys_64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl
949         .word sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
950  /*260*/        .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
951 -       .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy
952 +       .word sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
953  /*270*/        .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
954         .word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
955  /*280*/        .word sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat
956 diff -NurpP --minimal linux-2.6.31.6/arch/x86/ia32/ia32entry.S linux-2.6.31.6-vs2.3.0.36.24/arch/x86/ia32/ia32entry.S
957 --- linux-2.6.31.6/arch/x86/ia32/ia32entry.S    2009-11-12 12:10:08.000000000 +0100
958 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/x86/ia32/ia32entry.S      2009-11-12 12:26:38.000000000 +0100
959 @@ -777,7 +777,7 @@ ia32_sys_call_table:
960         .quad sys_tgkill                /* 270 */
961         .quad compat_sys_utimes
962         .quad sys32_fadvise64_64
963 -       .quad quiet_ni_syscall  /* sys_vserver */
964 +       .quad sys32_vserver
965         .quad sys_mbind
966         .quad compat_sys_get_mempolicy  /* 275 */
967         .quad sys_set_mempolicy
968 diff -NurpP --minimal linux-2.6.31.6/arch/x86/include/asm/unistd_64.h linux-2.6.31.6-vs2.3.0.36.24/arch/x86/include/asm/unistd_64.h
969 --- linux-2.6.31.6/arch/x86/include/asm/unistd_64.h     2009-09-10 15:25:47.000000000 +0200
970 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/x86/include/asm/unistd_64.h       2009-09-10 16:11:43.000000000 +0200
971 @@ -535,7 +535,7 @@ __SYSCALL(__NR_tgkill, sys_tgkill)
972  #define __NR_utimes                            235
973  __SYSCALL(__NR_utimes, sys_utimes)
974  #define __NR_vserver                           236
975 -__SYSCALL(__NR_vserver, sys_ni_syscall)
976 +__SYSCALL(__NR_vserver, sys_vserver)
977  #define __NR_mbind                             237
978  __SYSCALL(__NR_mbind, sys_mbind)
979  #define __NR_set_mempolicy                     238
980 diff -NurpP --minimal linux-2.6.31.6/arch/x86/Kconfig linux-2.6.31.6-vs2.3.0.36.24/arch/x86/Kconfig
981 --- linux-2.6.31.6/arch/x86/Kconfig     2009-09-10 15:25:46.000000000 +0200
982 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/x86/Kconfig       2009-09-10 16:11:43.000000000 +0200
983 @@ -2088,6 +2088,8 @@ source "fs/Kconfig"
984  
985  source "arch/x86/Kconfig.debug"
986  
987 +source "kernel/vserver/Kconfig"
988 +
989  source "security/Kconfig"
990  
991  source "crypto/Kconfig"
992 diff -NurpP --minimal linux-2.6.31.6/arch/x86/kernel/syscall_table_32.S linux-2.6.31.6-vs2.3.0.36.24/arch/x86/kernel/syscall_table_32.S
993 --- linux-2.6.31.6/arch/x86/kernel/syscall_table_32.S   2009-09-10 15:25:47.000000000 +0200
994 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/x86/kernel/syscall_table_32.S     2009-09-10 16:11:43.000000000 +0200
995 @@ -272,7 +272,7 @@ ENTRY(sys_call_table)
996         .long sys_tgkill        /* 270 */
997         .long sys_utimes
998         .long sys_fadvise64_64
999 -       .long sys_ni_syscall    /* sys_vserver */
1000 +       .long sys_vserver
1001         .long sys_mbind
1002         .long sys_get_mempolicy
1003         .long sys_set_mempolicy
1004 diff -NurpP --minimal linux-2.6.31.6/arch/xtensa/mm/fault.c linux-2.6.31.6-vs2.3.0.36.24/arch/xtensa/mm/fault.c
1005 --- linux-2.6.31.6/arch/xtensa/mm/fault.c       2009-09-10 15:25:48.000000000 +0200
1006 +++ linux-2.6.31.6-vs2.3.0.36.24/arch/xtensa/mm/fault.c 2009-09-29 17:30:14.000000000 +0200
1007 @@ -151,7 +151,8 @@ out_of_memory:
1008                 down_read(&mm->mmap_sem);
1009                 goto survive;
1010         }
1011 -       printk("VM: killing process %s\n", current->comm);
1012 +       printk("VM: killing process %s(%d:#%u)\n",
1013 +               current->comm, task_pid_nr(current), current->xid);
1014         if (user_mode(regs))
1015                 do_group_exit(SIGKILL);
1016         bad_page_fault(regs, address, SIGKILL);
1017 diff -NurpP --minimal linux-2.6.31.6/Documentation/scheduler/sched-cfs-hard-limits.txt linux-2.6.31.6-vs2.3.0.36.24/Documentation/scheduler/sched-cfs-hard-limits.txt
1018 --- linux-2.6.31.6/Documentation/scheduler/sched-cfs-hard-limits.txt    1970-01-01 01:00:00.000000000 +0100
1019 +++ linux-2.6.31.6-vs2.3.0.36.24/Documentation/scheduler/sched-cfs-hard-limits.txt      2009-10-06 04:39:46.000000000 +0200
1020 @@ -0,0 +1,52 @@
1021 +CPU HARD LIMITS FOR CFS GROUPS
1022 +==============================
1023 +
1024 +1. Overview
1025 +2. Interface
1026 +3. Examples
1027 +
1028 +1. Overview
1029 +-----------
1030 +
1031 +CFS is a proportional share scheduler which tries to divide the CPU time
1032 +proportionately between tasks or groups of tasks (task group/cgroup) depending
1033 +on the priority/weight of the task or shares assigned to groups of tasks.
1034 +In CFS, a task/task group can get more than its share of CPU if there are
1035 +enough idle CPU cycles available in the system, due to the work conserving
1036 +nature of the scheduler. However in certain scenarios (like pay-per-use),
1037 +it is desirable not to provide extra time to a group even in the presence
1038 +of idle CPU cycles. This is where hard limiting can be of use.
1039 +
1040 +Hard limits for task groups can be set by specifying how much CPU runtime a
1041 +group can consume within a given period. If the group consumes more CPU time
1042 +than the runtime in a given period, it gets throttled. None of the tasks of
1043 +the throttled group gets to run until the runtime of the group gets refreshed
1044 +at the beginning of the next period.
1045 +
1046 +2. Interface
1047 +------------
1048 +
1049 +Hard limit feature adds 3 cgroup files for CFS group scheduler:
1050 +
1051 +cfs_runtime_us: Hard limit for the group in microseconds.
1052 +
1053 +cfs_period_us: Time period in microseconds within which hard limits is
1054 +enforced.
1055 +
1056 +cfs_hard_limit: The control file to enable or disable hard limiting for the
1057 +group.
1058 +
1059 +A group gets created with default values for runtime and period and with
1060 +hard limit disabled. Each group can set its own values for runtime and period
1061 +independent of other groups in the system.
1062 +
1063 +3. Examples
1064 +-----------
1065 +
1066 +# mount -t cgroup -ocpu none /cgroups/
1067 +# cd /cgroups
1068 +# mkdir 1
1069 +# cd 1/
1070 +# echo 250000 > cfs_runtime_us /* set a 250ms runtime or limit */
1071 +# echo 500000 > cfs_period_us /* set a 500ms period */
1072 +# echo 1 > cfs_hard_limit /* enable hard limiting for group 1/ */
1073 diff -NurpP --minimal linux-2.6.31.6/Documentation/vserver/debug.txt linux-2.6.31.6-vs2.3.0.36.24/Documentation/vserver/debug.txt
1074 --- linux-2.6.31.6/Documentation/vserver/debug.txt      1970-01-01 01:00:00.000000000 +0100
1075 +++ linux-2.6.31.6-vs2.3.0.36.24/Documentation/vserver/debug.txt        2009-09-10 16:11:43.000000000 +0200
1076 @@ -0,0 +1,154 @@
1077 +
1078 +debug_cvirt:
1079 +
1080 + 2   4 "vx_map_tgid: %p/%llx: %d -> %d"
1081 +       "vx_rmap_tgid: %p/%llx: %d -> %d"
1082 +
1083 +debug_dlim:
1084 +
1085 + 0   1 "ALLOC (%p,#%d)%c inode (%d)"
1086 +       "FREE  (%p,#%d)%c inode"
1087 + 1   2 "ALLOC (%p,#%d)%c %lld bytes (%d)"
1088 +       "FREE  (%p,#%d)%c %lld bytes"
1089 + 2   4 "ADJUST: %lld,%lld on %ld,%ld [mult=%d]"
1090 + 3   8 "ext3_has_free_blocks(%p): %lu<%lu+1, %c, %u!=%u r=%d"
1091 +       "ext3_has_free_blocks(%p): free=%lu, root=%lu"
1092 +       "rcu_free_dl_info(%p)"
1093 + 4  10 "alloc_dl_info(%p,%d) = %p"
1094 +       "dealloc_dl_info(%p)"
1095 +       "get_dl_info(%p[#%d.%d])"
1096 +       "put_dl_info(%p[#%d.%d])"
1097 + 5  20 "alloc_dl_info(%p,%d)*"
1098 + 6  40 "__hash_dl_info: %p[#%d]"
1099 +       "__unhash_dl_info: %p[#%d]"
1100 + 7  80 "locate_dl_info(%p,#%d) = %p"
1101 +
1102 +debug_misc:
1103 +
1104 + 0   1 "destroy_dqhash: %p [#0x%08x] c=%d"
1105 +       "new_dqhash: %p [#0x%08x]"
1106 +       "vroot[%d]_clr_dev: dev=%p[%lu,%d:%d]"
1107 +       "vroot[%d]_get_real_bdev: dev=%p[%lu,%d:%d]"
1108 +       "vroot[%d]_set_dev: dev=%p[%lu,%d:%d]"
1109 +       "vroot_get_real_bdev not set"
1110 + 1   2 "cow_break_link(»%s«)"
1111 +       "temp copy Â»%s«"
1112 + 2   4 "dentry_open(new): %p"
1113 +       "dentry_open(old): %p"
1114 +       "lookup_create(new): %p"
1115 +       "old path Â»%s«"
1116 +       "path_lookup(old): %d"
1117 +       "vfs_create(new): %d"
1118 +       "vfs_rename: %d"
1119 +       "vfs_sendfile: %d"
1120 + 3   8 "fput(new_file=%p[#%d])"
1121 +       "fput(old_file=%p[#%d])"
1122 + 4  10 "vx_info_kill(%p[#%d],%d,%d) = %d"
1123 +       "vx_info_kill(%p[#%d],%d,%d)*"
1124 + 5  20 "vs_reboot(%p[#%d],%d)"
1125 + 6  40 "dropping task %p[#%u,%u] for %p[#%u,%u]"
1126 +
1127 +debug_net:
1128 +
1129 + 2   4 "nx_addr_conflict(%p,%p) %d.%d,%d.%d"
1130 + 3   8 "inet_bind(%p) %d.%d.%d.%d, %d.%d.%d.%d, %d.%d.%d.%d"
1131 +       "inet_bind(%p)* %p,%p;%lx %d.%d.%d.%d"
1132 + 4  10 "ip_route_connect(%p) %p,%p;%lx"
1133 + 5  20 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx"
1134 + 6  40 "sk,egf: %p [#%d] (from %d)"
1135 +       "sk,egn: %p [#%d] (from %d)"
1136 +       "sk,req: %p [#%d] (from %d)"
1137 +       "sk: %p [#%d] (from %d)"
1138 +       "tw: %p [#%d] (from %d)"
1139 + 7  80 "__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d"
1140 +       "__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d"
1141 +
1142 +debug_nid:
1143 +
1144 + 0   1 "__lookup_nx_info(#%u): %p[#%u]"
1145 +       "alloc_nx_info(%d) = %p"
1146 +       "create_nx_info(%d) (dynamic rejected)"
1147 +       "create_nx_info(%d) = %p (already there)"
1148 +       "create_nx_info(%d) = %p (new)"
1149 +       "dealloc_nx_info(%p)"
1150 + 1   2 "alloc_nx_info(%d)*"
1151 +       "create_nx_info(%d)*"
1152 + 2   4 "get_nx_info(%p[#%d.%d])"
1153 +       "put_nx_info(%p[#%d.%d])"
1154 + 3   8 "claim_nx_info(%p[#%d.%d.%d]) %p"
1155 +       "clr_nx_info(%p[#%d.%d])"
1156 +       "init_nx_info(%p[#%d.%d])"
1157 +       "release_nx_info(%p[#%d.%d.%d]) %p"
1158 +       "set_nx_info(%p[#%d.%d])"
1159 + 4  10 "__hash_nx_info: %p[#%d]"
1160 +       "__nx_dynamic_id: [#%d]"
1161 +       "__unhash_nx_info: %p[#%d.%d.%d]"
1162 + 5  20 "moved task %p into nxi:%p[#%d]"
1163 +       "nx_migrate_task(%p,%p[#%d.%d.%d])"
1164 +       "task_get_nx_info(%p)"
1165 + 6  40 "nx_clear_persistent(%p[#%d])"
1166 +
1167 +debug_quota:
1168 +
1169 + 0   1 "quota_sync_dqh(%p,%d) discard inode %p"
1170 + 1   2 "quota_sync_dqh(%p,%d)"
1171 +       "sync_dquots(%p,%d)"
1172 +       "sync_dquots_dqh(%p,%d)"
1173 + 3   8 "do_quotactl(%p,%d,cmd=%d,id=%d,%p)"
1174 +
1175 +debug_switch:
1176 +
1177 + 0   1 "vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]"
1178 + 1   2 "vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]"
1179 + 4  10 "%s: (%s %s) returned %s with %d"
1180 +
1181 +debug_tag:
1182 +
1183 + 7  80 "dx_parse_tag(»%s«): %d:#%d"
1184 +       "dx_propagate_tag(%p[#%lu.%d]): %d,%d"
1185 +
1186 +debug_xid:
1187 +
1188 + 0   1 "__lookup_vx_info(#%u): %p[#%u]"
1189 +       "alloc_vx_info(%d) = %p"
1190 +       "alloc_vx_info(%d)*"
1191 +       "create_vx_info(%d) (dynamic rejected)"
1192 +       "create_vx_info(%d) = %p (already there)"
1193 +       "create_vx_info(%d) = %p (new)"
1194 +       "dealloc_vx_info(%p)"
1195 +       "loc_vx_info(%d) = %p (found)"
1196 +       "loc_vx_info(%d) = %p (new)"
1197 +       "loc_vx_info(%d) = %p (not available)"
1198 + 1   2 "create_vx_info(%d)*"
1199 +       "loc_vx_info(%d)*"
1200 + 2   4 "get_vx_info(%p[#%d.%d])"
1201 +       "put_vx_info(%p[#%d.%d])"
1202 + 3   8 "claim_vx_info(%p[#%d.%d.%d]) %p"
1203 +       "clr_vx_info(%p[#%d.%d])"
1204 +       "init_vx_info(%p[#%d.%d])"
1205 +       "release_vx_info(%p[#%d.%d.%d]) %p"
1206 +       "set_vx_info(%p[#%d.%d])"
1207 + 4  10 "__hash_vx_info: %p[#%d]"
1208 +       "__unhash_vx_info: %p[#%d.%d.%d]"
1209 +       "__vx_dynamic_id: [#%d]"
1210 + 5  20 "enter_vx_info(%p[#%d],%p) %p[#%d,%p]"
1211 +       "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]"
1212 +       "moved task %p into vxi:%p[#%d]"
1213 +       "task_get_vx_info(%p)"
1214 +       "vx_migrate_task(%p,%p[#%d.%d])"
1215 + 6  40 "vx_clear_persistent(%p[#%d])"
1216 +       "vx_exit_init(%p[#%d],%p[#%d,%d,%d])"
1217 +       "vx_set_init(%p[#%d],%p[#%d,%d,%d])"
1218 +       "vx_set_persistent(%p[#%d])"
1219 +       "vx_set_reaper(%p[#%d],%p[#%d,%d])"
1220 + 7  80 "vx_child_reaper(%p[#%u,%u]) = %p[#%u,%u]"
1221 +
1222 +
1223 +debug_limit:
1224 +
1225 + n 2^n "vx_acc_cres[%5d,%s,%2d]: %5d%s"
1226 +       "vx_cres_avail[%5d,%s,%2d]: %5ld > %5d + %5d"
1227 +
1228 + m 2^m "vx_acc_page[%5d,%s,%2d]: %5d%s"
1229 +       "vx_acc_pages[%5d,%s,%2d]: %5d += %5d"
1230 +       "vx_pages_avail[%5d,%s,%2d]: %5ld > %5d + %5d"
1231 diff -NurpP --minimal linux-2.6.31.6/drivers/block/Kconfig linux-2.6.31.6-vs2.3.0.36.24/drivers/block/Kconfig
1232 --- linux-2.6.31.6/drivers/block/Kconfig        2009-09-10 15:25:49.000000000 +0200
1233 +++ linux-2.6.31.6-vs2.3.0.36.24/drivers/block/Kconfig  2009-09-10 16:11:43.000000000 +0200
1234 @@ -271,6 +271,13 @@ config BLK_DEV_CRYPTOLOOP
1235           instead, which can be configured to be on-disk compatible with the
1236           cryptoloop device.
1237  
1238 +config BLK_DEV_VROOT
1239 +       tristate "Virtual Root device support"
1240 +       depends on QUOTACTL
1241 +       ---help---
1242 +         Saying Y here will allow you to use quota/fs ioctls on a shared
1243 +         partition within a virtual server without compromising security.
1244 +
1245  config BLK_DEV_NBD
1246         tristate "Network block device support"
1247         depends on NET
1248 diff -NurpP --minimal linux-2.6.31.6/drivers/block/loop.c linux-2.6.31.6-vs2.3.0.36.24/drivers/block/loop.c
1249 --- linux-2.6.31.6/drivers/block/loop.c 2009-09-10 15:25:49.000000000 +0200
1250 +++ linux-2.6.31.6-vs2.3.0.36.24/drivers/block/loop.c   2009-09-10 16:11:43.000000000 +0200
1251 @@ -74,6 +74,7 @@
1252  #include <linux/gfp.h>
1253  #include <linux/kthread.h>
1254  #include <linux/splice.h>
1255 +#include <linux/vs_context.h>
1256  
1257  #include <asm/uaccess.h>
1258  
1259 @@ -812,6 +813,7 @@ static int loop_set_fd(struct loop_devic
1260         lo->lo_blocksize = lo_blocksize;
1261         lo->lo_device = bdev;
1262         lo->lo_flags = lo_flags;
1263 +       lo->lo_xid = vx_current_xid();
1264         lo->lo_backing_file = file;
1265         lo->transfer = transfer_none;
1266         lo->ioctl = NULL;
1267 @@ -937,6 +939,7 @@ static int loop_clr_fd(struct loop_devic
1268         lo->lo_encrypt_key_size = 0;
1269         lo->lo_flags = 0;
1270         lo->lo_thread = NULL;
1271 +       lo->lo_xid = 0;
1272         memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
1273         memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
1274         memset(lo->lo_file_name, 0, LO_NAME_SIZE);
1275 @@ -971,7 +974,7 @@ loop_set_status(struct loop_device *lo, 
1276  
1277         if (lo->lo_encrypt_key_size &&
1278             lo->lo_key_owner != uid &&
1279 -           !capable(CAP_SYS_ADMIN))
1280 +           !vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_CLOOP))
1281                 return -EPERM;
1282         if (lo->lo_state != Lo_bound)
1283                 return -ENXIO;
1284 @@ -1055,7 +1058,8 @@ loop_get_status(struct loop_device *lo, 
1285         memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
1286         info->lo_encrypt_type =
1287                 lo->lo_encryption ? lo->lo_encryption->number : 0;
1288 -       if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
1289 +       if (lo->lo_encrypt_key_size &&
1290 +               vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_CLOOP)) {
1291                 info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
1292                 memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
1293                        lo->lo_encrypt_key_size);
1294 @@ -1399,6 +1403,9 @@ static int lo_open(struct block_device *
1295  {
1296         struct loop_device *lo = bdev->bd_disk->private_data;
1297  
1298 +       if (!vx_check(lo->lo_xid, VS_IDENT|VS_HOSTID|VS_ADMIN_P))
1299 +               return -EACCES;
1300 +
1301         mutex_lock(&lo->lo_ctl_mutex);
1302         lo->lo_refcnt++;
1303         mutex_unlock(&lo->lo_ctl_mutex);
1304 diff -NurpP --minimal linux-2.6.31.6/drivers/block/Makefile linux-2.6.31.6-vs2.3.0.36.24/drivers/block/Makefile
1305 --- linux-2.6.31.6/drivers/block/Makefile       2009-09-10 15:25:49.000000000 +0200
1306 +++ linux-2.6.31.6-vs2.3.0.36.24/drivers/block/Makefile 2009-09-10 16:11:43.000000000 +0200
1307 @@ -34,6 +34,7 @@ obj-$(CONFIG_VIODASD)         += viodasd.o
1308  obj-$(CONFIG_BLK_DEV_SX8)      += sx8.o
1309  obj-$(CONFIG_BLK_DEV_UB)       += ub.o
1310  obj-$(CONFIG_BLK_DEV_HD)       += hd.o
1311 +obj-$(CONFIG_BLK_DEV_VROOT)    += vroot.o
1312  
1313  obj-$(CONFIG_XEN_BLKDEV_FRONTEND)      += xen-blkfront.o
1314  
1315 diff -NurpP --minimal linux-2.6.31.6/drivers/block/vroot.c linux-2.6.31.6-vs2.3.0.36.24/drivers/block/vroot.c
1316 --- linux-2.6.31.6/drivers/block/vroot.c        1970-01-01 01:00:00.000000000 +0100
1317 +++ linux-2.6.31.6-vs2.3.0.36.24/drivers/block/vroot.c  2009-09-10 16:11:43.000000000 +0200
1318 @@ -0,0 +1,281 @@
1319 +/*
1320 + *  linux/drivers/block/vroot.c
1321 + *
1322 + *  written by Herbert Pötzl, 9/11/2002
1323 + *  ported to 2.6.10 by Herbert Pötzl, 30/12/2004
1324 + *
1325 + *  based on the loop.c code by Theodore Ts'o.
1326 + *
1327 + * Copyright (C) 2002-2007 by Herbert Pötzl.
1328 + * Redistribution of this file is permitted under the
1329 + * GNU General Public License.
1330 + *
1331 + */
1332 +
1333 +#include <linux/module.h>
1334 +#include <linux/moduleparam.h>
1335 +#include <linux/file.h>
1336 +#include <linux/major.h>
1337 +#include <linux/blkdev.h>
1338 +
1339 +#include <linux/vroot.h>
1340 +#include <linux/vs_context.h>
1341 +
1342 +
1343 +static int max_vroot = 8;
1344 +
1345 +static struct vroot_device *vroot_dev;
1346 +static struct gendisk **disks;
1347 +
1348 +
1349 +static int vroot_set_dev(
1350 +       struct vroot_device *vr,
1351 +       struct block_device *bdev,
1352 +       unsigned int arg)
1353 +{
1354 +       struct block_device *real_bdev;
1355 +       struct file *file;
1356 +       struct inode *inode;
1357 +       int error;
1358 +
1359 +       error = -EBUSY;
1360 +       if (vr->vr_state != Vr_unbound)
1361 +               goto out;
1362 +
1363 +       error = -EBADF;
1364 +       file = fget(arg);
1365 +       if (!file)
1366 +               goto out;
1367 +
1368 +       error = -EINVAL;
1369 +       inode = file->f_dentry->d_inode;
1370 +
1371 +
1372 +       if (S_ISBLK(inode->i_mode)) {
1373 +               real_bdev = inode->i_bdev;
1374 +               vr->vr_device = real_bdev;
1375 +               __iget(real_bdev->bd_inode);
1376 +       } else
1377 +               goto out_fput;
1378 +
1379 +       vxdprintk(VXD_CBIT(misc, 0),
1380 +               "vroot[%d]_set_dev: dev=" VXF_DEV,
1381 +               vr->vr_number, VXD_DEV(real_bdev));
1382 +
1383 +       vr->vr_state = Vr_bound;
1384 +       error = 0;
1385 +
1386 + out_fput:
1387 +       fput(file);
1388 + out:
1389 +       return error;
1390 +}
1391 +
1392 +static int vroot_clr_dev(
1393 +       struct vroot_device *vr,
1394 +       struct block_device *bdev)
1395 +{
1396 +       struct block_device *real_bdev;
1397 +
1398 +       if (vr->vr_state != Vr_bound)
1399 +               return -ENXIO;
1400 +       if (vr->vr_refcnt > 1)  /* we needed one fd for the ioctl */
1401 +               return -EBUSY;
1402 +
1403 +       real_bdev = vr->vr_device;
1404 +
1405 +       vxdprintk(VXD_CBIT(misc, 0),
1406 +               "vroot[%d]_clr_dev: dev=" VXF_DEV,
1407 +               vr->vr_number, VXD_DEV(real_bdev));
1408 +
1409 +       bdput(real_bdev);
1410 +       vr->vr_state = Vr_unbound;
1411 +       vr->vr_device = NULL;
1412 +       return 0;
1413 +}
1414 +
1415 +
1416 +static int vr_ioctl(struct block_device *bdev, fmode_t mode,
1417 +       unsigned int cmd, unsigned long arg)
1418 +{
1419 +       struct vroot_device *vr = bdev->bd_disk->private_data;
1420 +       int err;
1421 +
1422 +       down(&vr->vr_ctl_mutex);
1423 +       switch (cmd) {
1424 +       case VROOT_SET_DEV:
1425 +               err = vroot_set_dev(vr, bdev, arg);
1426 +               break;
1427 +       case VROOT_CLR_DEV:
1428 +               err = vroot_clr_dev(vr, bdev);
1429 +               break;
1430 +       default:
1431 +               err = -EINVAL;
1432 +               break;
1433 +       }
1434 +       up(&vr->vr_ctl_mutex);
1435 +       return err;
1436 +}
1437 +
1438 +static int vr_open(struct block_device *bdev, fmode_t mode)
1439 +{
1440 +       struct vroot_device *vr = bdev->bd_disk->private_data;
1441 +
1442 +       down(&vr->vr_ctl_mutex);
1443 +       vr->vr_refcnt++;
1444 +       up(&vr->vr_ctl_mutex);
1445 +       return 0;
1446 +}
1447 +
1448 +static int vr_release(struct gendisk *disk, fmode_t mode)
1449 +{
1450 +       struct vroot_device *vr = disk->private_data;
1451 +
1452 +       down(&vr->vr_ctl_mutex);
1453 +       --vr->vr_refcnt;
1454 +       up(&vr->vr_ctl_mutex);
1455 +       return 0;
1456 +}
1457 +
1458 +static struct block_device_operations vr_fops = {
1459 +       .owner =        THIS_MODULE,
1460 +       .open =         vr_open,
1461 +       .release =      vr_release,
1462 +       .ioctl =        vr_ioctl,
1463 +};
1464 +
1465 +struct block_device *__vroot_get_real_bdev(struct block_device *bdev)
1466 +{
1467 +       struct inode *inode = bdev->bd_inode;
1468 +       struct vroot_device *vr;
1469 +       struct block_device *real_bdev;
1470 +       int minor = iminor(inode);
1471 +
1472 +       vr = &vroot_dev[minor];
1473 +       real_bdev = vr->vr_device;
1474 +
1475 +       vxdprintk(VXD_CBIT(misc, 0),
1476 +               "vroot[%d]_get_real_bdev: dev=" VXF_DEV,
1477 +               vr->vr_number, VXD_DEV(real_bdev));
1478 +
1479 +       if (vr->vr_state != Vr_bound)
1480 +               return ERR_PTR(-ENXIO);
1481 +
1482 +       __iget(real_bdev->bd_inode);
1483 +       return real_bdev;
1484 +}
1485 +
1486 +/*
1487 + * And now the modules code and kernel interface.
1488 + */
1489 +
1490 +module_param(max_vroot, int, 0);
1491 +
1492 +MODULE_PARM_DESC(max_vroot, "Maximum number of vroot devices (1-256)");
1493 +MODULE_LICENSE("GPL");
1494 +MODULE_ALIAS_BLOCKDEV_MAJOR(VROOT_MAJOR);
1495 +
1496 +MODULE_AUTHOR ("Herbert Pötzl");
1497 +MODULE_DESCRIPTION ("Virtual Root Device Mapper");
1498 +
1499 +
1500 +int __init vroot_init(void)
1501 +{
1502 +       int err, i;
1503 +
1504 +       if (max_vroot < 1 || max_vroot > 256) {
1505 +               max_vroot = MAX_VROOT_DEFAULT;
1506 +               printk(KERN_WARNING "vroot: invalid max_vroot "
1507 +                       "(must be between 1 and 256), "
1508 +                       "using default (%d)\n", max_vroot);
1509 +       }
1510 +
1511 +       if (register_blkdev(VROOT_MAJOR, "vroot"))
1512 +               return -EIO;
1513 +
1514 +       err = -ENOMEM;
1515 +       vroot_dev = kmalloc(max_vroot * sizeof(struct vroot_device), GFP_KERNEL);
1516 +       if (!vroot_dev)
1517 +               goto out_mem1;
1518 +       memset(vroot_dev, 0, max_vroot * sizeof(struct vroot_device));
1519 +
1520 +       disks = kmalloc(max_vroot * sizeof(struct gendisk *), GFP_KERNEL);
1521 +       if (!disks)
1522 +               goto out_mem2;
1523 +
1524 +       for (i = 0; i < max_vroot; i++) {
1525 +               disks[i] = alloc_disk(1);
1526 +               if (!disks[i])
1527 +                       goto out_mem3;
1528 +               disks[i]->queue = blk_alloc_queue(GFP_KERNEL);
1529 +               if (!disks[i]->queue)
1530 +                       goto out_mem3;
1531 +       }
1532 +
1533 +       for (i = 0; i < max_vroot; i++) {
1534 +               struct vroot_device *vr = &vroot_dev[i];
1535 +               struct gendisk *disk = disks[i];
1536 +
1537 +               memset(vr, 0, sizeof(*vr));
1538 +               init_MUTEX(&vr->vr_ctl_mutex);
1539 +               vr->vr_number = i;
1540 +               disk->major = VROOT_MAJOR;
1541 +               disk->first_minor = i;
1542 +               disk->fops = &vr_fops;
1543 +               sprintf(disk->disk_name, "vroot%d", i);
1544 +               disk->private_data = vr;
1545 +       }
1546 +
1547 +       err = register_vroot_grb(&__vroot_get_real_bdev);
1548 +       if (err)
1549 +               goto out_mem3;
1550 +
1551 +       for (i = 0; i < max_vroot; i++)
1552 +               add_disk(disks[i]);
1553 +       printk(KERN_INFO "vroot: loaded (max %d devices)\n", max_vroot);
1554 +       return 0;
1555 +
1556 +out_mem3:
1557 +       while (i--)
1558 +               put_disk(disks[i]);
1559 +       kfree(disks);
1560 +out_mem2:
1561 +       kfree(vroot_dev);
1562 +out_mem1:
1563 +       unregister_blkdev(VROOT_MAJOR, "vroot");
1564 +       printk(KERN_ERR "vroot: ran out of memory\n");
1565 +       return err;
1566 +}
1567 +
1568 +void vroot_exit(void)
1569 +{
1570 +       int i;
1571 +
1572 +       if (unregister_vroot_grb(&__vroot_get_real_bdev))
1573 +               printk(KERN_WARNING "vroot: cannot unregister grb\n");
1574 +
1575 +       for (i = 0; i < max_vroot; i++) {
1576 +               del_gendisk(disks[i]);
1577 +               put_disk(disks[i]);
1578 +       }
1579 +       unregister_blkdev(VROOT_MAJOR, "vroot");
1580 +
1581 +       kfree(disks);
1582 +       kfree(vroot_dev);
1583 +}
1584 +
1585 +module_init(vroot_init);
1586 +module_exit(vroot_exit);
1587 +
1588 +#ifndef MODULE
1589 +
1590 +static int __init max_vroot_setup(char *str)
1591 +{
1592 +       max_vroot = simple_strtol(str, NULL, 0);
1593 +       return 1;
1594 +}
1595 +
1596 +__setup("max_vroot=", max_vroot_setup);
1597 +
1598 +#endif
1599 +
1600 diff -NurpP --minimal linux-2.6.31.6/drivers/char/sysrq.c linux-2.6.31.6-vs2.3.0.36.24/drivers/char/sysrq.c
1601 --- linux-2.6.31.6/drivers/char/sysrq.c 2009-09-10 15:25:50.000000000 +0200
1602 +++ linux-2.6.31.6-vs2.3.0.36.24/drivers/char/sysrq.c   2009-09-10 16:11:43.000000000 +0200
1603 @@ -37,6 +37,7 @@
1604  #include <linux/workqueue.h>
1605  #include <linux/hrtimer.h>
1606  #include <linux/oom.h>
1607 +#include <linux/vserver/debug.h>
1608  
1609  #include <asm/ptrace.h>
1610  #include <asm/irq_regs.h>
1611 @@ -382,6 +383,21 @@ static struct sysrq_key_op sysrq_unrt_op
1612         .enable_mask    = SYSRQ_ENABLE_RTNICE,
1613  };
1614  
1615 +
1616 +#ifdef CONFIG_VSERVER_DEBUG
1617 +static void sysrq_handle_vxinfo(int key, struct tty_struct *tty)
1618 +{
1619 +       dump_vx_info_inactive((key == 'x')?0:1);
1620 +}
1621 +
1622 +static struct sysrq_key_op sysrq_showvxinfo_op = {
1623 +       .handler        = sysrq_handle_vxinfo,
1624 +       .help_msg       = "conteXt",
1625 +       .action_msg     = "Show Context Info",
1626 +       .enable_mask    = SYSRQ_ENABLE_DUMP,
1627 +};
1628 +#endif
1629 +
1630  /* Key Operations table and lock */
1631  static DEFINE_SPINLOCK(sysrq_key_table_lock);
1632  
1633 @@ -436,7 +452,11 @@ static struct sysrq_key_op *sysrq_key_ta
1634         NULL,                           /* v */
1635         &sysrq_showstate_blocked_op,    /* w */
1636         /* x: May be registered on ppc/powerpc for xmon */
1637 +#ifdef CONFIG_VSERVER_DEBUG
1638 +       &sysrq_showvxinfo_op,           /* x */
1639 +#else
1640         NULL,                           /* x */
1641 +#endif
1642         /* y: May be registered on sparc64 for global register dump */
1643         NULL,                           /* y */
1644         &sysrq_ftrace_dump_op,          /* z */
1645 @@ -451,6 +471,8 @@ static int sysrq_key_table_key2index(int
1646                 retval = key - '0';
1647         else if ((key >= 'a') && (key <= 'z'))
1648                 retval = key + 10 - 'a';
1649 +       else if ((key >= 'A') && (key <= 'Z'))
1650 +               retval = key + 10 - 'A';
1651         else
1652                 retval = -1;
1653         return retval;
1654 diff -NurpP --minimal linux-2.6.31.6/drivers/char/tty_io.c linux-2.6.31.6-vs2.3.0.36.24/drivers/char/tty_io.c
1655 --- linux-2.6.31.6/drivers/char/tty_io.c        2009-11-12 12:10:09.000000000 +0100
1656 +++ linux-2.6.31.6-vs2.3.0.36.24/drivers/char/tty_io.c  2009-10-05 23:35:52.000000000 +0200
1657 @@ -106,6 +106,7 @@
1658  
1659  #include <linux/kmod.h>
1660  #include <linux/nsproxy.h>
1661 +#include <linux/vs_pid.h>
1662  
1663  #undef TTY_DEBUG_HANGUP
1664  
1665 @@ -2236,6 +2237,7 @@ static int tiocspgrp(struct tty_struct *
1666                 return -ENOTTY;
1667         if (get_user(pgrp_nr, p))
1668                 return -EFAULT;
1669 +       pgrp_nr = vx_rmap_pid(pgrp_nr);
1670         if (pgrp_nr < 0)
1671                 return -EINVAL;
1672         rcu_read_lock();
1673 diff -NurpP --minimal linux-2.6.31.6/drivers/infiniband/hw/ipath/ipath_user_pages.c linux-2.6.31.6-vs2.3.0.36.24/drivers/infiniband/hw/ipath/ipath_user_pages.c
1674 --- linux-2.6.31.6/drivers/infiniband/hw/ipath/ipath_user_pages.c       2009-06-11 17:12:30.000000000 +0200
1675 +++ linux-2.6.31.6-vs2.3.0.36.24/drivers/infiniband/hw/ipath/ipath_user_pages.c 2009-09-10 16:11:43.000000000 +0200
1676 @@ -33,6 +33,7 @@
1677  
1678  #include <linux/mm.h>
1679  #include <linux/device.h>
1680 +#include <linux/vs_memory.h>
1681  
1682  #include "ipath_kernel.h"
1683  
1684 @@ -61,7 +62,8 @@ static int __get_user_pages(unsigned lon
1685         lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >>
1686                 PAGE_SHIFT;
1687  
1688 -       if (num_pages > lock_limit) {
1689 +       if (num_pages > lock_limit ||
1690 +               !vx_vmlocked_avail(current->mm, num_pages)) {
1691                 ret = -ENOMEM;
1692                 goto bail;
1693         }
1694 @@ -78,7 +80,7 @@ static int __get_user_pages(unsigned lon
1695                         goto bail_release;
1696         }
1697  
1698 -       current->mm->locked_vm += num_pages;
1699 +       vx_vmlocked_add(current->mm, num_pages);
1700  
1701         ret = 0;
1702         goto bail;
1703 @@ -177,7 +179,7 @@ void ipath_release_user_pages(struct pag
1704  
1705         __ipath_release_user_pages(p, num_pages, 1);
1706  
1707 -       current->mm->locked_vm -= num_pages;
1708 +       vx_vmlocked_sub(current->mm, num_pages);
1709  
1710         up_write(&current->mm->mmap_sem);
1711  }
1712 @@ -194,7 +196,7 @@ static void user_pages_account(struct wo
1713                 container_of(_work, struct ipath_user_pages_work, work);
1714  
1715         down_write(&work->mm->mmap_sem);
1716 -       work->mm->locked_vm -= work->num_pages;
1717 +       vx_vmlocked_sub(work->mm, work->num_pages);
1718         up_write(&work->mm->mmap_sem);
1719         mmput(work->mm);
1720         kfree(work);
1721 diff -NurpP --minimal linux-2.6.31.6/drivers/md/dm.c linux-2.6.31.6-vs2.3.0.36.24/drivers/md/dm.c
1722 --- linux-2.6.31.6/drivers/md/dm.c      2009-11-12 12:10:10.000000000 +0100
1723 +++ linux-2.6.31.6-vs2.3.0.36.24/drivers/md/dm.c        2009-11-12 12:26:38.000000000 +0100
1724 @@ -19,6 +19,7 @@
1725  #include <linux/slab.h>
1726  #include <linux/idr.h>
1727  #include <linux/hdreg.h>
1728 +#include <linux/vs_base.h>
1729  
1730  #include <trace/events/block.h>
1731  
1732 @@ -119,6 +120,7 @@ struct mapped_device {
1733         rwlock_t map_lock;
1734         atomic_t holders;
1735         atomic_t open_count;
1736 +       xid_t xid;
1737  
1738         unsigned long flags;
1739  
1740 @@ -323,6 +325,7 @@ static void __exit dm_exit(void)
1741  static int dm_blk_open(struct block_device *bdev, fmode_t mode)
1742  {
1743         struct mapped_device *md;
1744 +       int ret = -ENXIO;
1745  
1746         spin_lock(&_minor_lock);
1747  
1748 @@ -331,18 +334,19 @@ static int dm_blk_open(struct block_devi
1749                 goto out;
1750  
1751         if (test_bit(DMF_FREEING, &md->flags) ||
1752 -           test_bit(DMF_DELETING, &md->flags)) {
1753 -               md = NULL;
1754 +           test_bit(DMF_DELETING, &md->flags))
1755 +               goto out;
1756 +
1757 +       ret = -EACCES;
1758 +       if (!vx_check(md->xid, VS_IDENT|VS_HOSTID))
1759                 goto out;
1760 -       }
1761  
1762         dm_get(md);
1763         atomic_inc(&md->open_count);
1764 -
1765 +       ret = 0;
1766  out:
1767         spin_unlock(&_minor_lock);
1768 -
1769 -       return md ? 0 : -ENXIO;
1770 +       return ret;
1771  }
1772  
1773  static int dm_blk_close(struct gendisk *disk, fmode_t mode)
1774 @@ -551,6 +555,14 @@ int dm_set_geometry(struct mapped_device
1775         return 0;
1776  }
1777  
1778 +/*
1779 + * Get the xid associated with a dm device
1780 + */
1781 +xid_t dm_get_xid(struct mapped_device *md)
1782 +{
1783 +       return md->xid;
1784 +}
1785 +
1786  /*-----------------------------------------------------------------
1787   * CRUD START:
1788   *   A more elegant soln is in the works that uses the queue
1789 @@ -1760,6 +1772,7 @@ static struct mapped_device *alloc_dev(i
1790         INIT_LIST_HEAD(&md->uevent_list);
1791         spin_lock_init(&md->uevent_lock);
1792  
1793 +       md->xid = vx_current_xid();
1794         md->queue = blk_init_queue(dm_request_fn, NULL);
1795         if (!md->queue)
1796                 goto bad_queue;
1797 diff -NurpP --minimal linux-2.6.31.6/drivers/md/dm.h linux-2.6.31.6-vs2.3.0.36.24/drivers/md/dm.h
1798 --- linux-2.6.31.6/drivers/md/dm.h      2009-09-10 15:25:55.000000000 +0200
1799 +++ linux-2.6.31.6-vs2.3.0.36.24/drivers/md/dm.h        2009-09-10 16:11:43.000000000 +0200
1800 @@ -41,6 +41,8 @@ struct dm_dev_internal {
1801  struct dm_table;
1802  struct dm_md_mempools;
1803  
1804 +xid_t dm_get_xid(struct mapped_device *md);
1805 +
1806  /*-----------------------------------------------------------------
1807   * Internal table functions.
1808   *---------------------------------------------------------------*/
1809 diff -NurpP --minimal linux-2.6.31.6/drivers/md/dm-ioctl.c linux-2.6.31.6-vs2.3.0.36.24/drivers/md/dm-ioctl.c
1810 --- linux-2.6.31.6/drivers/md/dm-ioctl.c        2009-09-10 15:25:55.000000000 +0200
1811 +++ linux-2.6.31.6-vs2.3.0.36.24/drivers/md/dm-ioctl.c  2009-09-10 16:11:43.000000000 +0200
1812 @@ -16,6 +16,7 @@
1813  #include <linux/dm-ioctl.h>
1814  #include <linux/hdreg.h>
1815  #include <linux/compat.h>
1816 +#include <linux/vs_context.h>
1817  
1818  #include <asm/uaccess.h>
1819  
1820 @@ -101,7 +102,8 @@ static struct hash_cell *__get_name_cell
1821         unsigned int h = hash_str(str);
1822  
1823         list_for_each_entry (hc, _name_buckets + h, name_list)
1824 -               if (!strcmp(hc->name, str)) {
1825 +               if (vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT) &&
1826 +                       !strcmp(hc->name, str)) {
1827                         dm_get(hc->md);
1828                         return hc;
1829                 }
1830 @@ -115,7 +117,8 @@ static struct hash_cell *__get_uuid_cell
1831         unsigned int h = hash_str(str);
1832  
1833         list_for_each_entry (hc, _uuid_buckets + h, uuid_list)
1834 -               if (!strcmp(hc->uuid, str)) {
1835 +               if (vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT) &&
1836 +                       !strcmp(hc->uuid, str)) {
1837                         dm_get(hc->md);
1838                         return hc;
1839                 }
1840 @@ -352,6 +355,9 @@ typedef int (*ioctl_fn)(struct dm_ioctl 
1841  
1842  static int remove_all(struct dm_ioctl *param, size_t param_size)
1843  {
1844 +       if (!vx_check(0, VS_ADMIN))
1845 +               return -EPERM;
1846 +
1847         dm_hash_remove_all(1);
1848         param->data_size = 0;
1849         return 0;
1850 @@ -399,6 +405,8 @@ static int list_devices(struct dm_ioctl 
1851          */
1852         for (i = 0; i < NUM_BUCKETS; i++) {
1853                 list_for_each_entry (hc, _name_buckets + i, name_list) {
1854 +                       if (!vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT))
1855 +                               continue;
1856                         needed += sizeof(struct dm_name_list);
1857                         needed += strlen(hc->name) + 1;
1858                         needed += ALIGN_MASK;
1859 @@ -422,6 +430,8 @@ static int list_devices(struct dm_ioctl 
1860          */
1861         for (i = 0; i < NUM_BUCKETS; i++) {
1862                 list_for_each_entry (hc, _name_buckets + i, name_list) {
1863 +                       if (!vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT))
1864 +                               continue;
1865                         if (old_nl)
1866                                 old_nl->next = (uint32_t) ((void *) nl -
1867                                                            (void *) old_nl);
1868 @@ -612,10 +622,11 @@ static struct hash_cell *__find_device_h
1869         if (!md)
1870                 goto out;
1871  
1872 -       mdptr = dm_get_mdptr(md);
1873 +       if (vx_check(dm_get_xid(md), VS_WATCH_P | VS_IDENT))
1874 +               mdptr = dm_get_mdptr(md);
1875 +
1876         if (!mdptr)
1877                 dm_put(md);
1878 -
1879  out:
1880         return mdptr;
1881  }
1882 @@ -1445,8 +1456,8 @@ static int ctl_ioctl(uint command, struc
1883         ioctl_fn fn = NULL;
1884         size_t param_size;
1885  
1886 -       /* only root can play with this */
1887 -       if (!capable(CAP_SYS_ADMIN))
1888 +       /* only root and certain contexts can play with this */
1889 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_MAPPER))
1890                 return -EACCES;
1891  
1892         if (_IOC_TYPE(command) != DM_IOCTL)
1893 diff -NurpP --minimal linux-2.6.31.6/drivers/net/tun.c linux-2.6.31.6-vs2.3.0.36.24/drivers/net/tun.c
1894 --- linux-2.6.31.6/drivers/net/tun.c    2009-11-12 12:10:10.000000000 +0100
1895 +++ linux-2.6.31.6-vs2.3.0.36.24/drivers/net/tun.c      2009-10-15 03:50:05.000000000 +0200
1896 @@ -61,6 +61,7 @@
1897  #include <linux/crc32.h>
1898  #include <linux/nsproxy.h>
1899  #include <linux/virtio_net.h>
1900 +#include <linux/vs_network.h>
1901  #include <net/net_namespace.h>
1902  #include <net/netns/generic.h>
1903  #include <net/rtnetlink.h>
1904 @@ -102,6 +103,7 @@ struct tun_struct {
1905         unsigned int            flags;
1906         uid_t                   owner;
1907         gid_t                   group;
1908 +       nid_t                   nid;
1909  
1910         struct sk_buff_head     readq;
1911  
1912 @@ -138,7 +140,7 @@ static int tun_attach(struct tun_struct 
1913         /* Check permissions */
1914         if (((tun->owner != -1 && cred->euid != tun->owner) ||
1915              (tun->group != -1 && !in_egroup_p(tun->group))) &&
1916 -               !capable(CAP_NET_ADMIN))
1917 +               !cap_raised(current_cap(), CAP_NET_ADMIN))
1918                 return -EPERM;
1919  
1920         netif_tx_lock_bh(tun->dev);
1921 @@ -823,6 +825,7 @@ static void tun_setup(struct net_device 
1922  
1923         tun->owner = -1;
1924         tun->group = -1;
1925 +       tun->nid = current->nid;
1926  
1927         dev->ethtool_ops = &tun_ethtool_ops;
1928         dev->destructor = tun_free_netdev;
1929 @@ -935,6 +938,9 @@ static int tun_set_iff(struct net *net, 
1930                 else
1931                         return -EINVAL;
1932  
1933 +               if (!nx_check(tun->nid, VS_IDENT | VS_HOSTID | VS_ADMIN_P))
1934 +                       return -EPERM;
1935 +
1936                 err = tun_attach(tun, file);
1937                 if (err < 0)
1938                         return err;
1939 @@ -943,7 +949,7 @@ static int tun_set_iff(struct net *net, 
1940                 char *name;
1941                 unsigned long flags = 0;
1942  
1943 -               if (!capable(CAP_NET_ADMIN))
1944 +               if (!nx_capable(CAP_NET_ADMIN, NXC_TUN_CREATE))
1945                         return -EPERM;
1946  
1947                 /* Set dev type */
1948 @@ -1190,6 +1196,16 @@ static long tun_chr_ioctl(struct file *f
1949                 DBG(KERN_INFO "%s: group set to %d\n", tun->dev->name, tun->group);
1950                 break;
1951  
1952 +       case TUNSETNID:
1953 +               if (!capable(CAP_CONTEXT))
1954 +                       return -EPERM;
1955 +
1956 +               /* Set nid owner of the device */
1957 +               tun->nid = (nid_t) arg;
1958 +
1959 +               DBG(KERN_INFO "%s: nid owner set to %u\n", tun->dev->name, tun->nid);
1960 +               break;
1961 +
1962         case TUNSETLINK:
1963                 /* Only allow setting the type when the interface is down */
1964                 if (tun->dev->flags & IFF_UP) {
1965 diff -NurpP --minimal linux-2.6.31.6/fs/attr.c linux-2.6.31.6-vs2.3.0.36.24/fs/attr.c
1966 --- linux-2.6.31.6/fs/attr.c    2009-06-11 17:13:01.000000000 +0200
1967 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/attr.c      2009-09-10 16:11:43.000000000 +0200
1968 @@ -14,6 +14,9 @@
1969  #include <linux/fcntl.h>
1970  #include <linux/quotaops.h>
1971  #include <linux/security.h>
1972 +#include <linux/proc_fs.h>
1973 +#include <linux/devpts_fs.h>
1974 +#include <linux/vs_tag.h>
1975  
1976  /* Taken over from the old code... */
1977  
1978 @@ -55,6 +58,10 @@ int inode_change_ok(struct inode *inode,
1979                 if (!is_owner_or_cap(inode))
1980                         goto error;
1981         }
1982 +
1983 +       if (dx_permission(inode, MAY_WRITE))
1984 +               goto error;
1985 +
1986  fine:
1987         retval = 0;
1988  error:
1989 @@ -78,6 +85,8 @@ int inode_setattr(struct inode * inode, 
1990                 inode->i_uid = attr->ia_uid;
1991         if (ia_valid & ATTR_GID)
1992                 inode->i_gid = attr->ia_gid;
1993 +       if ((ia_valid & ATTR_TAG) && IS_TAGGED(inode))
1994 +               inode->i_tag = attr->ia_tag;
1995         if (ia_valid & ATTR_ATIME)
1996                 inode->i_atime = timespec_trunc(attr->ia_atime,
1997                                                 inode->i_sb->s_time_gran);
1998 @@ -172,7 +181,8 @@ int notify_change(struct dentry * dentry
1999                 error = inode_change_ok(inode, attr);
2000                 if (!error) {
2001                         if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
2002 -                           (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid))
2003 +                           (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) ||
2004 +                           (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag))
2005                                 error = vfs_dq_transfer(inode, attr) ?
2006                                         -EDQUOT : 0;
2007                         if (!error)
2008 diff -NurpP --minimal linux-2.6.31.6/fs/binfmt_aout.c linux-2.6.31.6-vs2.3.0.36.24/fs/binfmt_aout.c
2009 --- linux-2.6.31.6/fs/binfmt_aout.c     2009-03-24 14:22:24.000000000 +0100
2010 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/binfmt_aout.c       2009-09-10 16:11:43.000000000 +0200
2011 @@ -24,6 +24,7 @@
2012  #include <linux/binfmts.h>
2013  #include <linux/personality.h>
2014  #include <linux/init.h>
2015 +#include <linux/vs_memory.h>
2016  
2017  #include <asm/system.h>
2018  #include <asm/uaccess.h>
2019 diff -NurpP --minimal linux-2.6.31.6/fs/binfmt_elf.c linux-2.6.31.6-vs2.3.0.36.24/fs/binfmt_elf.c
2020 --- linux-2.6.31.6/fs/binfmt_elf.c      2009-11-12 12:10:11.000000000 +0100
2021 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/binfmt_elf.c        2009-09-29 17:02:58.000000000 +0200
2022 @@ -31,6 +31,7 @@
2023  #include <linux/random.h>
2024  #include <linux/elf.h>
2025  #include <linux/utsname.h>
2026 +#include <linux/vs_memory.h>
2027  #include <asm/uaccess.h>
2028  #include <asm/param.h>
2029  #include <asm/page.h>
2030 diff -NurpP --minimal linux-2.6.31.6/fs/binfmt_flat.c linux-2.6.31.6-vs2.3.0.36.24/fs/binfmt_flat.c
2031 --- linux-2.6.31.6/fs/binfmt_flat.c     2009-09-10 15:26:20.000000000 +0200
2032 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/binfmt_flat.c       2009-09-10 16:11:43.000000000 +0200
2033 @@ -35,6 +35,7 @@
2034  #include <linux/init.h>
2035  #include <linux/flat.h>
2036  #include <linux/syscalls.h>
2037 +#include <linux/vs_memory.h>
2038  
2039  #include <asm/byteorder.h>
2040  #include <asm/system.h>
2041 diff -NurpP --minimal linux-2.6.31.6/fs/binfmt_som.c linux-2.6.31.6-vs2.3.0.36.24/fs/binfmt_som.c
2042 --- linux-2.6.31.6/fs/binfmt_som.c      2009-06-11 17:13:02.000000000 +0200
2043 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/binfmt_som.c        2009-09-10 16:11:43.000000000 +0200
2044 @@ -28,6 +28,7 @@
2045  #include <linux/shm.h>
2046  #include <linux/personality.h>
2047  #include <linux/init.h>
2048 +#include <linux/vs_memory.h>
2049  
2050  #include <asm/uaccess.h>
2051  #include <asm/pgtable.h>
2052 diff -NurpP --minimal linux-2.6.31.6/fs/block_dev.c linux-2.6.31.6-vs2.3.0.36.24/fs/block_dev.c
2053 --- linux-2.6.31.6/fs/block_dev.c       2009-09-10 15:26:20.000000000 +0200
2054 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/block_dev.c 2009-09-10 17:01:13.000000000 +0200
2055 @@ -26,6 +26,7 @@
2056  #include <linux/namei.h>
2057  #include <linux/log2.h>
2058  #include <linux/kmemleak.h>
2059 +#include <linux/vs_device.h>
2060  #include <asm/uaccess.h>
2061  #include "internal.h"
2062  
2063 @@ -550,6 +551,7 @@ struct block_device *bdget(dev_t dev)
2064                 bdev->bd_invalidated = 0;
2065                 inode->i_mode = S_IFBLK;
2066                 inode->i_rdev = dev;
2067 +               inode->i_mdev = dev;
2068                 inode->i_bdev = bdev;
2069                 inode->i_data.a_ops = &def_blk_aops;
2070                 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
2071 @@ -596,6 +598,11 @@ EXPORT_SYMBOL(bdput);
2072  static struct block_device *bd_acquire(struct inode *inode)
2073  {
2074         struct block_device *bdev;
2075 +       dev_t mdev;
2076 +
2077 +       if (!vs_map_blkdev(inode->i_rdev, &mdev, DATTR_OPEN))
2078 +               return NULL;
2079 +       inode->i_mdev = mdev;
2080  
2081         spin_lock(&bdev_lock);
2082         bdev = inode->i_bdev;
2083 @@ -606,7 +613,7 @@ static struct block_device *bd_acquire(s
2084         }
2085         spin_unlock(&bdev_lock);
2086  
2087 -       bdev = bdget(inode->i_rdev);
2088 +       bdev = bdget(mdev);
2089         if (bdev) {
2090                 spin_lock(&bdev_lock);
2091                 if (!inode->i_bdev) {
2092 diff -NurpP --minimal linux-2.6.31.6/fs/btrfs/ctree.h linux-2.6.31.6-vs2.3.0.36.24/fs/btrfs/ctree.h
2093 --- linux-2.6.31.6/fs/btrfs/ctree.h     2009-09-10 15:26:20.000000000 +0200
2094 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/btrfs/ctree.h       2009-10-08 07:39:48.000000000 +0200
2095 @@ -540,11 +540,14 @@ struct btrfs_inode_item {
2096         /* modification sequence number for NFS */
2097         __le64 sequence;
2098  
2099 +       __le16 tag;
2100         /*
2101          * a little future expansion, for more than this we can
2102          * just grow the inode item and version it
2103          */
2104 -       __le64 reserved[4];
2105 +       __le16 reserved16;
2106 +       __le32 reserved32;
2107 +       __le64 reserved[3];
2108         struct btrfs_timespec atime;
2109         struct btrfs_timespec ctime;
2110         struct btrfs_timespec mtime;
2111 @@ -1119,6 +1122,8 @@ struct btrfs_root {
2112  #define BTRFS_MOUNT_SSD_SPREAD         (1 << 8)
2113  #define BTRFS_MOUNT_NOSSD              (1 << 9)
2114  
2115 +#define BTRFS_MOUNT_TAGGED             (1 << 24)
2116 +
2117  #define btrfs_clear_opt(o, opt)                ((o) &= ~BTRFS_MOUNT_##opt)
2118  #define btrfs_set_opt(o, opt)          ((o) |= BTRFS_MOUNT_##opt)
2119  #define btrfs_test_opt(root, opt)      ((root)->fs_info->mount_opt & \
2120 @@ -1138,6 +1143,10 @@ struct btrfs_root {
2121  #define BTRFS_INODE_NOATIME            (1 << 9)
2122  #define BTRFS_INODE_DIRSYNC            (1 << 10)
2123  
2124 +#define BTRFS_INODE_IXUNLINK           (1 << 24)
2125 +#define BTRFS_INODE_BARRIER            (1 << 25)
2126 +#define BTRFS_INODE_COW                        (1 << 26)
2127 +
2128  
2129  /* some macros to generate set/get funcs for the struct fields.  This
2130   * assumes there is a lefoo_to_cpu for every type, so lets make a simple
2131 @@ -1340,6 +1349,7 @@ BTRFS_SETGET_FUNCS(inode_block_group, st
2132  BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32);
2133  BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32);
2134  BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
2135 +BTRFS_SETGET_FUNCS(inode_tag, struct btrfs_inode_item, tag, 16);
2136  BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
2137  BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
2138  BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64);
2139 @@ -2280,6 +2290,7 @@ int btrfs_cont_expand(struct inode *inod
2140  long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
2141  void btrfs_update_iflags(struct inode *inode);
2142  void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
2143 +int btrfs_sync_flags(struct inode *inode, int, int);
2144  
2145  /* file.c */
2146  int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync);
2147 diff -NurpP --minimal linux-2.6.31.6/fs/btrfs/disk-io.c linux-2.6.31.6-vs2.3.0.36.24/fs/btrfs/disk-io.c
2148 --- linux-2.6.31.6/fs/btrfs/disk-io.c   2009-09-10 15:26:20.000000000 +0200
2149 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/btrfs/disk-io.c     2009-10-08 06:43:17.000000000 +0200
2150 @@ -1673,6 +1673,9 @@ struct btrfs_root *open_ctree(struct sup
2151                 goto fail_iput;
2152         }
2153  
2154 +       if (btrfs_test_opt(tree_root, TAGGED))
2155 +               sb->s_flags |= MS_TAGGED;
2156 +
2157         features = btrfs_super_incompat_flags(disk_super) &
2158                 ~BTRFS_FEATURE_INCOMPAT_SUPP;
2159         if (features) {
2160 diff -NurpP --minimal linux-2.6.31.6/fs/btrfs/inode.c linux-2.6.31.6-vs2.3.0.36.24/fs/btrfs/inode.c
2161 --- linux-2.6.31.6/fs/btrfs/inode.c     2009-09-10 15:26:21.000000000 +0200
2162 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/btrfs/inode.c       2009-10-08 14:17:12.000000000 +0200
2163 @@ -36,6 +36,8 @@
2164  #include <linux/xattr.h>
2165  #include <linux/posix_acl.h>
2166  #include <linux/falloc.h>
2167 +#include <linux/vs_tag.h>
2168 +
2169  #include "compat.h"
2170  #include "ctree.h"
2171  #include "disk-io.h"
2172 @@ -2072,6 +2074,8 @@ static void btrfs_read_locked_inode(stru
2173         int maybe_acls;
2174         u64 alloc_group_block;
2175         u32 rdev;
2176 +       uid_t uid;
2177 +       gid_t gid;
2178         int ret;
2179  
2180         path = btrfs_alloc_path();
2181 @@ -2088,8 +2092,13 @@ static void btrfs_read_locked_inode(stru
2182  
2183         inode->i_mode = btrfs_inode_mode(leaf, inode_item);
2184         inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
2185 -       inode->i_uid = btrfs_inode_uid(leaf, inode_item);
2186 -       inode->i_gid = btrfs_inode_gid(leaf, inode_item);
2187 +
2188 +       uid = btrfs_inode_uid(leaf, inode_item);
2189 +       gid = btrfs_inode_gid(leaf, inode_item);
2190 +       inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
2191 +       inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
2192 +       inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
2193 +               btrfs_inode_tag(leaf, inode_item));
2194         btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
2195  
2196         tspec = btrfs_inode_atime(inode_item);
2197 @@ -2171,8 +2180,15 @@ static void fill_inode_item(struct btrfs
2198                             struct btrfs_inode_item *item,
2199                             struct inode *inode)
2200  {
2201 -       btrfs_set_inode_uid(leaf, item, inode->i_uid);
2202 -       btrfs_set_inode_gid(leaf, item, inode->i_gid);
2203 +       uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
2204 +       gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
2205 +
2206 +       btrfs_set_inode_uid(leaf, item, uid);
2207 +       btrfs_set_inode_gid(leaf, item, gid);
2208 +#ifdef CONFIG_TAGGING_INTERN
2209 +       btrfs_set_inode_tag(leaf, item, inode->i_tag);
2210 +#endif
2211 +
2212         btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
2213         btrfs_set_inode_mode(leaf, item, inode->i_mode);
2214         btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
2215 @@ -3615,6 +3631,7 @@ static struct inode *btrfs_new_inode(str
2216         } else
2217                 inode->i_gid = current_fsgid();
2218  
2219 +       inode->i_tag = dx_current_fstag(root->fs_info->sb);
2220         inode->i_mode = mode;
2221         inode->i_ino = objectid;
2222         inode_set_bytes(inode, 0);
2223 @@ -5218,6 +5235,7 @@ static struct inode_operations btrfs_dir
2224         .listxattr      = btrfs_listxattr,
2225         .removexattr    = btrfs_removexattr,
2226         .permission     = btrfs_permission,
2227 +       .sync_flags     = btrfs_sync_flags,
2228  };
2229  static struct inode_operations btrfs_dir_ro_inode_operations = {
2230         .lookup         = btrfs_lookup,
2231 @@ -5289,6 +5307,7 @@ static struct inode_operations btrfs_fil
2232         .permission     = btrfs_permission,
2233         .fallocate      = btrfs_fallocate,
2234         .fiemap         = btrfs_fiemap,
2235 +       .sync_flags     = btrfs_sync_flags,
2236  };
2237  static struct inode_operations btrfs_special_inode_operations = {
2238         .getattr        = btrfs_getattr,
2239 diff -NurpP --minimal linux-2.6.31.6/fs/btrfs/ioctl.c linux-2.6.31.6-vs2.3.0.36.24/fs/btrfs/ioctl.c
2240 --- linux-2.6.31.6/fs/btrfs/ioctl.c     2009-09-10 15:26:21.000000000 +0200
2241 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/btrfs/ioctl.c       2009-10-08 05:58:29.000000000 +0200
2242 @@ -67,10 +67,13 @@ static unsigned int btrfs_flags_to_ioctl
2243  {
2244         unsigned int iflags = 0;
2245  
2246 -       if (flags & BTRFS_INODE_SYNC)
2247 -               iflags |= FS_SYNC_FL;
2248         if (flags & BTRFS_INODE_IMMUTABLE)
2249                 iflags |= FS_IMMUTABLE_FL;
2250 +       if (flags & BTRFS_INODE_IXUNLINK)
2251 +               iflags |= FS_IXUNLINK_FL;
2252 +
2253 +       if (flags & BTRFS_INODE_SYNC)
2254 +               iflags |= FS_SYNC_FL;
2255         if (flags & BTRFS_INODE_APPEND)
2256                 iflags |= FS_APPEND_FL;
2257         if (flags & BTRFS_INODE_NODUMP)
2258 @@ -80,28 +83,78 @@ static unsigned int btrfs_flags_to_ioctl
2259         if (flags & BTRFS_INODE_DIRSYNC)
2260                 iflags |= FS_DIRSYNC_FL;
2261  
2262 +       if (flags & BTRFS_INODE_BARRIER)
2263 +               iflags |= FS_BARRIER_FL;
2264 +       if (flags & BTRFS_INODE_COW)
2265 +               iflags |= FS_COW_FL;
2266         return iflags;
2267  }
2268  
2269  /*
2270 - * Update inode->i_flags based on the btrfs internal flags.
2271 + * Update inode->i_(v)flags based on the btrfs internal flags.
2272   */
2273  void btrfs_update_iflags(struct inode *inode)
2274  {
2275         struct btrfs_inode *ip = BTRFS_I(inode);
2276  
2277 -       inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
2278 +       inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
2279 +               S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
2280  
2281 -       if (ip->flags & BTRFS_INODE_SYNC)
2282 -               inode->i_flags |= S_SYNC;
2283         if (ip->flags & BTRFS_INODE_IMMUTABLE)
2284                 inode->i_flags |= S_IMMUTABLE;
2285 +       if (ip->flags & BTRFS_INODE_IXUNLINK)
2286 +               inode->i_flags |= S_IXUNLINK;
2287 +
2288 +       if (ip->flags & BTRFS_INODE_SYNC)
2289 +               inode->i_flags |= S_SYNC;
2290         if (ip->flags & BTRFS_INODE_APPEND)
2291                 inode->i_flags |= S_APPEND;
2292         if (ip->flags & BTRFS_INODE_NOATIME)
2293                 inode->i_flags |= S_NOATIME;
2294         if (ip->flags & BTRFS_INODE_DIRSYNC)
2295                 inode->i_flags |= S_DIRSYNC;
2296 +
2297 +       inode->i_vflags &= ~(V_BARRIER | V_COW);
2298 +
2299 +       if (ip->flags & BTRFS_INODE_BARRIER)
2300 +               inode->i_vflags |= V_BARRIER;
2301 +       if (ip->flags & BTRFS_INODE_COW)
2302 +               inode->i_vflags |= V_COW;
2303 +}
2304 +
2305 +/*
2306 + * Update btrfs internal flags from inode->i_(v)flags.
2307 + */
2308 +void btrfs_update_flags(struct inode *inode)
2309 +{
2310 +       struct btrfs_inode *ip = BTRFS_I(inode);
2311 +
2312 +       unsigned int flags = inode->i_flags;
2313 +       unsigned int vflags = inode->i_vflags;
2314 +
2315 +       ip->flags &= ~(BTRFS_INODE_SYNC | BTRFS_INODE_APPEND |
2316 +                       BTRFS_INODE_IMMUTABLE | BTRFS_INODE_IXUNLINK |
2317 +                       BTRFS_INODE_NOATIME | BTRFS_INODE_DIRSYNC |
2318 +                       BTRFS_INODE_BARRIER | BTRFS_INODE_COW);
2319 +
2320 +       if (flags & S_IMMUTABLE)
2321 +               ip->flags |= BTRFS_INODE_IMMUTABLE;
2322 +       if (flags & S_IXUNLINK)
2323 +               ip->flags |= BTRFS_INODE_IXUNLINK;
2324 +
2325 +       if (flags & S_SYNC)
2326 +               ip->flags |= BTRFS_INODE_SYNC;
2327 +       if (flags & S_APPEND)
2328 +               ip->flags |= BTRFS_INODE_APPEND;
2329 +       if (flags & S_NOATIME)
2330 +               ip->flags |= BTRFS_INODE_NOATIME;
2331 +       if (flags & S_DIRSYNC)
2332 +               ip->flags |= BTRFS_INODE_DIRSYNC;
2333 +
2334 +       if (vflags & V_BARRIER)
2335 +               ip->flags |= BTRFS_INODE_BARRIER;
2336 +       if (vflags & V_COW)
2337 +               ip->flags |= BTRFS_INODE_COW;
2338  }
2339  
2340  /*
2341 @@ -119,7 +172,7 @@ void btrfs_inherit_iflags(struct inode *
2342         flags = BTRFS_I(dir)->flags;
2343  
2344         if (S_ISREG(inode->i_mode))
2345 -               flags &= ~BTRFS_INODE_DIRSYNC;
2346 +               flags &= ~(BTRFS_INODE_DIRSYNC | BTRFS_INODE_BARRIER);
2347         else if (!S_ISDIR(inode->i_mode))
2348                 flags &= (BTRFS_INODE_NODUMP | BTRFS_INODE_NOATIME);
2349  
2350 @@ -127,6 +180,30 @@ void btrfs_inherit_iflags(struct inode *
2351         btrfs_update_iflags(inode);
2352  }
2353  
2354 +int btrfs_sync_flags(struct inode *inode, int flags, int vflags)
2355 +{
2356 +       struct btrfs_inode *ip = BTRFS_I(inode);
2357 +       struct btrfs_root *root = ip->root;
2358 +       struct btrfs_trans_handle *trans;
2359 +       int ret;
2360 +
2361 +       trans = btrfs_join_transaction(root, 1);
2362 +       BUG_ON(!trans);
2363 +
2364 +       inode->i_flags = flags;
2365 +       inode->i_vflags = vflags;
2366 +       btrfs_update_flags(inode);
2367 +
2368 +       ret = btrfs_update_inode(trans, root, inode);
2369 +       BUG_ON(ret);
2370 +
2371 +       btrfs_update_iflags(inode);
2372 +       inode->i_ctime = CURRENT_TIME;
2373 +       btrfs_end_transaction(trans, root);
2374 +
2375 +       return 0;
2376 +}
2377 +
2378  static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
2379  {
2380         struct btrfs_inode *ip = BTRFS_I(file->f_path.dentry->d_inode);
2381 @@ -149,6 +226,7 @@ static int btrfs_ioctl_setflags(struct f
2382         if (copy_from_user(&flags, arg, sizeof(flags)))
2383                 return -EFAULT;
2384  
2385 +       /* maybe add FS_IXUNLINK_FL ? */
2386         if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
2387                       FS_NOATIME_FL | FS_NODUMP_FL | \
2388                       FS_SYNC_FL | FS_DIRSYNC_FL))
2389 @@ -161,7 +239,8 @@ static int btrfs_ioctl_setflags(struct f
2390  
2391         flags = btrfs_mask_flags(inode->i_mode, flags);
2392         oldflags = btrfs_flags_to_ioctl(ip->flags);
2393 -       if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
2394 +       if ((flags ^ oldflags) & (FS_APPEND_FL |
2395 +               FS_IMMUTABLE_FL | FS_IXUNLINK_FL)) {
2396                 if (!capable(CAP_LINUX_IMMUTABLE)) {
2397                         ret = -EPERM;
2398                         goto out_unlock;
2399 @@ -172,14 +251,19 @@ static int btrfs_ioctl_setflags(struct f
2400         if (ret)
2401                 goto out_unlock;
2402  
2403 -       if (flags & FS_SYNC_FL)
2404 -               ip->flags |= BTRFS_INODE_SYNC;
2405 -       else
2406 -               ip->flags &= ~BTRFS_INODE_SYNC;
2407         if (flags & FS_IMMUTABLE_FL)
2408                 ip->flags |= BTRFS_INODE_IMMUTABLE;
2409         else
2410                 ip->flags &= ~BTRFS_INODE_IMMUTABLE;
2411 +       if (flags & FS_IXUNLINK_FL)
2412 +               ip->flags |= BTRFS_INODE_IXUNLINK;
2413 +       else
2414 +               ip->flags &= ~BTRFS_INODE_IXUNLINK;
2415 +
2416 +       if (flags & FS_SYNC_FL)
2417 +               ip->flags |= BTRFS_INODE_SYNC;
2418 +       else
2419 +               ip->flags &= ~BTRFS_INODE_SYNC;
2420         if (flags & FS_APPEND_FL)
2421                 ip->flags |= BTRFS_INODE_APPEND;
2422         else
2423 diff -NurpP --minimal linux-2.6.31.6/fs/btrfs/super.c linux-2.6.31.6-vs2.3.0.36.24/fs/btrfs/super.c
2424 --- linux-2.6.31.6/fs/btrfs/super.c     2009-09-10 15:26:21.000000000 +0200
2425 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/btrfs/super.c       2009-10-08 07:27:04.000000000 +0200
2426 @@ -66,7 +66,8 @@ enum {
2427         Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
2428         Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
2429         Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl,
2430 -       Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_err,
2431 +       Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
2432 +       Opt_tag, Opt_notag, Opt_tagid, Opt_err,
2433  };
2434  
2435  static match_table_t tokens = {
2436 @@ -88,6 +89,9 @@ static match_table_t tokens = {
2437         {Opt_notreelog, "notreelog"},
2438         {Opt_flushoncommit, "flushoncommit"},
2439         {Opt_ratio, "metadata_ratio=%d"},
2440 +       {Opt_tag, "tag"},
2441 +       {Opt_notag, "notag"},
2442 +       {Opt_tagid, "tagid=%u"},
2443         {Opt_err, NULL},
2444  };
2445  
2446 @@ -257,6 +261,22 @@ int btrfs_parse_options(struct btrfs_roo
2447                                        info->metadata_ratio);
2448                         }
2449                         break;
2450 +#ifndef CONFIG_TAGGING_NONE
2451 +               case Opt_tag:
2452 +                       printk(KERN_INFO "btrfs: use tagging\n");
2453 +                       btrfs_set_opt(info->mount_opt, TAGGED);
2454 +                       break;
2455 +               case Opt_notag:
2456 +                       printk(KERN_INFO "btrfs: disabled tagging\n");
2457 +                       btrfs_clear_opt(info->mount_opt, TAGGED);
2458 +                       break;
2459 +#endif
2460 +#ifdef CONFIG_PROPAGATE
2461 +               case Opt_tagid:
2462 +                       /* use args[0] */
2463 +                       btrfs_set_opt(info->mount_opt, TAGGED);
2464 +                       break;
2465 +#endif
2466                 default:
2467                         break;
2468                 }
2469 @@ -568,6 +588,12 @@ static int btrfs_remount(struct super_bl
2470         if (ret)
2471                 return -EINVAL;
2472  
2473 +       if (btrfs_test_opt(root, TAGGED) && !(sb->s_flags & MS_TAGGED)) {
2474 +               printk("btrfs: %s: tagging not permitted on remount.\n",
2475 +                       sb->s_id);
2476 +               return -EINVAL;
2477 +       }
2478 +
2479         if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
2480                 return 0;
2481  
2482 diff -NurpP --minimal linux-2.6.31.6/fs/char_dev.c linux-2.6.31.6-vs2.3.0.36.24/fs/char_dev.c
2483 --- linux-2.6.31.6/fs/char_dev.c        2009-09-10 15:26:21.000000000 +0200
2484 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/char_dev.c  2009-09-10 16:11:43.000000000 +0200
2485 @@ -20,6 +20,8 @@
2486  #include <linux/cdev.h>
2487  #include <linux/mutex.h>
2488  #include <linux/backing-dev.h>
2489 +#include <linux/vs_context.h>
2490 +#include <linux/vs_device.h>
2491  
2492  #include "internal.h"
2493  
2494 @@ -357,14 +359,21 @@ static int chrdev_open(struct inode *ino
2495         struct cdev *p;
2496         struct cdev *new = NULL;
2497         int ret = 0;
2498 +       dev_t mdev;
2499 +
2500 +       if (!vs_map_chrdev(inode->i_rdev, &mdev, DATTR_OPEN))
2501 +               return -EPERM;
2502 +       inode->i_mdev = mdev;
2503  
2504         spin_lock(&cdev_lock);
2505         p = inode->i_cdev;
2506         if (!p) {
2507                 struct kobject *kobj;
2508                 int idx;
2509 +
2510                 spin_unlock(&cdev_lock);
2511 -               kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
2512 +
2513 +               kobj = kobj_lookup(cdev_map, mdev, &idx);
2514                 if (!kobj)
2515                         return -ENXIO;
2516                 new = container_of(kobj, struct cdev, kobj);
2517 diff -NurpP --minimal linux-2.6.31.6/fs/dcache.c linux-2.6.31.6-vs2.3.0.36.24/fs/dcache.c
2518 --- linux-2.6.31.6/fs/dcache.c  2009-09-10 15:26:21.000000000 +0200
2519 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/dcache.c    2009-09-10 16:11:43.000000000 +0200
2520 @@ -32,6 +32,7 @@
2521  #include <linux/swap.h>
2522  #include <linux/bootmem.h>
2523  #include <linux/fs_struct.h>
2524 +#include <linux/vs_limit.h>
2525  #include "internal.h"
2526  
2527  int sysctl_vfs_cache_pressure __read_mostly = 100;
2528 @@ -229,6 +230,8 @@ repeat:
2529                 return;
2530         }
2531  
2532 +       vx_dentry_dec(dentry);
2533 +
2534         /*
2535          * AV: ->d_delete() is _NOT_ allowed to block now.
2536          */
2537 @@ -320,6 +323,7 @@ static inline struct dentry * __dget_loc
2538  {
2539         atomic_inc(&dentry->d_count);
2540         dentry_lru_del_init(dentry);
2541 +       vx_dentry_inc(dentry);
2542         return dentry;
2543  }
2544  
2545 @@ -918,6 +922,9 @@ struct dentry *d_alloc(struct dentry * p
2546         struct dentry *dentry;
2547         char *dname;
2548  
2549 +       if (!vx_dentry_avail(1))
2550 +               return NULL;
2551 +
2552         dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
2553         if (!dentry)
2554                 return NULL;
2555 @@ -963,6 +970,7 @@ struct dentry *d_alloc(struct dentry * p
2556         if (parent)
2557                 list_add(&dentry->d_u.d_child, &parent->d_subdirs);
2558         dentry_stat.nr_dentry++;
2559 +       vx_dentry_inc(dentry);
2560         spin_unlock(&dcache_lock);
2561  
2562         return dentry;
2563 @@ -1406,6 +1414,7 @@ struct dentry * __d_lookup(struct dentry
2564                 }
2565  
2566                 atomic_inc(&dentry->d_count);
2567 +               vx_dentry_inc(dentry);
2568                 found = dentry;
2569                 spin_unlock(&dentry->d_lock);
2570                 break;
2571 diff -NurpP --minimal linux-2.6.31.6/fs/devpts/inode.c linux-2.6.31.6-vs2.3.0.36.24/fs/devpts/inode.c
2572 --- linux-2.6.31.6/fs/devpts/inode.c    2009-09-10 15:26:21.000000000 +0200
2573 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/devpts/inode.c      2009-09-10 16:11:43.000000000 +0200
2574 @@ -19,12 +19,12 @@
2575  #include <linux/tty.h>
2576  #include <linux/mutex.h>
2577  #include <linux/idr.h>
2578 +#include <linux/magic.h>
2579  #include <linux/devpts_fs.h>
2580  #include <linux/parser.h>
2581  #include <linux/fsnotify.h>
2582  #include <linux/seq_file.h>
2583 -
2584 -#define DEVPTS_SUPER_MAGIC 0x1cd1
2585 +#include <linux/vs_base.h>
2586  
2587  #define DEVPTS_DEFAULT_MODE 0600
2588  /*
2589 @@ -36,6 +36,20 @@
2590  #define DEVPTS_DEFAULT_PTMX_MODE 0000
2591  #define PTMX_MINOR     2
2592  
2593 +static int devpts_permission(struct inode *inode, int mask)
2594 +{
2595 +       int ret = -EACCES;
2596 +
2597 +       /* devpts is xid tagged */
2598 +       if (vx_check((xid_t)inode->i_tag, VS_WATCH_P | VS_IDENT))
2599 +               ret = generic_permission(inode, mask, NULL);
2600 +       return ret;
2601 +}
2602 +
2603 +static struct inode_operations devpts_file_inode_operations = {
2604 +       .permission     = devpts_permission,
2605 +};
2606 +
2607  extern int pty_limit;                  /* Config limit on Unix98 ptys */
2608  static DEFINE_MUTEX(allocated_ptys_lock);
2609  
2610 @@ -263,6 +277,25 @@ static int devpts_show_options(struct se
2611         return 0;
2612  }
2613  
2614 +static int devpts_filter(struct dentry *de)
2615 +{
2616 +       /* devpts is xid tagged */
2617 +       return vx_check((xid_t)de->d_inode->i_tag, VS_WATCH_P | VS_IDENT);
2618 +}
2619 +
2620 +static int devpts_readdir(struct file * filp, void * dirent, filldir_t filldir)
2621 +{
2622 +       return dcache_readdir_filter(filp, dirent, filldir, devpts_filter);
2623 +}
2624 +
2625 +static struct file_operations devpts_dir_operations = {
2626 +       .open           = dcache_dir_open,
2627 +       .release        = dcache_dir_close,
2628 +       .llseek         = dcache_dir_lseek,
2629 +       .read           = generic_read_dir,
2630 +       .readdir        = devpts_readdir,
2631 +};
2632 +
2633  static const struct super_operations devpts_sops = {
2634         .statfs         = simple_statfs,
2635         .remount_fs     = devpts_remount,
2636 @@ -302,12 +335,15 @@ devpts_fill_super(struct super_block *s,
2637         inode = new_inode(s);
2638         if (!inode)
2639                 goto free_fsi;
2640 +
2641         inode->i_ino = 1;
2642         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
2643         inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
2644         inode->i_op = &simple_dir_inode_operations;
2645 -       inode->i_fop = &simple_dir_operations;
2646 +       inode->i_fop = &devpts_dir_operations;
2647         inode->i_nlink = 2;
2648 +       /* devpts is xid tagged */
2649 +       inode->i_tag = (tag_t)vx_current_xid();
2650  
2651         s->s_root = d_alloc_root(inode);
2652         if (s->s_root)
2653 @@ -498,6 +534,9 @@ int devpts_pty_new(struct inode *ptmx_in
2654         inode->i_gid = opts->setgid ? opts->gid : current_fsgid();
2655         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
2656         init_special_inode(inode, S_IFCHR|opts->mode, device);
2657 +       /* devpts is xid tagged */
2658 +       inode->i_tag = (tag_t)vx_current_xid();
2659 +       inode->i_op = &devpts_file_inode_operations;
2660         inode->i_private = tty;
2661         tty->driver_data = inode;
2662  
2663 diff -NurpP --minimal linux-2.6.31.6/fs/exec.c linux-2.6.31.6-vs2.3.0.36.24/fs/exec.c
2664 --- linux-2.6.31.6/fs/exec.c    2009-09-10 15:26:21.000000000 +0200
2665 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/exec.c      2009-09-10 16:11:43.000000000 +0200
2666 @@ -249,7 +249,9 @@ static int __bprm_mm_init(struct linux_b
2667         if (err)
2668                 goto err;
2669  
2670 -       mm->stack_vm = mm->total_vm = 1;
2671 +       mm->total_vm = 0;
2672 +       vx_vmpages_inc(mm);
2673 +       mm->stack_vm = 1;
2674         up_write(&mm->mmap_sem);
2675         bprm->p = vma->vm_end - sizeof(void *);
2676         return 0;
2677 @@ -1471,7 +1473,7 @@ static int format_corename(char *corenam
2678                         /* UNIX time of coredump */
2679                         case 't': {
2680                                 struct timeval tv;
2681 -                               do_gettimeofday(&tv);
2682 +                               vx_gettimeofday(&tv);
2683                                 rc = snprintf(out_ptr, out_end - out_ptr,
2684                                               "%lu", tv.tv_sec);
2685                                 if (rc > out_end - out_ptr)
2686 diff -NurpP --minimal linux-2.6.31.6/fs/ext2/balloc.c linux-2.6.31.6-vs2.3.0.36.24/fs/ext2/balloc.c
2687 --- linux-2.6.31.6/fs/ext2/balloc.c     2009-06-11 17:13:03.000000000 +0200
2688 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ext2/balloc.c       2009-09-10 16:11:43.000000000 +0200
2689 @@ -701,7 +701,6 @@ ext2_try_to_allocate(struct super_block 
2690                         start = 0;
2691                 end = EXT2_BLOCKS_PER_GROUP(sb);
2692         }
2693 -
2694         BUG_ON(start > EXT2_BLOCKS_PER_GROUP(sb));
2695  
2696  repeat:
2697 diff -NurpP --minimal linux-2.6.31.6/fs/ext2/ext2.h linux-2.6.31.6-vs2.3.0.36.24/fs/ext2/ext2.h
2698 --- linux-2.6.31.6/fs/ext2/ext2.h       2009-09-10 15:26:21.000000000 +0200
2699 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ext2/ext2.h 2009-10-07 01:23:12.000000000 +0200
2700 @@ -131,6 +131,7 @@ extern int ext2_fiemap(struct inode *ino
2701  int __ext2_write_begin(struct file *file, struct address_space *mapping,
2702                 loff_t pos, unsigned len, unsigned flags,
2703                 struct page **pagep, void **fsdata);
2704 +extern int ext2_sync_flags(struct inode *, int, int);
2705  
2706  /* ioctl.c */
2707  extern long ext2_ioctl(struct file *, unsigned int, unsigned long);
2708 diff -NurpP --minimal linux-2.6.31.6/fs/ext2/file.c linux-2.6.31.6-vs2.3.0.36.24/fs/ext2/file.c
2709 --- linux-2.6.31.6/fs/ext2/file.c       2009-09-10 15:26:21.000000000 +0200
2710 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ext2/file.c 2009-10-07 01:03:12.000000000 +0200
2711 @@ -87,4 +87,5 @@ const struct inode_operations ext2_file_
2712         .setattr        = ext2_setattr,
2713         .permission     = ext2_permission,
2714         .fiemap         = ext2_fiemap,
2715 +       .sync_flags     = ext2_sync_flags,
2716  };
2717 diff -NurpP --minimal linux-2.6.31.6/fs/ext2/ialloc.c linux-2.6.31.6-vs2.3.0.36.24/fs/ext2/ialloc.c
2718 --- linux-2.6.31.6/fs/ext2/ialloc.c     2009-06-11 17:13:03.000000000 +0200
2719 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ext2/ialloc.c       2009-10-12 05:05:34.000000000 +0200
2720 @@ -17,6 +17,7 @@
2721  #include <linux/backing-dev.h>
2722  #include <linux/buffer_head.h>
2723  #include <linux/random.h>
2724 +#include <linux/vs_tag.h>
2725  #include "ext2.h"
2726  #include "xattr.h"
2727  #include "acl.h"
2728 @@ -560,6 +561,7 @@ got:
2729         } else
2730                 inode->i_gid = current_fsgid();
2731         inode->i_mode = mode;
2732 +       inode->i_tag = dx_current_fstag(sb);
2733  
2734         inode->i_ino = ino;
2735         inode->i_blocks = 0;
2736 diff -NurpP --minimal linux-2.6.31.6/fs/ext2/inode.c linux-2.6.31.6-vs2.3.0.36.24/fs/ext2/inode.c
2737 --- linux-2.6.31.6/fs/ext2/inode.c      2009-09-10 15:26:21.000000000 +0200
2738 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ext2/inode.c        2009-10-06 19:45:13.000000000 +0200
2739 @@ -33,6 +33,7 @@
2740  #include <linux/mpage.h>
2741  #include <linux/fiemap.h>
2742  #include <linux/namei.h>
2743 +#include <linux/vs_tag.h>
2744  #include "ext2.h"
2745  #include "acl.h"
2746  #include "xip.h"
2747 @@ -1038,7 +1039,7 @@ void ext2_truncate(struct inode *inode)
2748                 return;
2749         if (ext2_inode_is_fast_symlink(inode))
2750                 return;
2751 -       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2752 +       if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
2753                 return;
2754  
2755         blocksize = inode->i_sb->s_blocksize;
2756 @@ -1176,36 +1177,61 @@ void ext2_set_inode_flags(struct inode *
2757  {
2758         unsigned int flags = EXT2_I(inode)->i_flags;
2759  
2760 -       inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
2761 +       inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
2762 +               S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
2763 +
2764 +
2765 +       if (flags & EXT2_IMMUTABLE_FL)
2766 +               inode->i_flags |= S_IMMUTABLE;
2767 +       if (flags & EXT2_IXUNLINK_FL)
2768 +               inode->i_flags |= S_IXUNLINK;
2769 +
2770         if (flags & EXT2_SYNC_FL)
2771                 inode->i_flags |= S_SYNC;
2772         if (flags & EXT2_APPEND_FL)
2773                 inode->i_flags |= S_APPEND;
2774 -       if (flags & EXT2_IMMUTABLE_FL)
2775 -               inode->i_flags |= S_IMMUTABLE;
2776         if (flags & EXT2_NOATIME_FL)
2777                 inode->i_flags |= S_NOATIME;
2778         if (flags & EXT2_DIRSYNC_FL)
2779                 inode->i_flags |= S_DIRSYNC;
2780 +
2781 +       inode->i_vflags &= ~(V_BARRIER | V_COW);
2782 +
2783 +       if (flags & EXT2_BARRIER_FL)
2784 +               inode->i_vflags |= V_BARRIER;
2785 +       if (flags & EXT2_COW_FL)
2786 +               inode->i_vflags |= V_COW;
2787  }
2788  
2789  /* Propagate flags from i_flags to EXT2_I(inode)->i_flags */
2790  void ext2_get_inode_flags(struct ext2_inode_info *ei)
2791  {
2792         unsigned int flags = ei->vfs_inode.i_flags;
2793 +       unsigned int vflags = ei->vfs_inode.i_vflags;
2794 +
2795 +       ei->i_flags &= ~(EXT2_SYNC_FL | EXT2_APPEND_FL |
2796 +                       EXT2_IMMUTABLE_FL | EXT2_IXUNLINK_FL |
2797 +                       EXT2_NOATIME_FL | EXT2_DIRSYNC_FL |
2798 +                       EXT2_BARRIER_FL | EXT2_COW_FL);
2799 +
2800 +       if (flags & S_IMMUTABLE)
2801 +               ei->i_flags |= EXT2_IMMUTABLE_FL;
2802 +       if (flags & S_IXUNLINK)
2803 +               ei->i_flags |= EXT2_IXUNLINK_FL;
2804  
2805 -       ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL|
2806 -                       EXT2_IMMUTABLE_FL|EXT2_NOATIME_FL|EXT2_DIRSYNC_FL);
2807         if (flags & S_SYNC)
2808                 ei->i_flags |= EXT2_SYNC_FL;
2809         if (flags & S_APPEND)
2810                 ei->i_flags |= EXT2_APPEND_FL;
2811 -       if (flags & S_IMMUTABLE)
2812 -               ei->i_flags |= EXT2_IMMUTABLE_FL;
2813         if (flags & S_NOATIME)
2814                 ei->i_flags |= EXT2_NOATIME_FL;
2815         if (flags & S_DIRSYNC)
2816                 ei->i_flags |= EXT2_DIRSYNC_FL;
2817 +
2818 +       if (vflags & V_BARRIER)
2819 +               ei->i_flags |= EXT2_BARRIER_FL;
2820 +       if (vflags & V_COW)
2821 +               ei->i_flags |= EXT2_COW_FL;
2822  }
2823  
2824  struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
2825 @@ -1215,6 +1241,8 @@ struct inode *ext2_iget (struct super_bl
2826         struct ext2_inode *raw_inode;
2827         struct inode *inode;
2828         long ret = -EIO;
2829 +       uid_t uid;
2830 +       gid_t gid;
2831         int n;
2832  
2833         inode = iget_locked(sb, ino);
2834 @@ -1233,12 +1261,17 @@ struct inode *ext2_iget (struct super_bl
2835         }
2836  
2837         inode->i_mode = le16_to_cpu(raw_inode->i_mode);
2838 -       inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
2839 -       inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
2840 +       uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
2841 +       gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
2842         if (!(test_opt (inode->i_sb, NO_UID32))) {
2843 -               inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2844 -               inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
2845 +               uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2846 +               gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
2847         }
2848 +       inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
2849 +       inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
2850 +       inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
2851 +               le16_to_cpu(raw_inode->i_raw_tag));
2852 +
2853         inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
2854         inode->i_size = le32_to_cpu(raw_inode->i_size);
2855         inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
2856 @@ -1336,8 +1369,8 @@ int ext2_write_inode(struct inode *inode
2857         struct ext2_inode_info *ei = EXT2_I(inode);
2858         struct super_block *sb = inode->i_sb;
2859         ino_t ino = inode->i_ino;
2860 -       uid_t uid = inode->i_uid;
2861 -       gid_t gid = inode->i_gid;
2862 +       uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
2863 +       gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
2864         struct buffer_head * bh;
2865         struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh);
2866         int n;
2867 @@ -1373,6 +1406,9 @@ int ext2_write_inode(struct inode *inode
2868                 raw_inode->i_uid_high = 0;
2869                 raw_inode->i_gid_high = 0;
2870         }
2871 +#ifdef CONFIG_TAGGING_INTERN
2872 +       raw_inode->i_raw_tag = cpu_to_le16(inode->i_tag);
2873 +#endif
2874         raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
2875         raw_inode->i_size = cpu_to_le32(inode->i_size);
2876         raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
2877 @@ -1454,7 +1490,8 @@ int ext2_setattr(struct dentry *dentry, 
2878         if (error)
2879                 return error;
2880         if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
2881 -           (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
2882 +           (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) ||
2883 +           (iattr->ia_valid & ATTR_TAG && iattr->ia_tag != inode->i_tag)) {
2884                 error = vfs_dq_transfer(inode, iattr) ? -EDQUOT : 0;
2885                 if (error)
2886                         return error;
2887 diff -NurpP --minimal linux-2.6.31.6/fs/ext2/ioctl.c linux-2.6.31.6-vs2.3.0.36.24/fs/ext2/ioctl.c
2888 --- linux-2.6.31.6/fs/ext2/ioctl.c      2009-09-10 15:26:21.000000000 +0200
2889 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ext2/ioctl.c        2009-10-07 01:01:20.000000000 +0200
2890 @@ -17,6 +17,16 @@
2891  #include <asm/uaccess.h>
2892  
2893  
2894 +int ext2_sync_flags(struct inode *inode, int flags, int vflags)
2895 +{
2896 +       inode->i_flags = flags;
2897 +       inode->i_vflags = vflags;
2898 +       ext2_get_inode_flags(EXT2_I(inode));
2899 +       inode->i_ctime = CURRENT_TIME_SEC;
2900 +       mark_inode_dirty(inode);
2901 +       return 0;
2902 +}
2903 +
2904  long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
2905  {
2906         struct inode *inode = filp->f_dentry->d_inode;
2907 @@ -51,6 +61,11 @@ long ext2_ioctl(struct file *filp, unsig
2908  
2909                 flags = ext2_mask_flags(inode->i_mode, flags);
2910  
2911 +               if (IS_BARRIER(inode)) {
2912 +                       vxwprintk_task(1, "messing with the barrier.");
2913 +                       return -EACCES;
2914 +               }
2915 +
2916                 mutex_lock(&inode->i_mutex);
2917                 /* Is it quota file? Do not allow user to mess with it */
2918                 if (IS_NOQUOTA(inode)) {
2919 @@ -66,7 +81,9 @@ long ext2_ioctl(struct file *filp, unsig
2920                  *
2921                  * This test looks nicer. Thanks to Pauline Middelink
2922                  */
2923 -               if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
2924 +               if ((oldflags & EXT2_IMMUTABLE_FL) ||
2925 +                       ((flags ^ oldflags) & (EXT2_APPEND_FL |
2926 +                       EXT2_IMMUTABLE_FL | EXT2_IXUNLINK_FL))) {
2927                         if (!capable(CAP_LINUX_IMMUTABLE)) {
2928                                 mutex_unlock(&inode->i_mutex);
2929                                 ret = -EPERM;
2930 @@ -74,7 +91,7 @@ long ext2_ioctl(struct file *filp, unsig
2931                         }
2932                 }
2933  
2934 -               flags = flags & EXT2_FL_USER_MODIFIABLE;
2935 +               flags &= EXT2_FL_USER_MODIFIABLE;
2936                 flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE;
2937                 ei->i_flags = flags;
2938                 mutex_unlock(&inode->i_mutex);
2939 diff -NurpP --minimal linux-2.6.31.6/fs/ext2/namei.c linux-2.6.31.6-vs2.3.0.36.24/fs/ext2/namei.c
2940 --- linux-2.6.31.6/fs/ext2/namei.c      2009-09-10 15:26:21.000000000 +0200
2941 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ext2/namei.c        2009-10-07 01:08:06.000000000 +0200
2942 @@ -31,6 +31,7 @@
2943   */
2944  
2945  #include <linux/pagemap.h>
2946 +#include <linux/vs_tag.h>
2947  #include "ext2.h"
2948  #include "xattr.h"
2949  #include "acl.h"
2950 @@ -74,6 +75,7 @@ static struct dentry *ext2_lookup(struct
2951                                 return ERR_PTR(-EIO);
2952                         } else {
2953                                 return ERR_CAST(inode);
2954 +               dx_propagate_tag(nd, inode);
2955                         }
2956                 }
2957         }
2958 @@ -401,6 +403,7 @@ const struct inode_operations ext2_dir_i
2959  #endif
2960         .setattr        = ext2_setattr,
2961         .permission     = ext2_permission,
2962 +       .sync_flags     = ext2_sync_flags,
2963  };
2964  
2965  const struct inode_operations ext2_special_inode_operations = {
2966 diff -NurpP --minimal linux-2.6.31.6/fs/ext2/super.c linux-2.6.31.6-vs2.3.0.36.24/fs/ext2/super.c
2967 --- linux-2.6.31.6/fs/ext2/super.c      2009-09-10 15:26:21.000000000 +0200
2968 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ext2/super.c        2009-10-06 22:47:11.000000000 +0200
2969 @@ -382,7 +382,8 @@ enum {
2970         Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug,
2971         Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr,
2972         Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota,
2973 -       Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation
2974 +       Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation,
2975 +       Opt_tag, Opt_notag, Opt_tagid
2976  };
2977  
2978  static const match_table_t tokens = {
2979 @@ -410,6 +411,9 @@ static const match_table_t tokens = {
2980         {Opt_acl, "acl"},
2981         {Opt_noacl, "noacl"},
2982         {Opt_xip, "xip"},
2983 +       {Opt_tag, "tag"},
2984 +       {Opt_notag, "notag"},
2985 +       {Opt_tagid, "tagid=%u"},
2986         {Opt_grpquota, "grpquota"},
2987         {Opt_ignore, "noquota"},
2988         {Opt_quota, "quota"},
2989 @@ -480,6 +484,20 @@ static int parse_options (char * options
2990                 case Opt_nouid32:
2991                         set_opt (sbi->s_mount_opt, NO_UID32);
2992                         break;
2993 +#ifndef CONFIG_TAGGING_NONE
2994 +               case Opt_tag:
2995 +                       set_opt (sbi->s_mount_opt, TAGGED);
2996 +                       break;
2997 +               case Opt_notag:
2998 +                       clear_opt (sbi->s_mount_opt, TAGGED);
2999 +                       break;
3000 +#endif
3001 +#ifdef CONFIG_PROPAGATE
3002 +               case Opt_tagid:
3003 +                       /* use args[0] */
3004 +                       set_opt (sbi->s_mount_opt, TAGGED);
3005 +                       break;
3006 +#endif
3007                 case Opt_nocheck:
3008                         clear_opt (sbi->s_mount_opt, CHECK);
3009                         break;
3010 @@ -829,6 +847,8 @@ static int ext2_fill_super(struct super_
3011         if (!parse_options ((char *) data, sbi))
3012                 goto failed_mount;
3013  
3014 +       if (EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_TAGGED)
3015 +               sb->s_flags |= MS_TAGGED;
3016         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3017                 ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
3018                  MS_POSIXACL : 0);
3019 @@ -1175,6 +1195,14 @@ static int ext2_remount (struct super_bl
3020                 goto restore_opts;
3021         }
3022  
3023 +       if ((sbi->s_mount_opt & EXT2_MOUNT_TAGGED) &&
3024 +               !(sb->s_flags & MS_TAGGED)) {
3025 +               printk("EXT2-fs: %s: tagging not permitted on remount.\n",
3026 +                      sb->s_id);
3027 +               err = -EINVAL;
3028 +               goto restore_opts;
3029 +       }
3030 +
3031         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3032                 ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
3033  
3034 diff -NurpP --minimal linux-2.6.31.6/fs/ext3/file.c linux-2.6.31.6-vs2.3.0.36.24/fs/ext3/file.c
3035 --- linux-2.6.31.6/fs/ext3/file.c       2009-06-11 17:13:03.000000000 +0200
3036 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ext3/file.c 2009-10-06 23:23:15.000000000 +0200
3037 @@ -139,5 +139,6 @@ const struct inode_operations ext3_file_
3038  #endif
3039         .permission     = ext3_permission,
3040         .fiemap         = ext3_fiemap,
3041 +       .sync_flags     = ext3_sync_flags,
3042  };
3043  
3044 diff -NurpP --minimal linux-2.6.31.6/fs/ext3/ialloc.c linux-2.6.31.6-vs2.3.0.36.24/fs/ext3/ialloc.c
3045 --- linux-2.6.31.6/fs/ext3/ialloc.c     2009-09-10 15:26:21.000000000 +0200
3046 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ext3/ialloc.c       2009-10-12 05:06:13.000000000 +0200
3047 @@ -23,6 +23,7 @@
3048  #include <linux/buffer_head.h>
3049  #include <linux/random.h>
3050  #include <linux/bitops.h>
3051 +#include <linux/vs_tag.h>
3052  
3053  #include <asm/byteorder.h>
3054  
3055 @@ -548,6 +549,7 @@ got:
3056         } else
3057                 inode->i_gid = current_fsgid();
3058         inode->i_mode = mode;
3059 +       inode->i_tag = dx_current_fstag(sb);
3060  
3061         inode->i_ino = ino;
3062         /* This is the optimal IO size (for stat), not the fs block size */
3063 diff -NurpP --minimal linux-2.6.31.6/fs/ext3/inode.c linux-2.6.31.6-vs2.3.0.36.24/fs/ext3/inode.c
3064 --- linux-2.6.31.6/fs/ext3/inode.c      2009-09-10 15:26:21.000000000 +0200
3065 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ext3/inode.c        2009-10-06 19:45:13.000000000 +0200
3066 @@ -38,6 +38,7 @@
3067  #include <linux/bio.h>
3068  #include <linux/fiemap.h>
3069  #include <linux/namei.h>
3070 +#include <linux/vs_tag.h>
3071  #include "xattr.h"
3072  #include "acl.h"
3073  
3074 @@ -2312,7 +2313,7 @@ static void ext3_free_branches(handle_t 
3075  
3076  int ext3_can_truncate(struct inode *inode)
3077  {
3078 -       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
3079 +       if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
3080                 return 0;
3081         if (S_ISREG(inode->i_mode))
3082                 return 1;
3083 @@ -2697,36 +2698,60 @@ void ext3_set_inode_flags(struct inode *
3084  {
3085         unsigned int flags = EXT3_I(inode)->i_flags;
3086  
3087 -       inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
3088 +       inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
3089 +               S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
3090 +
3091 +       if (flags & EXT3_IMMUTABLE_FL)
3092 +               inode->i_flags |= S_IMMUTABLE;
3093 +       if (flags & EXT3_IXUNLINK_FL)
3094 +               inode->i_flags |= S_IXUNLINK;
3095 +
3096         if (flags & EXT3_SYNC_FL)
3097                 inode->i_flags |= S_SYNC;
3098         if (flags & EXT3_APPEND_FL)
3099                 inode->i_flags |= S_APPEND;
3100 -       if (flags & EXT3_IMMUTABLE_FL)
3101 -               inode->i_flags |= S_IMMUTABLE;
3102         if (flags & EXT3_NOATIME_FL)
3103                 inode->i_flags |= S_NOATIME;
3104         if (flags & EXT3_DIRSYNC_FL)
3105                 inode->i_flags |= S_DIRSYNC;
3106 +
3107 +       inode->i_vflags &= ~(V_BARRIER | V_COW);
3108 +
3109 +       if (flags & EXT3_BARRIER_FL)
3110 +               inode->i_vflags |= V_BARRIER;
3111 +       if (flags & EXT3_COW_FL)
3112 +               inode->i_vflags |= V_COW;
3113  }
3114  
3115  /* Propagate flags from i_flags to EXT3_I(inode)->i_flags */
3116  void ext3_get_inode_flags(struct ext3_inode_info *ei)
3117  {
3118         unsigned int flags = ei->vfs_inode.i_flags;
3119 +       unsigned int vflags = ei->vfs_inode.i_vflags;
3120 +
3121 +       ei->i_flags &= ~(EXT3_SYNC_FL | EXT3_APPEND_FL |
3122 +                       EXT3_IMMUTABLE_FL | EXT3_IXUNLINK_FL |
3123 +                       EXT3_NOATIME_FL | EXT3_DIRSYNC_FL |
3124 +                       EXT3_BARRIER_FL | EXT3_COW_FL);
3125 +
3126 +       if (flags & S_IMMUTABLE)
3127 +               ei->i_flags |= EXT3_IMMUTABLE_FL;
3128 +       if (flags & S_IXUNLINK)
3129 +               ei->i_flags |= EXT3_IXUNLINK_FL;
3130  
3131 -       ei->i_flags &= ~(EXT3_SYNC_FL|EXT3_APPEND_FL|
3132 -                       EXT3_IMMUTABLE_FL|EXT3_NOATIME_FL|EXT3_DIRSYNC_FL);
3133         if (flags & S_SYNC)
3134                 ei->i_flags |= EXT3_SYNC_FL;
3135         if (flags & S_APPEND)
3136                 ei->i_flags |= EXT3_APPEND_FL;
3137 -       if (flags & S_IMMUTABLE)
3138 -               ei->i_flags |= EXT3_IMMUTABLE_FL;
3139         if (flags & S_NOATIME)
3140                 ei->i_flags |= EXT3_NOATIME_FL;
3141         if (flags & S_DIRSYNC)
3142                 ei->i_flags |= EXT3_DIRSYNC_FL;
3143 +
3144 +       if (vflags & V_BARRIER)
3145 +               ei->i_flags |= EXT3_BARRIER_FL;
3146 +       if (vflags & V_COW)
3147 +               ei->i_flags |= EXT3_COW_FL;
3148  }
3149  
3150  struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
3151 @@ -2738,6 +2763,8 @@ struct inode *ext3_iget(struct super_blo
3152         struct inode *inode;
3153         long ret;
3154         int block;
3155 +       uid_t uid;
3156 +       gid_t gid;
3157  
3158         inode = iget_locked(sb, ino);
3159         if (!inode)
3160 @@ -2754,12 +2781,17 @@ struct inode *ext3_iget(struct super_blo
3161         bh = iloc.bh;
3162         raw_inode = ext3_raw_inode(&iloc);
3163         inode->i_mode = le16_to_cpu(raw_inode->i_mode);
3164 -       inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
3165 -       inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
3166 +       uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
3167 +       gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
3168         if(!(test_opt (inode->i_sb, NO_UID32))) {
3169 -               inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
3170 -               inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
3171 +               uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
3172 +               gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
3173         }
3174 +       inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
3175 +       inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
3176 +       inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
3177 +               le16_to_cpu(raw_inode->i_raw_tag));
3178 +
3179         inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
3180         inode->i_size = le32_to_cpu(raw_inode->i_size);
3181         inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
3182 @@ -2890,6 +2922,8 @@ static int ext3_do_update_inode(handle_t
3183         struct ext3_inode *raw_inode = ext3_raw_inode(iloc);
3184         struct ext3_inode_info *ei = EXT3_I(inode);
3185         struct buffer_head *bh = iloc->bh;
3186 +       uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
3187 +       gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
3188         int err = 0, rc, block;
3189  
3190         /* For fields not not tracking in the in-memory inode,
3191 @@ -2900,29 +2934,32 @@ static int ext3_do_update_inode(handle_t
3192         ext3_get_inode_flags(ei);
3193         raw_inode->i_mode = cpu_to_le16(inode->i_mode);
3194         if(!(test_opt(inode->i_sb, NO_UID32))) {
3195 -               raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
3196 -               raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
3197 +               raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid));
3198 +               raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid));
3199  /*
3200   * Fix up interoperability with old kernels. Otherwise, old inodes get
3201   * re-used with the upper 16 bits of the uid/gid intact
3202   */
3203                 if(!ei->i_dtime) {
3204                         raw_inode->i_uid_high =
3205 -                               cpu_to_le16(high_16_bits(inode->i_uid));
3206 +                               cpu_to_le16(high_16_bits(uid));
3207                         raw_inode->i_gid_high =
3208 -                               cpu_to_le16(high_16_bits(inode->i_gid));
3209 +                               cpu_to_le16(high_16_bits(gid));
3210                 } else {
3211                         raw_inode->i_uid_high = 0;
3212                         raw_inode->i_gid_high = 0;
3213                 }
3214         } else {
3215                 raw_inode->i_uid_low =
3216 -                       cpu_to_le16(fs_high2lowuid(inode->i_uid));
3217 +                       cpu_to_le16(fs_high2lowuid(uid));
3218                 raw_inode->i_gid_low =
3219 -                       cpu_to_le16(fs_high2lowgid(inode->i_gid));
3220 +                       cpu_to_le16(fs_high2lowgid(gid));
3221                 raw_inode->i_uid_high = 0;
3222                 raw_inode->i_gid_high = 0;
3223         }
3224 +#ifdef CONFIG_TAGGING_INTERN
3225 +       raw_inode->i_raw_tag = cpu_to_le16(inode->i_tag);
3226 +#endif
3227         raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
3228         raw_inode->i_size = cpu_to_le32(ei->i_disksize);
3229         raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
3230 @@ -3074,7 +3111,8 @@ int ext3_setattr(struct dentry *dentry, 
3231                 return error;
3232  
3233         if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
3234 -               (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
3235 +               (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) ||
3236 +               (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) {
3237                 handle_t *handle;
3238  
3239                 /* (user+group)*(old+new) structure, inode write (sb,
3240 @@ -3096,6 +3134,8 @@ int ext3_setattr(struct dentry *dentry, 
3241                         inode->i_uid = attr->ia_uid;
3242                 if (attr->ia_valid & ATTR_GID)
3243                         inode->i_gid = attr->ia_gid;
3244 +               if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
3245 +                       inode->i_tag = attr->ia_tag;
3246                 error = ext3_mark_inode_dirty(handle, inode);
3247                 ext3_journal_stop(handle);
3248         }
3249 diff -NurpP --minimal linux-2.6.31.6/fs/ext3/ioctl.c linux-2.6.31.6-vs2.3.0.36.24/fs/ext3/ioctl.c
3250 --- linux-2.6.31.6/fs/ext3/ioctl.c      2009-06-11 17:13:03.000000000 +0200
3251 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ext3/ioctl.c        2009-10-07 00:08:00.000000000 +0200
3252 @@ -8,6 +8,7 @@
3253   */
3254  
3255  #include <linux/fs.h>
3256 +#include <linux/mount.h>
3257  #include <linux/jbd.h>
3258  #include <linux/capability.h>
3259  #include <linux/ext3_fs.h>
3260 @@ -17,6 +18,34 @@
3261  #include <linux/compat.h>
3262  #include <asm/uaccess.h>
3263  
3264 +
3265 +int ext3_sync_flags(struct inode *inode, int flags, int vflags)
3266 +{
3267 +       handle_t *handle = NULL;
3268 +       struct ext3_iloc iloc;
3269 +       int err;
3270 +
3271 +       handle = ext3_journal_start(inode, 1);
3272 +       if (IS_ERR(handle))
3273 +               return PTR_ERR(handle);
3274 +
3275 +       if (IS_SYNC(inode))
3276 +               handle->h_sync = 1;
3277 +       err = ext3_reserve_inode_write(handle, inode, &iloc);
3278 +       if (err)
3279 +               goto flags_err;
3280 +
3281 +       inode->i_flags = flags;
3282 +       inode->i_vflags = vflags;
3283 +       ext3_get_inode_flags(EXT3_I(inode));
3284 +       inode->i_ctime = CURRENT_TIME_SEC;
3285 +
3286 +       err = ext3_mark_iloc_dirty(handle, inode, &iloc);
3287 +flags_err:
3288 +       ext3_journal_stop(handle);
3289 +       return err;
3290 +}
3291 +
3292  long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
3293  {
3294         struct inode *inode = filp->f_dentry->d_inode;
3295 @@ -50,6 +79,11 @@ long ext3_ioctl(struct file *filp, unsig
3296  
3297                 flags = ext3_mask_flags(inode->i_mode, flags);
3298  
3299 +               if (IS_BARRIER(inode)) {
3300 +                       vxwprintk_task(1, "messing with the barrier.");
3301 +                       return -EACCES;
3302 +               }
3303 +
3304                 mutex_lock(&inode->i_mutex);
3305  
3306                 /* Is it quota file? Do not allow user to mess with it */
3307 @@ -68,7 +102,9 @@ long ext3_ioctl(struct file *filp, unsig
3308                  *
3309                  * This test looks nicer. Thanks to Pauline Middelink
3310                  */
3311 -               if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) {
3312 +               if ((oldflags & EXT3_IMMUTABLE_FL) ||
3313 +                       ((flags ^ oldflags) & (EXT3_APPEND_FL |
3314 +                       EXT3_IMMUTABLE_FL | EXT3_IXUNLINK_FL))) {
3315                         if (!capable(CAP_LINUX_IMMUTABLE))
3316                                 goto flags_out;
3317                 }
3318 @@ -93,7 +129,7 @@ long ext3_ioctl(struct file *filp, unsig
3319                 if (err)
3320                         goto flags_err;
3321  
3322 -               flags = flags & EXT3_FL_USER_MODIFIABLE;
3323 +               flags &= EXT3_FL_USER_MODIFIABLE;
3324                 flags |= oldflags & ~EXT3_FL_USER_MODIFIABLE;
3325                 ei->i_flags = flags;
3326  
3327 diff -NurpP --minimal linux-2.6.31.6/fs/ext3/namei.c linux-2.6.31.6-vs2.3.0.36.24/fs/ext3/namei.c
3328 --- linux-2.6.31.6/fs/ext3/namei.c      2009-06-11 17:13:03.000000000 +0200
3329 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ext3/namei.c        2009-10-06 23:24:04.000000000 +0200
3330 @@ -36,6 +36,7 @@
3331  #include <linux/quotaops.h>
3332  #include <linux/buffer_head.h>
3333  #include <linux/bio.h>
3334 +#include <linux/vs_tag.h>
3335  
3336  #include "namei.h"
3337  #include "xattr.h"
3338 @@ -912,6 +913,7 @@ restart:
3339                                 if (bh)
3340                                         ll_rw_block(READ_META, 1, &bh);
3341                         }
3342 +               dx_propagate_tag(nd, inode);
3343                 }
3344                 if ((bh = bh_use[ra_ptr++]) == NULL)
3345                         goto next;
3346 @@ -2446,6 +2448,7 @@ const struct inode_operations ext3_dir_i
3347         .removexattr    = generic_removexattr,
3348  #endif
3349         .permission     = ext3_permission,
3350 +       .sync_flags     = ext3_sync_flags,
3351  };
3352  
3353  const struct inode_operations ext3_special_inode_operations = {
3354 diff -NurpP --minimal linux-2.6.31.6/fs/ext3/super.c linux-2.6.31.6-vs2.3.0.36.24/fs/ext3/super.c
3355 --- linux-2.6.31.6/fs/ext3/super.c      2009-09-10 15:26:21.000000000 +0200
3356 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ext3/super.c        2009-10-06 22:46:25.000000000 +0200
3357 @@ -787,7 +787,7 @@ enum {
3358         Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
3359         Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
3360         Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
3361 -       Opt_grpquota
3362 +       Opt_grpquota, Opt_tag, Opt_notag, Opt_tagid
3363  };
3364  
3365  static const match_table_t tokens = {
3366 @@ -840,6 +840,9 @@ static const match_table_t tokens = {
3367         {Opt_usrquota, "usrquota"},
3368         {Opt_barrier, "barrier=%u"},
3369         {Opt_resize, "resize"},
3370 +       {Opt_tag, "tag"},
3371 +       {Opt_notag, "notag"},
3372 +       {Opt_tagid, "tagid=%u"},
3373         {Opt_err, NULL},
3374  };
3375  
3376 @@ -932,6 +935,20 @@ static int parse_options (char *options,
3377                 case Opt_nouid32:
3378                         set_opt (sbi->s_mount_opt, NO_UID32);
3379                         break;
3380 +#ifndef CONFIG_TAGGING_NONE
3381 +               case Opt_tag:
3382 +                       set_opt (sbi->s_mount_opt, TAGGED);
3383 +                       break;
3384 +               case Opt_notag:
3385 +                       clear_opt (sbi->s_mount_opt, TAGGED);
3386 +                       break;
3387 +#endif
3388 +#ifdef CONFIG_PROPAGATE
3389 +               case Opt_tagid:
3390 +                       /* use args[0] */
3391 +                       set_opt (sbi->s_mount_opt, TAGGED);
3392 +                       break;
3393 +#endif
3394                 case Opt_nocheck:
3395                         clear_opt (sbi->s_mount_opt, CHECK);
3396                         break;
3397 @@ -1656,6 +1673,9 @@ static int ext3_fill_super (struct super
3398                             NULL, 0))
3399                 goto failed_mount;
3400  
3401 +       if (EXT3_SB(sb)->s_mount_opt & EXT3_MOUNT_TAGGED)
3402 +               sb->s_flags |= MS_TAGGED;
3403 +
3404         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3405                 ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
3406  
3407 @@ -2514,6 +2534,14 @@ static int ext3_remount (struct super_bl
3408         if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
3409                 ext3_abort(sb, __func__, "Abort forced by user");
3410  
3411 +       if ((sbi->s_mount_opt & EXT3_MOUNT_TAGGED) &&
3412 +               !(sb->s_flags & MS_TAGGED)) {
3413 +               printk("EXT3-fs: %s: tagging not permitted on remount.\n",
3414 +                       sb->s_id);
3415 +               err = -EINVAL;
3416 +               goto restore_opts;
3417 +       }
3418 +
3419         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3420                 ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
3421  
3422 diff -NurpP --minimal linux-2.6.31.6/fs/ext4/ext4.h linux-2.6.31.6-vs2.3.0.36.24/fs/ext4/ext4.h
3423 --- linux-2.6.31.6/fs/ext4/ext4.h       2009-09-10 15:26:21.000000000 +0200
3424 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ext4/ext4.h 2009-10-07 01:23:25.000000000 +0200
3425 @@ -252,8 +252,12 @@ struct flex_groups {
3426  #define EXT4_HUGE_FILE_FL               0x00040000 /* Set to each huge file */
3427  #define EXT4_EXTENTS_FL                        0x00080000 /* Inode uses extents */
3428  #define EXT4_EXT_MIGRATE               0x00100000 /* Inode is migrating */
3429 +#define EXT4_IXUNLINK_FL               0x08000000 /* Immutable invert on unlink */
3430  #define EXT4_RESERVED_FL               0x80000000 /* reserved for ext4 lib */
3431  
3432 +#define EXT4_BARRIER_FL                        0x04000000 /* Barrier for chroot() */
3433 +#define EXT4_COW_FL                    0x20000000 /* Copy on Write marker */
3434 +
3435  #define EXT4_FL_USER_VISIBLE           0x000BDFFF /* User visible flags */
3436  #define EXT4_FL_USER_MODIFIABLE                0x000B80FF /* User modifiable flags */
3437  
3438 @@ -423,7 +427,8 @@ struct ext4_inode {
3439                         __le16  l_i_file_acl_high;
3440                         __le16  l_i_uid_high;   /* these 2 fields */
3441                         __le16  l_i_gid_high;   /* were reserved2[0] */
3442 -                       __u32   l_i_reserved2;
3443 +                       __le16  l_i_tag;        /* Context Tag */
3444 +                       __u16   l_i_reserved2;
3445                 } linux2;
3446                 struct {
3447                         __le16  h_i_reserved1;  /* Obsoleted fragment number/size which are removed in ext4 */
3448 @@ -538,6 +543,7 @@ do {                                                                               \
3449  #define i_gid_low      i_gid
3450  #define i_uid_high     osd2.linux2.l_i_uid_high
3451  #define i_gid_high     osd2.linux2.l_i_gid_high
3452 +#define i_raw_tag      osd2.linux2.l_i_tag
3453  #define i_reserved2    osd2.linux2.l_i_reserved2
3454  
3455  #elif defined(__GNU__)
3456 @@ -694,6 +700,7 @@ struct ext4_inode_info {
3457  #define EXT4_MOUNT_QUOTA               0x80000 /* Some quota option set */
3458  #define EXT4_MOUNT_USRQUOTA            0x100000 /* "old" user quota */
3459  #define EXT4_MOUNT_GRPQUOTA            0x200000 /* "old" group quota */
3460 +#define EXT4_MOUNT_TAGGED              0x400000 /* Enable Context Tags */
3461  #define EXT4_MOUNT_JOURNAL_CHECKSUM    0x800000 /* Journal checksums */
3462  #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT        0x1000000 /* Journal Async Commit */
3463  #define EXT4_MOUNT_I_VERSION            0x2000000 /* i_version support */
3464 @@ -1655,6 +1662,7 @@ extern int ext4_get_blocks(handle_t *han
3465                            struct buffer_head *bh, int flags);
3466  extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3467                         __u64 start, __u64 len);
3468 +extern int ext4_sync_flags(struct inode *, int, int);
3469  /* move_extent.c */
3470  extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
3471                              __u64 start_orig, __u64 start_donor,
3472 diff -NurpP --minimal linux-2.6.31.6/fs/ext4/file.c linux-2.6.31.6-vs2.3.0.36.24/fs/ext4/file.c
3473 --- linux-2.6.31.6/fs/ext4/file.c       2009-09-10 15:26:21.000000000 +0200
3474 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ext4/file.c 2009-10-07 01:03:26.000000000 +0200
3475 @@ -210,5 +210,6 @@ const struct inode_operations ext4_file_
3476         .permission     = ext4_permission,
3477         .fallocate      = ext4_fallocate,
3478         .fiemap         = ext4_fiemap,
3479 +       .sync_flags     = ext4_sync_flags,
3480  };
3481  
3482 diff -NurpP --minimal linux-2.6.31.6/fs/ext4/ialloc.c linux-2.6.31.6-vs2.3.0.36.24/fs/ext4/ialloc.c
3483 --- linux-2.6.31.6/fs/ext4/ialloc.c     2009-09-10 15:26:21.000000000 +0200
3484 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ext4/ialloc.c       2009-10-12 05:06:42.000000000 +0200
3485 @@ -22,6 +22,7 @@
3486  #include <linux/random.h>
3487  #include <linux/bitops.h>
3488  #include <linux/blkdev.h>
3489 +#include <linux/vs_tag.h>
3490  #include <asm/byteorder.h>
3491  
3492  #include "ext4.h"
3493 @@ -995,6 +996,7 @@ got:
3494         } else
3495                 inode->i_gid = current_fsgid();
3496         inode->i_mode = mode;
3497 +       inode->i_tag = dx_current_fstag(sb);
3498  
3499         inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
3500         /* This is the optimal IO size (for stat), not the fs block size */
3501 diff -NurpP --minimal linux-2.6.31.6/fs/ext4/inode.c linux-2.6.31.6-vs2.3.0.36.24/fs/ext4/inode.c
3502 --- linux-2.6.31.6/fs/ext4/inode.c      2009-09-10 15:26:21.000000000 +0200
3503 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ext4/inode.c        2009-10-06 19:45:13.000000000 +0200
3504 @@ -37,6 +37,7 @@
3505  #include <linux/namei.h>
3506  #include <linux/uio.h>
3507  #include <linux/bio.h>
3508 +#include <linux/vs_tag.h>
3509  
3510  #include "ext4_jbd2.h"
3511  #include "xattr.h"
3512 @@ -3901,7 +3902,7 @@ static void ext4_free_branches(handle_t 
3513  
3514  int ext4_can_truncate(struct inode *inode)
3515  {
3516 -       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
3517 +       if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
3518                 return 0;
3519         if (S_ISREG(inode->i_mode))
3520                 return 1;
3521 @@ -4253,36 +4254,60 @@ void ext4_set_inode_flags(struct inode *
3522  {
3523         unsigned int flags = EXT4_I(inode)->i_flags;
3524  
3525 -       inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
3526 +       inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
3527 +               S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
3528 +
3529 +       if (flags & EXT4_IMMUTABLE_FL)
3530 +               inode->i_flags |= S_IMMUTABLE;
3531 +       if (flags & EXT4_IXUNLINK_FL)
3532 +               inode->i_flags |= S_IXUNLINK;
3533 +
3534         if (flags & EXT4_SYNC_FL)
3535                 inode->i_flags |= S_SYNC;
3536         if (flags & EXT4_APPEND_FL)
3537                 inode->i_flags |= S_APPEND;
3538 -       if (flags & EXT4_IMMUTABLE_FL)
3539 -               inode->i_flags |= S_IMMUTABLE;
3540         if (flags & EXT4_NOATIME_FL)
3541                 inode->i_flags |= S_NOATIME;
3542         if (flags & EXT4_DIRSYNC_FL)
3543                 inode->i_flags |= S_DIRSYNC;
3544 +
3545 +       inode->i_vflags &= ~(V_BARRIER | V_COW);
3546 +
3547 +       if (flags & EXT4_BARRIER_FL)
3548 +               inode->i_vflags |= V_BARRIER;
3549 +       if (flags & EXT4_COW_FL)
3550 +               inode->i_vflags |= V_COW;
3551  }
3552  
3553  /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
3554  void ext4_get_inode_flags(struct ext4_inode_info *ei)
3555  {
3556         unsigned int flags = ei->vfs_inode.i_flags;
3557 +       unsigned int vflags = ei->vfs_inode.i_vflags;
3558 +
3559 +       ei->i_flags &= ~(EXT4_SYNC_FL | EXT4_APPEND_FL |
3560 +                       EXT4_IMMUTABLE_FL | EXT4_IXUNLINK_FL |
3561 +                       EXT4_NOATIME_FL | EXT4_DIRSYNC_FL |
3562 +                       EXT4_BARRIER_FL | EXT4_COW_FL);
3563 +
3564 +       if (flags & S_IMMUTABLE)
3565 +               ei->i_flags |= EXT4_IMMUTABLE_FL;
3566 +       if (flags & S_IXUNLINK)
3567 +               ei->i_flags |= EXT4_IXUNLINK_FL;
3568  
3569 -       ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
3570 -                       EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|EXT4_DIRSYNC_FL);
3571         if (flags & S_SYNC)
3572                 ei->i_flags |= EXT4_SYNC_FL;
3573         if (flags & S_APPEND)
3574                 ei->i_flags |= EXT4_APPEND_FL;
3575 -       if (flags & S_IMMUTABLE)
3576 -               ei->i_flags |= EXT4_IMMUTABLE_FL;
3577         if (flags & S_NOATIME)
3578                 ei->i_flags |= EXT4_NOATIME_FL;
3579         if (flags & S_DIRSYNC)
3580                 ei->i_flags |= EXT4_DIRSYNC_FL;
3581 +
3582 +       if (vflags & V_BARRIER)
3583 +               ei->i_flags |= EXT4_BARRIER_FL;
3584 +       if (vflags & V_COW)
3585 +               ei->i_flags |= EXT4_COW_FL;
3586  }
3587  
3588  static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
3589 @@ -4317,6 +4342,8 @@ struct inode *ext4_iget(struct super_blo
3590         struct inode *inode;
3591         long ret;
3592         int block;
3593 +       uid_t uid;
3594 +       gid_t gid;
3595  
3596         inode = iget_locked(sb, ino);
3597         if (!inode)
3598 @@ -4332,12 +4359,16 @@ struct inode *ext4_iget(struct super_blo
3599         bh = iloc.bh;
3600         raw_inode = ext4_raw_inode(&iloc);
3601         inode->i_mode = le16_to_cpu(raw_inode->i_mode);
3602 -       inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
3603 -       inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
3604 +       uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
3605 +       gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
3606         if (!(test_opt(inode->i_sb, NO_UID32))) {
3607 -               inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
3608 -               inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
3609 +               uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
3610 +               gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
3611         }
3612 +       inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
3613 +       inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
3614 +       inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
3615 +               le16_to_cpu(raw_inode->i_raw_tag));
3616         inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
3617  
3618         ei->i_state = 0;
3619 @@ -4538,6 +4569,8 @@ static int ext4_do_update_inode(handle_t
3620         struct ext4_inode *raw_inode = ext4_raw_inode(iloc);
3621         struct ext4_inode_info *ei = EXT4_I(inode);
3622         struct buffer_head *bh = iloc->bh;
3623 +       uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
3624 +       gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
3625         int err = 0, rc, block;
3626  
3627         /* For fields not not tracking in the in-memory inode,
3628 @@ -4548,29 +4581,32 @@ static int ext4_do_update_inode(handle_t
3629         ext4_get_inode_flags(ei);
3630         raw_inode->i_mode = cpu_to_le16(inode->i_mode);
3631         if (!(test_opt(inode->i_sb, NO_UID32))) {
3632 -               raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
3633 -               raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
3634 +               raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid));
3635 +               raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid));
3636  /*
3637   * Fix up interoperability with old kernels. Otherwise, old inodes get
3638   * re-used with the upper 16 bits of the uid/gid intact
3639   */
3640                 if (!ei->i_dtime) {
3641                         raw_inode->i_uid_high =
3642 -                               cpu_to_le16(high_16_bits(inode->i_uid));
3643 +                               cpu_to_le16(high_16_bits(uid));
3644                         raw_inode->i_gid_high =
3645 -                               cpu_to_le16(high_16_bits(inode->i_gid));
3646 +                               cpu_to_le16(high_16_bits(gid));
3647                 } else {
3648                         raw_inode->i_uid_high = 0;
3649                         raw_inode->i_gid_high = 0;
3650                 }
3651         } else {
3652                 raw_inode->i_uid_low =
3653 -                       cpu_to_le16(fs_high2lowuid(inode->i_uid));
3654 +                       cpu_to_le16(fs_high2lowuid(uid));
3655                 raw_inode->i_gid_low =
3656 -                       cpu_to_le16(fs_high2lowgid(inode->i_gid));
3657 +                       cpu_to_le16(fs_high2lowgid(gid));
3658                 raw_inode->i_uid_high = 0;
3659                 raw_inode->i_gid_high = 0;
3660         }
3661 +#ifdef CONFIG_TAGGING_INTERN
3662 +       raw_inode->i_raw_tag = cpu_to_le16(inode->i_tag);
3663 +#endif
3664         raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
3665  
3666         EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
3667 @@ -4734,7 +4770,8 @@ int ext4_setattr(struct dentry *dentry, 
3668                 return error;
3669  
3670         if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
3671 -               (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
3672 +               (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) ||
3673 +               (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) {
3674                 handle_t *handle;
3675  
3676                 /* (user+group)*(old+new) structure, inode write (sb,
3677 @@ -4756,6 +4793,8 @@ int ext4_setattr(struct dentry *dentry, 
3678                         inode->i_uid = attr->ia_uid;
3679                 if (attr->ia_valid & ATTR_GID)
3680                         inode->i_gid = attr->ia_gid;
3681 +               if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
3682 +                       inode->i_tag = attr->ia_tag;
3683                 error = ext4_mark_inode_dirty(handle, inode);
3684                 ext4_journal_stop(handle);
3685         }
3686 diff -NurpP --minimal linux-2.6.31.6/fs/ext4/ioctl.c linux-2.6.31.6-vs2.3.0.36.24/fs/ext4/ioctl.c
3687 --- linux-2.6.31.6/fs/ext4/ioctl.c      2009-09-10 15:26:21.000000000 +0200
3688 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ext4/ioctl.c        2009-10-07 04:03:02.000000000 +0200
3689 @@ -14,10 +14,39 @@
3690  #include <linux/compat.h>
3691  #include <linux/mount.h>
3692  #include <linux/file.h>
3693 +#include <linux/vs_tag.h>
3694  #include <asm/uaccess.h>
3695  #include "ext4_jbd2.h"
3696  #include "ext4.h"
3697  
3698 +
3699 +int ext4_sync_flags(struct inode *inode, int flags, int vflags)
3700 +{
3701 +       handle_t *handle = NULL;
3702 +       struct ext4_iloc iloc;
3703 +       int err;
3704 +
3705 +       handle = ext4_journal_start(inode, 1);
3706 +       if (IS_ERR(handle))
3707 +               return PTR_ERR(handle);
3708 +
3709 +       if (IS_SYNC(inode))
3710 +               ext4_handle_sync(handle);
3711 +       err = ext4_reserve_inode_write(handle, inode, &iloc);
3712 +       if (err)
3713 +               goto flags_err;
3714 +
3715 +       inode->i_flags = flags;
3716 +       inode->i_vflags = vflags;
3717 +       ext4_get_inode_flags(EXT4_I(inode));
3718 +       inode->i_ctime = ext4_current_time(inode);
3719 +
3720 +       err = ext4_mark_iloc_dirty(handle, inode, &iloc);
3721 +flags_err:
3722 +       ext4_journal_stop(handle);
3723 +       return err;
3724 +}
3725 +
3726  long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
3727  {
3728         struct inode *inode = filp->f_dentry->d_inode;
3729 @@ -50,6 +79,11 @@ long ext4_ioctl(struct file *filp, unsig
3730  
3731                 flags = ext4_mask_flags(inode->i_mode, flags);
3732  
3733 +               if (IS_BARRIER(inode)) {
3734 +                       vxwprintk_task(1, "messing with the barrier.");
3735 +                       return -EACCES;
3736 +               }
3737 +
3738                 err = -EPERM;
3739                 mutex_lock(&inode->i_mutex);
3740                 /* Is it quota file? Do not allow user to mess with it */
3741 @@ -67,7 +101,9 @@ long ext4_ioctl(struct file *filp, unsig
3742                  *
3743                  * This test looks nicer. Thanks to Pauline Middelink
3744                  */
3745 -               if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
3746 +               if ((oldflags & EXT4_IMMUTABLE_FL) ||
3747 +                       ((flags ^ oldflags) & (EXT4_APPEND_FL |
3748 +                       EXT4_IMMUTABLE_FL | EXT4_IXUNLINK_FL))) {
3749                         if (!capable(CAP_LINUX_IMMUTABLE))
3750                                 goto flags_out;
3751                 }
3752 diff -NurpP --minimal linux-2.6.31.6/fs/ext4/namei.c linux-2.6.31.6-vs2.3.0.36.24/fs/ext4/namei.c
3753 --- linux-2.6.31.6/fs/ext4/namei.c      2009-09-10 15:26:21.000000000 +0200
3754 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ext4/namei.c        2009-10-07 01:08:37.000000000 +0200
3755 @@ -34,6 +34,7 @@
3756  #include <linux/quotaops.h>
3757  #include <linux/buffer_head.h>
3758  #include <linux/bio.h>
3759 +#include <linux/vs_tag.h>
3760  #include "ext4.h"
3761  #include "ext4_jbd2.h"
3762  
3763 @@ -941,6 +942,7 @@ restart:
3764                                 if (bh)
3765                                         ll_rw_block(READ_META, 1, &bh);
3766                         }
3767 +               dx_propagate_tag(nd, inode);
3768                 }
3769                 if ((bh = bh_use[ra_ptr++]) == NULL)
3770                         goto next;
3771 @@ -2538,6 +2540,7 @@ const struct inode_operations ext4_dir_i
3772  #endif
3773         .permission     = ext4_permission,
3774         .fiemap         = ext4_fiemap,
3775 +       .sync_flags     = ext4_sync_flags,
3776  };
3777  
3778  const struct inode_operations ext4_special_inode_operations = {
3779 diff -NurpP --minimal linux-2.6.31.6/fs/ext4/super.c linux-2.6.31.6-vs2.3.0.36.24/fs/ext4/super.c
3780 --- linux-2.6.31.6/fs/ext4/super.c      2009-09-10 15:26:21.000000000 +0200
3781 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ext4/super.c        2009-10-06 22:47:27.000000000 +0200
3782 @@ -1057,7 +1057,8 @@ enum {
3783         Opt_usrquota, Opt_grpquota, Opt_i_version,
3784         Opt_stripe, Opt_delalloc, Opt_nodelalloc,
3785         Opt_block_validity, Opt_noblock_validity,
3786 -       Opt_inode_readahead_blks, Opt_journal_ioprio
3787 +       Opt_inode_readahead_blks, Opt_journal_ioprio,
3788 +       Opt_tag, Opt_notag, Opt_tagid
3789  };
3790  
3791  static const match_table_t tokens = {
3792 @@ -1123,6 +1124,9 @@ static const match_table_t tokens = {
3793         {Opt_auto_da_alloc, "auto_da_alloc=%u"},
3794         {Opt_auto_da_alloc, "auto_da_alloc"},
3795         {Opt_noauto_da_alloc, "noauto_da_alloc"},
3796 +       {Opt_tag, "tag"},
3797 +       {Opt_notag, "notag"},
3798 +       {Opt_tagid, "tagid=%u"},
3799         {Opt_err, NULL},
3800  };
3801  
3802 @@ -1220,6 +1224,20 @@ static int parse_options(char *options, 
3803                 case Opt_nouid32:
3804                         set_opt(sbi->s_mount_opt, NO_UID32);
3805                         break;
3806 +#ifndef CONFIG_TAGGING_NONE
3807 +               case Opt_tag:
3808 +                       set_opt (sbi->s_mount_opt, TAGGED);
3809 +                       break;
3810 +               case Opt_notag:
3811 +                       clear_opt (sbi->s_mount_opt, TAGGED);
3812 +                       break;
3813 +#endif
3814 +#ifdef CONFIG_PROPAGATE
3815 +               case Opt_tagid:
3816 +                       /* use args[0] */
3817 +                       set_opt (sbi->s_mount_opt, TAGGED);
3818 +                       break;
3819 +#endif
3820                 case Opt_debug:
3821                         set_opt(sbi->s_mount_opt, DEBUG);
3822                         break;
3823 @@ -2385,6 +2403,9 @@ static int ext4_fill_super(struct super_
3824                            &journal_ioprio, NULL, 0))
3825                 goto failed_mount;
3826  
3827 +       if (EXT4_SB(sb)->s_mount_opt & EXT4_MOUNT_TAGGED)
3828 +               sb->s_flags |= MS_TAGGED;
3829 +
3830         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3831                 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
3832  
3833 @@ -3441,6 +3462,14 @@ static int ext4_remount(struct super_blo
3834         if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
3835                 ext4_abort(sb, __func__, "Abort forced by user");
3836  
3837 +       if ((sbi->s_mount_opt & EXT4_MOUNT_TAGGED) &&
3838 +               !(sb->s_flags & MS_TAGGED)) {
3839 +               printk("EXT4-fs: %s: tagging not permitted on remount.\n",
3840 +                       sb->s_id);
3841 +               err = -EINVAL;
3842 +               goto restore_opts;
3843 +       }
3844 +
3845         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3846                 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
3847  
3848 diff -NurpP --minimal linux-2.6.31.6/fs/fcntl.c linux-2.6.31.6-vs2.3.0.36.24/fs/fcntl.c
3849 --- linux-2.6.31.6/fs/fcntl.c   2009-09-10 15:26:22.000000000 +0200
3850 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/fcntl.c     2009-09-10 17:00:41.000000000 +0200
3851 @@ -19,6 +19,7 @@
3852  #include <linux/signal.h>
3853  #include <linux/rcupdate.h>
3854  #include <linux/pid_namespace.h>
3855 +#include <linux/vs_limit.h>
3856  
3857  #include <asm/poll.h>
3858  #include <asm/siginfo.h>
3859 @@ -102,6 +103,8 @@ SYSCALL_DEFINE3(dup3, unsigned int, oldf
3860  
3861         if (tofree)
3862                 filp_close(tofree, files);
3863 +       else
3864 +               vx_openfd_inc(newfd);   /* fd was unused */
3865  
3866         return newfd;
3867  
3868 @@ -347,6 +350,8 @@ SYSCALL_DEFINE3(fcntl, unsigned int, fd,
3869         filp = fget(fd);
3870         if (!filp)
3871                 goto out;
3872 +       if (!vx_files_avail(1))
3873 +               goto out;
3874  
3875         err = security_file_fcntl(filp, cmd, arg);
3876         if (err) {
3877 diff -NurpP --minimal linux-2.6.31.6/fs/file.c linux-2.6.31.6-vs2.3.0.36.24/fs/file.c
3878 --- linux-2.6.31.6/fs/file.c    2008-12-25 00:26:37.000000000 +0100
3879 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/file.c      2009-09-10 16:11:43.000000000 +0200
3880 @@ -19,6 +19,7 @@
3881  #include <linux/spinlock.h>
3882  #include <linux/rcupdate.h>
3883  #include <linux/workqueue.h>
3884 +#include <linux/vs_limit.h>
3885  
3886  struct fdtable_defer {
3887         spinlock_t lock;
3888 @@ -367,6 +368,8 @@ struct files_struct *dup_fd(struct files
3889                 struct file *f = *old_fds++;
3890                 if (f) {
3891                         get_file(f);
3892 +                       /* TODO: sum it first for check and performance */
3893 +                       vx_openfd_inc(open_files - i);
3894                 } else {
3895                         /*
3896                          * The fd may be claimed in the fd bitmap but not yet
3897 @@ -475,6 +478,7 @@ repeat:
3898         else
3899                 FD_CLR(fd, fdt->close_on_exec);
3900         error = fd;
3901 +       vx_openfd_inc(fd);
3902  #if 1
3903         /* Sanity check */
3904         if (rcu_dereference(fdt->fd[fd]) != NULL) {
3905 diff -NurpP --minimal linux-2.6.31.6/fs/file_table.c linux-2.6.31.6-vs2.3.0.36.24/fs/file_table.c
3906 --- linux-2.6.31.6/fs/file_table.c      2009-09-10 15:26:22.000000000 +0200
3907 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/file_table.c        2009-09-10 16:11:43.000000000 +0200
3908 @@ -22,6 +22,8 @@
3909  #include <linux/fsnotify.h>
3910  #include <linux/sysctl.h>
3911  #include <linux/percpu_counter.h>
3912 +#include <linux/vs_limit.h>
3913 +#include <linux/vs_context.h>
3914  
3915  #include <asm/atomic.h>
3916  
3917 @@ -131,6 +133,8 @@ struct file *get_empty_filp(void)
3918         spin_lock_init(&f->f_lock);
3919         eventpoll_init_file(f);
3920         /* f->f_version: 0 */
3921 +       f->f_xid = vx_current_xid();
3922 +       vx_files_inc(f);
3923         return f;
3924  
3925  over:
3926 @@ -285,6 +289,8 @@ void __fput(struct file *file)
3927                 cdev_put(inode->i_cdev);
3928         fops_put(file->f_op);
3929         put_pid(file->f_owner.pid);
3930 +       vx_files_dec(file);
3931 +       file->f_xid = 0;
3932         file_kill(file);
3933         if (file->f_mode & FMODE_WRITE)
3934                 drop_file_write_access(file);
3935 @@ -352,6 +358,8 @@ void put_filp(struct file *file)
3936  {
3937         if (atomic_long_dec_and_test(&file->f_count)) {
3938                 security_file_free(file);
3939 +               vx_files_dec(file);
3940 +               file->f_xid = 0;
3941                 file_kill(file);
3942                 file_free(file);
3943         }
3944 diff -NurpP --minimal linux-2.6.31.6/fs/fs_struct.c linux-2.6.31.6-vs2.3.0.36.24/fs/fs_struct.c
3945 --- linux-2.6.31.6/fs/fs_struct.c       2009-06-11 17:13:04.000000000 +0200
3946 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/fs_struct.c 2009-09-10 16:11:43.000000000 +0200
3947 @@ -4,6 +4,7 @@
3948  #include <linux/path.h>
3949  #include <linux/slab.h>
3950  #include <linux/fs_struct.h>
3951 +#include <linux/vserver/global.h>
3952  
3953  /*
3954   * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
3955 @@ -77,6 +78,7 @@ void free_fs_struct(struct fs_struct *fs
3956  {
3957         path_put(&fs->root);
3958         path_put(&fs->pwd);
3959 +       atomic_dec(&vs_global_fs);
3960         kmem_cache_free(fs_cachep, fs);
3961  }
3962  
3963 @@ -112,6 +114,7 @@ struct fs_struct *copy_fs_struct(struct 
3964                 fs->pwd = old->pwd;
3965                 path_get(&old->pwd);
3966                 read_unlock(&old->lock);
3967 +               atomic_inc(&vs_global_fs);
3968         }
3969         return fs;
3970  }
3971 diff -NurpP --minimal linux-2.6.31.6/fs/gfs2/file.c linux-2.6.31.6-vs2.3.0.36.24/fs/gfs2/file.c
3972 --- linux-2.6.31.6/fs/gfs2/file.c       2009-09-10 15:26:22.000000000 +0200
3973 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/gfs2/file.c 2009-10-07 18:47:19.000000000 +0200
3974 @@ -133,6 +133,9 @@ static const u32 fsflags_to_gfs2[32] = {
3975         [7] = GFS2_DIF_NOATIME,
3976         [12] = GFS2_DIF_EXHASH,
3977         [14] = GFS2_DIF_INHERIT_JDATA,
3978 +       [27] = GFS2_DIF_IXUNLINK,
3979 +       [26] = GFS2_DIF_BARRIER,
3980 +       [29] = GFS2_DIF_COW,
3981  };
3982  
3983  static const u32 gfs2_to_fsflags[32] = {
3984 @@ -142,6 +145,9 @@ static const u32 gfs2_to_fsflags[32] = {
3985         [gfs2fl_NoAtime] = FS_NOATIME_FL,
3986         [gfs2fl_ExHash] = FS_INDEX_FL,
3987         [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL,
3988 +       [gfs2fl_IXUnlink] = FS_IXUNLINK_FL,
3989 +       [gfs2fl_Barrier] = FS_BARRIER_FL,
3990 +       [gfs2fl_Cow] = FS_COW_FL,
3991  };
3992  
3993  static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
3994 @@ -172,10 +178,16 @@ void gfs2_set_inode_flags(struct inode *
3995  {
3996         struct gfs2_inode *ip = GFS2_I(inode);
3997         unsigned int flags = inode->i_flags;
3998 +       unsigned int vflags = inode->i_vflags;
3999 +
4000 +       flags &= ~(S_IMMUTABLE | S_IXUNLINK |
4001 +               S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
4002  
4003 -       flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
4004         if (ip->i_diskflags & GFS2_DIF_IMMUTABLE)
4005                 flags |= S_IMMUTABLE;
4006 +       if (ip->i_diskflags & GFS2_DIF_IXUNLINK)
4007 +               flags |= S_IXUNLINK;
4008 +
4009         if (ip->i_diskflags & GFS2_DIF_APPENDONLY)
4010                 flags |= S_APPEND;
4011         if (ip->i_diskflags & GFS2_DIF_NOATIME)
4012 @@ -183,6 +195,43 @@ void gfs2_set_inode_flags(struct inode *
4013         if (ip->i_diskflags & GFS2_DIF_SYNC)
4014                 flags |= S_SYNC;
4015         inode->i_flags = flags;
4016 +
4017 +       vflags &= ~(V_BARRIER | V_COW);
4018 +
4019 +       if (ip->i_diskflags & GFS2_DIF_BARRIER)
4020 +               vflags |= V_BARRIER;
4021 +       if (ip->i_diskflags & GFS2_DIF_COW)
4022 +               vflags |= V_COW;
4023 +       inode->i_vflags = vflags;
4024 +}
4025 +
4026 +void gfs2_get_inode_flags(struct inode *inode)
4027 +{
4028 +       struct gfs2_inode *ip = GFS2_I(inode);
4029 +       unsigned int flags = inode->i_flags;
4030 +       unsigned int vflags = inode->i_vflags;
4031 +
4032 +       ip->i_diskflags &= ~(GFS2_DIF_APPENDONLY |
4033 +                       GFS2_DIF_NOATIME | GFS2_DIF_SYNC |
4034 +                       GFS2_DIF_IMMUTABLE | GFS2_DIF_IXUNLINK |
4035 +                       GFS2_DIF_BARRIER | GFS2_DIF_COW);
4036 +
4037 +       if (flags & S_IMMUTABLE)
4038 +               ip->i_diskflags |= GFS2_DIF_IMMUTABLE;
4039 +       if (flags & S_IXUNLINK)
4040 +               ip->i_diskflags |= GFS2_DIF_IXUNLINK;
4041 +
4042 +       if (flags & S_APPEND)
4043 +               ip->i_diskflags |= GFS2_DIF_APPENDONLY;
4044 +       if (flags & S_NOATIME)
4045 +               ip->i_diskflags |= GFS2_DIF_NOATIME;
4046 +       if (flags & S_SYNC)
4047 +               ip->i_diskflags |= GFS2_DIF_SYNC;
4048 +
4049 +       if (vflags & V_BARRIER)
4050 +               ip->i_diskflags |= GFS2_DIF_BARRIER;
4051 +       if (vflags & V_COW)
4052 +               ip->i_diskflags |= GFS2_DIF_COW;
4053  }
4054  
4055  /* Flags that can be set by user space */
4056 @@ -287,6 +336,37 @@ static int gfs2_set_flags(struct file *f
4057         return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA);
4058  }
4059  
4060 +int gfs2_sync_flags(struct inode *inode, int flags, int vflags)
4061 +{
4062 +       struct gfs2_inode *ip = GFS2_I(inode);
4063 +       struct gfs2_sbd *sdp = GFS2_SB(inode);
4064 +       struct buffer_head *bh;
4065 +       struct gfs2_holder gh;
4066 +       int error;
4067 +
4068 +       error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
4069 +       if (error)
4070 +               return error;
4071 +       error = gfs2_trans_begin(sdp, RES_DINODE, 0);
4072 +       if (error)
4073 +               goto out;
4074 +       error = gfs2_meta_inode_buffer(ip, &bh);
4075 +       if (error)
4076 +               goto out_trans_end;
4077 +       gfs2_trans_add_bh(ip->i_gl, bh, 1);
4078 +       inode->i_flags = flags;
4079 +       inode->i_vflags = vflags;
4080 +       gfs2_get_inode_flags(inode);
4081 +       gfs2_dinode_out(ip, bh->b_data);
4082 +       brelse(bh);
4083 +       gfs2_set_aops(inode);
4084 +out_trans_end:
4085 +       gfs2_trans_end(sdp);
4086 +out:
4087 +       gfs2_glock_dq_uninit(&gh);
4088 +       return error;
4089 +}
4090 +
4091  static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
4092  {
4093         switch(cmd) {
4094 diff -NurpP --minimal linux-2.6.31.6/fs/gfs2/inode.h linux-2.6.31.6-vs2.3.0.36.24/fs/gfs2/inode.h
4095 --- linux-2.6.31.6/fs/gfs2/inode.h      2009-09-10 15:26:22.000000000 +0200
4096 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/gfs2/inode.h        2009-10-07 18:16:33.000000000 +0200
4097 @@ -109,6 +109,7 @@ extern const struct file_operations gfs2
4098  extern const struct file_operations gfs2_dir_fops_nolock;
4099  
4100  extern void gfs2_set_inode_flags(struct inode *inode);
4101 +extern int gfs2_sync_flags(struct inode *inode, int flags, int vflags);
4102   
4103  #ifdef CONFIG_GFS2_FS_LOCKING_DLM
4104  extern const struct file_operations gfs2_file_fops;
4105 diff -NurpP --minimal linux-2.6.31.6/fs/gfs2/ops_inode.c linux-2.6.31.6-vs2.3.0.36.24/fs/gfs2/ops_inode.c
4106 --- linux-2.6.31.6/fs/gfs2/ops_inode.c  2009-09-10 15:26:22.000000000 +0200
4107 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/gfs2/ops_inode.c    2009-10-07 18:15:45.000000000 +0200
4108 @@ -1409,6 +1409,7 @@ const struct inode_operations gfs2_file_
4109         .listxattr = gfs2_listxattr,
4110         .removexattr = gfs2_removexattr,
4111         .fiemap = gfs2_fiemap,
4112 +       .sync_flags = gfs2_sync_flags,
4113  };
4114  
4115  const struct inode_operations gfs2_dir_iops = {
4116 @@ -1429,6 +1430,7 @@ const struct inode_operations gfs2_dir_i
4117         .listxattr = gfs2_listxattr,
4118         .removexattr = gfs2_removexattr,
4119         .fiemap = gfs2_fiemap,
4120 +       .sync_flags = gfs2_sync_flags,
4121  };
4122  
4123  const struct inode_operations gfs2_symlink_iops = {
4124 diff -NurpP --minimal linux-2.6.31.6/fs/hfsplus/ioctl.c linux-2.6.31.6-vs2.3.0.36.24/fs/hfsplus/ioctl.c
4125 --- linux-2.6.31.6/fs/hfsplus/ioctl.c   2008-12-25 00:26:37.000000000 +0100
4126 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/hfsplus/ioctl.c     2009-09-10 16:11:43.000000000 +0200
4127 @@ -17,6 +17,7 @@
4128  #include <linux/mount.h>
4129  #include <linux/sched.h>
4130  #include <linux/xattr.h>
4131 +#include <linux/mount.h>
4132  #include <asm/uaccess.h>
4133  #include "hfsplus_fs.h"
4134  
4135 diff -NurpP --minimal linux-2.6.31.6/fs/inode.c linux-2.6.31.6-vs2.3.0.36.24/fs/inode.c
4136 --- linux-2.6.31.6/fs/inode.c   2009-11-12 12:10:11.000000000 +0100
4137 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/inode.c     2009-10-05 23:35:52.000000000 +0200
4138 @@ -128,6 +128,9 @@ int inode_init_always(struct super_block
4139         struct address_space *const mapping = &inode->i_data;
4140  
4141         inode->i_sb = sb;
4142 +
4143 +       /* essential because of inode slab reuse */
4144 +       inode->i_tag = 0;
4145         inode->i_blkbits = sb->s_blocksize_bits;
4146         inode->i_flags = 0;
4147         atomic_set(&inode->i_count, 1);
4148 @@ -148,6 +151,7 @@ int inode_init_always(struct super_block
4149         inode->i_bdev = NULL;
4150         inode->i_cdev = NULL;
4151         inode->i_rdev = 0;
4152 +       inode->i_mdev = 0;
4153         inode->dirtied_when = 0;
4154  
4155         if (security_inode_alloc(inode))
4156 @@ -304,6 +308,8 @@ void __iget(struct inode *inode)
4157         inodes_stat.nr_unused--;
4158  }
4159  
4160 +EXPORT_SYMBOL_GPL(__iget);
4161 +
4162  /**
4163   * clear_inode - clear an inode
4164   * @inode: inode to clear
4165 @@ -1588,9 +1594,11 @@ void init_special_inode(struct inode *in
4166         if (S_ISCHR(mode)) {
4167                 inode->i_fop = &def_chr_fops;
4168                 inode->i_rdev = rdev;
4169 +               inode->i_mdev = rdev;
4170         } else if (S_ISBLK(mode)) {
4171                 inode->i_fop = &def_blk_fops;
4172                 inode->i_rdev = rdev;
4173 +               inode->i_mdev = rdev;
4174         } else if (S_ISFIFO(mode))
4175                 inode->i_fop = &def_fifo_fops;
4176         else if (S_ISSOCK(mode))
4177 diff -NurpP --minimal linux-2.6.31.6/fs/ioctl.c linux-2.6.31.6-vs2.3.0.36.24/fs/ioctl.c
4178 --- linux-2.6.31.6/fs/ioctl.c   2009-09-10 15:26:22.000000000 +0200
4179 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ioctl.c     2009-09-10 17:13:08.000000000 +0200
4180 @@ -16,6 +16,9 @@
4181  #include <linux/writeback.h>
4182  #include <linux/buffer_head.h>
4183  #include <linux/falloc.h>
4184 +#include <linux/proc_fs.h>
4185 +#include <linux/vserver/inode.h>
4186 +#include <linux/vs_tag.h>
4187  
4188  #include <asm/ioctls.h>
4189  
4190 diff -NurpP --minimal linux-2.6.31.6/fs/ioprio.c linux-2.6.31.6-vs2.3.0.36.24/fs/ioprio.c
4191 --- linux-2.6.31.6/fs/ioprio.c  2009-03-24 14:22:26.000000000 +0100
4192 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ioprio.c    2009-09-10 16:11:43.000000000 +0200
4193 @@ -26,6 +26,7 @@
4194  #include <linux/syscalls.h>
4195  #include <linux/security.h>
4196  #include <linux/pid_namespace.h>
4197 +#include <linux/vs_base.h>
4198  
4199  int set_task_ioprio(struct task_struct *task, int ioprio)
4200  {
4201 @@ -123,6 +124,8 @@ SYSCALL_DEFINE3(ioprio_set, int, which, 
4202                         else
4203                                 pgrp = find_vpid(who);
4204                         do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
4205 +                               if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
4206 +                                       continue;
4207                                 ret = set_task_ioprio(p, ioprio);
4208                                 if (ret)
4209                                         break;
4210 @@ -212,6 +215,8 @@ SYSCALL_DEFINE2(ioprio_get, int, which, 
4211                         else
4212                                 pgrp = find_vpid(who);
4213                         do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
4214 +                               if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
4215 +                                       continue;
4216                                 tmpio = get_task_ioprio(p);
4217                                 if (tmpio < 0)
4218                                         continue;
4219 diff -NurpP --minimal linux-2.6.31.6/fs/jfs/acl.c linux-2.6.31.6-vs2.3.0.36.24/fs/jfs/acl.c
4220 --- linux-2.6.31.6/fs/jfs/acl.c 2009-09-10 15:26:22.000000000 +0200
4221 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/jfs/acl.c   2009-09-10 16:11:43.000000000 +0200
4222 @@ -221,7 +221,8 @@ int jfs_setattr(struct dentry *dentry, s
4223                 return rc;
4224  
4225         if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
4226 -           (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
4227 +           (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) ||
4228 +           (iattr->ia_valid & ATTR_TAG && iattr->ia_tag != inode->i_tag)) {
4229                 if (vfs_dq_transfer(inode, iattr))
4230                         return -EDQUOT;
4231         }
4232 diff -NurpP --minimal linux-2.6.31.6/fs/jfs/file.c linux-2.6.31.6-vs2.3.0.36.24/fs/jfs/file.c
4233 --- linux-2.6.31.6/fs/jfs/file.c        2008-12-25 00:26:37.000000000 +0100
4234 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/jfs/file.c  2009-10-07 01:05:32.000000000 +0200
4235 @@ -98,6 +98,7 @@ const struct inode_operations jfs_file_i
4236         .setattr        = jfs_setattr,
4237         .permission     = jfs_permission,
4238  #endif
4239 +       .sync_flags     = jfs_sync_flags,
4240  };
4241  
4242  const struct file_operations jfs_file_operations = {
4243 diff -NurpP --minimal linux-2.6.31.6/fs/jfs/ioctl.c linux-2.6.31.6-vs2.3.0.36.24/fs/jfs/ioctl.c
4244 --- linux-2.6.31.6/fs/jfs/ioctl.c       2008-12-25 00:26:37.000000000 +0100
4245 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/jfs/ioctl.c 2009-10-07 04:09:15.000000000 +0200
4246 @@ -11,6 +11,7 @@
4247  #include <linux/mount.h>
4248  #include <linux/time.h>
4249  #include <linux/sched.h>
4250 +#include <linux/mount.h>
4251  #include <asm/current.h>
4252  #include <asm/uaccess.h>
4253  
4254 @@ -52,6 +53,16 @@ static long jfs_map_ext2(unsigned long f
4255  }
4256  
4257  
4258 +int jfs_sync_flags(struct inode *inode, int flags, int vflags)
4259 +{
4260 +       inode->i_flags = flags;
4261 +       inode->i_vflags = vflags;
4262 +       jfs_get_inode_flags(JFS_IP(inode));
4263 +       inode->i_ctime = CURRENT_TIME_SEC;
4264 +       mark_inode_dirty(inode);
4265 +       return 0;
4266 +}
4267 +
4268  long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
4269  {
4270         struct inode *inode = filp->f_dentry->d_inode;
4271 @@ -85,6 +96,11 @@ long jfs_ioctl(struct file *filp, unsign
4272                 if (!S_ISDIR(inode->i_mode))
4273                         flags &= ~JFS_DIRSYNC_FL;
4274  
4275 +               if (IS_BARRIER(inode)) {
4276 +                       vxwprintk_task(1, "messing with the barrier.");
4277 +                       return -EACCES;
4278 +               }
4279 +
4280                 /* Is it quota file? Do not allow user to mess with it */
4281                 if (IS_NOQUOTA(inode)) {
4282                         err = -EPERM;
4283 @@ -102,8 +118,8 @@ long jfs_ioctl(struct file *filp, unsign
4284                  * the relevant capability.
4285                  */
4286                 if ((oldflags & JFS_IMMUTABLE_FL) ||
4287 -                       ((flags ^ oldflags) &
4288 -                       (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
4289 +                       ((flags ^ oldflags) & (JFS_APPEND_FL |
4290 +                       JFS_IMMUTABLE_FL | JFS_IXUNLINK_FL))) {
4291                         if (!capable(CAP_LINUX_IMMUTABLE)) {
4292                                 mutex_unlock(&inode->i_mutex);
4293                                 err = -EPERM;
4294 @@ -111,7 +127,7 @@ long jfs_ioctl(struct file *filp, unsign
4295                         }
4296                 }
4297  
4298 -               flags = flags & JFS_FL_USER_MODIFIABLE;
4299 +               flags &= JFS_FL_USER_MODIFIABLE;
4300                 flags |= oldflags & ~JFS_FL_USER_MODIFIABLE;
4301                 jfs_inode->mode2 = flags;
4302  
4303 diff -NurpP --minimal linux-2.6.31.6/fs/jfs/jfs_dinode.h linux-2.6.31.6-vs2.3.0.36.24/fs/jfs/jfs_dinode.h
4304 --- linux-2.6.31.6/fs/jfs/jfs_dinode.h  2008-12-25 00:26:37.000000000 +0100
4305 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/jfs/jfs_dinode.h    2009-09-10 16:11:43.000000000 +0200
4306 @@ -161,9 +161,13 @@ struct dinode {
4307  
4308  #define JFS_APPEND_FL          0x01000000 /* writes to file may only append */
4309  #define JFS_IMMUTABLE_FL       0x02000000 /* Immutable file */
4310 +#define JFS_IXUNLINK_FL                0x08000000 /* Immutable invert on unlink */
4311  
4312 -#define JFS_FL_USER_VISIBLE    0x03F80000
4313 -#define JFS_FL_USER_MODIFIABLE 0x03F80000
4314 +#define JFS_BARRIER_FL         0x04000000 /* Barrier for chroot() */
4315 +#define JFS_COW_FL             0x20000000 /* Copy on Write marker */
4316 +
4317 +#define JFS_FL_USER_VISIBLE    0x07F80000
4318 +#define JFS_FL_USER_MODIFIABLE 0x07F80000
4319  #define JFS_FL_INHERIT         0x03C80000
4320  
4321  /* These are identical to EXT[23]_IOC_GETFLAGS/SETFLAGS */
4322 diff -NurpP --minimal linux-2.6.31.6/fs/jfs/jfs_filsys.h linux-2.6.31.6-vs2.3.0.36.24/fs/jfs/jfs_filsys.h
4323 --- linux-2.6.31.6/fs/jfs/jfs_filsys.h  2008-12-25 00:26:37.000000000 +0100
4324 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/jfs/jfs_filsys.h    2009-09-10 16:11:43.000000000 +0200
4325 @@ -263,6 +263,7 @@
4326  #define JFS_NAME_MAX   255
4327  #define JFS_PATH_MAX   BPSIZE
4328  
4329 +#define JFS_TAGGED             0x00800000      /* Context Tagging */
4330  
4331  /*
4332   *     file system state (superblock state)
4333 diff -NurpP --minimal linux-2.6.31.6/fs/jfs/jfs_imap.c linux-2.6.31.6-vs2.3.0.36.24/fs/jfs/jfs_imap.c
4334 --- linux-2.6.31.6/fs/jfs/jfs_imap.c    2009-09-10 15:26:22.000000000 +0200
4335 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/jfs/jfs_imap.c      2009-09-10 16:11:43.000000000 +0200
4336 @@ -45,6 +45,7 @@
4337  #include <linux/buffer_head.h>
4338  #include <linux/pagemap.h>
4339  #include <linux/quotaops.h>
4340 +#include <linux/vs_tag.h>
4341  
4342  #include "jfs_incore.h"
4343  #include "jfs_inode.h"
4344 @@ -3059,6 +3060,8 @@ static int copy_from_dinode(struct dinod
4345  {
4346         struct jfs_inode_info *jfs_ip = JFS_IP(ip);
4347         struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
4348 +       uid_t uid;
4349 +       gid_t gid;
4350  
4351         jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
4352         jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
4353 @@ -3079,14 +3082,18 @@ static int copy_from_dinode(struct dinod
4354         }
4355         ip->i_nlink = le32_to_cpu(dip->di_nlink);
4356  
4357 -       jfs_ip->saved_uid = le32_to_cpu(dip->di_uid);
4358 +       uid = le32_to_cpu(dip->di_uid);
4359 +       gid = le32_to_cpu(dip->di_gid);
4360 +       ip->i_tag = INOTAG_TAG(DX_TAG(ip), uid, gid, 0);
4361 +
4362 +       jfs_ip->saved_uid = INOTAG_UID(DX_TAG(ip), uid, gid);
4363         if (sbi->uid == -1)
4364                 ip->i_uid = jfs_ip->saved_uid;
4365         else {
4366                 ip->i_uid = sbi->uid;
4367         }
4368  
4369 -       jfs_ip->saved_gid = le32_to_cpu(dip->di_gid);
4370 +       jfs_ip->saved_gid = INOTAG_GID(DX_TAG(ip), uid, gid);
4371         if (sbi->gid == -1)
4372                 ip->i_gid = jfs_ip->saved_gid;
4373         else {
4374 @@ -3151,14 +3158,12 @@ static void copy_to_dinode(struct dinode
4375         dip->di_size = cpu_to_le64(ip->i_size);
4376         dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
4377         dip->di_nlink = cpu_to_le32(ip->i_nlink);
4378 -       if (sbi->uid == -1)
4379 -               dip->di_uid = cpu_to_le32(ip->i_uid);
4380 -       else
4381 -               dip->di_uid = cpu_to_le32(jfs_ip->saved_uid);
4382 -       if (sbi->gid == -1)
4383 -               dip->di_gid = cpu_to_le32(ip->i_gid);
4384 -       else
4385 -               dip->di_gid = cpu_to_le32(jfs_ip->saved_gid);
4386 +
4387 +       dip->di_uid = cpu_to_le32(TAGINO_UID(DX_TAG(ip),
4388 +               (sbi->uid == -1) ? ip->i_uid : jfs_ip->saved_uid, ip->i_tag));
4389 +       dip->di_gid = cpu_to_le32(TAGINO_GID(DX_TAG(ip),
4390 +               (sbi->gid == -1) ? ip->i_gid : jfs_ip->saved_gid, ip->i_tag));
4391 +
4392         jfs_get_inode_flags(jfs_ip);
4393         /*
4394          * mode2 is only needed for storing the higher order bits.
4395 diff -NurpP --minimal linux-2.6.31.6/fs/jfs/jfs_inode.c linux-2.6.31.6-vs2.3.0.36.24/fs/jfs/jfs_inode.c
4396 --- linux-2.6.31.6/fs/jfs/jfs_inode.c   2009-06-11 17:13:05.000000000 +0200
4397 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/jfs/jfs_inode.c     2009-10-12 05:07:16.000000000 +0200
4398 @@ -18,6 +18,7 @@
4399  
4400  #include <linux/fs.h>
4401  #include <linux/quotaops.h>
4402 +#include <linux/vs_tag.h>
4403  #include "jfs_incore.h"
4404  #include "jfs_inode.h"
4405  #include "jfs_filsys.h"
4406 @@ -30,29 +31,46 @@ void jfs_set_inode_flags(struct inode *i
4407  {
4408         unsigned int flags = JFS_IP(inode)->mode2;
4409  
4410 -       inode->i_flags &= ~(S_IMMUTABLE | S_APPEND |
4411 -               S_NOATIME | S_DIRSYNC | S_SYNC);
4412 +       inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
4413 +               S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
4414  
4415         if (flags & JFS_IMMUTABLE_FL)
4416                 inode->i_flags |= S_IMMUTABLE;
4417 +       if (flags & JFS_IXUNLINK_FL)
4418 +               inode->i_flags |= S_IXUNLINK;
4419 +
4420 +       if (flags & JFS_SYNC_FL)
4421 +               inode->i_flags |= S_SYNC;
4422         if (flags & JFS_APPEND_FL)
4423                 inode->i_flags |= S_APPEND;
4424         if (flags & JFS_NOATIME_FL)
4425                 inode->i_flags |= S_NOATIME;
4426         if (flags & JFS_DIRSYNC_FL)
4427                 inode->i_flags |= S_DIRSYNC;
4428 -       if (flags & JFS_SYNC_FL)
4429 -               inode->i_flags |= S_SYNC;
4430 +
4431 +       inode->i_vflags &= ~(V_BARRIER | V_COW);
4432 +
4433 +       if (flags & JFS_BARRIER_FL)
4434 +               inode->i_vflags |= V_BARRIER;
4435 +       if (flags & JFS_COW_FL)
4436 +               inode->i_vflags |= V_COW;
4437  }
4438  
4439  void jfs_get_inode_flags(struct jfs_inode_info *jfs_ip)
4440  {
4441         unsigned int flags = jfs_ip->vfs_inode.i_flags;
4442 +       unsigned int vflags = jfs_ip->vfs_inode.i_vflags;
4443 +
4444 +       jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_IXUNLINK_FL |
4445 +                          JFS_APPEND_FL | JFS_NOATIME_FL |
4446 +                          JFS_DIRSYNC_FL | JFS_SYNC_FL |
4447 +                          JFS_BARRIER_FL | JFS_COW_FL);
4448  
4449 -       jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_APPEND_FL | JFS_NOATIME_FL |
4450 -                          JFS_DIRSYNC_FL | JFS_SYNC_FL);
4451         if (flags & S_IMMUTABLE)
4452                 jfs_ip->mode2 |= JFS_IMMUTABLE_FL;
4453 +       if (flags & S_IXUNLINK)
4454 +               jfs_ip->mode2 |= JFS_IXUNLINK_FL;
4455 +
4456         if (flags & S_APPEND)
4457                 jfs_ip->mode2 |= JFS_APPEND_FL;
4458         if (flags & S_NOATIME)
4459 @@ -61,6 +79,11 @@ void jfs_get_inode_flags(struct jfs_inod
4460                 jfs_ip->mode2 |= JFS_DIRSYNC_FL;
4461         if (flags & S_SYNC)
4462                 jfs_ip->mode2 |= JFS_SYNC_FL;
4463 +
4464 +       if (vflags & V_BARRIER)
4465 +               jfs_ip->mode2 |= JFS_BARRIER_FL;
4466 +       if (vflags & V_COW)
4467 +               jfs_ip->mode2 |= JFS_COW_FL;
4468  }
4469  
4470  /*
4471 @@ -105,6 +128,7 @@ struct inode *ialloc(struct inode *paren
4472                         mode |= S_ISGID;
4473         } else
4474                 inode->i_gid = current_fsgid();
4475 +       inode->i_tag = dx_current_fstag(sb);
4476  
4477         /*
4478          * New inodes need to save sane values on disk when
4479 diff -NurpP --minimal linux-2.6.31.6/fs/jfs/jfs_inode.h linux-2.6.31.6-vs2.3.0.36.24/fs/jfs/jfs_inode.h
4480 --- linux-2.6.31.6/fs/jfs/jfs_inode.h   2009-06-11 17:13:05.000000000 +0200
4481 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/jfs/jfs_inode.h     2009-10-07 01:25:26.000000000 +0200
4482 @@ -39,6 +39,7 @@ extern struct dentry *jfs_fh_to_dentry(s
4483  extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid,
4484         int fh_len, int fh_type);
4485  extern void jfs_set_inode_flags(struct inode *);
4486 +extern int jfs_sync_flags(struct inode *, int, int);
4487  extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
4488  
4489  extern const struct address_space_operations jfs_aops;
4490 diff -NurpP --minimal linux-2.6.31.6/fs/jfs/namei.c linux-2.6.31.6-vs2.3.0.36.24/fs/jfs/namei.c
4491 --- linux-2.6.31.6/fs/jfs/namei.c       2009-06-11 17:13:05.000000000 +0200
4492 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/jfs/namei.c 2009-10-07 01:09:16.000000000 +0200
4493 @@ -21,6 +21,7 @@
4494  #include <linux/ctype.h>
4495  #include <linux/quotaops.h>
4496  #include <linux/exportfs.h>
4497 +#include <linux/vs_tag.h>
4498  #include "jfs_incore.h"
4499  #include "jfs_superblock.h"
4500  #include "jfs_inode.h"
4501 @@ -1476,6 +1477,7 @@ static struct dentry *jfs_lookup(struct 
4502                 return ERR_CAST(ip);
4503         }
4504  
4505 +       dx_propagate_tag(nd, ip);
4506         dentry = d_splice_alias(ip, dentry);
4507  
4508         if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2))
4509 @@ -1545,6 +1547,7 @@ const struct inode_operations jfs_dir_in
4510         .setattr        = jfs_setattr,
4511         .permission     = jfs_permission,
4512  #endif
4513 +       .sync_flags     = jfs_sync_flags,
4514  };
4515  
4516  const struct file_operations jfs_dir_operations = {
4517 diff -NurpP --minimal linux-2.6.31.6/fs/jfs/super.c linux-2.6.31.6-vs2.3.0.36.24/fs/jfs/super.c
4518 --- linux-2.6.31.6/fs/jfs/super.c       2009-09-10 15:26:22.000000000 +0200
4519 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/jfs/super.c 2009-09-10 17:10:55.000000000 +0200
4520 @@ -194,7 +194,8 @@ static void jfs_put_super(struct super_b
4521  enum {
4522         Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize,
4523         Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota,
4524 -       Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask
4525 +       Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask,
4526 +       Opt_tag, Opt_notag, Opt_tagid
4527  };
4528  
4529  static const match_table_t tokens = {
4530 @@ -204,6 +205,10 @@ static const match_table_t tokens = {
4531         {Opt_resize, "resize=%u"},
4532         {Opt_resize_nosize, "resize"},
4533         {Opt_errors, "errors=%s"},
4534 +       {Opt_tag, "tag"},
4535 +       {Opt_notag, "notag"},
4536 +       {Opt_tagid, "tagid=%u"},
4537 +       {Opt_tag, "tagxid"},
4538         {Opt_ignore, "noquota"},
4539         {Opt_ignore, "quota"},
4540         {Opt_usrquota, "usrquota"},
4541 @@ -338,6 +343,20 @@ static int parse_options(char *options, 
4542                         }
4543                         break;
4544                 }
4545 +#ifndef CONFIG_TAGGING_NONE
4546 +               case Opt_tag:
4547 +                       *flag |= JFS_TAGGED;
4548 +                       break;
4549 +               case Opt_notag:
4550 +                       *flag &= JFS_TAGGED;
4551 +                       break;
4552 +#endif
4553 +#ifdef CONFIG_PROPAGATE
4554 +               case Opt_tagid:
4555 +                       /* use args[0] */
4556 +                       *flag |= JFS_TAGGED;
4557 +                       break;
4558 +#endif
4559                 default:
4560                         printk("jfs: Unrecognized mount option \"%s\" "
4561                                         " or missing value\n", p);
4562 @@ -369,6 +388,12 @@ static int jfs_remount(struct super_bloc
4563         if (!parse_options(data, sb, &newLVSize, &flag)) {
4564                 return -EINVAL;
4565         }
4566 +       if ((flag & JFS_TAGGED) && !(sb->s_flags & MS_TAGGED)) {
4567 +               printk(KERN_ERR "JFS: %s: tagging not permitted on remount.\n",
4568 +                       sb->s_id);
4569 +               return -EINVAL;
4570 +       }
4571 +
4572         lock_kernel();
4573         if (newLVSize) {
4574                 if (sb->s_flags & MS_RDONLY) {
4575 @@ -452,6 +477,9 @@ static int jfs_fill_super(struct super_b
4576  #ifdef CONFIG_JFS_POSIX_ACL
4577         sb->s_flags |= MS_POSIXACL;
4578  #endif
4579 +       /* map mount option tagxid */
4580 +       if (sbi->flag & JFS_TAGGED)
4581 +               sb->s_flags |= MS_TAGGED;
4582  
4583         if (newLVSize) {
4584                 printk(KERN_ERR "resize option for remount only\n");
4585 diff -NurpP --minimal linux-2.6.31.6/fs/libfs.c linux-2.6.31.6-vs2.3.0.36.24/fs/libfs.c
4586 --- linux-2.6.31.6/fs/libfs.c   2009-09-10 15:26:22.000000000 +0200
4587 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/libfs.c     2009-09-10 16:11:43.000000000 +0200
4588 @@ -127,7 +127,8 @@ static inline unsigned char dt_type(stru
4589   * both impossible due to the lock on directory.
4590   */
4591  
4592 -int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
4593 +static inline int do_dcache_readdir_filter(struct file *filp,
4594 +       void *dirent, filldir_t filldir, int (*filter)(struct dentry *dentry))
4595  {
4596         struct dentry *dentry = filp->f_path.dentry;
4597         struct dentry *cursor = filp->private_data;
4598 @@ -160,6 +161,8 @@ int dcache_readdir(struct file * filp, v
4599                                 next = list_entry(p, struct dentry, d_u.d_child);
4600                                 if (d_unhashed(next) || !next->d_inode)
4601                                         continue;
4602 +                               if (filter && !filter(next))
4603 +                                       continue;
4604  
4605                                 spin_unlock(&dcache_lock);
4606                                 if (filldir(dirent, next->d_name.name, 
4607 @@ -178,6 +181,18 @@ int dcache_readdir(struct file * filp, v
4608         return 0;
4609  }
4610  
4611 +int dcache_readdir(struct file *filp, void *dirent, filldir_t filldir)
4612 +{
4613 +       return do_dcache_readdir_filter(filp, dirent, filldir, NULL);
4614 +}
4615 +
4616 +int dcache_readdir_filter(struct file *filp, void *dirent, filldir_t filldir,
4617 +       int (*filter)(struct dentry *))
4618 +{
4619 +       return do_dcache_readdir_filter(filp, dirent, filldir, filter);
4620 +}
4621 +
4622 +
4623  ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
4624  {
4625         return -EISDIR;
4626 @@ -836,6 +851,7 @@ EXPORT_SYMBOL(dcache_dir_close);
4627  EXPORT_SYMBOL(dcache_dir_lseek);
4628  EXPORT_SYMBOL(dcache_dir_open);
4629  EXPORT_SYMBOL(dcache_readdir);
4630 +EXPORT_SYMBOL(dcache_readdir_filter);
4631  EXPORT_SYMBOL(generic_read_dir);
4632  EXPORT_SYMBOL(get_sb_pseudo);
4633  EXPORT_SYMBOL(simple_write_begin);
4634 diff -NurpP --minimal linux-2.6.31.6/fs/locks.c linux-2.6.31.6-vs2.3.0.36.24/fs/locks.c
4635 --- linux-2.6.31.6/fs/locks.c   2009-09-10 15:26:22.000000000 +0200
4636 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/locks.c     2009-09-10 16:11:43.000000000 +0200
4637 @@ -127,6 +127,8 @@
4638  #include <linux/time.h>
4639  #include <linux/rcupdate.h>
4640  #include <linux/pid_namespace.h>
4641 +#include <linux/vs_base.h>
4642 +#include <linux/vs_limit.h>
4643  
4644  #include <asm/uaccess.h>
4645  
4646 @@ -148,6 +150,8 @@ static struct kmem_cache *filelock_cache
4647  /* Allocate an empty lock structure. */
4648  static struct file_lock *locks_alloc_lock(void)
4649  {
4650 +       if (!vx_locks_avail(1))
4651 +               return NULL;
4652         return kmem_cache_alloc(filelock_cache, GFP_KERNEL);
4653  }
4654  
4655 @@ -174,6 +178,7 @@ static void locks_free_lock(struct file_
4656         BUG_ON(!list_empty(&fl->fl_block));
4657         BUG_ON(!list_empty(&fl->fl_link));
4658  
4659 +       vx_locks_dec(fl);
4660         locks_release_private(fl);
4661         kmem_cache_free(filelock_cache, fl);
4662  }
4663 @@ -194,6 +199,7 @@ void locks_init_lock(struct file_lock *f
4664         fl->fl_start = fl->fl_end = 0;
4665         fl->fl_ops = NULL;
4666         fl->fl_lmops = NULL;
4667 +       fl->fl_xid = -1;
4668  }
4669  
4670  EXPORT_SYMBOL(locks_init_lock);
4671 @@ -248,6 +254,7 @@ void locks_copy_lock(struct file_lock *n
4672         new->fl_file = fl->fl_file;
4673         new->fl_ops = fl->fl_ops;
4674         new->fl_lmops = fl->fl_lmops;
4675 +       new->fl_xid = fl->fl_xid;
4676  
4677         locks_copy_private(new, fl);
4678  }
4679 @@ -286,6 +293,11 @@ static int flock_make_lock(struct file *
4680         fl->fl_flags = FL_FLOCK;
4681         fl->fl_type = type;
4682         fl->fl_end = OFFSET_MAX;
4683 +
4684 +       vxd_assert(filp->f_xid == vx_current_xid(),
4685 +               "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
4686 +       fl->fl_xid = filp->f_xid;
4687 +       vx_locks_inc(fl);
4688         
4689         *lock = fl;
4690         return 0;
4691 @@ -451,6 +463,7 @@ static int lease_init(struct file *filp,
4692  
4693         fl->fl_owner = current->files;
4694         fl->fl_pid = current->tgid;
4695 +       fl->fl_xid = vx_current_xid();
4696  
4697         fl->fl_file = filp;
4698         fl->fl_flags = FL_LEASE;
4699 @@ -470,6 +483,11 @@ static struct file_lock *lease_alloc(str
4700         if (fl == NULL)
4701                 return ERR_PTR(error);
4702  
4703 +       fl->fl_xid = vx_current_xid();
4704 +       if (filp)
4705 +               vxd_assert(filp->f_xid == fl->fl_xid,
4706 +                       "f_xid(%d) == fl_xid(%d)", filp->f_xid, fl->fl_xid);
4707 +       vx_locks_inc(fl);
4708         error = lease_init(filp, type, fl);
4709         if (error) {
4710                 locks_free_lock(fl);
4711 @@ -770,6 +788,7 @@ static int flock_lock_file(struct file *
4712         if (found)
4713                 cond_resched_bkl();
4714  
4715 +       new_fl->fl_xid = -1;
4716  find_conflict:
4717         for_each_lock(inode, before) {
4718                 struct file_lock *fl = *before;
4719 @@ -790,6 +809,7 @@ find_conflict:
4720                 goto out;
4721         locks_copy_lock(new_fl, request);
4722         locks_insert_lock(before, new_fl);
4723 +       vx_locks_inc(new_fl);
4724         new_fl = NULL;
4725         error = 0;
4726  
4727 @@ -800,7 +820,8 @@ out:
4728         return error;
4729  }
4730  
4731 -static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
4732 +static int __posix_lock_file(struct inode *inode, struct file_lock *request,
4733 +       struct file_lock *conflock, xid_t xid)
4734  {
4735         struct file_lock *fl;
4736         struct file_lock *new_fl = NULL;
4737 @@ -810,6 +831,8 @@ static int __posix_lock_file(struct inod
4738         struct file_lock **before;
4739         int error, added = 0;
4740  
4741 +       vxd_assert(xid == vx_current_xid(),
4742 +               "xid(%d) == current(%d)", xid, vx_current_xid());
4743         /*
4744          * We may need two file_lock structures for this operation,
4745          * so we get them in advance to avoid races.
4746 @@ -820,7 +843,11 @@ static int __posix_lock_file(struct inod
4747             (request->fl_type != F_UNLCK ||
4748              request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {
4749                 new_fl = locks_alloc_lock();
4750 +               new_fl->fl_xid = xid;
4751 +               vx_locks_inc(new_fl);
4752                 new_fl2 = locks_alloc_lock();
4753 +               new_fl2->fl_xid = xid;
4754 +               vx_locks_inc(new_fl2);
4755         }
4756  
4757         lock_kernel();
4758 @@ -1019,7 +1046,8 @@ static int __posix_lock_file(struct inod
4759  int posix_lock_file(struct file *filp, struct file_lock *fl,
4760                         struct file_lock *conflock)
4761  {
4762 -       return __posix_lock_file(filp->f_path.dentry->d_inode, fl, conflock);
4763 +       return __posix_lock_file(filp->f_path.dentry->d_inode,
4764 +               fl, conflock, filp->f_xid);
4765  }
4766  EXPORT_SYMBOL(posix_lock_file);
4767  
4768 @@ -1109,7 +1137,7 @@ int locks_mandatory_area(int read_write,
4769         fl.fl_end = offset + count - 1;
4770  
4771         for (;;) {
4772 -               error = __posix_lock_file(inode, &fl, NULL);
4773 +               error = __posix_lock_file(inode, &fl, NULL, filp->f_xid);
4774                 if (error != FILE_LOCK_DEFERRED)
4775                         break;
4776                 error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
4777 @@ -1424,6 +1452,7 @@ int generic_setlease(struct file *filp, 
4778  
4779         locks_copy_lock(new_fl, lease);
4780         locks_insert_lock(before, new_fl);
4781 +       vx_locks_inc(new_fl);
4782  
4783         *flp = new_fl;
4784         return 0;
4785 @@ -1779,6 +1808,11 @@ int fcntl_setlk(unsigned int fd, struct 
4786         if (file_lock == NULL)
4787                 return -ENOLCK;
4788  
4789 +       vxd_assert(filp->f_xid == vx_current_xid(),
4790 +               "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
4791 +       file_lock->fl_xid = filp->f_xid;
4792 +       vx_locks_inc(file_lock);
4793 +
4794         /*
4795          * This might block, so we do it before checking the inode.
4796          */
4797 @@ -1897,6 +1931,11 @@ int fcntl_setlk64(unsigned int fd, struc
4798         if (file_lock == NULL)
4799                 return -ENOLCK;
4800  
4801 +       vxd_assert(filp->f_xid == vx_current_xid(),
4802 +               "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
4803 +       file_lock->fl_xid = filp->f_xid;
4804 +       vx_locks_inc(file_lock);
4805 +
4806         /*
4807          * This might block, so we do it before checking the inode.
4808          */
4809 @@ -2162,8 +2201,11 @@ static int locks_show(struct seq_file *f
4810  
4811         lock_get_status(f, fl, (long)f->private, "");
4812  
4813 -       list_for_each_entry(bfl, &fl->fl_block, fl_block)
4814 +       list_for_each_entry(bfl, &fl->fl_block, fl_block) {
4815 +               if (!vx_check(fl->fl_xid, VS_WATCH_P | VS_IDENT))
4816 +                       continue;
4817                 lock_get_status(f, bfl, (long)f->private, " ->");
4818 +       }
4819  
4820         f->private++;
4821         return 0;
4822 diff -NurpP --minimal linux-2.6.31.6/fs/namei.c linux-2.6.31.6-vs2.3.0.36.24/fs/namei.c
4823 --- linux-2.6.31.6/fs/namei.c   2009-11-12 12:10:11.000000000 +0100
4824 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/namei.c     2009-10-15 03:49:19.000000000 +0200
4825 @@ -33,6 +33,14 @@
4826  #include <linux/fcntl.h>
4827  #include <linux/device_cgroup.h>
4828  #include <linux/fs_struct.h>
4829 +#include <linux/proc_fs.h>
4830 +#include <linux/vserver/inode.h>
4831 +#include <linux/vs_base.h>
4832 +#include <linux/vs_tag.h>
4833 +#include <linux/vs_cowbl.h>
4834 +#include <linux/vs_device.h>
4835 +#include <linux/vs_context.h>
4836 +#include <linux/pid_namespace.h>
4837  #include <asm/uaccess.h>
4838  
4839  #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
4840 @@ -169,6 +177,77 @@ void putname(const char *name)
4841  EXPORT_SYMBOL(putname);
4842  #endif
4843  
4844 +static inline int dx_barrier(struct inode *inode)
4845 +{
4846 +       if (IS_BARRIER(inode) && !vx_check(0, VS_ADMIN | VS_WATCH)) {
4847 +               vxwprintk_task(1, "did hit the barrier.");
4848 +               return 1;
4849 +       }
4850 +       return 0;
4851 +}
4852 +
4853 +static int __dx_permission(struct inode *inode, int mask)
4854 +{
4855 +       if (dx_barrier(inode))
4856 +               return -EACCES;
4857 +
4858 +       if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) {
4859 +               /* devpts is xid tagged */
4860 +               if (S_ISDIR(inode->i_mode) ||
4861 +                   vx_check((xid_t)inode->i_tag, VS_IDENT | VS_WATCH_P))
4862 +                       return 0;
4863 +       }
4864 +       else if (inode->i_sb->s_magic == PROC_SUPER_MAGIC) {
4865 +               struct proc_dir_entry *de = PDE(inode);
4866 +
4867 +               if (de && !vx_hide_check(0, de->vx_flags))
4868 +                       goto out;
4869 +
4870 +               if ((mask & (MAY_WRITE | MAY_APPEND))) {
4871 +                       struct pid *pid;
4872 +                       struct task_struct *tsk;
4873 +
4874 +                       if (vx_check(0, VS_ADMIN | VS_WATCH_P) ||
4875 +                           vx_flags(VXF_STATE_SETUP, 0))
4876 +                               return 0;
4877 +
4878 +                       pid = PROC_I(inode)->pid;
4879 +                       if (!pid)
4880 +                               goto out;
4881 +
4882 +                       tsk = pid_task(pid, PIDTYPE_PID);
4883 +                       vxdprintk(VXD_CBIT(tag, 0), "accessing %p[#%u]",
4884 +                                 tsk, (tsk ? vx_task_xid(tsk) : 0));
4885 +                       if (tsk && vx_check(vx_task_xid(tsk), VS_IDENT | VS_WATCH_P))
4886 +                               return 0;
4887 +               }
4888 +               else {
4889 +                       /* FIXME: Should we block some entries here? */
4890 +                       return 0;
4891 +               }
4892 +       }
4893 +       else {
4894 +               if (dx_notagcheck(inode->i_sb) ||
4895 +                   dx_check(inode->i_tag, DX_HOSTID | DX_ADMIN | DX_WATCH |
4896 +                            DX_IDENT))
4897 +                       return 0;
4898 +       }
4899 +
4900 +out:
4901 +       return -EACCES;
4902 +}
4903 +
4904 +int dx_permission(struct inode *inode, int mask)
4905 +{
4906 +       int ret = __dx_permission(inode, mask);
4907 +       if (unlikely(ret)) {
4908 +               vxwprintk_task(1, "denied %x access to %s:%p[#%d,%lu]",
4909 +                       mask, inode->i_sb->s_id, inode, inode->i_tag,
4910 +                       inode->i_ino);
4911 +       }
4912 +       return ret;
4913 +}
4914 +
4915  
4916  /**
4917   * generic_permission  -  check for access rights on a Posix-like filesystem
4918 @@ -255,10 +334,14 @@ int inode_permission(struct inode *inode
4919                 /*
4920                  * Nobody gets write access to an immutable file.
4921                  */
4922 -               if (IS_IMMUTABLE(inode))
4923 +               if (IS_IMMUTABLE(inode) && !IS_COW(inode))
4924                         return -EACCES;
4925         }
4926  
4927 +       retval = dx_permission(inode, mask);
4928 +       if (retval)
4929 +               return retval;
4930 +
4931         if (inode->i_op->permission)
4932                 retval = inode->i_op->permission(inode, mask);
4933         else
4934 @@ -434,6 +517,8 @@ static int exec_permission_lite(struct i
4935  {
4936         umode_t mode = inode->i_mode;
4937  
4938 +       if (dx_barrier(inode))
4939 +               return -EACCES;
4940         if (inode->i_op->permission)
4941                 return -EAGAIN;
4942  
4943 @@ -756,7 +841,8 @@ static __always_inline void follow_dotdo
4944  
4945                 if (nd->path.dentry == nd->root.dentry &&
4946                     nd->path.mnt == nd->root.mnt) {
4947 -                       break;
4948 +                       /* for sane '/' avoid follow_mount() */
4949 +                       return;
4950                 }
4951                 spin_lock(&dcache_lock);
4952                 if (nd->path.dentry != nd->path.mnt->mnt_root) {
4953 @@ -792,16 +878,30 @@ static int do_lookup(struct nameidata *n
4954  {
4955         struct vfsmount *mnt = nd->path.mnt;
4956         struct dentry *dentry = __d_lookup(nd->path.dentry, name);
4957 +       struct inode *inode;
4958  
4959         if (!dentry)
4960                 goto need_lookup;
4961         if (dentry->d_op && dentry->d_op->d_revalidate)
4962                 goto need_revalidate;
4963 +       inode = dentry->d_inode;
4964 +       if (!inode)
4965 +               goto done;
4966 +
4967 +       if (__dx_permission(inode, MAY_ACCESS))
4968 +               goto hidden;
4969 +
4970  done:
4971         path->mnt = mnt;
4972         path->dentry = dentry;
4973         __follow_mount(path);
4974         return 0;
4975 +hidden:
4976 +       vxwprintk_task(1, "did lookup hidden %s:%p[#%d,%lu] Â»%s/%.*s«.",
4977 +               inode->i_sb->s_id, inode, inode->i_tag, inode->i_ino,
4978 +               vxd_path(&nd->path), name->len, name->name);
4979 +       dput(dentry);
4980 +       return -ENOENT;
4981  
4982  need_lookup:
4983         dentry = real_lookup(nd->path.dentry, name, nd);
4984 @@ -1389,7 +1489,7 @@ static int may_delete(struct inode *dir,
4985         if (IS_APPEND(dir))
4986                 return -EPERM;
4987         if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
4988 -           IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
4989 +               IS_IXORUNLINK(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
4990                 return -EPERM;
4991         if (isdir) {
4992                 if (!S_ISDIR(victim->d_inode->i_mode))
4993 @@ -1529,6 +1629,14 @@ int may_open(struct path *path, int acc_
4994                 break;
4995         }
4996  
4997 +#ifdef CONFIG_VSERVER_COWBL
4998 +       if (IS_COW(inode) && (flag & FMODE_WRITE)) {
4999 +               if (IS_COW_LINK(inode))
5000 +                       return -EMLINK;
5001 +               inode->i_flags &= ~(S_IXUNLINK|S_IMMUTABLE);
5002 +               mark_inode_dirty(inode);
5003 +       }
5004 +#endif
5005         error = inode_permission(inode, acc_mode);
5006         if (error)
5007                 return error;
5008 @@ -1677,7 +1785,11 @@ struct file *do_filp_open(int dfd, const
5009         int count = 0;
5010         int will_write;
5011         int flag = open_to_namei_flags(open_flag);
5012 -
5013 +#ifdef CONFIG_VSERVER_COWBL
5014 +       int rflag = flag;
5015 +       int rmode = mode;
5016 +restart:
5017 +#endif
5018         if (!acc_mode)
5019                 acc_mode = MAY_OPEN | ACC_MODE(flag);
5020  
5021 @@ -1825,6 +1937,25 @@ ok:
5022                         goto exit;
5023         }
5024         error = may_open(&nd.path, acc_mode, flag);
5025 +#ifdef CONFIG_VSERVER_COWBL
5026 +       if (error == -EMLINK) {
5027 +               struct dentry *dentry;
5028 +               dentry = cow_break_link(pathname);
5029 +               if (IS_ERR(dentry)) {
5030 +                       error = PTR_ERR(dentry);
5031 +                       goto exit_cow;
5032 +               }
5033 +               dput(dentry);
5034 +               if (will_write)
5035 +                       mnt_drop_write(nd.path.mnt);
5036 +               release_open_intent(&nd);
5037 +               path_put(&nd.path);
5038 +               flag = rflag;
5039 +               mode = rmode;
5040 +               goto restart;
5041 +       }
5042 +exit_cow:
5043 +#endif
5044         if (error) {
5045                 if (will_write)
5046                         mnt_drop_write(nd.path.mnt);
5047 @@ -1987,9 +2118,17 @@ int vfs_mknod(struct inode *dir, struct 
5048         if (error)
5049                 return error;
5050  
5051 -       if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
5052 +       if (!(S_ISCHR(mode) || S_ISBLK(mode)))
5053 +               goto okay;
5054 +
5055 +       if (!capable(CAP_MKNOD))
5056                 return -EPERM;
5057  
5058 +       if (S_ISCHR(mode) && !vs_chrdev_perm(dev, DATTR_CREATE))
5059 +               return -EPERM;
5060 +       if (S_ISBLK(mode) && !vs_blkdev_perm(dev, DATTR_CREATE))
5061 +               return -EPERM;
5062 +okay:
5063         if (!dir->i_op->mknod)
5064                 return -EPERM;
5065  
5066 @@ -2456,7 +2595,7 @@ int vfs_link(struct dentry *old_dentry, 
5067         /*
5068          * A link to an append-only or immutable file cannot be created.
5069          */
5070 -       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
5071 +       if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
5072                 return -EPERM;
5073         if (!dir->i_op->link)
5074                 return -EPERM;
5075 @@ -2829,6 +2968,219 @@ int vfs_follow_link(struct nameidata *nd
5076         return __vfs_follow_link(nd, link);
5077  }
5078  
5079 +
5080 +#ifdef CONFIG_VSERVER_COWBL
5081 +
5082 +#include <linux/file.h>
5083 +
5084 +static inline
5085 +long do_cow_splice(struct file *in, struct file *out, size_t len)
5086 +{
5087 +       loff_t ppos = 0;
5088 +
5089 +       return do_splice_direct(in, &ppos, out, len, 0);
5090 +}
5091 +
5092 +struct dentry *cow_break_link(const char *pathname)
5093 +{
5094 +       int ret, mode, pathlen, redo = 0;
5095 +       struct nameidata old_nd, dir_nd;
5096 +       struct path old_path, new_path;
5097 +       struct dentry *dir, *res = NULL;
5098 +       struct file *old_file;
5099 +       struct file *new_file;
5100 +       char *to, *path, pad='\251';
5101 +       loff_t size;
5102 +
5103 +       vxdprintk(VXD_CBIT(misc, 1), "cow_break_link(»%s«)", pathname);
5104 +       path = kmalloc(PATH_MAX, GFP_KERNEL);
5105 +       ret = -ENOMEM;
5106 +       if (!path)
5107 +               goto out;
5108 +
5109 +       /* old_nd will have refs to dentry and mnt */
5110 +       ret = path_lookup(pathname, LOOKUP_FOLLOW, &old_nd);
5111 +       vxdprintk(VXD_CBIT(misc, 2), "path_lookup(old): %d", ret);
5112 +       if (ret < 0)
5113 +               goto out_free_path;
5114 +
5115 +       old_path = old_nd.path;
5116 +       mode = old_path.dentry->d_inode->i_mode;
5117 +
5118 +       to = d_path(&old_path, path, PATH_MAX-2);
5119 +       pathlen = strlen(to);
5120 +       vxdprintk(VXD_CBIT(misc, 2), "old path Â»%s« [»%.*s«:%d]", to,
5121 +               old_path.dentry->d_name.len, old_path.dentry->d_name.name,
5122 +               old_path.dentry->d_name.len);
5123 +
5124 +       to[pathlen + 1] = 0;
5125 +retry:
5126 +       to[pathlen] = pad--;
5127 +       ret = -EMLINK;
5128 +       if (pad <= '\240')
5129 +               goto out_rel_old;
5130 +
5131 +       vxdprintk(VXD_CBIT(misc, 1), "temp copy Â»%s«", to);
5132 +       /* dir_nd will have refs to dentry and mnt */
5133 +       ret = path_lookup(to,
5134 +               LOOKUP_PARENT | LOOKUP_OPEN | LOOKUP_CREATE, &dir_nd);
5135 +       vxdprintk(VXD_CBIT(misc, 2),
5136 +               "path_lookup(new): %d", ret);
5137 +       if (ret < 0)
5138 +               goto retry;
5139 +
5140 +       /* this puppy downs the inode mutex */
5141 +       new_path.dentry = lookup_create(&dir_nd, 0);
5142 +       if (!new_path.dentry || IS_ERR(new_path.dentry)) {
5143 +               vxdprintk(VXD_CBIT(misc, 2),
5144 +                       "lookup_create(new): %p", new_path.dentry);
5145 +               mutex_unlock(&dir_nd.path.dentry->d_inode->i_mutex);
5146 +               path_put(&dir_nd.path);
5147 +               goto retry;
5148 +       }
5149 +       vxdprintk(VXD_CBIT(misc, 2),
5150 +               "lookup_create(new): %p [»%.*s«:%d]", new_path.dentry,
5151 +               new_path.dentry->d_name.len, new_path.dentry->d_name.name,
5152 +               new_path.dentry->d_name.len);
5153 +       dir = dir_nd.path.dentry;
5154 +
5155 +       ret = vfs_create(dir_nd.path.dentry->d_inode, new_path.dentry, mode, &dir_nd);
5156 +       vxdprintk(VXD_CBIT(misc, 2),
5157 +               "vfs_create(new): %d", ret);
5158 +       if (ret == -EEXIST) {
5159 +               mutex_unlock(&dir->d_inode->i_mutex);
5160 +               dput(new_path.dentry);
5161 +               path_put(&dir_nd.path);
5162 +               goto retry;
5163 +       }
5164 +       else if (ret < 0)
5165 +               goto out_unlock_new;
5166 +
5167 +       /* drop out early, ret passes ENOENT */
5168 +       ret = -ENOENT;
5169 +       if ((redo = d_unhashed(old_path.dentry)))
5170 +               goto out_unlock_new;
5171 +
5172 +       new_path.mnt = dir_nd.path.mnt;
5173 +       dget(old_path.dentry);
5174 +       mntget(old_path.mnt);
5175 +       /* this one cleans up the dentry/mnt in case of failure */
5176 +       old_file = dentry_open(old_path.dentry, old_path.mnt,
5177 +               O_RDONLY, current_cred());
5178 +       vxdprintk(VXD_CBIT(misc, 2),
5179 +               "dentry_open(old): %p", old_file);
5180 +       if (!old_file || IS_ERR(old_file)) {
5181 +               res = IS_ERR(old_file) ? (void *) old_file : res;
5182 +               goto out_unlock_new;
5183 +       }
5184 +
5185 +       dget(new_path.dentry);
5186 +       mntget(new_path.mnt);
5187 +       /* this one cleans up the dentry/mnt in case of failure */
5188 +       new_file = dentry_open(new_path.dentry, new_path.mnt,
5189 +               O_WRONLY, current_cred());
5190 +       vxdprintk(VXD_CBIT(misc, 2),
5191 +               "dentry_open(new): %p", new_file);
5192 +
5193 +       ret = IS_ERR(new_file) ? PTR_ERR(new_file) : -ENOENT;
5194 +       if (!new_file || IS_ERR(new_file))
5195 +               goto out_fput_old;
5196 +
5197 +       size = i_size_read(old_file->f_dentry->d_inode);
5198 +       ret = do_cow_splice(old_file, new_file, size);
5199 +       vxdprintk(VXD_CBIT(misc, 2), "do_splice_direct: %d", ret);
5200 +       if (ret < 0) {
5201 +               goto out_fput_both;
5202 +       } else if (ret < size) {
5203 +               ret = -ENOSPC;
5204 +               goto out_fput_both;
5205 +       } else {
5206 +               struct inode *old_inode = old_path.dentry->d_inode;
5207 +               struct inode *new_inode = new_path.dentry->d_inode;
5208 +               struct iattr attr = {
5209 +                       .ia_uid = old_inode->i_uid,
5210 +                       .ia_gid = old_inode->i_gid,
5211 +                       .ia_valid = ATTR_UID | ATTR_GID
5212 +                       };
5213 +
5214 +               ret = inode_setattr(new_inode, &attr);
5215 +               if (ret)
5216 +                       goto out_fput_both;
5217 +       }
5218 +
5219 +       mutex_lock(&old_path.dentry->d_inode->i_sb->s_vfs_rename_mutex);
5220 +
5221 +       /* drop out late */
5222 +       ret = -ENOENT;
5223 +       if ((redo = d_unhashed(old_path.dentry)))
5224 +               goto out_unlock;
5225 +
5226 +       vxdprintk(VXD_CBIT(misc, 2),
5227 +               "vfs_rename: [»%*s«:%d] -> [»%*s«:%d]",
5228 +               new_path.dentry->d_name.len, new_path.dentry->d_name.name,
5229 +               new_path.dentry->d_name.len,
5230 +               old_path.dentry->d_name.len, old_path.dentry->d_name.name,
5231 +               old_path.dentry->d_name.len);
5232 +       ret = vfs_rename(dir_nd.path.dentry->d_inode, new_path.dentry,
5233 +               old_nd.path.dentry->d_parent->d_inode, old_path.dentry);
5234 +       vxdprintk(VXD_CBIT(misc, 2), "vfs_rename: %d", ret);
5235 +       res = new_path.dentry;
5236 +
5237 +out_unlock:
5238 +       mutex_unlock(&old_path.dentry->d_inode->i_sb->s_vfs_rename_mutex);
5239 +
5240 +out_fput_both:
5241 +       vxdprintk(VXD_CBIT(misc, 3),
5242 +               "fput(new_file=%p[#%ld])", new_file,
5243 +               atomic_long_read(&new_file->f_count));
5244 +       fput(new_file);
5245 +
5246 +out_fput_old:
5247 +       vxdprintk(VXD_CBIT(misc, 3),
5248 +               "fput(old_file=%p[#%ld])", old_file,
5249 +               atomic_long_read(&old_file->f_count));
5250 +       fput(old_file);
5251 +
5252 +out_unlock_new:
5253 +       mutex_unlock(&dir->d_inode->i_mutex);
5254 +       if (!ret)
5255 +               goto out_redo;
5256 +
5257 +       /* error path cleanup */
5258 +       vfs_unlink(dir->d_inode, new_path.dentry);
5259 +       dput(new_path.dentry);
5260 +
5261 +out_redo:
5262 +       if (!redo)
5263 +               goto out_rel_both;
5264 +       /* lookup dentry once again */
5265 +       path_put(&old_nd.path);
5266 +       ret = path_lookup(pathname, LOOKUP_FOLLOW, &old_nd);
5267 +       if (ret)
5268 +               goto out_rel_both;
5269 +
5270 +       new_path.dentry = old_nd.path.dentry;
5271 +       vxdprintk(VXD_CBIT(misc, 2),
5272 +               "path_lookup(redo): %p [»%.*s«:%d]", new_path.dentry,
5273 +               new_path.dentry->d_name.len, new_path.dentry->d_name.name,
5274 +               new_path.dentry->d_name.len);
5275 +       dget(new_path.dentry);
5276 +       res = new_path.dentry;
5277 +
5278 +out_rel_both:
5279 +       path_put(&dir_nd.path);
5280 +out_rel_old:
5281 +       path_put(&old_nd.path);
5282 +out_free_path:
5283 +       kfree(path);
5284 +out:
5285 +       if (ret)
5286 +               res = ERR_PTR(ret);
5287 +       return res;
5288 +}
5289 +
5290 +#endif
5291 +
5292  /* get the link contents into pagecache */
5293  static char *page_getlink(struct dentry * dentry, struct page **ppage)
5294  {
5295 diff -NurpP --minimal linux-2.6.31.6/fs/namespace.c linux-2.6.31.6-vs2.3.0.36.24/fs/namespace.c
5296 --- linux-2.6.31.6/fs/namespace.c       2009-09-10 15:26:22.000000000 +0200
5297 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/namespace.c 2009-09-10 16:11:43.000000000 +0200
5298 @@ -29,6 +29,11 @@
5299  #include <linux/log2.h>
5300  #include <linux/idr.h>
5301  #include <linux/fs_struct.h>
5302 +#include <linux/vs_base.h>
5303 +#include <linux/vs_context.h>
5304 +#include <linux/vs_tag.h>
5305 +#include <linux/vserver/space.h>
5306 +#include <linux/vserver/global.h>
5307  #include <asm/uaccess.h>
5308  #include <asm/unistd.h>
5309  #include "pnode.h"
5310 @@ -567,6 +572,7 @@ static struct vfsmount *clone_mnt(struct
5311                 mnt->mnt_root = dget(root);
5312                 mnt->mnt_mountpoint = mnt->mnt_root;
5313                 mnt->mnt_parent = mnt;
5314 +               mnt->mnt_tag = old->mnt_tag;
5315  
5316                 if (flag & CL_SLAVE) {
5317                         list_add(&mnt->mnt_slave, &old->mnt_slave_list);
5318 @@ -661,6 +667,31 @@ static inline void mangle(struct seq_fil
5319         seq_escape(m, s, " \t\n\\");
5320  }
5321  
5322 +static int mnt_is_reachable(struct vfsmount *mnt)
5323 +{
5324 +       struct path root;
5325 +       struct dentry *point;
5326 +       int ret;
5327 +
5328 +       if (mnt == mnt->mnt_ns->root)
5329 +               return 1;
5330 +
5331 +       spin_lock(&vfsmount_lock);
5332 +       root = current->fs->root;
5333 +       point = root.dentry;
5334 +
5335 +       while ((mnt != mnt->mnt_parent) && (mnt != root.mnt)) {
5336 +               point = mnt->mnt_mountpoint;
5337 +               mnt = mnt->mnt_parent;
5338 +       }
5339 +
5340 +       ret = (mnt == root.mnt) && is_subdir(point, root.dentry);
5341 +
5342 +       spin_unlock(&vfsmount_lock);
5343 +
5344 +       return ret;
5345 +}
5346 +
5347  /*
5348   * Simple .show_options callback for filesystems which don't want to
5349   * implement more complex mount option showing.
5350 @@ -748,6 +779,8 @@ static int show_sb_opts(struct seq_file 
5351                 { MS_SYNCHRONOUS, ",sync" },
5352                 { MS_DIRSYNC, ",dirsync" },
5353                 { MS_MANDLOCK, ",mand" },
5354 +               { MS_TAGGED, ",tag" },
5355 +               { MS_NOTAGCHECK, ",notagcheck" },
5356                 { 0, NULL }
5357         };
5358         const struct proc_fs_info *fs_infop;
5359 @@ -795,10 +828,20 @@ static int show_vfsmnt(struct seq_file *
5360         int err = 0;
5361         struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
5362  
5363 -       mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
5364 -       seq_putc(m, ' ');
5365 -       seq_path(m, &mnt_path, " \t\n\\");
5366 -       seq_putc(m, ' ');
5367 +       if (vx_flags(VXF_HIDE_MOUNT, 0))
5368 +               return SEQ_SKIP;
5369 +       if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
5370 +               return SEQ_SKIP;
5371 +
5372 +       if (!vx_check(0, VS_ADMIN|VS_WATCH) &&
5373 +               mnt == current->fs->root.mnt) {
5374 +               seq_puts(m, "/dev/root / ");
5375 +       } else {
5376 +               mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
5377 +               seq_putc(m, ' ');
5378 +               seq_path(m, &mnt_path, " \t\n\\");
5379 +               seq_putc(m, ' ');
5380 +       }
5381         show_type(m, mnt->mnt_sb);
5382         seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
5383         err = show_sb_opts(m, mnt->mnt_sb);
5384 @@ -828,6 +871,11 @@ static int show_mountinfo(struct seq_fil
5385         struct path root = p->root;
5386         int err = 0;
5387  
5388 +       if (vx_flags(VXF_HIDE_MOUNT, 0))
5389 +               return SEQ_SKIP;
5390 +       if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
5391 +               return SEQ_SKIP;
5392 +
5393         seq_printf(m, "%i %i %u:%u ", mnt->mnt_id, mnt->mnt_parent->mnt_id,
5394                    MAJOR(sb->s_dev), MINOR(sb->s_dev));
5395         seq_dentry(m, mnt->mnt_root, " \t\n\\");
5396 @@ -886,17 +934,27 @@ static int show_vfsstat(struct seq_file 
5397         struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
5398         int err = 0;
5399  
5400 -       /* device */
5401 -       if (mnt->mnt_devname) {
5402 -               seq_puts(m, "device ");
5403 -               mangle(m, mnt->mnt_devname);
5404 -       } else
5405 -               seq_puts(m, "no device");
5406 +       if (vx_flags(VXF_HIDE_MOUNT, 0))
5407 +               return SEQ_SKIP;
5408 +       if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
5409 +               return SEQ_SKIP;
5410  
5411 -       /* mount point */
5412 -       seq_puts(m, " mounted on ");
5413 -       seq_path(m, &mnt_path, " \t\n\\");
5414 -       seq_putc(m, ' ');
5415 +       if (!vx_check(0, VS_ADMIN|VS_WATCH) &&
5416 +               mnt == current->fs->root.mnt) {
5417 +               seq_puts(m, "device /dev/root mounted on / ");
5418 +       } else {
5419 +               /* device */
5420 +               if (mnt->mnt_devname) {
5421 +                       seq_puts(m, "device ");
5422 +                       mangle(m, mnt->mnt_devname);
5423 +               } else
5424 +                       seq_puts(m, "no device");
5425 +
5426 +               /* mount point */
5427 +               seq_puts(m, " mounted on ");
5428 +               seq_path(m, &mnt_path, " \t\n\\");
5429 +               seq_putc(m, ' ');
5430 +       }
5431  
5432         /* file system type */
5433         seq_puts(m, "with fstype ");
5434 @@ -1130,7 +1188,7 @@ SYSCALL_DEFINE2(umount, char __user *, n
5435                 goto dput_and_out;
5436  
5437         retval = -EPERM;
5438 -       if (!capable(CAP_SYS_ADMIN))
5439 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
5440                 goto dput_and_out;
5441  
5442         retval = do_umount(path.mnt, flags);
5443 @@ -1156,7 +1214,7 @@ SYSCALL_DEFINE1(oldumount, char __user *
5444  
5445  static int mount_is_safe(struct path *path)
5446  {
5447 -       if (capable(CAP_SYS_ADMIN))
5448 +       if (vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
5449                 return 0;
5450         return -EPERM;
5451  #ifdef notyet
5452 @@ -1420,7 +1478,7 @@ static int do_change_type(struct path *p
5453         int type = flag & ~MS_REC;
5454         int err = 0;
5455  
5456 -       if (!capable(CAP_SYS_ADMIN))
5457 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_NAMESPACE))
5458                 return -EPERM;
5459  
5460         if (path->dentry != path->mnt->mnt_root)
5461 @@ -1447,11 +1505,13 @@ static int do_change_type(struct path *p
5462   * do loopback mount.
5463   */
5464  static int do_loopback(struct path *path, char *old_name,
5465 -                               int recurse)
5466 +       tag_t tag, unsigned long flags, int mnt_flags)
5467  {
5468         struct path old_path;
5469         struct vfsmount *mnt = NULL;
5470         int err = mount_is_safe(path);
5471 +       int recurse = flags & MS_REC;
5472 +
5473         if (err)
5474                 return err;
5475         if (!old_name || !*old_name)
5476 @@ -1485,6 +1545,7 @@ static int do_loopback(struct path *path
5477                 spin_unlock(&vfsmount_lock);
5478                 release_mounts(&umount_list);
5479         }
5480 +       mnt->mnt_flags = mnt_flags;
5481  
5482  out:
5483         up_write(&namespace_sem);
5484 @@ -1515,12 +1576,12 @@ static int change_mount_flags(struct vfs
5485   * on it - tough luck.
5486   */
5487  static int do_remount(struct path *path, int flags, int mnt_flags,
5488 -                     void *data)
5489 +       void *data, xid_t xid)
5490  {
5491         int err;
5492         struct super_block *sb = path->mnt->mnt_sb;
5493  
5494 -       if (!capable(CAP_SYS_ADMIN))
5495 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_REMOUNT))
5496                 return -EPERM;
5497  
5498         if (!check_mnt(path->mnt))
5499 @@ -1562,7 +1623,7 @@ static int do_move_mount(struct path *pa
5500         struct path old_path, parent_path;
5501         struct vfsmount *p;
5502         int err = 0;
5503 -       if (!capable(CAP_SYS_ADMIN))
5504 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
5505                 return -EPERM;
5506         if (!old_name || !*old_name)
5507                 return -EINVAL;
5508 @@ -1644,7 +1705,7 @@ static int do_new_mount(struct path *pat
5509                 return -EINVAL;
5510  
5511         /* we need capabilities... */
5512 -       if (!capable(CAP_SYS_ADMIN))
5513 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
5514                 return -EPERM;
5515  
5516         lock_kernel();
5517 @@ -1891,6 +1952,7 @@ long do_mount(char *dev_name, char *dir_
5518         struct path path;
5519         int retval = 0;
5520         int mnt_flags = 0;
5521 +       tag_t tag = 0;
5522  
5523         /* Discard magic */
5524         if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
5525 @@ -1910,6 +1972,12 @@ long do_mount(char *dev_name, char *dir_
5526         if (!(flags & MS_NOATIME))
5527                 mnt_flags |= MNT_RELATIME;
5528  
5529 +       if (dx_parse_tag(data_page, &tag, 1, &mnt_flags, &flags)) {
5530 +               /* FIXME: bind and re-mounts get the tag flag? */
5531 +               if (flags & (MS_BIND|MS_REMOUNT))
5532 +                       flags |= MS_TAGID;
5533 +       }
5534 +
5535         /* Separate the per-mountpoint flags */
5536         if (flags & MS_NOSUID)
5537                 mnt_flags |= MNT_NOSUID;
5538 @@ -1926,6 +1994,8 @@ long do_mount(char *dev_name, char *dir_
5539         if (flags & MS_RDONLY)
5540                 mnt_flags |= MNT_READONLY;
5541  
5542 +       if (!capable(CAP_SYS_ADMIN))
5543 +               mnt_flags |= MNT_NODEV;
5544         flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
5545                    MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
5546                    MS_STRICTATIME);
5547 @@ -1942,9 +2012,9 @@ long do_mount(char *dev_name, char *dir_
5548  
5549         if (flags & MS_REMOUNT)
5550                 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
5551 -                                   data_page);
5552 +                                   data_page, tag);
5553         else if (flags & MS_BIND)
5554 -               retval = do_loopback(&path, dev_name, flags & MS_REC);
5555 +               retval = do_loopback(&path, dev_name, tag, flags, mnt_flags);
5556         else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
5557                 retval = do_change_type(&path, flags);
5558         else if (flags & MS_MOVE)
5559 @@ -2023,6 +2093,7 @@ static struct mnt_namespace *dup_mnt_ns(
5560                 q = next_mnt(q, new_ns->root);
5561         }
5562         up_write(&namespace_sem);
5563 +       atomic_inc(&vs_global_mnt_ns);
5564  
5565         if (rootmnt)
5566                 mntput(rootmnt);
5567 @@ -2165,9 +2236,10 @@ SYSCALL_DEFINE2(pivot_root, const char _
5568         down_write(&namespace_sem);
5569         mutex_lock(&old.dentry->d_inode->i_mutex);
5570         error = -EINVAL;
5571 -       if (IS_MNT_SHARED(old.mnt) ||
5572 +       if ((IS_MNT_SHARED(old.mnt) ||
5573                 IS_MNT_SHARED(new.mnt->mnt_parent) ||
5574 -               IS_MNT_SHARED(root.mnt->mnt_parent))
5575 +               IS_MNT_SHARED(root.mnt->mnt_parent)) &&
5576 +               !vx_flags(VXF_STATE_SETUP, 0))
5577                 goto out2;
5578         if (!check_mnt(root.mnt))
5579                 goto out2;
5580 @@ -2303,6 +2375,7 @@ void put_mnt_ns(struct mnt_namespace *ns
5581         spin_unlock(&vfsmount_lock);
5582         up_write(&namespace_sem);
5583         release_mounts(&umount_list);
5584 +       atomic_dec(&vs_global_mnt_ns);
5585         kfree(ns);
5586  }
5587  EXPORT_SYMBOL(put_mnt_ns);
5588 diff -NurpP --minimal linux-2.6.31.6/fs/nfs/client.c linux-2.6.31.6-vs2.3.0.36.24/fs/nfs/client.c
5589 --- linux-2.6.31.6/fs/nfs/client.c      2009-11-12 12:10:11.000000000 +0100
5590 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/nfs/client.c        2009-11-12 12:26:38.000000000 +0100
5591 @@ -732,6 +732,9 @@ static int nfs_init_server_rpcclient(str
5592         if (server->flags & NFS_MOUNT_SOFT)
5593                 server->client->cl_softrtry = 1;
5594  
5595 +       server->client->cl_tag = 0;
5596 +       if (server->flags & NFS_MOUNT_TAGGED)
5597 +               server->client->cl_tag = 1;
5598         return 0;
5599  }
5600  
5601 @@ -899,6 +902,10 @@ static void nfs_server_set_fsinfo(struct
5602                 server->acdirmin = server->acdirmax = 0;
5603         }
5604  
5605 +       /* FIXME: needs fsinfo
5606 +       if (server->flags & NFS_MOUNT_TAGGED)
5607 +               sb->s_flags |= MS_TAGGED;       */
5608 +
5609         server->maxfilesize = fsinfo->maxfilesize;
5610  
5611         /* We're airborne Set socket buffersize */
5612 diff -NurpP --minimal linux-2.6.31.6/fs/nfs/dir.c linux-2.6.31.6-vs2.3.0.36.24/fs/nfs/dir.c
5613 --- linux-2.6.31.6/fs/nfs/dir.c 2009-11-12 12:10:11.000000000 +0100
5614 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/nfs/dir.c   2009-11-12 12:26:38.000000000 +0100
5615 @@ -33,6 +33,7 @@
5616  #include <linux/namei.h>
5617  #include <linux/mount.h>
5618  #include <linux/sched.h>
5619 +#include <linux/vs_tag.h>
5620  
5621  #include "nfs4_fs.h"
5622  #include "delegation.h"
5623 @@ -949,6 +950,7 @@ static struct dentry *nfs_lookup(struct 
5624         if (IS_ERR(res))
5625                 goto out_unblock_sillyrename;
5626  
5627 +       dx_propagate_tag(nd, inode);
5628  no_entry:
5629         res = d_materialise_unique(dentry, inode);
5630         if (res != NULL) {
5631 diff -NurpP --minimal linux-2.6.31.6/fs/nfs/inode.c linux-2.6.31.6-vs2.3.0.36.24/fs/nfs/inode.c
5632 --- linux-2.6.31.6/fs/nfs/inode.c       2009-09-10 15:26:23.000000000 +0200
5633 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/nfs/inode.c 2009-09-10 16:11:43.000000000 +0200
5634 @@ -36,6 +36,7 @@
5635  #include <linux/vfs.h>
5636  #include <linux/inet.h>
5637  #include <linux/nfs_xdr.h>
5638 +#include <linux/vs_tag.h>
5639  
5640  #include <asm/system.h>
5641  #include <asm/uaccess.h>
5642 @@ -278,6 +279,8 @@ nfs_fhget(struct super_block *sb, struct
5643         if (inode->i_state & I_NEW) {
5644                 struct nfs_inode *nfsi = NFS_I(inode);
5645                 unsigned long now = jiffies;
5646 +               uid_t uid;
5647 +               gid_t gid;
5648  
5649                 /* We set i_ino for the few things that still rely on it,
5650                  * such as stat(2) */
5651 @@ -321,8 +324,8 @@ nfs_fhget(struct super_block *sb, struct
5652                 nfsi->change_attr = 0;
5653                 inode->i_size = 0;
5654                 inode->i_nlink = 0;
5655 -               inode->i_uid = -2;
5656 -               inode->i_gid = -2;
5657 +               uid = -2;
5658 +               gid = -2;
5659                 inode->i_blocks = 0;
5660                 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
5661  
5662 @@ -341,9 +344,9 @@ nfs_fhget(struct super_block *sb, struct
5663                 if (fattr->valid & NFS_ATTR_FATTR_NLINK)
5664                         inode->i_nlink = fattr->nlink;
5665                 if (fattr->valid & NFS_ATTR_FATTR_OWNER)
5666 -                       inode->i_uid = fattr->uid;
5667 +                       uid = fattr->uid;
5668                 if (fattr->valid & NFS_ATTR_FATTR_GROUP)
5669 -                       inode->i_gid = fattr->gid;
5670 +                       gid = fattr->gid;
5671                 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
5672                         inode->i_blocks = fattr->du.nfs2.blocks;
5673                 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
5674 @@ -352,6 +355,11 @@ nfs_fhget(struct super_block *sb, struct
5675                          */
5676                         inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
5677                 }
5678 +               inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
5679 +               inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
5680 +               inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, 0);
5681 +                               /* maybe fattr->xid someday */
5682 +
5683                 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
5684                 nfsi->attrtimeo_timestamp = now;
5685                 nfsi->access_cache = RB_ROOT;
5686 @@ -492,6 +500,8 @@ void nfs_setattr_update_inode(struct ino
5687                         inode->i_uid = attr->ia_uid;
5688                 if ((attr->ia_valid & ATTR_GID) != 0)
5689                         inode->i_gid = attr->ia_gid;
5690 +               if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
5691 +                       inode->i_tag = attr->ia_tag;
5692                 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
5693                 spin_unlock(&inode->i_lock);
5694         }
5695 @@ -902,6 +912,9 @@ static int nfs_check_inode_attributes(st
5696         struct nfs_inode *nfsi = NFS_I(inode);
5697         loff_t cur_size, new_isize;
5698         unsigned long invalid = 0;
5699 +       uid_t uid;
5700 +       gid_t gid;
5701 +       tag_t tag;
5702  
5703  
5704         /* Has the inode gone and changed behind our back? */
5705 @@ -925,13 +938,18 @@ static int nfs_check_inode_attributes(st
5706                         invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
5707         }
5708  
5709 +       uid = INOTAG_UID(DX_TAG(inode), fattr->uid, fattr->gid);
5710 +       gid = INOTAG_GID(DX_TAG(inode), fattr->uid, fattr->gid);
5711 +       tag = INOTAG_TAG(DX_TAG(inode), fattr->uid, fattr->gid, 0);
5712 +
5713         /* Have any file permissions changed? */
5714         if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
5715                 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
5716 -       if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && inode->i_uid != fattr->uid)
5717 +       if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && uid != fattr->uid)
5718                 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
5719 -       if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && inode->i_gid != fattr->gid)
5720 +       if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && gid != fattr->gid)
5721                 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
5722 +               /* maybe check for tag too? */
5723  
5724         /* Has the link count changed? */
5725         if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
5726 @@ -1145,6 +1163,9 @@ static int nfs_update_inode(struct inode
5727         loff_t cur_isize, new_isize;
5728         unsigned long invalid = 0;
5729         unsigned long now = jiffies;
5730 +       uid_t uid;
5731 +       gid_t gid;
5732 +       tag_t tag;
5733  
5734         dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
5735                         __func__, inode->i_sb->s_id, inode->i_ino,
5736 @@ -1233,6 +1254,9 @@ static int nfs_update_inode(struct inode
5737                 }
5738         }
5739  
5740 +       uid = INOTAG_UID(DX_TAG(inode), fattr->uid, fattr->gid);
5741 +       gid = INOTAG_GID(DX_TAG(inode), fattr->uid, fattr->gid);
5742 +       tag = INOTAG_TAG(DX_TAG(inode), fattr->uid, fattr->gid, 0);
5743  
5744         if (fattr->valid & NFS_ATTR_FATTR_ATIME)
5745                 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
5746 @@ -1244,18 +1268,22 @@ static int nfs_update_inode(struct inode
5747                 }
5748         }
5749         if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
5750 -               if (inode->i_uid != fattr->uid) {
5751 +               if (uid != fattr->uid) {
5752                         invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
5753 -                       inode->i_uid = fattr->uid;
5754 +                       uid = fattr->uid;
5755                 }
5756         }
5757         if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
5758 -               if (inode->i_gid != fattr->gid) {
5759 +               if (gid != fattr->gid) {
5760                         invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
5761 -                       inode->i_gid = fattr->gid;
5762 +                       gid = fattr->gid;
5763                 }
5764         }
5765  
5766 +       inode->i_uid = uid;
5767 +       inode->i_gid = gid;
5768 +       inode->i_tag = tag;
5769 +
5770         if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
5771                 if (inode->i_nlink != fattr->nlink) {
5772                         invalid |= NFS_INO_INVALID_ATTR;
5773 diff -NurpP --minimal linux-2.6.31.6/fs/nfs/nfs3xdr.c linux-2.6.31.6-vs2.3.0.36.24/fs/nfs/nfs3xdr.c
5774 --- linux-2.6.31.6/fs/nfs/nfs3xdr.c     2009-06-11 17:13:06.000000000 +0200
5775 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/nfs/nfs3xdr.c       2009-09-10 16:11:43.000000000 +0200
5776 @@ -22,6 +22,7 @@
5777  #include <linux/nfs3.h>
5778  #include <linux/nfs_fs.h>
5779  #include <linux/nfsacl.h>
5780 +#include <linux/vs_tag.h>
5781  #include "internal.h"
5782  
5783  #define NFSDBG_FACILITY                NFSDBG_XDR
5784 @@ -177,7 +178,7 @@ xdr_decode_fattr(__be32 *p, struct nfs_f
5785  }
5786  
5787  static inline __be32 *
5788 -xdr_encode_sattr(__be32 *p, struct iattr *attr)
5789 +xdr_encode_sattr(__be32 *p, struct iattr *attr, int tag)
5790  {
5791         if (attr->ia_valid & ATTR_MODE) {
5792                 *p++ = xdr_one;
5793 @@ -185,15 +186,17 @@ xdr_encode_sattr(__be32 *p, struct iattr
5794         } else {
5795                 *p++ = xdr_zero;
5796         }
5797 -       if (attr->ia_valid & ATTR_UID) {
5798 +       if (attr->ia_valid & ATTR_UID ||
5799 +               (tag && (attr->ia_valid & ATTR_TAG))) {
5800                 *p++ = xdr_one;
5801 -               *p++ = htonl(attr->ia_uid);
5802 +               *p++ = htonl(TAGINO_UID(tag, attr->ia_uid, attr->ia_tag));
5803         } else {
5804                 *p++ = xdr_zero;
5805         }
5806 -       if (attr->ia_valid & ATTR_GID) {
5807 +       if (attr->ia_valid & ATTR_GID ||
5808 +               (tag && (attr->ia_valid & ATTR_TAG))) {
5809                 *p++ = xdr_one;
5810 -               *p++ = htonl(attr->ia_gid);
5811 +               *p++ = htonl(TAGINO_GID(tag, attr->ia_gid, attr->ia_tag));
5812         } else {
5813                 *p++ = xdr_zero;
5814         }
5815 @@ -280,7 +283,8 @@ static int
5816  nfs3_xdr_sattrargs(struct rpc_rqst *req, __be32 *p, struct nfs3_sattrargs *args)
5817  {
5818         p = xdr_encode_fhandle(p, args->fh);
5819 -       p = xdr_encode_sattr(p, args->sattr);
5820 +       p = xdr_encode_sattr(p, args->sattr,
5821 +               req->rq_task->tk_client->cl_tag);
5822         *p++ = htonl(args->guard);
5823         if (args->guard)
5824                 p = xdr_encode_time3(p, &args->guardtime);
5825 @@ -385,7 +389,8 @@ nfs3_xdr_createargs(struct rpc_rqst *req
5826                 *p++ = args->verifier[0];
5827                 *p++ = args->verifier[1];
5828         } else
5829 -               p = xdr_encode_sattr(p, args->sattr);
5830 +               p = xdr_encode_sattr(p, args->sattr,
5831 +                       req->rq_task->tk_client->cl_tag);
5832  
5833         req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
5834         return 0;
5835 @@ -399,7 +404,8 @@ nfs3_xdr_mkdirargs(struct rpc_rqst *req,
5836  {
5837         p = xdr_encode_fhandle(p, args->fh);
5838         p = xdr_encode_array(p, args->name, args->len);
5839 -       p = xdr_encode_sattr(p, args->sattr);
5840 +       p = xdr_encode_sattr(p, args->sattr,
5841 +               req->rq_task->tk_client->cl_tag);
5842         req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
5843         return 0;
5844  }
5845 @@ -412,7 +418,8 @@ nfs3_xdr_symlinkargs(struct rpc_rqst *re
5846  {
5847         p = xdr_encode_fhandle(p, args->fromfh);
5848         p = xdr_encode_array(p, args->fromname, args->fromlen);
5849 -       p = xdr_encode_sattr(p, args->sattr);
5850 +       p = xdr_encode_sattr(p, args->sattr,
5851 +               req->rq_task->tk_client->cl_tag);
5852         *p++ = htonl(args->pathlen);
5853         req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
5854  
5855 @@ -430,7 +437,8 @@ nfs3_xdr_mknodargs(struct rpc_rqst *req,
5856         p = xdr_encode_fhandle(p, args->fh);
5857         p = xdr_encode_array(p, args->name, args->len);
5858         *p++ = htonl(args->type);
5859 -       p = xdr_encode_sattr(p, args->sattr);
5860 +       p = xdr_encode_sattr(p, args->sattr,
5861 +               req->rq_task->tk_client->cl_tag);
5862         if (args->type == NF3CHR || args->type == NF3BLK) {
5863                 *p++ = htonl(MAJOR(args->rdev));
5864                 *p++ = htonl(MINOR(args->rdev));
5865 diff -NurpP --minimal linux-2.6.31.6/fs/nfs/nfsroot.c linux-2.6.31.6-vs2.3.0.36.24/fs/nfs/nfsroot.c
5866 --- linux-2.6.31.6/fs/nfs/nfsroot.c     2009-09-10 15:26:23.000000000 +0200
5867 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/nfs/nfsroot.c       2009-09-10 16:11:43.000000000 +0200
5868 @@ -122,12 +122,12 @@ static int mount_port __initdata = 0;             /
5869  enum {
5870         /* Options that take integer arguments */
5871         Opt_port, Opt_rsize, Opt_wsize, Opt_timeo, Opt_retrans, Opt_acregmin,
5872 -       Opt_acregmax, Opt_acdirmin, Opt_acdirmax,
5873 +       Opt_acregmax, Opt_acdirmin, Opt_acdirmax, Opt_tagid,
5874         /* Options that take no arguments */
5875         Opt_soft, Opt_hard, Opt_intr,
5876         Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, 
5877         Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp,
5878 -       Opt_acl, Opt_noacl,
5879 +       Opt_acl, Opt_noacl, Opt_tag, Opt_notag,
5880         /* Error token */
5881         Opt_err
5882  };
5883 @@ -164,6 +164,9 @@ static const match_table_t tokens __init
5884         {Opt_tcp, "tcp"},
5885         {Opt_acl, "acl"},
5886         {Opt_noacl, "noacl"},
5887 +       {Opt_tag, "tag"},
5888 +       {Opt_notag, "notag"},
5889 +       {Opt_tagid, "tagid=%u"},
5890         {Opt_err, NULL}
5891         
5892  };
5893 @@ -275,6 +278,20 @@ static int __init root_nfs_parse(char *n
5894                         case Opt_noacl:
5895                                 nfs_data.flags |= NFS_MOUNT_NOACL;
5896                                 break;
5897 +#ifndef CONFIG_TAGGING_NONE
5898 +                       case Opt_tag:
5899 +                               nfs_data.flags |= NFS_MOUNT_TAGGED;
5900 +                               break;
5901 +                       case Opt_notag:
5902 +                               nfs_data.flags &= ~NFS_MOUNT_TAGGED;
5903 +                               break;
5904 +#endif
5905 +#ifdef CONFIG_PROPAGATE
5906 +                       case Opt_tagid:
5907 +                               /* use args[0] */
5908 +                               nfs_data.flags |= NFS_MOUNT_TAGGED;
5909 +                               break;
5910 +#endif
5911                         default:
5912                                 printk(KERN_WARNING "Root-NFS: unknown "
5913                                         "option: %s\n", p);
5914 diff -NurpP --minimal linux-2.6.31.6/fs/nfs/super.c linux-2.6.31.6-vs2.3.0.36.24/fs/nfs/super.c
5915 --- linux-2.6.31.6/fs/nfs/super.c       2009-11-12 12:10:11.000000000 +0100
5916 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/nfs/super.c 2009-11-12 12:26:38.000000000 +0100
5917 @@ -53,6 +53,7 @@
5918  #include <linux/nfs_xdr.h>
5919  #include <linux/magic.h>
5920  #include <linux/parser.h>
5921 +#include <linux/vs_tag.h>
5922  
5923  #include <asm/system.h>
5924  #include <asm/uaccess.h>
5925 @@ -546,6 +547,7 @@ static void nfs_show_mount_options(struc
5926                 { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
5927                 { NFS_MOUNT_UNSHARED, ",nosharecache", "" },
5928                 { NFS_MOUNT_NORESVPORT, ",noresvport", "" },
5929 +               { NFS_MOUNT_TAGGED, ",tag", "" },
5930                 { 0, NULL, NULL }
5931         };
5932         const struct proc_nfs_info *nfs_infop;
5933 diff -NurpP --minimal linux-2.6.31.6/fs/nfsd/auth.c linux-2.6.31.6-vs2.3.0.36.24/fs/nfsd/auth.c
5934 --- linux-2.6.31.6/fs/nfsd/auth.c       2009-03-24 14:22:26.000000000 +0100
5935 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/nfsd/auth.c 2009-09-10 16:11:43.000000000 +0200
5936 @@ -10,6 +10,7 @@
5937  #include <linux/sunrpc/svcauth.h>
5938  #include <linux/nfsd/nfsd.h>
5939  #include <linux/nfsd/export.h>
5940 +#include <linux/vs_tag.h>
5941  #include "auth.h"
5942  
5943  int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
5944 @@ -42,6 +43,9 @@ int nfsd_setuser(struct svc_rqst *rqstp,
5945  
5946         new->fsuid = rqstp->rq_cred.cr_uid;
5947         new->fsgid = rqstp->rq_cred.cr_gid;
5948 +       /* FIXME: this desperately needs a tag :)
5949 +       new->xid = (xid_t)INOTAG_TAG(DX_TAG_NFSD, cred.cr_uid, cred.cr_gid, 0);
5950 +                       */
5951  
5952         rqgi = rqstp->rq_cred.cr_group_info;
5953  
5954 diff -NurpP --minimal linux-2.6.31.6/fs/nfsd/nfs3xdr.c linux-2.6.31.6-vs2.3.0.36.24/fs/nfsd/nfs3xdr.c
5955 --- linux-2.6.31.6/fs/nfsd/nfs3xdr.c    2009-09-10 15:26:23.000000000 +0200
5956 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/nfsd/nfs3xdr.c      2009-09-10 16:11:43.000000000 +0200
5957 @@ -21,6 +21,7 @@
5958  #include <linux/sunrpc/svc.h>
5959  #include <linux/nfsd/nfsd.h>
5960  #include <linux/nfsd/xdr3.h>
5961 +#include <linux/vs_tag.h>
5962  #include "auth.h"
5963  
5964  #define NFSDDBG_FACILITY               NFSDDBG_XDR
5965 @@ -108,6 +109,8 @@ static __be32 *
5966  decode_sattr3(__be32 *p, struct iattr *iap)
5967  {
5968         u32     tmp;
5969 +       uid_t   uid = 0;
5970 +       gid_t   gid = 0;
5971  
5972         iap->ia_valid = 0;
5973  
5974 @@ -117,12 +120,15 @@ decode_sattr3(__be32 *p, struct iattr *i
5975         }
5976         if (*p++) {
5977                 iap->ia_valid |= ATTR_UID;
5978 -               iap->ia_uid = ntohl(*p++);
5979 +               uid = ntohl(*p++);
5980         }
5981         if (*p++) {
5982                 iap->ia_valid |= ATTR_GID;
5983 -               iap->ia_gid = ntohl(*p++);
5984 +               gid = ntohl(*p++);
5985         }
5986 +       iap->ia_uid = INOTAG_UID(DX_TAG_NFSD, uid, gid);
5987 +       iap->ia_gid = INOTAG_GID(DX_TAG_NFSD, uid, gid);
5988 +       iap->ia_tag = INOTAG_TAG(DX_TAG_NFSD, uid, gid, 0);
5989         if (*p++) {
5990                 u64     newsize;
5991  
5992 @@ -178,8 +184,12 @@ encode_fattr3(struct svc_rqst *rqstp, __
5993         *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
5994         *p++ = htonl((u32) stat->mode);
5995         *p++ = htonl((u32) stat->nlink);
5996 -       *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid));
5997 -       *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid));
5998 +       *p++ = htonl((u32) nfsd_ruid(rqstp,
5999 +               TAGINO_UID(0 /* FIXME: DX_TAG(dentry->d_inode) */,
6000 +               stat->uid, stat->tag)));
6001 +       *p++ = htonl((u32) nfsd_rgid(rqstp,
6002 +               TAGINO_GID(0 /* FIXME: DX_TAG(dentry->d_inode) */,
6003 +               stat->gid, stat->tag)));
6004         if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) {
6005                 p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
6006         } else {
6007 diff -NurpP --minimal linux-2.6.31.6/fs/nfsd/nfs4xdr.c linux-2.6.31.6-vs2.3.0.36.24/fs/nfsd/nfs4xdr.c
6008 --- linux-2.6.31.6/fs/nfsd/nfs4xdr.c    2009-09-10 15:26:23.000000000 +0200
6009 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/nfsd/nfs4xdr.c      2009-09-10 16:11:43.000000000 +0200
6010 @@ -57,6 +57,7 @@
6011  #include <linux/nfs4_acl.h>
6012  #include <linux/sunrpc/gss_api.h>
6013  #include <linux/sunrpc/svcauth_gss.h>
6014 +#include <linux/vs_tag.h>
6015  
6016  #define NFSDDBG_FACILITY               NFSDDBG_XDR
6017  
6018 @@ -2052,14 +2053,18 @@ out_acl:
6019                 WRITE32(stat.nlink);
6020         }
6021         if (bmval1 & FATTR4_WORD1_OWNER) {
6022 -               status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen);
6023 +               status = nfsd4_encode_user(rqstp,
6024 +                       TAGINO_UID(DX_TAG(dentry->d_inode),
6025 +                       stat.uid, stat.tag), &p, &buflen);
6026                 if (status == nfserr_resource)
6027                         goto out_resource;
6028                 if (status)
6029                         goto out;
6030         }
6031         if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
6032 -               status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen);
6033 +               status = nfsd4_encode_group(rqstp,
6034 +                       TAGINO_GID(DX_TAG(dentry->d_inode),
6035 +                       stat.gid, stat.tag), &p, &buflen);
6036                 if (status == nfserr_resource)
6037                         goto out_resource;
6038                 if (status)
6039 diff -NurpP --minimal linux-2.6.31.6/fs/nfsd/nfsxdr.c linux-2.6.31.6-vs2.3.0.36.24/fs/nfsd/nfsxdr.c
6040 --- linux-2.6.31.6/fs/nfsd/nfsxdr.c     2008-12-25 00:26:37.000000000 +0100
6041 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/nfsd/nfsxdr.c       2009-09-10 16:11:43.000000000 +0200
6042 @@ -15,6 +15,7 @@
6043  #include <linux/nfsd/nfsd.h>
6044  #include <linux/nfsd/xdr.h>
6045  #include <linux/mm.h>
6046 +#include <linux/vs_tag.h>
6047  #include "auth.h"
6048  
6049  #define NFSDDBG_FACILITY               NFSDDBG_XDR
6050 @@ -98,6 +99,8 @@ static __be32 *
6051  decode_sattr(__be32 *p, struct iattr *iap)
6052  {
6053         u32     tmp, tmp1;
6054 +       uid_t   uid = 0;
6055 +       gid_t   gid = 0;
6056  
6057         iap->ia_valid = 0;
6058  
6059 @@ -111,12 +114,15 @@ decode_sattr(__be32 *p, struct iattr *ia
6060         }
6061         if ((tmp = ntohl(*p++)) != (u32)-1) {
6062                 iap->ia_valid |= ATTR_UID;
6063 -               iap->ia_uid = tmp;
6064 +               uid = tmp;
6065         }
6066         if ((tmp = ntohl(*p++)) != (u32)-1) {
6067                 iap->ia_valid |= ATTR_GID;
6068 -               iap->ia_gid = tmp;
6069 +               gid = tmp;
6070         }
6071 +       iap->ia_uid = INOTAG_UID(DX_TAG_NFSD, uid, gid);
6072 +       iap->ia_gid = INOTAG_GID(DX_TAG_NFSD, uid, gid);
6073 +       iap->ia_tag = INOTAG_TAG(DX_TAG_NFSD, uid, gid, 0);
6074         if ((tmp = ntohl(*p++)) != (u32)-1) {
6075                 iap->ia_valid |= ATTR_SIZE;
6076                 iap->ia_size = tmp;
6077 @@ -161,8 +167,10 @@ encode_fattr(struct svc_rqst *rqstp, __b
6078         *p++ = htonl(nfs_ftypes[type >> 12]);
6079         *p++ = htonl((u32) stat->mode);
6080         *p++ = htonl((u32) stat->nlink);
6081 -       *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid));
6082 -       *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid));
6083 +       *p++ = htonl((u32) nfsd_ruid(rqstp,
6084 +               TAGINO_UID(DX_TAG(dentry->d_inode), stat->uid, stat->tag)));
6085 +       *p++ = htonl((u32) nfsd_rgid(rqstp,
6086 +               TAGINO_GID(DX_TAG(dentry->d_inode), stat->gid, stat->tag)));
6087  
6088         if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) {
6089                 *p++ = htonl(NFS_MAXPATHLEN);
6090 diff -NurpP --minimal linux-2.6.31.6/fs/ocfs2/dlm/dlmfs.c linux-2.6.31.6-vs2.3.0.36.24/fs/ocfs2/dlm/dlmfs.c
6091 --- linux-2.6.31.6/fs/ocfs2/dlm/dlmfs.c 2009-03-24 14:22:27.000000000 +0100
6092 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ocfs2/dlm/dlmfs.c   2009-09-10 16:11:43.000000000 +0200
6093 @@ -43,6 +43,7 @@
6094  #include <linux/init.h>
6095  #include <linux/string.h>
6096  #include <linux/backing-dev.h>
6097 +#include <linux/vs_tag.h>
6098  
6099  #include <asm/uaccess.h>
6100  
6101 @@ -341,6 +342,7 @@ static struct inode *dlmfs_get_root_inod
6102                 inode->i_mode = mode;
6103                 inode->i_uid = current_fsuid();
6104                 inode->i_gid = current_fsgid();
6105 +               inode->i_tag = dx_current_fstag(sb);
6106                 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
6107                 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
6108                 inc_nlink(inode);
6109 @@ -366,6 +368,7 @@ static struct inode *dlmfs_get_inode(str
6110         inode->i_mode = mode;
6111         inode->i_uid = current_fsuid();
6112         inode->i_gid = current_fsgid();
6113 +       inode->i_tag = dx_current_fstag(sb);
6114         inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
6115         inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
6116  
6117 diff -NurpP --minimal linux-2.6.31.6/fs/ocfs2/dlmglue.c linux-2.6.31.6-vs2.3.0.36.24/fs/ocfs2/dlmglue.c
6118 --- linux-2.6.31.6/fs/ocfs2/dlmglue.c   2009-09-10 15:26:23.000000000 +0200
6119 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ocfs2/dlmglue.c     2009-09-10 16:11:43.000000000 +0200
6120 @@ -1960,6 +1960,7 @@ static void __ocfs2_stuff_meta_lvb(struc
6121         lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
6122         lvb->lvb_iuid      = cpu_to_be32(inode->i_uid);
6123         lvb->lvb_igid      = cpu_to_be32(inode->i_gid);
6124 +       lvb->lvb_itag      = cpu_to_be16(inode->i_tag);
6125         lvb->lvb_imode     = cpu_to_be16(inode->i_mode);
6126         lvb->lvb_inlink    = cpu_to_be16(inode->i_nlink);
6127         lvb->lvb_iatime_packed  =
6128 @@ -2014,6 +2015,7 @@ static void ocfs2_refresh_inode_from_lvb
6129  
6130         inode->i_uid     = be32_to_cpu(lvb->lvb_iuid);
6131         inode->i_gid     = be32_to_cpu(lvb->lvb_igid);
6132 +       inode->i_tag     = be16_to_cpu(lvb->lvb_itag);
6133         inode->i_mode    = be16_to_cpu(lvb->lvb_imode);
6134         inode->i_nlink   = be16_to_cpu(lvb->lvb_inlink);
6135         ocfs2_unpack_timespec(&inode->i_atime,
6136 diff -NurpP --minimal linux-2.6.31.6/fs/ocfs2/dlmglue.h linux-2.6.31.6-vs2.3.0.36.24/fs/ocfs2/dlmglue.h
6137 --- linux-2.6.31.6/fs/ocfs2/dlmglue.h   2009-09-10 15:26:23.000000000 +0200
6138 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ocfs2/dlmglue.h     2009-09-10 16:11:43.000000000 +0200
6139 @@ -46,7 +46,8 @@ struct ocfs2_meta_lvb {
6140         __be16       lvb_inlink;
6141         __be32       lvb_iattr;
6142         __be32       lvb_igeneration;
6143 -       __be32       lvb_reserved2;
6144 +       __be16       lvb_itag;
6145 +       __be16       lvb_reserved2;
6146  };
6147  
6148  #define OCFS2_QINFO_LVB_VERSION 1
6149 diff -NurpP --minimal linux-2.6.31.6/fs/ocfs2/file.c linux-2.6.31.6-vs2.3.0.36.24/fs/ocfs2/file.c
6150 --- linux-2.6.31.6/fs/ocfs2/file.c      2009-09-10 15:26:23.000000000 +0200
6151 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ocfs2/file.c        2009-10-06 19:45:13.000000000 +0200
6152 @@ -914,13 +914,15 @@ int ocfs2_setattr(struct dentry *dentry,
6153                 mlog(0, "uid change: %d\n", attr->ia_uid);
6154         if (attr->ia_valid & ATTR_GID)
6155                 mlog(0, "gid change: %d\n", attr->ia_gid);
6156 +       if (attr->ia_valid & ATTR_TAG)
6157 +               mlog(0, "tag change: %d\n", attr->ia_tag);
6158         if (attr->ia_valid & ATTR_SIZE)
6159                 mlog(0, "size change...\n");
6160         if (attr->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME))
6161                 mlog(0, "time change...\n");
6162  
6163  #define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \
6164 -                          | ATTR_GID | ATTR_UID | ATTR_MODE)
6165 +                          | ATTR_GID | ATTR_UID | ATTR_TAG | ATTR_MODE)
6166         if (!(attr->ia_valid & OCFS2_VALID_ATTRS)) {
6167                 mlog(0, "can't handle attrs: 0x%x\n", attr->ia_valid);
6168                 return 0;
6169 diff -NurpP --minimal linux-2.6.31.6/fs/ocfs2/inode.c linux-2.6.31.6-vs2.3.0.36.24/fs/ocfs2/inode.c
6170 --- linux-2.6.31.6/fs/ocfs2/inode.c     2009-09-10 15:26:23.000000000 +0200
6171 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ocfs2/inode.c       2009-10-06 19:45:13.000000000 +0200
6172 @@ -29,6 +29,7 @@
6173  #include <linux/highmem.h>
6174  #include <linux/pagemap.h>
6175  #include <linux/quotaops.h>
6176 +#include <linux/vs_tag.h>
6177  
6178  #include <asm/byteorder.h>
6179  
6180 @@ -78,11 +79,13 @@ void ocfs2_set_inode_flags(struct inode 
6181  {
6182         unsigned int flags = OCFS2_I(inode)->ip_attr;
6183  
6184 -       inode->i_flags &= ~(S_IMMUTABLE |
6185 +       inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
6186                 S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
6187  
6188         if (flags & OCFS2_IMMUTABLE_FL)
6189                 inode->i_flags |= S_IMMUTABLE;
6190 +       if (flags & OCFS2_IXUNLINK_FL)
6191 +               inode->i_flags |= S_IXUNLINK;
6192  
6193         if (flags & OCFS2_SYNC_FL)
6194                 inode->i_flags |= S_SYNC;
6195 @@ -92,25 +95,44 @@ void ocfs2_set_inode_flags(struct inode 
6196                 inode->i_flags |= S_NOATIME;
6197         if (flags & OCFS2_DIRSYNC_FL)
6198                 inode->i_flags |= S_DIRSYNC;
6199 +
6200 +       inode->i_vflags &= ~(V_BARRIER | V_COW);
6201 +
6202 +       if (flags & OCFS2_BARRIER_FL)
6203 +               inode->i_vflags |= V_BARRIER;
6204 +       if (flags & OCFS2_COW_FL)
6205 +               inode->i_vflags |= V_COW;
6206  }
6207  
6208  /* Propagate flags from i_flags to OCFS2_I(inode)->ip_attr */
6209  void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi)
6210  {
6211         unsigned int flags = oi->vfs_inode.i_flags;
6212 +       unsigned int vflags = oi->vfs_inode.i_vflags;
6213 +
6214 +       oi->ip_attr &= ~(OCFS2_SYNC_FL | OCFS2_APPEND_FL |
6215 +                       OCFS2_IMMUTABLE_FL | OCFS2_IXUNLINK_FL |
6216 +                       OCFS2_NOATIME_FL | OCFS2_DIRSYNC_FL |
6217 +                       OCFS2_BARRIER_FL | OCFS2_COW_FL);
6218 +
6219 +       if (flags & S_IMMUTABLE)
6220 +               oi->ip_attr |= OCFS2_IMMUTABLE_FL;
6221 +       if (flags & S_IXUNLINK)
6222 +               oi->ip_attr |= OCFS2_IXUNLINK_FL;
6223  
6224 -       oi->ip_attr &= ~(OCFS2_SYNC_FL|OCFS2_APPEND_FL|
6225 -                       OCFS2_IMMUTABLE_FL|OCFS2_NOATIME_FL|OCFS2_DIRSYNC_FL);
6226         if (flags & S_SYNC)
6227                 oi->ip_attr |= OCFS2_SYNC_FL;
6228         if (flags & S_APPEND)
6229                 oi->ip_attr |= OCFS2_APPEND_FL;
6230 -       if (flags & S_IMMUTABLE)
6231 -               oi->ip_attr |= OCFS2_IMMUTABLE_FL;
6232         if (flags & S_NOATIME)
6233                 oi->ip_attr |= OCFS2_NOATIME_FL;
6234         if (flags & S_DIRSYNC)
6235                 oi->ip_attr |= OCFS2_DIRSYNC_FL;
6236 +
6237 +       if (vflags & V_BARRIER)
6238 +               oi->ip_attr |= OCFS2_BARRIER_FL;
6239 +       if (vflags & V_COW)
6240 +               oi->ip_attr |= OCFS2_COW_FL;
6241  }
6242  
6243  struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno)
6244 @@ -245,6 +267,8 @@ void ocfs2_populate_inode(struct inode *
6245         struct super_block *sb;
6246         struct ocfs2_super *osb;
6247         int use_plocks = 1;
6248 +       uid_t uid;
6249 +       gid_t gid;
6250  
6251         mlog_entry("(0x%p, size:%llu)\n", inode,
6252                    (unsigned long long)le64_to_cpu(fe->i_size));
6253 @@ -276,8 +300,12 @@ void ocfs2_populate_inode(struct inode *
6254         inode->i_generation = le32_to_cpu(fe->i_generation);
6255         inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
6256         inode->i_mode = le16_to_cpu(fe->i_mode);
6257 -       inode->i_uid = le32_to_cpu(fe->i_uid);
6258 -       inode->i_gid = le32_to_cpu(fe->i_gid);
6259 +       uid = le32_to_cpu(fe->i_uid);
6260 +       gid = le32_to_cpu(fe->i_gid);
6261 +       inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
6262 +       inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
6263 +       inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
6264 +               /* le16_to_cpu(raw_inode->i_raw_tag)i */ 0);
6265  
6266         /* Fast symlinks will have i_size but no allocated clusters. */
6267         if (S_ISLNK(inode->i_mode) && !fe->i_clusters)
6268 diff -NurpP --minimal linux-2.6.31.6/fs/ocfs2/inode.h linux-2.6.31.6-vs2.3.0.36.24/fs/ocfs2/inode.h
6269 --- linux-2.6.31.6/fs/ocfs2/inode.h     2009-06-11 17:13:07.000000000 +0200
6270 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ocfs2/inode.h       2009-10-07 01:26:15.000000000 +0200
6271 @@ -152,6 +152,7 @@ struct buffer_head *ocfs2_bread(struct i
6272  
6273  void ocfs2_set_inode_flags(struct inode *inode);
6274  void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi);
6275 +int ocfs2_sync_flags(struct inode *inode, int, int);
6276  
6277  static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode)
6278  {
6279 diff -NurpP --minimal linux-2.6.31.6/fs/ocfs2/ioctl.c linux-2.6.31.6-vs2.3.0.36.24/fs/ocfs2/ioctl.c
6280 --- linux-2.6.31.6/fs/ocfs2/ioctl.c     2009-09-10 15:26:23.000000000 +0200
6281 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ocfs2/ioctl.c       2009-10-07 04:20:18.000000000 +0200
6282 @@ -41,7 +41,41 @@ static int ocfs2_get_inode_attr(struct i
6283         return status;
6284  }
6285  
6286 -static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
6287 +int ocfs2_sync_flags(struct inode *inode, int flags, int vflags)
6288 +{
6289 +       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6290 +       struct buffer_head *bh = NULL;
6291 +       handle_t *handle = NULL;
6292 +       int status;
6293 +
6294 +       status = ocfs2_inode_lock(inode, &bh, 1);
6295 +       if (status < 0) {
6296 +               mlog_errno(status);
6297 +               return status;
6298 +       }
6299 +       handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
6300 +       if (IS_ERR(handle)) {
6301 +               status = PTR_ERR(handle);
6302 +               mlog_errno(status);
6303 +               goto bail_unlock;
6304 +       }
6305 +
6306 +       inode->i_flags = flags;
6307 +       inode->i_vflags = vflags;
6308 +       ocfs2_get_inode_flags(OCFS2_I(inode));
6309 +
6310 +       status = ocfs2_mark_inode_dirty(handle, inode, bh);
6311 +       if (status < 0)
6312 +               mlog_errno(status);
6313 +
6314 +       ocfs2_commit_trans(osb, handle);
6315 +bail_unlock:
6316 +       ocfs2_inode_unlock(inode, 1);
6317 +       brelse(bh);
6318 +       return status;
6319 +}
6320 +
6321 +int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
6322                                 unsigned mask)
6323  {
6324         struct ocfs2_inode_info *ocfs2_inode = OCFS2_I(inode);
6325 @@ -66,6 +100,11 @@ static int ocfs2_set_inode_attr(struct i
6326         if (!S_ISDIR(inode->i_mode))
6327                 flags &= ~OCFS2_DIRSYNC_FL;
6328  
6329 +       if (IS_BARRIER(inode)) {
6330 +               vxwprintk_task(1, "messing with the barrier.");
6331 +               goto bail_unlock;
6332 +       }
6333 +
6334         handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
6335         if (IS_ERR(handle)) {
6336                 status = PTR_ERR(handle);
6337 @@ -107,6 +146,7 @@ bail:
6338         return status;
6339  }
6340  
6341 +
6342  long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
6343  {
6344         struct inode *inode = filp->f_path.dentry->d_inode;
6345 diff -NurpP --minimal linux-2.6.31.6/fs/ocfs2/namei.c linux-2.6.31.6-vs2.3.0.36.24/fs/ocfs2/namei.c
6346 --- linux-2.6.31.6/fs/ocfs2/namei.c     2009-09-10 15:26:23.000000000 +0200
6347 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ocfs2/namei.c       2009-10-06 19:45:13.000000000 +0200
6348 @@ -41,6 +41,7 @@
6349  #include <linux/slab.h>
6350  #include <linux/highmem.h>
6351  #include <linux/quotaops.h>
6352 +#include <linux/vs_tag.h>
6353  
6354  #define MLOG_MASK_PREFIX ML_NAMEI
6355  #include <cluster/masklog.h>
6356 @@ -478,6 +479,7 @@ static int ocfs2_mknod_locked(struct ocf
6357         u64 fe_blkno = 0;
6358         u16 suballoc_bit;
6359         u16 feat;
6360 +       tag_t tag;
6361  
6362         mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry,
6363                    inode->i_mode, (unsigned long)dev, dentry->d_name.len,
6364 @@ -524,8 +526,11 @@ static int ocfs2_mknod_locked(struct ocf
6365         fe->i_blkno = cpu_to_le64(fe_blkno);
6366         fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
6367         fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
6368 -       fe->i_uid = cpu_to_le32(inode->i_uid);
6369 -       fe->i_gid = cpu_to_le32(inode->i_gid);
6370 +
6371 +       tag = dx_current_fstag(osb->sb);
6372 +       fe->i_uid = cpu_to_le32(TAGINO_UID(DX_TAG(inode), inode->i_uid, tag));
6373 +       fe->i_gid = cpu_to_le32(TAGINO_GID(DX_TAG(inode), inode->i_gid, tag));
6374 +       inode->i_tag = tag;
6375         fe->i_mode = cpu_to_le16(inode->i_mode);
6376         if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
6377                 fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
6378 diff -NurpP --minimal linux-2.6.31.6/fs/ocfs2/ocfs2_fs.h linux-2.6.31.6-vs2.3.0.36.24/fs/ocfs2/ocfs2_fs.h
6379 --- linux-2.6.31.6/fs/ocfs2/ocfs2_fs.h  2009-06-11 17:13:07.000000000 +0200
6380 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ocfs2/ocfs2_fs.h    2009-09-10 16:11:43.000000000 +0200
6381 @@ -225,18 +225,23 @@
6382  #define OCFS2_INDEXED_DIR_FL   (0x0008)
6383  
6384  /* Inode attributes, keep in sync with EXT2 */
6385 -#define OCFS2_SECRM_FL         (0x00000001)    /* Secure deletion */
6386 -#define OCFS2_UNRM_FL          (0x00000002)    /* Undelete */
6387 -#define OCFS2_COMPR_FL         (0x00000004)    /* Compress file */
6388 -#define OCFS2_SYNC_FL          (0x00000008)    /* Synchronous updates */
6389 -#define OCFS2_IMMUTABLE_FL     (0x00000010)    /* Immutable file */
6390 -#define OCFS2_APPEND_FL                (0x00000020)    /* writes to file may only append */
6391 -#define OCFS2_NODUMP_FL                (0x00000040)    /* do not dump file */
6392 -#define OCFS2_NOATIME_FL       (0x00000080)    /* do not update atime */
6393 -#define OCFS2_DIRSYNC_FL       (0x00010000)    /* dirsync behaviour (directories only) */
6394 +#define OCFS2_SECRM_FL         FS_SECRM_FL     /* Secure deletion */
6395 +#define OCFS2_UNRM_FL          FS_UNRM_FL      /* Undelete */
6396 +#define OCFS2_COMPR_FL         FS_COMPR_FL     /* Compress file */
6397 +#define OCFS2_SYNC_FL          FS_SYNC_FL      /* Synchronous updates */
6398 +#define OCFS2_IMMUTABLE_FL     FS_IMMUTABLE_FL /* Immutable file */
6399 +#define OCFS2_APPEND_FL                FS_APPEND_FL    /* writes to file may only append */
6400 +#define OCFS2_NODUMP_FL                FS_NODUMP_FL    /* do not dump file */
6401 +#define OCFS2_NOATIME_FL       FS_NOATIME_FL   /* do not update atime */
6402  
6403 -#define OCFS2_FL_VISIBLE       (0x000100FF)    /* User visible flags */
6404 -#define OCFS2_FL_MODIFIABLE    (0x000100FF)    /* User modifiable flags */
6405 +#define OCFS2_DIRSYNC_FL       FS_DIRSYNC_FL   /* dirsync behaviour (directories only) */
6406 +#define OCFS2_IXUNLINK_FL      FS_IXUNLINK_FL  /* Immutable invert on unlink */
6407 +
6408 +#define OCFS2_BARRIER_FL       FS_BARRIER_FL   /* Barrier for chroot() */
6409 +#define OCFS2_COW_FL           FS_COW_FL       /* Copy on Write marker */
6410 +
6411 +#define OCFS2_FL_VISIBLE       (0x010300FF)    /* User visible flags */
6412 +#define OCFS2_FL_MODIFIABLE    (0x010300FF)    /* User modifiable flags */
6413  
6414  /*
6415   * Extent record flags (e_node.leaf.flags)
6416 diff -NurpP --minimal linux-2.6.31.6/fs/ocfs2/ocfs2.h linux-2.6.31.6-vs2.3.0.36.24/fs/ocfs2/ocfs2.h
6417 --- linux-2.6.31.6/fs/ocfs2/ocfs2.h     2009-09-10 15:26:23.000000000 +0200
6418 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ocfs2/ocfs2.h       2009-09-10 16:11:43.000000000 +0200
6419 @@ -222,6 +222,7 @@ enum ocfs2_mount_options
6420         OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* POSIX access control lists */
6421         OCFS2_MOUNT_USRQUOTA = 1 << 9, /* We support user quotas */
6422         OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */
6423 +       OCFS2_MOUNT_TAGGED = 1 << 11, /* use tagging */
6424  };
6425  
6426  #define OCFS2_OSB_SOFT_RO                      0x0001
6427 diff -NurpP --minimal linux-2.6.31.6/fs/ocfs2/super.c linux-2.6.31.6-vs2.3.0.36.24/fs/ocfs2/super.c
6428 --- linux-2.6.31.6/fs/ocfs2/super.c     2009-09-10 15:26:23.000000000 +0200
6429 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/ocfs2/super.c       2009-10-06 22:50:39.000000000 +0200
6430 @@ -173,6 +173,7 @@ enum {
6431         Opt_noacl,
6432         Opt_usrquota,
6433         Opt_grpquota,
6434 +       Opt_tag, Opt_notag, Opt_tagid,
6435         Opt_err,
6436  };
6437  
6438 @@ -199,6 +200,9 @@ static const match_table_t tokens = {
6439         {Opt_noacl, "noacl"},
6440         {Opt_usrquota, "usrquota"},
6441         {Opt_grpquota, "grpquota"},
6442 +       {Opt_tag, "tag"},
6443 +       {Opt_notag, "notag"},
6444 +       {Opt_tagid, "tagid=%u"},
6445         {Opt_err, NULL}
6446  };
6447  
6448 @@ -605,6 +609,13 @@ static int ocfs2_remount(struct super_bl
6449                 goto out;
6450         }
6451  
6452 +       if ((osb->s_mount_opt & OCFS2_MOUNT_TAGGED) !=
6453 +           (parsed_options.mount_opt & OCFS2_MOUNT_TAGGED)) {
6454 +               ret = -EINVAL;
6455 +               mlog(ML_ERROR, "Cannot change tagging on remount\n");
6456 +               goto out;
6457 +       }
6458 +
6459         if ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) !=
6460             (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL)) {
6461                 ret = -EINVAL;
6462 @@ -1146,6 +1157,9 @@ static int ocfs2_fill_super(struct super
6463  
6464         ocfs2_complete_mount_recovery(osb);
6465  
6466 +       if (osb->s_mount_opt & OCFS2_MOUNT_TAGGED)
6467 +               sb->s_flags |= MS_TAGGED;
6468 +
6469         if (ocfs2_mount_local(osb))
6470                 snprintf(nodestr, sizeof(nodestr), "local");
6471         else
6472 @@ -1424,6 +1438,20 @@ static int ocfs2_parse_options(struct su
6473                         printk(KERN_INFO "ocfs2 (no)acl options not supported\n");
6474                         break;
6475  #endif
6476 +#ifndef CONFIG_TAGGING_NONE
6477 +               case Opt_tag:
6478 +                       mopt->mount_opt |= OCFS2_MOUNT_TAGGED;
6479 +                       break;
6480 +               case Opt_notag:
6481 +                       mopt->mount_opt &= ~OCFS2_MOUNT_TAGGED;
6482 +                       break;
6483 +#endif
6484 +#ifdef CONFIG_PROPAGATE
6485 +               case Opt_tagid:
6486 +                       /* use args[0] */
6487 +                       mopt->mount_opt |= OCFS2_MOUNT_TAGGED;
6488 +                       break;
6489 +#endif
6490                 default:
6491                         mlog(ML_ERROR,
6492                              "Unrecognized mount option \"%s\" "
6493 diff -NurpP --minimal linux-2.6.31.6/fs/open.c linux-2.6.31.6-vs2.3.0.36.24/fs/open.c
6494 --- linux-2.6.31.6/fs/open.c    2009-09-10 15:26:23.000000000 +0200
6495 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/open.c      2009-10-15 03:54:42.000000000 +0200
6496 @@ -30,22 +30,30 @@
6497  #include <linux/audit.h>
6498  #include <linux/falloc.h>
6499  #include <linux/fs_struct.h>
6500 +#include <linux/vs_base.h>
6501 +#include <linux/vs_limit.h>
6502 +#include <linux/vs_tag.h>
6503 +#include <linux/vs_cowbl.h>
6504  
6505  int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
6506  {
6507         int retval = -ENODEV;
6508  
6509         if (dentry) {
6510 +               struct super_block *sb = dentry->d_sb;
6511 +
6512                 retval = -ENOSYS;
6513 -               if (dentry->d_sb->s_op->statfs) {
6514 +               if (sb->s_op->statfs) {
6515                         memset(buf, 0, sizeof(*buf));
6516                         retval = security_sb_statfs(dentry);
6517                         if (retval)
6518                                 return retval;
6519 -                       retval = dentry->d_sb->s_op->statfs(dentry, buf);
6520 +                       retval = sb->s_op->statfs(dentry, buf);
6521                         if (retval == 0 && buf->f_frsize == 0)
6522                                 buf->f_frsize = buf->f_bsize;
6523                 }
6524 +               if (!vx_check(0, VS_ADMIN|VS_WATCH))
6525 +                       vx_vsi_statfs(sb, buf);
6526         }
6527         return retval;
6528  }
6529 @@ -639,6 +647,10 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, cons
6530         error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
6531         if (error)
6532                 goto out;
6533 +
6534 +       error = cow_check_and_break(&path);
6535 +       if (error)
6536 +               goto dput_and_out;
6537         inode = path.dentry->d_inode;
6538  
6539         error = mnt_want_write(path.mnt);
6540 @@ -672,11 +684,11 @@ static int chown_common(struct dentry * 
6541         newattrs.ia_valid =  ATTR_CTIME;
6542         if (user != (uid_t) -1) {
6543                 newattrs.ia_valid |= ATTR_UID;
6544 -               newattrs.ia_uid = user;
6545 +               newattrs.ia_uid = dx_map_uid(user);
6546         }
6547         if (group != (gid_t) -1) {
6548                 newattrs.ia_valid |= ATTR_GID;
6549 -               newattrs.ia_gid = group;
6550 +               newattrs.ia_gid = dx_map_gid(group);
6551         }
6552         if (!S_ISDIR(inode->i_mode))
6553                 newattrs.ia_valid |=
6554 @@ -699,7 +711,11 @@ SYSCALL_DEFINE3(chown, const char __user
6555         error = mnt_want_write(path.mnt);
6556         if (error)
6557                 goto out_release;
6558 -       error = chown_common(path.dentry, user, group);
6559 +#ifdef CONFIG_VSERVER_COWBL
6560 +       error = cow_check_and_break(&path);
6561 +       if (!error)
6562 +#endif
6563 +               error = chown_common(path.dentry, user, group);
6564         mnt_drop_write(path.mnt);
6565  out_release:
6566         path_put(&path);
6567 @@ -724,7 +740,11 @@ SYSCALL_DEFINE5(fchownat, int, dfd, cons
6568         error = mnt_want_write(path.mnt);
6569         if (error)
6570                 goto out_release;
6571 -       error = chown_common(path.dentry, user, group);
6572 +#ifdef CONFIG_VSERVER_COWBL
6573 +       error = cow_check_and_break(&path);
6574 +       if (!error)
6575 +#endif
6576 +               error = chown_common(path.dentry, user, group);
6577         mnt_drop_write(path.mnt);
6578  out_release:
6579         path_put(&path);
6580 @@ -743,7 +763,11 @@ SYSCALL_DEFINE3(lchown, const char __use
6581         error = mnt_want_write(path.mnt);
6582         if (error)
6583                 goto out_release;
6584 -       error = chown_common(path.dentry, user, group);
6585 +#ifdef CONFIG_VSERVER_COWBL
6586 +       error = cow_check_and_break(&path);
6587 +       if (!error)
6588 +#endif
6589 +               error = chown_common(path.dentry, user, group);
6590         mnt_drop_write(path.mnt);
6591  out_release:
6592         path_put(&path);
6593 @@ -987,6 +1011,7 @@ static void __put_unused_fd(struct files
6594         __FD_CLR(fd, fdt->open_fds);
6595         if (fd < files->next_fd)
6596                 files->next_fd = fd;
6597 +       vx_openfd_dec(fd);
6598  }
6599  
6600  void put_unused_fd(unsigned int fd)
6601 diff -NurpP --minimal linux-2.6.31.6/fs/proc/array.c linux-2.6.31.6-vs2.3.0.36.24/fs/proc/array.c
6602 --- linux-2.6.31.6/fs/proc/array.c      2009-06-11 17:13:07.000000000 +0200
6603 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/proc/array.c        2009-09-10 16:11:43.000000000 +0200
6604 @@ -82,6 +82,8 @@
6605  #include <linux/pid_namespace.h>
6606  #include <linux/ptrace.h>
6607  #include <linux/tracehook.h>
6608 +#include <linux/vs_context.h>
6609 +#include <linux/vs_network.h>
6610  
6611  #include <asm/pgtable.h>
6612  #include <asm/processor.h>
6613 @@ -138,8 +140,9 @@ static const char *task_state_array[] = 
6614         "D (disk sleep)",       /*  2 */
6615         "T (stopped)",          /*  4 */
6616         "T (tracing stop)",     /*  8 */
6617 -       "Z (zombie)",           /* 16 */
6618 -       "X (dead)"              /* 32 */
6619 +       "H (on hold)",          /* 16 */
6620 +       "Z (zombie)",           /* 32 */
6621 +       "X (dead)",             /* 64 */
6622  };
6623  
6624  static inline const char *get_task_state(struct task_struct *tsk)
6625 @@ -166,6 +169,9 @@ static inline void task_state(struct seq
6626         rcu_read_lock();
6627         ppid = pid_alive(p) ?
6628                 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
6629 +       if (unlikely(vx_current_initpid(p->pid)))
6630 +               ppid = 0;
6631 +
6632         tpid = 0;
6633         if (pid_alive(p)) {
6634                 struct task_struct *tracer = tracehook_tracer_task(p);
6635 @@ -281,7 +287,7 @@ static inline void task_sig(struct seq_f
6636  }
6637  
6638  static void render_cap_t(struct seq_file *m, const char *header,
6639 -                       kernel_cap_t *a)
6640 +                       struct vx_info *vxi, kernel_cap_t *a)
6641  {
6642         unsigned __capi;
6643  
6644 @@ -306,10 +312,11 @@ static inline void task_cap(struct seq_f
6645         cap_bset        = cred->cap_bset;
6646         rcu_read_unlock();
6647  
6648 -       render_cap_t(m, "CapInh:\t", &cap_inheritable);
6649 -       render_cap_t(m, "CapPrm:\t", &cap_permitted);
6650 -       render_cap_t(m, "CapEff:\t", &cap_effective);
6651 -       render_cap_t(m, "CapBnd:\t", &cap_bset);
6652 +       /* FIXME: maybe move the p->vx_info masking to __task_cred() ? */
6653 +       render_cap_t(m, "CapInh:\t", p->vx_info, &cap_inheritable);
6654 +       render_cap_t(m, "CapPrm:\t", p->vx_info, &cap_permitted);
6655 +       render_cap_t(m, "CapEff:\t", p->vx_info, &cap_effective);
6656 +       render_cap_t(m, "CapBnd:\t", p->vx_info, &cap_bset);
6657  }
6658  
6659  static inline void task_context_switch_counts(struct seq_file *m,
6660 @@ -321,6 +328,42 @@ static inline void task_context_switch_c
6661                         p->nivcsw);
6662  }
6663  
6664 +int proc_pid_nsproxy(struct seq_file *m, struct pid_namespace *ns,
6665 +                       struct pid *pid, struct task_struct *task)
6666 +{
6667 +       seq_printf(m,   "Proxy:\t%p(%c)\n"
6668 +                       "Count:\t%u\n"
6669 +                       "uts:\t%p(%c)\n"
6670 +                       "ipc:\t%p(%c)\n"
6671 +                       "mnt:\t%p(%c)\n"
6672 +                       "pid:\t%p(%c)\n"
6673 +                       "net:\t%p(%c)\n",
6674 +                       task->nsproxy,
6675 +                       (task->nsproxy == init_task.nsproxy ? 'I' : '-'),
6676 +                       atomic_read(&task->nsproxy->count),
6677 +                       task->nsproxy->uts_ns,
6678 +                       (task->nsproxy->uts_ns == init_task.nsproxy->uts_ns ? 'I' : '-'),
6679 +                       task->nsproxy->ipc_ns,
6680 +                       (task->nsproxy->ipc_ns == init_task.nsproxy->ipc_ns ? 'I' : '-'),
6681 +                       task->nsproxy->mnt_ns,
6682 +                       (task->nsproxy->mnt_ns == init_task.nsproxy->mnt_ns ? 'I' : '-'),
6683 +                       task->nsproxy->pid_ns,
6684 +                       (task->nsproxy->pid_ns == init_task.nsproxy->pid_ns ? 'I' : '-'),
6685 +                       task->nsproxy->net_ns,
6686 +                       (task->nsproxy->net_ns == init_task.nsproxy->net_ns ? 'I' : '-'));
6687 +       return 0;
6688 +}
6689 +
6690 +void task_vs_id(struct seq_file *m, struct task_struct *task)
6691 +{
6692 +       if (task_vx_flags(task, VXF_HIDE_VINFO, 0))
6693 +               return;
6694 +
6695 +       seq_printf(m, "VxID: %d\n", vx_task_xid(task));
6696 +       seq_printf(m, "NxID: %d\n", nx_task_nid(task));
6697 +}
6698 +
6699 +
6700  int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
6701                         struct pid *pid, struct task_struct *task)
6702  {
6703 @@ -336,6 +379,7 @@ int proc_pid_status(struct seq_file *m, 
6704         task_sig(m, task);
6705         task_cap(m, task);
6706         cpuset_task_status_allowed(m, task);
6707 +       task_vs_id(m, task);
6708  #if defined(CONFIG_S390)
6709         task_show_regs(m, task);
6710  #endif
6711 @@ -452,6 +496,17 @@ static int do_task_stat(struct seq_file 
6712         /* convert nsec -> ticks */
6713         start_time = nsec_to_clock_t(start_time);
6714  
6715 +       /* fixup start time for virt uptime */
6716 +       if (vx_flags(VXF_VIRT_UPTIME, 0)) {
6717 +               unsigned long long bias =
6718 +                       current->vx_info->cvirt.bias_clock;
6719 +
6720 +               if (start_time > bias)
6721 +                       start_time -= bias;
6722 +               else
6723 +                       start_time = 0;
6724 +       }
6725 +
6726         seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \
6727  %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
6728  %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n",
6729 diff -NurpP --minimal linux-2.6.31.6/fs/proc/base.c linux-2.6.31.6-vs2.3.0.36.24/fs/proc/base.c
6730 --- linux-2.6.31.6/fs/proc/base.c       2009-09-10 15:26:23.000000000 +0200
6731 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/proc/base.c 2009-10-03 01:56:26.000000000 +0200
6732 @@ -81,6 +81,8 @@
6733  #include <linux/elf.h>
6734  #include <linux/pid_namespace.h>
6735  #include <linux/fs_struct.h>
6736 +#include <linux/vs_context.h>
6737 +#include <linux/vs_network.h>
6738  #include "internal.h"
6739  
6740  /* NOTE:
6741 @@ -1032,10 +1034,14 @@ static ssize_t oom_adjust_write(struct f
6742         task = get_proc_task(file->f_path.dentry->d_inode);
6743         if (!task)
6744                 return -ESRCH;
6745 -       if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) {
6746 +       if (oom_adjust < task->oomkilladj &&
6747 +               !vx_capable(CAP_SYS_RESOURCE, VXC_OOM_ADJUST)) {
6748                 put_task_struct(task);
6749                 return -EACCES;
6750         }
6751 +       /* prevent guest processes from circumventing the oom killer */
6752 +       if (vx_current_xid() && (oom_adjust == OOM_DISABLE))
6753 +               oom_adjust = OOM_ADJUST_MIN;
6754         task->oomkilladj = oom_adjust;
6755         put_task_struct(task);
6756         if (end - buffer == 0)
6757 @@ -1074,7 +1080,7 @@ static ssize_t proc_loginuid_write(struc
6758         ssize_t length;
6759         uid_t loginuid;
6760  
6761 -       if (!capable(CAP_AUDIT_CONTROL))
6762 +       if (!vx_capable(CAP_AUDIT_CONTROL, VXC_AUDIT_CONTROL))
6763                 return -EPERM;
6764  
6765         if (current != pid_task(proc_pid(inode), PIDTYPE_PID))
6766 @@ -1441,6 +1447,8 @@ static struct inode *proc_pid_make_inode
6767                 inode->i_gid = cred->egid;
6768                 rcu_read_unlock();
6769         }
6770 +       /* procfs is xid tagged */
6771 +       inode->i_tag = (tag_t)vx_task_xid(task);
6772         security_task_to_inode(task, inode);
6773  
6774  out:
6775 @@ -1991,6 +1999,13 @@ static struct dentry *proc_pident_lookup
6776         if (!task)
6777                 goto out_no_task;
6778  
6779 +       /* TODO: maybe we can come up with a generic approach? */
6780 +       if (task_vx_flags(task, VXF_HIDE_VINFO, 0) &&
6781 +               (dentry->d_name.len == 5) &&
6782 +               (!memcmp(dentry->d_name.name, "vinfo", 5) ||
6783 +               !memcmp(dentry->d_name.name, "ninfo", 5)))
6784 +               goto out;
6785 +
6786         /*
6787          * Yes, it does not scale. And it should not. Don't add
6788          * new entries into /proc/<tgid>/ without very good reasons.
6789 @@ -2382,7 +2397,7 @@ out_iput:
6790  static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
6791  {
6792         struct dentry *error;
6793 -       struct task_struct *task = get_proc_task(dir);
6794 +       struct task_struct *task = get_proc_task_real(dir);
6795         const struct pid_entry *p, *last;
6796  
6797         error = ERR_PTR(-ENOENT);
6798 @@ -2472,6 +2487,9 @@ static int proc_pid_personality(struct s
6799  static const struct file_operations proc_task_operations;
6800  static const struct inode_operations proc_task_inode_operations;
6801  
6802 +extern int proc_pid_vx_info(struct task_struct *, char *);
6803 +extern int proc_pid_nx_info(struct task_struct *, char *);
6804 +
6805  static const struct pid_entry tgid_base_stuff[] = {
6806         DIR("task",       S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
6807         DIR("fd",         S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
6808 @@ -2530,6 +2548,8 @@ static const struct pid_entry tgid_base_
6809  #ifdef CONFIG_CGROUPS
6810         REG("cgroup",  S_IRUGO, proc_cgroup_operations),
6811  #endif
6812 +       INF("vinfo",      S_IRUGO, proc_pid_vx_info),
6813 +       INF("ninfo",      S_IRUGO, proc_pid_nx_info),
6814         INF("oom_score",  S_IRUGO, proc_oom_score),
6815         REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adjust_operations),
6816  #ifdef CONFIG_AUDITSYSCALL
6817 @@ -2545,6 +2565,7 @@ static const struct pid_entry tgid_base_
6818  #ifdef CONFIG_TASK_IO_ACCOUNTING
6819         INF("io",       S_IRUGO, proc_tgid_io_accounting),
6820  #endif
6821 +       ONE("nsproxy",  S_IRUGO, proc_pid_nsproxy),
6822  };
6823  
6824  static int proc_tgid_base_readdir(struct file * filp,
6825 @@ -2741,7 +2762,7 @@ retry:
6826         iter.task = NULL;
6827         pid = find_ge_pid(iter.tgid, ns);
6828         if (pid) {
6829 -               iter.tgid = pid_nr_ns(pid, ns);
6830 +               iter.tgid = pid_unmapped_nr_ns(pid, ns);
6831                 iter.task = pid_task(pid, PIDTYPE_PID);
6832                 /* What we to know is if the pid we have find is the
6833                  * pid of a thread_group_leader.  Testing for task
6834 @@ -2771,7 +2792,7 @@ static int proc_pid_fill_cache(struct fi
6835         struct tgid_iter iter)
6836  {
6837         char name[PROC_NUMBUF];
6838 -       int len = snprintf(name, sizeof(name), "%d", iter.tgid);
6839 +       int len = snprintf(name, sizeof(name), "%d", vx_map_tgid(iter.tgid));
6840         return proc_fill_cache(filp, dirent, filldir, name, len,
6841                                 proc_pid_instantiate, iter.task, NULL);
6842  }
6843 @@ -2780,7 +2801,7 @@ static int proc_pid_fill_cache(struct fi
6844  int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
6845  {
6846         unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
6847 -       struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode);
6848 +       struct task_struct *reaper = get_proc_task_real(filp->f_path.dentry->d_inode);
6849         struct tgid_iter iter;
6850         struct pid_namespace *ns;
6851  
6852 @@ -2800,6 +2821,8 @@ int proc_pid_readdir(struct file * filp,
6853              iter.task;
6854              iter.tgid += 1, iter = next_tgid(ns, iter)) {
6855                 filp->f_pos = iter.tgid + TGID_OFFSET;
6856 +               if (!vx_proc_task_visible(iter.task))
6857 +                       continue;
6858                 if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) {
6859                         put_task_struct(iter.task);
6860                         goto out;
6861 @@ -2946,6 +2969,8 @@ static struct dentry *proc_task_lookup(s
6862         tid = name_to_int(dentry);
6863         if (tid == ~0U)
6864                 goto out;
6865 +       if (vx_current_initpid(tid))
6866 +               goto out;
6867  
6868         ns = dentry->d_sb->s_fs_info;
6869         rcu_read_lock();
6870 diff -NurpP --minimal linux-2.6.31.6/fs/proc/generic.c linux-2.6.31.6-vs2.3.0.36.24/fs/proc/generic.c
6871 --- linux-2.6.31.6/fs/proc/generic.c    2009-06-11 17:13:07.000000000 +0200
6872 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/proc/generic.c      2009-09-10 16:11:43.000000000 +0200
6873 @@ -20,6 +20,7 @@
6874  #include <linux/bitops.h>
6875  #include <linux/spinlock.h>
6876  #include <linux/completion.h>
6877 +#include <linux/vserver/inode.h>
6878  #include <asm/uaccess.h>
6879  
6880  #include "internal.h"
6881 @@ -425,6 +426,8 @@ struct dentry *proc_lookup_de(struct pro
6882         for (de = de->subdir; de ; de = de->next) {
6883                 if (de->namelen != dentry->d_name.len)
6884                         continue;
6885 +                       if (!vx_hide_check(0, de->vx_flags))
6886 +                               continue;
6887                 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
6888                         unsigned int ino;
6889  
6890 @@ -433,6 +436,8 @@ struct dentry *proc_lookup_de(struct pro
6891                         spin_unlock(&proc_subdir_lock);
6892                         error = -EINVAL;
6893                         inode = proc_get_inode(dir->i_sb, ino, de);
6894 +                               /* generic proc entries belong to the host */
6895 +                               inode->i_tag = 0;
6896                         goto out_unlock;
6897                 }
6898         }
6899 @@ -510,6 +515,8 @@ int proc_readdir_de(struct proc_dir_entr
6900  
6901                                 /* filldir passes info to user space */
6902                                 de_get(de);
6903 +                               if (!vx_hide_check(0, de->vx_flags))
6904 +                                       goto skip;
6905                                 spin_unlock(&proc_subdir_lock);
6906                                 if (filldir(dirent, de->name, de->namelen, filp->f_pos,
6907                                             de->low_ino, de->mode >> 12) < 0) {
6908 @@ -517,6 +524,7 @@ int proc_readdir_de(struct proc_dir_entr
6909                                         goto out;
6910                                 }
6911                                 spin_lock(&proc_subdir_lock);
6912 +                       skip:
6913                                 filp->f_pos++;
6914                                 next = de->next;
6915                                 de_put(de);
6916 @@ -631,6 +639,7 @@ static struct proc_dir_entry *__proc_cre
6917         ent->nlink = nlink;
6918         atomic_set(&ent->count, 1);
6919         ent->pde_users = 0;
6920 +       ent->vx_flags = IATTR_PROC_DEFAULT;
6921         spin_lock_init(&ent->pde_unload_lock);
6922         ent->pde_unload_completion = NULL;
6923         INIT_LIST_HEAD(&ent->pde_openers);
6924 @@ -654,7 +663,8 @@ struct proc_dir_entry *proc_symlink(cons
6925                                 kfree(ent->data);
6926                                 kfree(ent);
6927                                 ent = NULL;
6928 -                       }
6929 +                       } else
6930 +                               ent->vx_flags = IATTR_PROC_SYMLINK;
6931                 } else {
6932                         kfree(ent);
6933                         ent = NULL;
6934 diff -NurpP --minimal linux-2.6.31.6/fs/proc/inode.c linux-2.6.31.6-vs2.3.0.36.24/fs/proc/inode.c
6935 --- linux-2.6.31.6/fs/proc/inode.c      2009-06-11 17:13:07.000000000 +0200
6936 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/proc/inode.c        2009-09-10 16:11:43.000000000 +0200
6937 @@ -459,6 +459,8 @@ struct inode *proc_get_inode(struct supe
6938                         inode->i_uid = de->uid;
6939                         inode->i_gid = de->gid;
6940                 }
6941 +               if (de->vx_flags)
6942 +                       PROC_I(inode)->vx_flags = de->vx_flags;
6943                 if (de->size)
6944                         inode->i_size = de->size;
6945                 if (de->nlink)
6946 diff -NurpP --minimal linux-2.6.31.6/fs/proc/internal.h linux-2.6.31.6-vs2.3.0.36.24/fs/proc/internal.h
6947 --- linux-2.6.31.6/fs/proc/internal.h   2009-09-10 15:26:23.000000000 +0200
6948 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/proc/internal.h     2009-09-10 16:11:43.000000000 +0200
6949 @@ -10,6 +10,7 @@
6950   */
6951  
6952  #include <linux/proc_fs.h>
6953 +#include <linux/vs_pid.h>
6954  
6955  extern struct proc_dir_entry proc_root;
6956  #ifdef CONFIG_PROC_SYSCTL
6957 @@ -51,6 +52,9 @@ extern int proc_pid_status(struct seq_fi
6958                                 struct pid *pid, struct task_struct *task);
6959  extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
6960                                 struct pid *pid, struct task_struct *task);
6961 +extern int proc_pid_nsproxy(struct seq_file *m, struct pid_namespace *ns,
6962 +                               struct pid *pid, struct task_struct *task);
6963 +
6964  extern loff_t mem_lseek(struct file *file, loff_t offset, int orig);
6965  
6966  extern const struct file_operations proc_maps_operations;
6967 @@ -70,11 +74,16 @@ static inline struct pid *proc_pid(struc
6968         return PROC_I(inode)->pid;
6969  }
6970  
6971 -static inline struct task_struct *get_proc_task(struct inode *inode)
6972 +static inline struct task_struct *get_proc_task_real(struct inode *inode)
6973  {
6974         return get_pid_task(proc_pid(inode), PIDTYPE_PID);
6975  }
6976  
6977 +static inline struct task_struct *get_proc_task(struct inode *inode)
6978 +{
6979 +       return vx_get_proc_task(inode, proc_pid(inode));
6980 +}
6981 +
6982  static inline int proc_fd(struct inode *inode)
6983  {
6984         return PROC_I(inode)->fd;
6985 diff -NurpP --minimal linux-2.6.31.6/fs/proc/loadavg.c linux-2.6.31.6-vs2.3.0.36.24/fs/proc/loadavg.c
6986 --- linux-2.6.31.6/fs/proc/loadavg.c    2009-09-10 15:26:23.000000000 +0200
6987 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/proc/loadavg.c      2009-11-05 04:12:09.000000000 +0100
6988 @@ -12,15 +12,27 @@
6989  
6990  static int loadavg_proc_show(struct seq_file *m, void *v)
6991  {
6992 +       unsigned long running;
6993 +       unsigned int threads;
6994         unsigned long avnrun[3];
6995  
6996         get_avenrun(avnrun, FIXED_1/200, 0);
6997  
6998 +       if (vx_flags(VXF_VIRT_LOAD, 0)) {
6999 +               struct vx_info *vxi = current_vx_info();
7000 +
7001 +               running = atomic_read(&vxi->cvirt.nr_running);
7002 +               threads = atomic_read(&vxi->cvirt.nr_threads);
7003 +       } else {
7004 +               running = nr_running();
7005 +               threads = nr_threads;
7006 +       }
7007 +
7008         seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n",
7009                 LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]),
7010                 LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
7011                 LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
7012 -               nr_running(), nr_threads,
7013 +               running, threads,
7014                 task_active_pid_ns(current)->last_pid);
7015         return 0;
7016  }
7017 diff -NurpP --minimal linux-2.6.31.6/fs/proc/meminfo.c linux-2.6.31.6-vs2.3.0.36.24/fs/proc/meminfo.c
7018 --- linux-2.6.31.6/fs/proc/meminfo.c    2009-09-10 15:26:23.000000000 +0200
7019 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/proc/meminfo.c      2009-09-10 16:11:43.000000000 +0200
7020 @@ -41,7 +41,7 @@ static int meminfo_proc_show(struct seq_
7021  
7022         cached = global_page_state(NR_FILE_PAGES) -
7023                         total_swapcache_pages - i.bufferram;
7024 -       if (cached < 0)
7025 +       if (cached < 0 || vx_flags(VXF_VIRT_MEM, 0))
7026                 cached = 0;
7027  
7028         get_vmalloc_info(&vmi);
7029 diff -NurpP --minimal linux-2.6.31.6/fs/proc/root.c linux-2.6.31.6-vs2.3.0.36.24/fs/proc/root.c
7030 --- linux-2.6.31.6/fs/proc/root.c       2009-06-11 17:13:07.000000000 +0200
7031 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/proc/root.c 2009-09-10 16:11:43.000000000 +0200
7032 @@ -18,9 +18,14 @@
7033  #include <linux/bitops.h>
7034  #include <linux/mount.h>
7035  #include <linux/pid_namespace.h>
7036 +#include <linux/vserver/inode.h>
7037  
7038  #include "internal.h"
7039  
7040 +struct proc_dir_entry *proc_virtual;
7041 +
7042 +extern void proc_vx_init(void);
7043 +
7044  static int proc_test_super(struct super_block *sb, void *data)
7045  {
7046         return sb->s_fs_info == data;
7047 @@ -136,6 +141,7 @@ void __init proc_root_init(void)
7048  #endif
7049         proc_mkdir("bus", NULL);
7050         proc_sys_init();
7051 +       proc_vx_init();
7052  }
7053  
7054  static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
7055 @@ -203,6 +209,7 @@ struct proc_dir_entry proc_root = {
7056         .proc_iops      = &proc_root_inode_operations, 
7057         .proc_fops      = &proc_root_operations,
7058         .parent         = &proc_root,
7059 +       .vx_flags       = IATTR_ADMIN | IATTR_WATCH,
7060  };
7061  
7062  int pid_ns_prepare_proc(struct pid_namespace *ns)
7063 diff -NurpP --minimal linux-2.6.31.6/fs/proc/uptime.c linux-2.6.31.6-vs2.3.0.36.24/fs/proc/uptime.c
7064 --- linux-2.6.31.6/fs/proc/uptime.c     2009-11-12 12:10:11.000000000 +0100
7065 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/proc/uptime.c       2009-10-05 23:35:52.000000000 +0200
7066 @@ -4,22 +4,22 @@
7067  #include <linux/sched.h>
7068  #include <linux/seq_file.h>
7069  #include <linux/time.h>
7070 -#include <linux/kernel_stat.h>
7071 +#include <linux/vserver/cvirt.h>
7072  #include <asm/cputime.h>
7073  
7074  static int uptime_proc_show(struct seq_file *m, void *v)
7075  {
7076         struct timespec uptime;
7077         struct timespec idle;
7078 -       int i;
7079 -       cputime_t idletime = cputime_zero;
7080 -
7081 -       for_each_possible_cpu(i)
7082 -               idletime = cputime64_add(idletime, kstat_cpu(i).cpustat.idle);
7083 +       cputime_t idletime = cputime_add(init_task.utime, init_task.stime);
7084  
7085         do_posix_clock_monotonic_gettime(&uptime);
7086         monotonic_to_bootbased(&uptime);
7087         cputime_to_timespec(idletime, &idle);
7088 +
7089 +       if (vx_flags(VXF_VIRT_UPTIME, 0))
7090 +               vx_vsi_uptime(&uptime, &idle);
7091 +
7092         seq_printf(m, "%lu.%02lu %lu.%02lu\n",
7093                         (unsigned long) uptime.tv_sec,
7094                         (uptime.tv_nsec / (NSEC_PER_SEC / 100)),
7095 diff -NurpP --minimal linux-2.6.31.6/fs/quota/quota.c linux-2.6.31.6-vs2.3.0.36.24/fs/quota/quota.c
7096 --- linux-2.6.31.6/fs/quota/quota.c     2009-09-10 15:26:24.000000000 +0200
7097 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/quota/quota.c       2009-09-10 16:11:43.000000000 +0200
7098 @@ -18,6 +18,7 @@
7099  #include <linux/capability.h>
7100  #include <linux/quotaops.h>
7101  #include <linux/types.h>
7102 +#include <linux/vs_context.h>
7103  
7104  /* Check validity of generic quotactl commands */
7105  static int generic_quotactl_valid(struct super_block *sb, int type, int cmd,
7106 @@ -83,11 +84,11 @@ static int generic_quotactl_valid(struct
7107         if (cmd == Q_GETQUOTA) {
7108                 if (((type == USRQUOTA && current_euid() != id) ||
7109                      (type == GRPQUOTA && !in_egroup_p(id))) &&
7110 -                   !capable(CAP_SYS_ADMIN))
7111 +                   !vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
7112                         return -EPERM;
7113         }
7114         else if (cmd != Q_GETFMT && cmd != Q_SYNC && cmd != Q_GETINFO)
7115 -               if (!capable(CAP_SYS_ADMIN))
7116 +               if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
7117                         return -EPERM;
7118  
7119         return 0;
7120 @@ -135,10 +136,10 @@ static int xqm_quotactl_valid(struct sup
7121         if (cmd == Q_XGETQUOTA) {
7122                 if (((type == XQM_USRQUOTA && current_euid() != id) ||
7123                      (type == XQM_GRPQUOTA && !in_egroup_p(id))) &&
7124 -                    !capable(CAP_SYS_ADMIN))
7125 +                    !vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
7126                         return -EPERM;
7127         } else if (cmd != Q_XGETQSTAT && cmd != Q_XQUOTASYNC) {
7128 -               if (!capable(CAP_SYS_ADMIN))
7129 +               if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
7130                         return -EPERM;
7131         }
7132  
7133 @@ -351,6 +352,46 @@ static int do_quotactl(struct super_bloc
7134         return 0;
7135  }
7136  
7137 +#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE)
7138 +
7139 +#include <linux/vroot.h>
7140 +#include <linux/major.h>
7141 +#include <linux/module.h>
7142 +#include <linux/kallsyms.h>
7143 +#include <linux/vserver/debug.h>
7144 +
7145 +static vroot_grb_func *vroot_get_real_bdev = NULL;
7146 +
7147 +static spinlock_t vroot_grb_lock = SPIN_LOCK_UNLOCKED;
7148 +
7149 +int register_vroot_grb(vroot_grb_func *func) {
7150 +       int ret = -EBUSY;
7151 +
7152 +       spin_lock(&vroot_grb_lock);
7153 +       if (!vroot_get_real_bdev) {
7154 +               vroot_get_real_bdev = func;
7155 +               ret = 0;
7156 +       }
7157 +       spin_unlock(&vroot_grb_lock);
7158 +       return ret;
7159 +}
7160 +EXPORT_SYMBOL(register_vroot_grb);
7161 +
7162 +int unregister_vroot_grb(vroot_grb_func *func) {
7163 +       int ret = -EINVAL;
7164 +
7165 +       spin_lock(&vroot_grb_lock);
7166 +       if (vroot_get_real_bdev) {
7167 +               vroot_get_real_bdev = NULL;
7168 +               ret = 0;
7169 +       }
7170 +       spin_unlock(&vroot_grb_lock);
7171 +       return ret;
7172 +}
7173 +EXPORT_SYMBOL(unregister_vroot_grb);
7174 +
7175 +#endif
7176 +
7177  /*
7178   * look up a superblock on which quota ops will be performed
7179   * - use the name of a block device to find the superblock thereon
7180 @@ -368,6 +409,22 @@ static struct super_block *quotactl_bloc
7181         putname(tmp);
7182         if (IS_ERR(bdev))
7183                 return ERR_CAST(bdev);
7184 +#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE)
7185 +       if (bdev && bdev->bd_inode &&
7186 +                       imajor(bdev->bd_inode) == VROOT_MAJOR) {
7187 +               struct block_device *bdnew = (void *)-EINVAL;
7188 +
7189 +               if (vroot_get_real_bdev)
7190 +                       bdnew = vroot_get_real_bdev(bdev);
7191 +               else
7192 +                       vxdprintk(VXD_CBIT(misc, 0),
7193 +                                       "vroot_get_real_bdev not set");
7194 +               bdput(bdev);
7195 +               if (IS_ERR(bdnew))
7196 +                       return ERR_PTR(PTR_ERR(bdnew));
7197 +               bdev = bdnew;
7198 +       }
7199 +#endif
7200         sb = get_super(bdev);
7201         bdput(bdev);
7202         if (!sb)
7203 diff -NurpP --minimal linux-2.6.31.6/fs/reiserfs/file.c linux-2.6.31.6-vs2.3.0.36.24/fs/reiserfs/file.c
7204 --- linux-2.6.31.6/fs/reiserfs/file.c   2009-06-11 17:13:08.000000000 +0200
7205 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/reiserfs/file.c     2009-10-07 01:04:14.000000000 +0200
7206 @@ -307,4 +307,5 @@ const struct inode_operations reiserfs_f
7207         .listxattr = reiserfs_listxattr,
7208         .removexattr = reiserfs_removexattr,
7209         .permission = reiserfs_permission,
7210 +       .sync_flags = reiserfs_sync_flags,
7211  };
7212 diff -NurpP --minimal linux-2.6.31.6/fs/reiserfs/inode.c linux-2.6.31.6-vs2.3.0.36.24/fs/reiserfs/inode.c
7213 --- linux-2.6.31.6/fs/reiserfs/inode.c  2009-09-10 15:26:24.000000000 +0200
7214 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/reiserfs/inode.c    2009-10-06 19:45:13.000000000 +0200
7215 @@ -18,6 +18,7 @@
7216  #include <linux/writeback.h>
7217  #include <linux/quotaops.h>
7218  #include <linux/swap.h>
7219 +#include <linux/vs_tag.h>
7220  
7221  int reiserfs_commit_write(struct file *f, struct page *page,
7222                           unsigned from, unsigned to);
7223 @@ -1117,6 +1118,8 @@ static void init_inode(struct inode *ino
7224         struct buffer_head *bh;
7225         struct item_head *ih;
7226         __u32 rdev;
7227 +       uid_t uid;
7228 +       gid_t gid;
7229         //int version = ITEM_VERSION_1;
7230  
7231         bh = PATH_PLAST_BUFFER(path);
7232 @@ -1138,12 +1141,13 @@ static void init_inode(struct inode *ino
7233                     (struct stat_data_v1 *)B_I_PITEM(bh, ih);
7234                 unsigned long blocks;
7235  
7236 +               uid = sd_v1_uid(sd);
7237 +               gid = sd_v1_gid(sd);
7238 +
7239                 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
7240                 set_inode_sd_version(inode, STAT_DATA_V1);
7241                 inode->i_mode = sd_v1_mode(sd);
7242                 inode->i_nlink = sd_v1_nlink(sd);
7243 -               inode->i_uid = sd_v1_uid(sd);
7244 -               inode->i_gid = sd_v1_gid(sd);
7245                 inode->i_size = sd_v1_size(sd);
7246                 inode->i_atime.tv_sec = sd_v1_atime(sd);
7247                 inode->i_mtime.tv_sec = sd_v1_mtime(sd);
7248 @@ -1185,11 +1189,12 @@ static void init_inode(struct inode *ino
7249                 // (directories and symlinks)
7250                 struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih);
7251  
7252 +               uid    = sd_v2_uid(sd);
7253 +               gid    = sd_v2_gid(sd);
7254 +
7255                 inode->i_mode = sd_v2_mode(sd);
7256                 inode->i_nlink = sd_v2_nlink(sd);
7257 -               inode->i_uid = sd_v2_uid(sd);
7258                 inode->i_size = sd_v2_size(sd);
7259 -               inode->i_gid = sd_v2_gid(sd);
7260                 inode->i_mtime.tv_sec = sd_v2_mtime(sd);
7261                 inode->i_atime.tv_sec = sd_v2_atime(sd);
7262                 inode->i_ctime.tv_sec = sd_v2_ctime(sd);
7263 @@ -1219,6 +1224,10 @@ static void init_inode(struct inode *ino
7264                 sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode);
7265         }
7266  
7267 +       inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
7268 +       inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
7269 +       inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, 0);
7270 +
7271         pathrelse(path);
7272         if (S_ISREG(inode->i_mode)) {
7273                 inode->i_op = &reiserfs_file_inode_operations;
7274 @@ -1241,13 +1250,15 @@ static void init_inode(struct inode *ino
7275  static void inode2sd(void *sd, struct inode *inode, loff_t size)
7276  {
7277         struct stat_data *sd_v2 = (struct stat_data *)sd;
7278 +       uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
7279 +       gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
7280         __u16 flags;
7281  
7282 +       set_sd_v2_uid(sd_v2, uid);
7283 +       set_sd_v2_gid(sd_v2, gid);
7284         set_sd_v2_mode(sd_v2, inode->i_mode);
7285         set_sd_v2_nlink(sd_v2, inode->i_nlink);
7286 -       set_sd_v2_uid(sd_v2, inode->i_uid);
7287         set_sd_v2_size(sd_v2, size);
7288 -       set_sd_v2_gid(sd_v2, inode->i_gid);
7289         set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec);
7290         set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec);
7291         set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec);
7292 @@ -2828,14 +2839,19 @@ int reiserfs_commit_write(struct file *f
7293  void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode)
7294  {
7295         if (reiserfs_attrs(inode->i_sb)) {
7296 -               if (sd_attrs & REISERFS_SYNC_FL)
7297 -                       inode->i_flags |= S_SYNC;
7298 -               else
7299 -                       inode->i_flags &= ~S_SYNC;
7300                 if (sd_attrs & REISERFS_IMMUTABLE_FL)
7301                         inode->i_flags |= S_IMMUTABLE;
7302                 else
7303                         inode->i_flags &= ~S_IMMUTABLE;
7304 +               if (sd_attrs & REISERFS_IXUNLINK_FL)
7305 +                       inode->i_flags |= S_IXUNLINK;
7306 +               else
7307 +                       inode->i_flags &= ~S_IXUNLINK;
7308 +
7309 +               if (sd_attrs & REISERFS_SYNC_FL)
7310 +                       inode->i_flags |= S_SYNC;
7311 +               else
7312 +                       inode->i_flags &= ~S_SYNC;
7313                 if (sd_attrs & REISERFS_APPEND_FL)
7314                         inode->i_flags |= S_APPEND;
7315                 else
7316 @@ -2848,6 +2864,15 @@ void sd_attrs_to_i_attrs(__u16 sd_attrs,
7317                         REISERFS_I(inode)->i_flags |= i_nopack_mask;
7318                 else
7319                         REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
7320 +
7321 +               if (sd_attrs & REISERFS_BARRIER_FL)
7322 +                       inode->i_vflags |= V_BARRIER;
7323 +               else
7324 +                       inode->i_vflags &= ~V_BARRIER;
7325 +               if (sd_attrs & REISERFS_COW_FL)
7326 +                       inode->i_vflags |= V_COW;
7327 +               else
7328 +                       inode->i_vflags &= ~V_COW;
7329         }
7330  }
7331  
7332 @@ -2858,6 +2883,11 @@ void i_attrs_to_sd_attrs(struct inode *i
7333                         *sd_attrs |= REISERFS_IMMUTABLE_FL;
7334                 else
7335                         *sd_attrs &= ~REISERFS_IMMUTABLE_FL;
7336 +               if (inode->i_flags & S_IXUNLINK)
7337 +                       *sd_attrs |= REISERFS_IXUNLINK_FL;
7338 +               else
7339 +                       *sd_attrs &= ~REISERFS_IXUNLINK_FL;
7340 +
7341                 if (inode->i_flags & S_SYNC)
7342                         *sd_attrs |= REISERFS_SYNC_FL;
7343                 else
7344 @@ -2870,6 +2900,15 @@ void i_attrs_to_sd_attrs(struct inode *i
7345                         *sd_attrs |= REISERFS_NOTAIL_FL;
7346                 else
7347                         *sd_attrs &= ~REISERFS_NOTAIL_FL;
7348 +
7349 +               if (inode->i_vflags & V_BARRIER)
7350 +                       *sd_attrs |= REISERFS_BARRIER_FL;
7351 +               else
7352 +                       *sd_attrs &= ~REISERFS_BARRIER_FL;
7353 +               if (inode->i_vflags & V_COW)
7354 +                       *sd_attrs |= REISERFS_COW_FL;
7355 +               else
7356 +                       *sd_attrs &= ~REISERFS_COW_FL;
7357         }
7358  }
7359  
7360 @@ -3090,9 +3129,11 @@ int reiserfs_setattr(struct dentry *dent
7361         }
7362  
7363         error = inode_change_ok(inode, attr);
7364 +
7365         if (!error) {
7366                 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
7367 -                   (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
7368 +                   (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) ||
7369 +                   (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) {
7370                         error = reiserfs_chown_xattrs(inode, attr);
7371  
7372                         if (!error) {
7373 @@ -3122,6 +3163,9 @@ int reiserfs_setattr(struct dentry *dent
7374                                         inode->i_uid = attr->ia_uid;
7375                                 if (attr->ia_valid & ATTR_GID)
7376                                         inode->i_gid = attr->ia_gid;
7377 +                               if ((attr->ia_valid & ATTR_TAG) &&
7378 +                                       IS_TAGGED(inode))
7379 +                                       inode->i_tag = attr->ia_tag;
7380                                 mark_inode_dirty(inode);
7381                                 error =
7382                                     journal_end(&th, inode->i_sb, jbegin_count);
7383 diff -NurpP --minimal linux-2.6.31.6/fs/reiserfs/ioctl.c linux-2.6.31.6-vs2.3.0.36.24/fs/reiserfs/ioctl.c
7384 --- linux-2.6.31.6/fs/reiserfs/ioctl.c  2009-06-11 17:13:08.000000000 +0200
7385 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/reiserfs/ioctl.c    2009-10-12 03:53:52.000000000 +0200
7386 @@ -7,11 +7,27 @@
7387  #include <linux/mount.h>
7388  #include <linux/reiserfs_fs.h>
7389  #include <linux/time.h>
7390 +#include <linux/mount.h>
7391  #include <asm/uaccess.h>
7392  #include <linux/pagemap.h>
7393  #include <linux/smp_lock.h>
7394  #include <linux/compat.h>
7395  
7396 +
7397 +int reiserfs_sync_flags(struct inode *inode, int flags, int vflags)
7398 +{
7399 +       __u16 sd_attrs = 0;
7400 +
7401 +       inode->i_flags = flags;
7402 +       inode->i_vflags = vflags;
7403 +
7404 +       i_attrs_to_sd_attrs(inode, &sd_attrs);
7405 +       REISERFS_I(inode)->i_attrs = sd_attrs;
7406 +       inode->i_ctime = CURRENT_TIME_SEC;
7407 +       mark_inode_dirty(inode);
7408 +       return 0;
7409 +}
7410 +
7411  /*
7412  ** reiserfs_ioctl - handler for ioctl for inode
7413  ** supported commands:
7414 @@ -23,7 +39,7 @@
7415  int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
7416                    unsigned long arg)
7417  {
7418 -       unsigned int flags;
7419 +       unsigned int flags, oldflags;
7420         int err = 0;
7421  
7422         switch (cmd) {
7423 @@ -43,6 +59,7 @@ int reiserfs_ioctl(struct inode *inode, 
7424  
7425                 flags = REISERFS_I(inode)->i_attrs;
7426                 i_attrs_to_sd_attrs(inode, (__u16 *) & flags);
7427 +               flags &= REISERFS_FL_USER_VISIBLE;
7428                 return put_user(flags, (int __user *)arg);
7429         case REISERFS_IOC_SETFLAGS:{
7430                         if (!reiserfs_attrs(inode->i_sb))
7431 @@ -60,6 +77,10 @@ int reiserfs_ioctl(struct inode *inode, 
7432                                 err = -EFAULT;
7433                                 goto setflags_out;
7434                         }
7435 +                       if (IS_BARRIER(inode)) {
7436 +                               vxwprintk_task(1, "messing with the barrier.");
7437 +                               return -EACCES;
7438 +                       }
7439                         /*
7440                          * Is it quota file? Do not allow user to mess with it
7441                          */
7442 @@ -84,6 +105,10 @@ int reiserfs_ioctl(struct inode *inode, 
7443                                         goto setflags_out;
7444                                 }
7445                         }
7446 +
7447 +                       oldflags = REISERFS_I(inode)->i_attrs;
7448 +                       flags &= REISERFS_FL_USER_MODIFIABLE;
7449 +                       flags |= oldflags & ~REISERFS_FL_USER_MODIFIABLE;
7450                         sd_attrs_to_i_attrs(flags, inode);
7451                         REISERFS_I(inode)->i_attrs = flags;
7452                         inode->i_ctime = CURRENT_TIME_SEC;
7453 diff -NurpP --minimal linux-2.6.31.6/fs/reiserfs/namei.c linux-2.6.31.6-vs2.3.0.36.24/fs/reiserfs/namei.c
7454 --- linux-2.6.31.6/fs/reiserfs/namei.c  2009-06-11 17:13:08.000000000 +0200
7455 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/reiserfs/namei.c    2009-10-07 01:09:49.000000000 +0200
7456 @@ -17,6 +17,7 @@
7457  #include <linux/reiserfs_acl.h>
7458  #include <linux/reiserfs_xattr.h>
7459  #include <linux/quotaops.h>
7460 +#include <linux/vs_tag.h>
7461  
7462  #define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; }
7463  #define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i);
7464 @@ -354,6 +355,7 @@ static struct dentry *reiserfs_lookup(st
7465         if (retval == IO_ERROR) {
7466                 return ERR_PTR(-EIO);
7467         }
7468 +               dx_propagate_tag(nd, inode);
7469  
7470         return d_splice_alias(inode, dentry);
7471  }
7472 @@ -570,6 +572,7 @@ static int new_inode_init(struct inode *
7473         } else {
7474                 inode->i_gid = current_fsgid();
7475         }
7476 +       inode->i_tag = dx_current_fstag(inode->i_sb);
7477         vfs_dq_init(inode);
7478         return 0;
7479  }
7480 @@ -1515,6 +1518,7 @@ const struct inode_operations reiserfs_d
7481         .listxattr = reiserfs_listxattr,
7482         .removexattr = reiserfs_removexattr,
7483         .permission = reiserfs_permission,
7484 +       .sync_flags = reiserfs_sync_flags,
7485  };
7486  
7487  /*
7488 diff -NurpP --minimal linux-2.6.31.6/fs/reiserfs/super.c linux-2.6.31.6-vs2.3.0.36.24/fs/reiserfs/super.c
7489 --- linux-2.6.31.6/fs/reiserfs/super.c  2009-09-10 15:26:24.000000000 +0200
7490 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/reiserfs/super.c    2009-10-07 03:27:01.000000000 +0200
7491 @@ -884,6 +884,14 @@ static int reiserfs_parse_options(struct
7492                 {"user_xattr",.setmask = 1 << REISERFS_UNSUPPORTED_OPT},
7493                 {"nouser_xattr",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT},
7494  #endif
7495 +#ifndef CONFIG_TAGGING_NONE
7496 +               {"tagxid",.setmask = 1 << REISERFS_TAGGED},
7497 +               {"tag",.setmask = 1 << REISERFS_TAGGED},
7498 +               {"notag",.clrmask = 1 << REISERFS_TAGGED},
7499 +#endif
7500 +#ifdef CONFIG_PROPAGATE
7501 +               {"tag",.arg_required = 'T',.values = NULL},
7502 +#endif
7503  #ifdef CONFIG_REISERFS_FS_POSIX_ACL
7504                 {"acl",.setmask = 1 << REISERFS_POSIXACL},
7505                 {"noacl",.clrmask = 1 << REISERFS_POSIXACL},
7506 @@ -1190,6 +1198,14 @@ static int reiserfs_remount(struct super
7507         handle_quota_files(s, qf_names, &qfmt);
7508  #endif
7509  
7510 +       if ((mount_options & (1 << REISERFS_TAGGED)) &&
7511 +               !(s->s_flags & MS_TAGGED)) {
7512 +               reiserfs_warning(s, "super-vs01",
7513 +                       "reiserfs: tagging not permitted on remount.");
7514 +               err = -EINVAL;
7515 +               goto out_err;
7516 +       }
7517 +
7518         handle_attrs(s);
7519  
7520         /* Add options that are safe here */
7521 @@ -1652,6 +1668,10 @@ static int reiserfs_fill_super(struct su
7522                 goto error;
7523         }
7524  
7525 +       /* map mount option tagxid */
7526 +       if (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TAGGED))
7527 +               s->s_flags |= MS_TAGGED;
7528 +
7529         rs = SB_DISK_SUPER_BLOCK(s);
7530         /* Let's do basic sanity check to verify that underlying device is not
7531            smaller than the filesystem. If the check fails then abort and scream,
7532 diff -NurpP --minimal linux-2.6.31.6/fs/reiserfs/xattr.c linux-2.6.31.6-vs2.3.0.36.24/fs/reiserfs/xattr.c
7533 --- linux-2.6.31.6/fs/reiserfs/xattr.c  2009-09-10 15:26:24.000000000 +0200
7534 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/reiserfs/xattr.c    2009-09-10 16:11:43.000000000 +0200
7535 @@ -39,6 +39,7 @@
7536  #include <linux/namei.h>
7537  #include <linux/errno.h>
7538  #include <linux/fs.h>
7539 +#include <linux/mount.h>
7540  #include <linux/file.h>
7541  #include <linux/pagemap.h>
7542  #include <linux/xattr.h>
7543 diff -NurpP --minimal linux-2.6.31.6/fs/stat.c linux-2.6.31.6-vs2.3.0.36.24/fs/stat.c
7544 --- linux-2.6.31.6/fs/stat.c    2009-06-11 17:13:08.000000000 +0200
7545 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/stat.c      2009-09-10 16:11:43.000000000 +0200
7546 @@ -26,6 +26,7 @@ void generic_fillattr(struct inode *inod
7547         stat->nlink = inode->i_nlink;
7548         stat->uid = inode->i_uid;
7549         stat->gid = inode->i_gid;
7550 +       stat->tag = inode->i_tag;
7551         stat->rdev = inode->i_rdev;
7552         stat->atime = inode->i_atime;
7553         stat->mtime = inode->i_mtime;
7554 diff -NurpP --minimal linux-2.6.31.6/fs/super.c linux-2.6.31.6-vs2.3.0.36.24/fs/super.c
7555 --- linux-2.6.31.6/fs/super.c   2009-09-10 15:26:24.000000000 +0200
7556 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/super.c     2009-09-10 17:00:57.000000000 +0200
7557 @@ -37,6 +37,9 @@
7558  #include <linux/kobject.h>
7559  #include <linux/mutex.h>
7560  #include <linux/file.h>
7561 +#include <linux/devpts_fs.h>
7562 +#include <linux/proc_fs.h>
7563 +#include <linux/vs_context.h>
7564  #include <asm/uaccess.h>
7565  #include "internal.h"
7566  
7567 @@ -859,12 +862,18 @@ struct vfsmount *
7568  vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
7569  {
7570         struct vfsmount *mnt;
7571 +       struct super_block *sb;
7572         char *secdata = NULL;
7573         int error;
7574  
7575         if (!type)
7576                 return ERR_PTR(-ENODEV);
7577  
7578 +       error = -EPERM;
7579 +       if ((type->fs_flags & FS_BINARY_MOUNTDATA) &&
7580 +               !vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT))
7581 +               goto out;
7582 +
7583         error = -ENOMEM;
7584         mnt = alloc_vfsmnt(name);
7585         if (!mnt)
7586 @@ -883,9 +892,17 @@ vfs_kern_mount(struct file_system_type *
7587         error = type->get_sb(type, flags, name, data, mnt);
7588         if (error < 0)
7589                 goto out_free_secdata;
7590 -       BUG_ON(!mnt->mnt_sb);
7591  
7592 -       error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
7593 +       sb = mnt->mnt_sb;
7594 +       BUG_ON(!sb);
7595 +
7596 +       error = -EPERM;
7597 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT) && !sb->s_bdev &&
7598 +               (sb->s_magic != PROC_SUPER_MAGIC) &&
7599 +               (sb->s_magic != DEVPTS_SUPER_MAGIC))
7600 +               goto out_sb;
7601 +
7602 +       error = security_sb_kern_mount(sb, flags, secdata);
7603         if (error)
7604                 goto out_sb;
7605  
7606 diff -NurpP --minimal linux-2.6.31.6/fs/sysfs/mount.c linux-2.6.31.6-vs2.3.0.36.24/fs/sysfs/mount.c
7607 --- linux-2.6.31.6/fs/sysfs/mount.c     2009-06-11 17:13:08.000000000 +0200
7608 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/sysfs/mount.c       2009-09-10 16:11:43.000000000 +0200
7609 @@ -47,7 +47,7 @@ static int sysfs_fill_super(struct super
7610  
7611         sb->s_blocksize = PAGE_CACHE_SIZE;
7612         sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
7613 -       sb->s_magic = SYSFS_MAGIC;
7614 +       sb->s_magic = SYSFS_SUPER_MAGIC;
7615         sb->s_op = &sysfs_ops;
7616         sb->s_time_gran = 1;
7617         sysfs_sb = sb;
7618 diff -NurpP --minimal linux-2.6.31.6/fs/utimes.c linux-2.6.31.6-vs2.3.0.36.24/fs/utimes.c
7619 --- linux-2.6.31.6/fs/utimes.c  2009-03-24 14:22:37.000000000 +0100
7620 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/utimes.c    2009-09-10 16:11:43.000000000 +0200
7621 @@ -8,6 +8,8 @@
7622  #include <linux/stat.h>
7623  #include <linux/utime.h>
7624  #include <linux/syscalls.h>
7625 +#include <linux/mount.h>
7626 +#include <linux/vs_cowbl.h>
7627  #include <asm/uaccess.h>
7628  #include <asm/unistd.h>
7629  
7630 diff -NurpP --minimal linux-2.6.31.6/fs/xattr.c linux-2.6.31.6-vs2.3.0.36.24/fs/xattr.c
7631 --- linux-2.6.31.6/fs/xattr.c   2009-09-10 15:26:24.000000000 +0200
7632 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/xattr.c     2009-09-10 16:11:43.000000000 +0200
7633 @@ -18,6 +18,7 @@
7634  #include <linux/module.h>
7635  #include <linux/fsnotify.h>
7636  #include <linux/audit.h>
7637 +#include <linux/mount.h>
7638  #include <asm/uaccess.h>
7639  
7640  
7641 diff -NurpP --minimal linux-2.6.31.6/fs/xfs/linux-2.6/xfs_ioctl.c linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/linux-2.6/xfs_ioctl.c
7642 --- linux-2.6.31.6/fs/xfs/linux-2.6/xfs_ioctl.c 2009-09-10 15:26:24.000000000 +0200
7643 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/linux-2.6/xfs_ioctl.c   2009-10-07 15:57:55.000000000 +0200
7644 @@ -34,7 +34,6 @@
7645  #include "xfs_dir2_sf.h"
7646  #include "xfs_dinode.h"
7647  #include "xfs_inode.h"
7648 -#include "xfs_ioctl.h"
7649  #include "xfs_btree.h"
7650  #include "xfs_ialloc.h"
7651  #include "xfs_rtalloc.h"
7652 @@ -742,6 +741,10 @@ xfs_merge_ioc_xflags(
7653                 xflags |= XFS_XFLAG_IMMUTABLE;
7654         else
7655                 xflags &= ~XFS_XFLAG_IMMUTABLE;
7656 +       if (flags & FS_IXUNLINK_FL)
7657 +               xflags |= XFS_XFLAG_IXUNLINK;
7658 +       else
7659 +               xflags &= ~XFS_XFLAG_IXUNLINK;
7660         if (flags & FS_APPEND_FL)
7661                 xflags |= XFS_XFLAG_APPEND;
7662         else
7663 @@ -770,6 +773,8 @@ xfs_di2lxflags(
7664  
7665         if (di_flags & XFS_DIFLAG_IMMUTABLE)
7666                 flags |= FS_IMMUTABLE_FL;
7667 +       if (di_flags & XFS_DIFLAG_IXUNLINK)
7668 +               flags |= FS_IXUNLINK_FL;
7669         if (di_flags & XFS_DIFLAG_APPEND)
7670                 flags |= FS_APPEND_FL;
7671         if (di_flags & XFS_DIFLAG_SYNC)
7672 @@ -828,6 +833,8 @@ xfs_set_diflags(
7673         di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
7674         if (xflags & XFS_XFLAG_IMMUTABLE)
7675                 di_flags |= XFS_DIFLAG_IMMUTABLE;
7676 +       if (xflags & XFS_XFLAG_IXUNLINK)
7677 +               di_flags |= XFS_DIFLAG_IXUNLINK;
7678         if (xflags & XFS_XFLAG_APPEND)
7679                 di_flags |= XFS_DIFLAG_APPEND;
7680         if (xflags & XFS_XFLAG_SYNC)
7681 @@ -870,6 +877,10 @@ xfs_diflags_to_linux(
7682                 inode->i_flags |= S_IMMUTABLE;
7683         else
7684                 inode->i_flags &= ~S_IMMUTABLE;
7685 +       if (xflags & XFS_XFLAG_IXUNLINK)
7686 +               inode->i_flags |= S_IXUNLINK;
7687 +       else
7688 +               inode->i_flags &= ~S_IXUNLINK;
7689         if (xflags & XFS_XFLAG_APPEND)
7690                 inode->i_flags |= S_APPEND;
7691         else
7692 @@ -1346,10 +1357,18 @@ xfs_file_ioctl(
7693         case XFS_IOC_FSGETXATTRA:
7694                 return xfs_ioc_fsgetxattr(ip, 1, arg);
7695         case XFS_IOC_FSSETXATTR:
7696 +               if (IS_BARRIER(inode)) {
7697 +                       vxwprintk_task(1, "messing with the barrier.");
7698 +                       return -XFS_ERROR(EACCES);
7699 +               }
7700                 return xfs_ioc_fssetxattr(ip, filp, arg);
7701         case XFS_IOC_GETXFLAGS:
7702                 return xfs_ioc_getxflags(ip, arg);
7703         case XFS_IOC_SETXFLAGS:
7704 +               if (IS_BARRIER(inode)) {
7705 +                       vxwprintk_task(1, "messing with the barrier.");
7706 +                       return -XFS_ERROR(EACCES);
7707 +               }
7708                 return xfs_ioc_setxflags(ip, filp, arg);
7709  
7710         case XFS_IOC_FSSETDM: {
7711 diff -NurpP --minimal linux-2.6.31.6/fs/xfs/linux-2.6/xfs_ioctl.h linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/linux-2.6/xfs_ioctl.h
7712 --- linux-2.6.31.6/fs/xfs/linux-2.6/xfs_ioctl.h 2009-03-24 14:22:37.000000000 +0100
7713 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/linux-2.6/xfs_ioctl.h   2009-10-07 15:59:45.000000000 +0200
7714 @@ -70,6 +70,12 @@ xfs_handle_to_dentry(
7715         void __user             *uhandle,
7716         u32                     hlen);
7717  
7718 +extern int
7719 +xfs_sync_flags(
7720 +       struct inode            *inode,
7721 +       int                     flags,
7722 +       int                     vflags);
7723 +
7724  extern long
7725  xfs_file_ioctl(
7726         struct file             *filp,
7727 diff -NurpP --minimal linux-2.6.31.6/fs/xfs/linux-2.6/xfs_iops.c linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/linux-2.6/xfs_iops.c
7728 --- linux-2.6.31.6/fs/xfs/linux-2.6/xfs_iops.c  2009-09-10 15:26:24.000000000 +0200
7729 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/linux-2.6/xfs_iops.c    2009-10-07 05:23:00.000000000 +0200
7730 @@ -36,6 +36,7 @@
7731  #include "xfs_attr_sf.h"
7732  #include "xfs_dinode.h"
7733  #include "xfs_inode.h"
7734 +#include "xfs_ioctl.h"
7735  #include "xfs_bmap.h"
7736  #include "xfs_btree.h"
7737  #include "xfs_ialloc.h"
7738 @@ -56,6 +57,7 @@
7739  #include <linux/security.h>
7740  #include <linux/falloc.h>
7741  #include <linux/fiemap.h>
7742 +#include <linux/vs_tag.h>
7743  
7744  /*
7745   * Bring the atime in the XFS inode uptodate.
7746 @@ -513,6 +515,7 @@ xfs_vn_getattr(
7747         stat->nlink = ip->i_d.di_nlink;
7748         stat->uid = ip->i_d.di_uid;
7749         stat->gid = ip->i_d.di_gid;
7750 +       stat->tag = ip->i_d.di_tag;
7751         stat->ino = ip->i_ino;
7752         stat->atime = inode->i_atime;
7753         stat->mtime.tv_sec = ip->i_d.di_mtime.t_sec;
7754 @@ -706,6 +709,7 @@ static const struct inode_operations xfs
7755         .listxattr              = xfs_vn_listxattr,
7756         .fallocate              = xfs_vn_fallocate,
7757         .fiemap                 = xfs_vn_fiemap,
7758 +       .sync_flags             = xfs_sync_flags,
7759  };
7760  
7761  static const struct inode_operations xfs_dir_inode_operations = {
7762 @@ -731,6 +735,7 @@ static const struct inode_operations xfs
7763         .getxattr               = generic_getxattr,
7764         .removexattr            = generic_removexattr,
7765         .listxattr              = xfs_vn_listxattr,
7766 +       .sync_flags             = xfs_sync_flags,
7767  };
7768  
7769  static const struct inode_operations xfs_dir_ci_inode_operations = {
7770 @@ -780,6 +785,10 @@ xfs_diflags_to_iflags(
7771                 inode->i_flags |= S_IMMUTABLE;
7772         else
7773                 inode->i_flags &= ~S_IMMUTABLE;
7774 +       if (ip->i_d.di_flags & XFS_DIFLAG_IXUNLINK)
7775 +               inode->i_flags |= S_IXUNLINK;
7776 +       else
7777 +               inode->i_flags &= ~S_IXUNLINK;
7778         if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
7779                 inode->i_flags |= S_APPEND;
7780         else
7781 @@ -792,6 +801,15 @@ xfs_diflags_to_iflags(
7782                 inode->i_flags |= S_NOATIME;
7783         else
7784                 inode->i_flags &= ~S_NOATIME;
7785 +
7786 +       if (ip->i_d.di_vflags & XFS_DIVFLAG_BARRIER)
7787 +               inode->i_vflags |= V_BARRIER;
7788 +       else
7789 +               inode->i_vflags &= ~V_BARRIER;
7790 +       if (ip->i_d.di_vflags & XFS_DIVFLAG_COW)
7791 +               inode->i_vflags |= V_COW;
7792 +       else
7793 +               inode->i_vflags &= ~V_COW;
7794  }
7795  
7796  /*
7797 @@ -820,6 +838,7 @@ xfs_setup_inode(
7798         inode->i_nlink  = ip->i_d.di_nlink;
7799         inode->i_uid    = ip->i_d.di_uid;
7800         inode->i_gid    = ip->i_d.di_gid;
7801 +       inode->i_tag    = ip->i_d.di_tag;
7802  
7803         switch (inode->i_mode & S_IFMT) {
7804         case S_IFBLK:
7805 diff -NurpP --minimal linux-2.6.31.6/fs/xfs/linux-2.6/xfs_linux.h linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/linux-2.6/xfs_linux.h
7806 --- linux-2.6.31.6/fs/xfs/linux-2.6/xfs_linux.h 2009-09-10 15:26:24.000000000 +0200
7807 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/linux-2.6/xfs_linux.h   2009-09-10 16:11:43.000000000 +0200
7808 @@ -119,6 +119,7 @@
7809  
7810  #define current_cpu()          (raw_smp_processor_id())
7811  #define current_pid()          (current->pid)
7812 +#define current_fstag(cred,vp) (dx_current_fstag((vp)->i_sb))
7813  #define current_test_flags(f)  (current->flags & (f))
7814  #define current_set_flags_nested(sp, f)                \
7815                 (*(sp) = current->flags, current->flags |= (f))
7816 diff -NurpP --minimal linux-2.6.31.6/fs/xfs/linux-2.6/xfs_super.c linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/linux-2.6/xfs_super.c
7817 --- linux-2.6.31.6/fs/xfs/linux-2.6/xfs_super.c 2009-09-10 15:26:24.000000000 +0200
7818 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/linux-2.6/xfs_super.c   2009-09-10 17:01:53.000000000 +0200
7819 @@ -117,6 +117,9 @@ mempool_t *xfs_ioend_pool;
7820  #define MNTOPT_DMAPI   "dmapi"         /* DMI enabled (DMAPI / XDSM) */
7821  #define MNTOPT_XDSM    "xdsm"          /* DMI enabled (DMAPI / XDSM) */
7822  #define MNTOPT_DMI     "dmi"           /* DMI enabled (DMAPI / XDSM) */
7823 +#define MNTOPT_TAGXID  "tagxid"        /* context tagging for inodes */
7824 +#define MNTOPT_TAGGED  "tag"           /* context tagging for inodes */
7825 +#define MNTOPT_NOTAGTAG        "notag"         /* do not use context tagging */
7826  
7827  /*
7828   * Table driven mount option parser.
7829 @@ -125,10 +128,14 @@ mempool_t *xfs_ioend_pool;
7830   * in the future, too.
7831   */
7832  enum {
7833 +       Opt_tag, Opt_notag,
7834         Opt_barrier, Opt_nobarrier, Opt_err
7835  };
7836  
7837  static const match_table_t tokens = {
7838 +       {Opt_tag, "tagxid"},
7839 +       {Opt_tag, "tag"},
7840 +       {Opt_notag, "notag"},
7841         {Opt_barrier, "barrier"},
7842         {Opt_nobarrier, "nobarrier"},
7843         {Opt_err, NULL}
7844 @@ -382,6 +389,19 @@ xfs_parseargs(
7845                 } else if (!strcmp(this_char, "irixsgid")) {
7846                         cmn_err(CE_WARN,
7847         "XFS: irixsgid is now a sysctl(2) variable, option is deprecated.");
7848 +#ifndef CONFIG_TAGGING_NONE
7849 +               } else if (!strcmp(this_char, MNTOPT_TAGGED)) {
7850 +                       mp->m_flags |= XFS_MOUNT_TAGGED;
7851 +               } else if (!strcmp(this_char, MNTOPT_NOTAGTAG)) {
7852 +                       mp->m_flags &= ~XFS_MOUNT_TAGGED;
7853 +               } else if (!strcmp(this_char, MNTOPT_TAGXID)) {
7854 +                       mp->m_flags |= XFS_MOUNT_TAGGED;
7855 +#endif
7856 +#ifdef CONFIG_PROPAGATE
7857 +               } else if (!strcmp(this_char, MNTOPT_TAGGED)) {
7858 +                       /* use value */
7859 +                       mp->m_flags |= XFS_MOUNT_TAGGED;
7860 +#endif
7861                 } else {
7862                         cmn_err(CE_WARN,
7863                                 "XFS: unknown mount option [%s].", this_char);
7864 @@ -1244,6 +1264,16 @@ xfs_fs_remount(
7865                 case Opt_nobarrier:
7866                         mp->m_flags &= ~XFS_MOUNT_BARRIER;
7867                         break;
7868 +               case Opt_tag:
7869 +                       if (!(sb->s_flags & MS_TAGGED)) {
7870 +                               printk(KERN_INFO
7871 +                                       "XFS: %s: tagging not permitted on remount.\n",
7872 +                                       sb->s_id);
7873 +                               return -EINVAL;
7874 +                       }
7875 +                       break;
7876 +               case Opt_notag:
7877 +                       break;
7878                 default:
7879                         /*
7880                          * Logically we would return an error here to prevent
7881 @@ -1451,6 +1481,9 @@ xfs_fs_fill_super(
7882  
7883         XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname);
7884  
7885 +       if (mp->m_flags & XFS_MOUNT_TAGGED)
7886 +               sb->s_flags |= MS_TAGGED;
7887 +
7888         sb->s_magic = XFS_SB_MAGIC;
7889         sb->s_blocksize = mp->m_sb.sb_blocksize;
7890         sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
7891 diff -NurpP --minimal linux-2.6.31.6/fs/xfs/xfs_dinode.h linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/xfs_dinode.h
7892 --- linux-2.6.31.6/fs/xfs/xfs_dinode.h  2009-06-11 17:13:09.000000000 +0200
7893 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/xfs_dinode.h    2009-09-10 16:11:43.000000000 +0200
7894 @@ -50,7 +50,9 @@ typedef struct xfs_dinode {
7895         __be32          di_gid;         /* owner's group id */
7896         __be32          di_nlink;       /* number of links to file */
7897         __be16          di_projid;      /* owner's project id */
7898 -       __u8            di_pad[8];      /* unused, zeroed space */
7899 +       __be16          di_tag;         /* context tagging */
7900 +       __be16          di_vflags;      /* vserver specific flags */
7901 +       __u8            di_pad[4];      /* unused, zeroed space */
7902         __be16          di_flushiter;   /* incremented on flush */
7903         xfs_timestamp_t di_atime;       /* time last accessed */
7904         xfs_timestamp_t di_mtime;       /* time last modified */
7905 @@ -183,6 +185,8 @@ static inline void xfs_dinode_put_rdev(s
7906  #define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */
7907  #define XFS_DIFLAG_NODEFRAG_BIT     13 /* do not reorganize/defragment */
7908  #define XFS_DIFLAG_FILESTREAM_BIT   14  /* use filestream allocator */
7909 +#define XFS_DIFLAG_IXUNLINK_BIT     15 /* Immutable inver on unlink */
7910 +
7911  #define XFS_DIFLAG_REALTIME      (1 << XFS_DIFLAG_REALTIME_BIT)
7912  #define XFS_DIFLAG_PREALLOC      (1 << XFS_DIFLAG_PREALLOC_BIT)
7913  #define XFS_DIFLAG_NEWRTBM       (1 << XFS_DIFLAG_NEWRTBM_BIT)
7914 @@ -198,6 +202,7 @@ static inline void xfs_dinode_put_rdev(s
7915  #define XFS_DIFLAG_EXTSZINHERIT  (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
7916  #define XFS_DIFLAG_NODEFRAG      (1 << XFS_DIFLAG_NODEFRAG_BIT)
7917  #define XFS_DIFLAG_FILESTREAM    (1 << XFS_DIFLAG_FILESTREAM_BIT)
7918 +#define XFS_DIFLAG_IXUNLINK      (1 << XFS_DIFLAG_IXUNLINK_BIT)
7919  
7920  #ifdef CONFIG_XFS_RT
7921  #define XFS_IS_REALTIME_INODE(ip) ((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME)
7922 @@ -210,6 +215,10 @@ static inline void xfs_dinode_put_rdev(s
7923          XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
7924          XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
7925          XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
7926 -        XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM)
7927 +        XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM | \
7928 +        XFS_DIFLAG_IXUNLINK)
7929 +
7930 +#define XFS_DIVFLAG_BARRIER    0x01
7931 +#define XFS_DIVFLAG_COW                0x02
7932  
7933  #endif /* __XFS_DINODE_H__ */
7934 diff -NurpP --minimal linux-2.6.31.6/fs/xfs/xfs_fs.h linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/xfs_fs.h
7935 --- linux-2.6.31.6/fs/xfs/xfs_fs.h      2009-09-10 15:26:24.000000000 +0200
7936 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/xfs_fs.h        2009-09-10 16:11:43.000000000 +0200
7937 @@ -67,6 +67,9 @@ struct fsxattr {
7938  #define XFS_XFLAG_EXTSZINHERIT 0x00001000      /* inherit inode extent size */
7939  #define XFS_XFLAG_NODEFRAG     0x00002000      /* do not defragment */
7940  #define XFS_XFLAG_FILESTREAM   0x00004000      /* use filestream allocator */
7941 +#define XFS_XFLAG_IXUNLINK     0x00008000      /* immutable invert on unlink */
7942 +#define XFS_XFLAG_BARRIER      0x10000000      /* chroot() barrier */
7943 +#define XFS_XFLAG_COW          0x20000000      /* copy on write mark */
7944  #define XFS_XFLAG_HASATTR      0x80000000      /* no DIFLAG for this   */
7945  
7946  /*
7947 @@ -292,7 +295,8 @@ typedef struct xfs_bstat {
7948         __s32           bs_extents;     /* number of extents            */
7949         __u32           bs_gen;         /* generation count             */
7950         __u16           bs_projid;      /* project id                   */
7951 -       unsigned char   bs_pad[14];     /* pad space, unused            */
7952 +       __u16           bs_tag;         /* context tagging              */
7953 +       unsigned char   bs_pad[12];     /* pad space, unused            */
7954         __u32           bs_dmevmask;    /* DMIG event mask              */
7955         __u16           bs_dmstate;     /* DMIG state info              */
7956         __u16           bs_aextents;    /* attribute number of extents  */
7957 diff -NurpP --minimal linux-2.6.31.6/fs/xfs/xfs_ialloc.c linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/xfs_ialloc.c
7958 --- linux-2.6.31.6/fs/xfs/xfs_ialloc.c  2009-06-11 17:13:09.000000000 +0200
7959 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/xfs_ialloc.c    2009-09-10 16:11:43.000000000 +0200
7960 @@ -41,7 +41,6 @@
7961  #include "xfs_error.h"
7962  #include "xfs_bmap.h"
7963  
7964 -
7965  /*
7966   * Allocation group level functions.
7967   */
7968 diff -NurpP --minimal linux-2.6.31.6/fs/xfs/xfs_inode.c linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/xfs_inode.c
7969 --- linux-2.6.31.6/fs/xfs/xfs_inode.c   2009-09-10 15:26:24.000000000 +0200
7970 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/xfs_inode.c     2009-09-10 16:11:43.000000000 +0200
7971 @@ -249,6 +249,7 @@ xfs_inotobp(
7972         return 0;
7973  }
7974  
7975 +#include <linux/vs_tag.h>
7976  
7977  /*
7978   * This routine is called to map an inode to the buffer containing
7979 @@ -654,15 +655,25 @@ xfs_iformat_btree(
7980  void
7981  xfs_dinode_from_disk(
7982         xfs_icdinode_t          *to,
7983 -       xfs_dinode_t            *from)
7984 +       xfs_dinode_t            *from,
7985 +       int tagged)
7986  {
7987 +       uint32_t uid, gid, tag;
7988 +
7989         to->di_magic = be16_to_cpu(from->di_magic);
7990         to->di_mode = be16_to_cpu(from->di_mode);
7991         to->di_version = from ->di_version;
7992         to->di_format = from->di_format;
7993         to->di_onlink = be16_to_cpu(from->di_onlink);
7994 -       to->di_uid = be32_to_cpu(from->di_uid);
7995 -       to->di_gid = be32_to_cpu(from->di_gid);
7996 +
7997 +       uid = be32_to_cpu(from->di_uid);
7998 +       gid = be32_to_cpu(from->di_gid);
7999 +       tag = be16_to_cpu(from->di_tag);
8000 +
8001 +       to->di_uid = INOTAG_UID(tagged, uid, gid);
8002 +       to->di_gid = INOTAG_GID(tagged, uid, gid);
8003 +       to->di_tag = INOTAG_TAG(tagged, uid, gid, tag);
8004 +
8005         to->di_nlink = be32_to_cpu(from->di_nlink);
8006         to->di_projid = be16_to_cpu(from->di_projid);
8007         memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
8008 @@ -683,21 +694,26 @@ xfs_dinode_from_disk(
8009         to->di_dmevmask = be32_to_cpu(from->di_dmevmask);
8010         to->di_dmstate  = be16_to_cpu(from->di_dmstate);
8011         to->di_flags    = be16_to_cpu(from->di_flags);
8012 +       to->di_vflags   = be16_to_cpu(from->di_vflags);
8013         to->di_gen      = be32_to_cpu(from->di_gen);
8014  }
8015  
8016  void
8017  xfs_dinode_to_disk(
8018         xfs_dinode_t            *to,
8019 -       xfs_icdinode_t          *from)
8020 +       xfs_icdinode_t          *from,
8021 +       int tagged)
8022  {
8023         to->di_magic = cpu_to_be16(from->di_magic);
8024         to->di_mode = cpu_to_be16(from->di_mode);
8025         to->di_version = from ->di_version;
8026         to->di_format = from->di_format;
8027         to->di_onlink = cpu_to_be16(from->di_onlink);
8028 -       to->di_uid = cpu_to_be32(from->di_uid);
8029 -       to->di_gid = cpu_to_be32(from->di_gid);
8030 +
8031 +       to->di_uid = cpu_to_be32(TAGINO_UID(tagged, from->di_uid, from->di_tag));
8032 +       to->di_gid = cpu_to_be32(TAGINO_GID(tagged, from->di_gid, from->di_tag));
8033 +       to->di_tag = cpu_to_be16(TAGINO_TAG(tagged, from->di_tag));
8034 +
8035         to->di_nlink = cpu_to_be32(from->di_nlink);
8036         to->di_projid = cpu_to_be16(from->di_projid);
8037         memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
8038 @@ -718,12 +734,14 @@ xfs_dinode_to_disk(
8039         to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
8040         to->di_dmstate = cpu_to_be16(from->di_dmstate);
8041         to->di_flags = cpu_to_be16(from->di_flags);
8042 +       to->di_vflags = cpu_to_be16(from->di_vflags);
8043         to->di_gen = cpu_to_be32(from->di_gen);
8044  }
8045  
8046  STATIC uint
8047  _xfs_dic2xflags(
8048 -       __uint16_t              di_flags)
8049 +       __uint16_t              di_flags,
8050 +       __uint16_t              di_vflags)
8051  {
8052         uint                    flags = 0;
8053  
8054 @@ -734,6 +752,8 @@ _xfs_dic2xflags(
8055                         flags |= XFS_XFLAG_PREALLOC;
8056                 if (di_flags & XFS_DIFLAG_IMMUTABLE)
8057                         flags |= XFS_XFLAG_IMMUTABLE;
8058 +               if (di_flags & XFS_DIFLAG_IXUNLINK)
8059 +                       flags |= XFS_XFLAG_IXUNLINK;
8060                 if (di_flags & XFS_DIFLAG_APPEND)
8061                         flags |= XFS_XFLAG_APPEND;
8062                 if (di_flags & XFS_DIFLAG_SYNC)
8063 @@ -758,6 +778,10 @@ _xfs_dic2xflags(
8064                         flags |= XFS_XFLAG_FILESTREAM;
8065         }
8066  
8067 +       if (di_vflags & XFS_DIVFLAG_BARRIER)
8068 +               flags |= FS_BARRIER_FL;
8069 +       if (di_vflags & XFS_DIVFLAG_COW)
8070 +               flags |= FS_COW_FL;
8071         return flags;
8072  }
8073  
8074 @@ -767,7 +791,7 @@ xfs_ip2xflags(
8075  {
8076         xfs_icdinode_t          *dic = &ip->i_d;
8077  
8078 -       return _xfs_dic2xflags(dic->di_flags) |
8079 +       return _xfs_dic2xflags(dic->di_flags, dic->di_vflags) |
8080                                 (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0);
8081  }
8082  
8083 @@ -775,7 +799,8 @@ uint
8084  xfs_dic2xflags(
8085         xfs_dinode_t            *dip)
8086  {
8087 -       return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) |
8088 +       return _xfs_dic2xflags(be16_to_cpu(dip->di_flags),
8089 +                               be16_to_cpu(dip->di_vflags)) |
8090                                 (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
8091  }
8092  
8093 @@ -811,7 +836,6 @@ xfs_iread(
8094         if (error)
8095                 return error;
8096         dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
8097 -
8098         /*
8099          * If we got something that isn't an inode it means someone
8100          * (nfs or dmi) has a stale handle.
8101 @@ -836,7 +860,8 @@ xfs_iread(
8102          * Otherwise, just get the truly permanent information.
8103          */
8104         if (dip->di_mode) {
8105 -               xfs_dinode_from_disk(&ip->i_d, dip);
8106 +               xfs_dinode_from_disk(&ip->i_d, dip,
8107 +                       mp->m_flags & XFS_MOUNT_TAGGED);
8108                 error = xfs_iformat(ip, dip);
8109                 if (error)  {
8110  #ifdef DEBUG
8111 @@ -1036,6 +1061,7 @@ xfs_ialloc(
8112         ASSERT(ip->i_d.di_nlink == nlink);
8113         ip->i_d.di_uid = current_fsuid();
8114         ip->i_d.di_gid = current_fsgid();
8115 +       ip->i_d.di_tag = current_fstag(cr, &ip->i_vnode);
8116         ip->i_d.di_projid = prid;
8117         memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
8118  
8119 @@ -1096,6 +1122,7 @@ xfs_ialloc(
8120         ip->i_d.di_dmevmask = 0;
8121         ip->i_d.di_dmstate = 0;
8122         ip->i_d.di_flags = 0;
8123 +       ip->i_d.di_vflags = 0;
8124         flags = XFS_ILOG_CORE;
8125         switch (mode & S_IFMT) {
8126         case S_IFIFO:
8127 @@ -2172,6 +2199,7 @@ xfs_ifree(
8128         }
8129         ip->i_d.di_mode = 0;            /* mark incore inode as free */
8130         ip->i_d.di_flags = 0;
8131 +       ip->i_d.di_vflags = 0;
8132         ip->i_d.di_dmevmask = 0;
8133         ip->i_d.di_forkoff = 0;         /* mark the attr fork not in use */
8134         ip->i_df.if_ext_max =
8135 @@ -3139,7 +3167,8 @@ xfs_iflush_int(
8136          * because if the inode is dirty at all the core must
8137          * be.
8138          */
8139 -       xfs_dinode_to_disk(dip, &ip->i_d);
8140 +       xfs_dinode_to_disk(dip, &ip->i_d,
8141 +               mp->m_flags & XFS_MOUNT_TAGGED);
8142  
8143         /* Wrap, we never let the log put out DI_MAX_FLUSH */
8144         if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
8145 diff -NurpP --minimal linux-2.6.31.6/fs/xfs/xfs_inode.h linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/xfs_inode.h
8146 --- linux-2.6.31.6/fs/xfs/xfs_inode.h   2009-09-10 15:26:24.000000000 +0200
8147 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/xfs_inode.h     2009-09-10 16:11:43.000000000 +0200
8148 @@ -135,7 +135,9 @@ typedef struct xfs_icdinode {
8149         __uint32_t      di_gid;         /* owner's group id */
8150         __uint32_t      di_nlink;       /* number of links to file */
8151         __uint16_t      di_projid;      /* owner's project id */
8152 -       __uint8_t       di_pad[8];      /* unused, zeroed space */
8153 +       __uint16_t      di_tag;         /* context tagging */
8154 +       __uint16_t      di_vflags;      /* vserver specific flags */
8155 +       __uint8_t       di_pad[4];      /* unused, zeroed space */
8156         __uint16_t      di_flushiter;   /* incremented on flush */
8157         xfs_ictimestamp_t di_atime;     /* time last accessed */
8158         xfs_ictimestamp_t di_mtime;     /* time last modified */
8159 @@ -573,9 +575,9 @@ int         xfs_itobp(struct xfs_mount *, struc
8160  int            xfs_iread(struct xfs_mount *, struct xfs_trans *,
8161                           struct xfs_inode *, xfs_daddr_t, uint);
8162  void           xfs_dinode_from_disk(struct xfs_icdinode *,
8163 -                                    struct xfs_dinode *);
8164 +                                    struct xfs_dinode *, int);
8165  void           xfs_dinode_to_disk(struct xfs_dinode *,
8166 -                                  struct xfs_icdinode *);
8167 +                                  struct xfs_icdinode *, int);
8168  void           xfs_idestroy_fork(struct xfs_inode *, int);
8169  void           xfs_idata_realloc(struct xfs_inode *, int, int);
8170  void           xfs_iroot_realloc(struct xfs_inode *, int, int);
8171 diff -NurpP --minimal linux-2.6.31.6/fs/xfs/xfs_itable.c linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/xfs_itable.c
8172 --- linux-2.6.31.6/fs/xfs/xfs_itable.c  2009-06-11 17:13:09.000000000 +0200
8173 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/xfs_itable.c    2009-09-10 16:11:43.000000000 +0200
8174 @@ -82,6 +82,7 @@ xfs_bulkstat_one_iget(
8175         buf->bs_mode = dic->di_mode;
8176         buf->bs_uid = dic->di_uid;
8177         buf->bs_gid = dic->di_gid;
8178 +       buf->bs_tag = dic->di_tag;
8179         buf->bs_size = dic->di_size;
8180         /*
8181          * We are reading the atime from the Linux inode because the
8182 diff -NurpP --minimal linux-2.6.31.6/fs/xfs/xfs_log_recover.c linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/xfs_log_recover.c
8183 --- linux-2.6.31.6/fs/xfs/xfs_log_recover.c     2009-09-10 15:26:24.000000000 +0200
8184 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/xfs_log_recover.c       2009-09-10 16:11:43.000000000 +0200
8185 @@ -2467,7 +2467,8 @@ xlog_recover_do_inode_trans(
8186         }
8187  
8188         /* The core is in in-core format */
8189 -       xfs_dinode_to_disk(dip, (xfs_icdinode_t *)item->ri_buf[1].i_addr);
8190 +       xfs_dinode_to_disk(dip, (xfs_icdinode_t *)item->ri_buf[1].i_addr,
8191 +               mp->m_flags & XFS_MOUNT_TAGGED);
8192  
8193         /* the rest is in on-disk format */
8194         if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) {
8195 diff -NurpP --minimal linux-2.6.31.6/fs/xfs/xfs_mount.h linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/xfs_mount.h
8196 --- linux-2.6.31.6/fs/xfs/xfs_mount.h   2009-09-10 15:26:24.000000000 +0200
8197 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/xfs_mount.h     2009-09-10 16:11:43.000000000 +0200
8198 @@ -283,6 +283,7 @@ typedef struct xfs_mount {
8199                                                    allocator */
8200  #define XFS_MOUNT_NOATTR2      (1ULL << 25)    /* disable use of attr2 format */
8201  
8202 +#define XFS_MOUNT_TAGGED       (1ULL << 31)    /* context tagging */
8203  
8204  /*
8205   * Default minimum read and write sizes.
8206 diff -NurpP --minimal linux-2.6.31.6/fs/xfs/xfs_vnodeops.c linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/xfs_vnodeops.c
8207 --- linux-2.6.31.6/fs/xfs/xfs_vnodeops.c        2009-09-10 15:26:24.000000000 +0200
8208 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/xfs_vnodeops.c  2009-10-07 15:57:06.000000000 +0200
8209 @@ -54,6 +54,80 @@
8210  #include "xfs_filestream.h"
8211  #include "xfs_vnodeops.h"
8212  
8213 +
8214 +STATIC void
8215 +xfs_get_inode_flags(
8216 +       xfs_inode_t     *ip)
8217 +{
8218 +       struct inode    *inode = VFS_I(ip);
8219 +       unsigned int    flags = inode->i_flags;
8220 +       unsigned int    vflags = inode->i_vflags;
8221 +
8222 +       if (flags & S_IMMUTABLE)
8223 +               ip->i_d.di_flags |= XFS_DIFLAG_IMMUTABLE;
8224 +       else
8225 +               ip->i_d.di_flags &= ~XFS_DIFLAG_IMMUTABLE;
8226 +       if (flags & S_IXUNLINK)
8227 +               ip->i_d.di_flags |= XFS_DIFLAG_IXUNLINK;
8228 +       else
8229 +               ip->i_d.di_flags &= ~XFS_DIFLAG_IXUNLINK;
8230 +
8231 +       if (vflags & V_BARRIER)
8232 +               ip->i_d.di_vflags |= XFS_DIVFLAG_BARRIER;
8233 +       else
8234 +               ip->i_d.di_vflags &= ~XFS_DIVFLAG_BARRIER;
8235 +       if (vflags & V_COW)
8236 +               ip->i_d.di_vflags |= XFS_DIVFLAG_COW;
8237 +       else
8238 +               ip->i_d.di_vflags &= ~XFS_DIVFLAG_COW;
8239 +}
8240 +
8241 +int
8242 +xfs_sync_flags(
8243 +       struct inode            *inode,
8244 +       int                     flags,
8245 +       int                     vflags)
8246 +{
8247 +       struct xfs_inode        *ip = XFS_I(inode);
8248 +       struct xfs_mount        *mp = ip->i_mount;
8249 +       struct xfs_trans        *tp;
8250 +       unsigned int            lock_flags = 0;
8251 +       int                     code;
8252 +
8253 +       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
8254 +       code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
8255 +       if (code)
8256 +               goto error_out;
8257 +
8258 +       lock_flags = XFS_ILOCK_EXCL;
8259 +       xfs_ilock(ip, lock_flags);
8260 +
8261 +       xfs_trans_ijoin(tp, ip, lock_flags);
8262 +       xfs_trans_ihold(tp, ip);
8263 +
8264 +       inode->i_flags = flags;
8265 +       inode->i_vflags = vflags;
8266 +       xfs_get_inode_flags(ip);
8267 +
8268 +       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
8269 +       xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
8270 +
8271 +       XFS_STATS_INC(xs_ig_attrchg);
8272 +
8273 +       if (mp->m_flags & XFS_MOUNT_WSYNC)
8274 +               xfs_trans_set_sync(tp);
8275 +       code = xfs_trans_commit(tp, 0);
8276 +       xfs_iunlock(ip, lock_flags);
8277 +       return code;
8278 +
8279 +error_out:
8280 +       xfs_trans_cancel(tp, 0);
8281 +       if (lock_flags)
8282 +               xfs_iunlock(ip, lock_flags);
8283 +       return code;
8284 +}
8285 +
8286 +
8287  int
8288  xfs_setattr(
8289         struct xfs_inode        *ip,
8290 @@ -69,6 +143,7 @@ xfs_setattr(
8291         uint                    commit_flags=0;
8292         uid_t                   uid=0, iuid=0;
8293         gid_t                   gid=0, igid=0;
8294 +       tag_t                   tag=0, itag=0;
8295         int                     timeflags = 0;
8296         struct xfs_dquot        *udqp, *gdqp, *olddquot1, *olddquot2;
8297         int                     need_iolock = 1;
8298 @@ -165,7 +240,7 @@ xfs_setattr(
8299         /*
8300          * Change file ownership.  Must be the owner or privileged.
8301          */
8302 -       if (mask & (ATTR_UID|ATTR_GID)) {
8303 +       if (mask & (ATTR_UID|ATTR_GID|ATTR_TAG)) {
8304                 /*
8305                  * These IDs could have changed since we last looked at them.
8306                  * But, we're assured that if the ownership did change
8307 @@ -174,8 +249,10 @@ xfs_setattr(
8308                  */
8309                 iuid = ip->i_d.di_uid;
8310                 igid = ip->i_d.di_gid;
8311 +               itag = ip->i_d.di_tag;
8312                 gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
8313                 uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
8314 +               tag = (mask & ATTR_TAG) ? iattr->ia_tag : itag;
8315  
8316                 /*
8317                  * Do a quota reservation only if uid/gid is actually
8318 @@ -183,7 +260,8 @@ xfs_setattr(
8319                  */
8320                 if (XFS_IS_QUOTA_RUNNING(mp) &&
8321                     ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
8322 -                    (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
8323 +                    (XFS_IS_GQUOTA_ON(mp) && igid != gid) ||
8324 +                    (XFS_IS_GQUOTA_ON(mp) && itag != tag))) {
8325                         ASSERT(tp);
8326                         code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
8327                                                 capable(CAP_FOWNER) ?
8328 @@ -336,7 +414,7 @@ xfs_setattr(
8329         /*
8330          * Change file ownership.  Must be the owner or privileged.
8331          */
8332 -       if (mask & (ATTR_UID|ATTR_GID)) {
8333 +       if (mask & (ATTR_UID|ATTR_GID|ATTR_TAG)) {
8334                 /*
8335                  * CAP_FSETID overrides the following restrictions:
8336                  *
8337 @@ -352,6 +430,10 @@ xfs_setattr(
8338                  * Change the ownerships and register quota modifications
8339                  * in the transaction.
8340                  */
8341 +               if (itag != tag) {
8342 +                       ip->i_d.di_tag = tag;
8343 +                       inode->i_tag = tag;
8344 +               }
8345                 if (iuid != uid) {
8346                         if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
8347                                 ASSERT(mask & ATTR_UID);
8348 diff -NurpP --minimal linux-2.6.31.6/fs/xfs/xfs_vnodeops.h linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/xfs_vnodeops.h
8349 --- linux-2.6.31.6/fs/xfs/xfs_vnodeops.h        2009-09-10 15:26:24.000000000 +0200
8350 +++ linux-2.6.31.6-vs2.3.0.36.24/fs/xfs/xfs_vnodeops.h  2009-09-10 16:11:43.000000000 +0200
8351 @@ -14,6 +14,7 @@ struct xfs_inode;
8352  struct xfs_iomap;
8353  
8354  
8355 +int xfs_sync_xflags(struct xfs_inode *ip);
8356  int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
8357  #define        XFS_ATTR_DMI            0x01    /* invocation from a DMI function */
8358  #define        XFS_ATTR_NONBLOCK       0x02    /* return EAGAIN if operation would block */
8359 diff -NurpP --minimal linux-2.6.31.6/include/asm-generic/tlb.h linux-2.6.31.6-vs2.3.0.36.24/include/asm-generic/tlb.h
8360 --- linux-2.6.31.6/include/asm-generic/tlb.h    2009-09-10 15:26:24.000000000 +0200
8361 +++ linux-2.6.31.6-vs2.3.0.36.24/include/asm-generic/tlb.h      2009-09-10 16:11:43.000000000 +0200
8362 @@ -14,6 +14,7 @@
8363  #define _ASM_GENERIC__TLB_H
8364  
8365  #include <linux/swap.h>
8366 +#include <linux/vs_memory.h>
8367  #include <asm/pgalloc.h>
8368  #include <asm/tlbflush.h>
8369  
8370 diff -NurpP --minimal linux-2.6.31.6/include/linux/capability.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/capability.h
8371 --- linux-2.6.31.6/include/linux/capability.h   2009-06-11 17:13:13.000000000 +0200
8372 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/capability.h     2009-09-10 16:11:43.000000000 +0200
8373 @@ -285,6 +285,7 @@ struct cpu_vfs_cap_data {
8374     arbitrary SCSI commands */
8375  /* Allow setting encryption key on loopback filesystem */
8376  /* Allow setting zone reclaim policy */
8377 +/* Allow the selection of a security context */
8378  
8379  #define CAP_SYS_ADMIN        21
8380  
8381 @@ -357,7 +358,13 @@ struct cpu_vfs_cap_data {
8382  
8383  #define CAP_MAC_ADMIN        33
8384  
8385 -#define CAP_LAST_CAP         CAP_MAC_ADMIN
8386 +/* Allow context manipulations */
8387 +/* Allow changing context info on files */
8388 +
8389 +#define CAP_CONTEXT         34
8390 +
8391 +
8392 +#define CAP_LAST_CAP         CAP_CONTEXT
8393  
8394  #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
8395  
8396 diff -NurpP --minimal linux-2.6.31.6/include/linux/devpts_fs.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/devpts_fs.h
8397 --- linux-2.6.31.6/include/linux/devpts_fs.h    2008-12-25 00:26:37.000000000 +0100
8398 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/devpts_fs.h      2009-09-10 16:11:43.000000000 +0200
8399 @@ -45,5 +45,4 @@ static inline void devpts_pty_kill(struc
8400  
8401  #endif
8402  
8403 -
8404  #endif /* _LINUX_DEVPTS_FS_H */
8405 diff -NurpP --minimal linux-2.6.31.6/include/linux/ext2_fs.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/ext2_fs.h
8406 --- linux-2.6.31.6/include/linux/ext2_fs.h      2009-03-24 14:22:41.000000000 +0100
8407 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/ext2_fs.h        2009-10-12 05:20:23.000000000 +0200
8408 @@ -189,8 +189,12 @@ struct ext2_group_desc
8409  #define EXT2_NOTAIL_FL                 FS_NOTAIL_FL    /* file tail should not be merged */
8410  #define EXT2_DIRSYNC_FL                        FS_DIRSYNC_FL   /* dirsync behaviour (directories only) */
8411  #define EXT2_TOPDIR_FL                 FS_TOPDIR_FL    /* Top of directory hierarchies*/
8412 +#define EXT2_IXUNLINK_FL               FS_IXUNLINK_FL  /* Immutable invert on unlink */
8413  #define EXT2_RESERVED_FL               FS_RESERVED_FL  /* reserved for ext2 lib */
8414  
8415 +#define EXT2_BARRIER_FL                        FS_BARRIER_FL   /* Barrier for chroot() */
8416 +#define EXT2_COW_FL                    FS_COW_FL       /* Copy on Write marker */
8417 +
8418  #define EXT2_FL_USER_VISIBLE           FS_FL_USER_VISIBLE      /* User visible flags */
8419  #define EXT2_FL_USER_MODIFIABLE                FS_FL_USER_MODIFIABLE   /* User modifiable flags */
8420  
8421 @@ -274,7 +278,8 @@ struct ext2_inode {
8422                         __u16   i_pad1;
8423                         __le16  l_i_uid_high;   /* these 2 fields    */
8424                         __le16  l_i_gid_high;   /* were reserved2[0] */
8425 -                       __u32   l_i_reserved2;
8426 +                       __le16  l_i_tag;        /* Context Tag */
8427 +                       __u16   l_i_reserved2;
8428                 } linux2;
8429                 struct {
8430                         __u8    h_i_frag;       /* Fragment number */
8431 @@ -303,6 +308,7 @@ struct ext2_inode {
8432  #define i_gid_low      i_gid
8433  #define i_uid_high     osd2.linux2.l_i_uid_high
8434  #define i_gid_high     osd2.linux2.l_i_gid_high
8435 +#define i_raw_tag      osd2.linux2.l_i_tag
8436  #define i_reserved2    osd2.linux2.l_i_reserved2
8437  #endif
8438  
8439 @@ -347,6 +353,7 @@ struct ext2_inode {
8440  #define EXT2_MOUNT_USRQUOTA            0x020000  /* user quota */
8441  #define EXT2_MOUNT_GRPQUOTA            0x040000  /* group quota */
8442  #define EXT2_MOUNT_RESERVATION         0x080000  /* Preallocation */
8443 +#define EXT2_MOUNT_TAGGED              (1<<24)   /* Enable Context Tags */
8444  
8445  
8446  #define clear_opt(o, opt)              o &= ~EXT2_MOUNT_##opt
8447 diff -NurpP --minimal linux-2.6.31.6/include/linux/ext3_fs.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/ext3_fs.h
8448 --- linux-2.6.31.6/include/linux/ext3_fs.h      2009-09-10 15:26:25.000000000 +0200
8449 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/ext3_fs.h        2009-10-12 05:20:40.000000000 +0200
8450 @@ -173,10 +173,14 @@ struct ext3_group_desc
8451  #define EXT3_NOTAIL_FL                 0x00008000 /* file tail should not be merged */
8452  #define EXT3_DIRSYNC_FL                        0x00010000 /* dirsync behaviour (directories only) */
8453  #define EXT3_TOPDIR_FL                 0x00020000 /* Top of directory hierarchies*/
8454 +#define EXT3_IXUNLINK_FL               0x08000000 /* Immutable invert on unlink */
8455  #define EXT3_RESERVED_FL               0x80000000 /* reserved for ext3 lib */
8456  
8457 -#define EXT3_FL_USER_VISIBLE           0x0003DFFF /* User visible flags */
8458 -#define EXT3_FL_USER_MODIFIABLE                0x000380FF /* User modifiable flags */
8459 +#define EXT3_BARRIER_FL                        0x04000000 /* Barrier for chroot() */
8460 +#define EXT3_COW_FL                    0x20000000 /* Copy on Write marker */
8461 +
8462 +#define EXT3_FL_USER_VISIBLE           0x0103DFFF /* User visible flags */
8463 +#define EXT3_FL_USER_MODIFIABLE                0x010380FF /* User modifiable flags */
8464  
8465  /* Flags that should be inherited by new inodes from their parent. */
8466  #define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\
8467 @@ -320,7 +324,8 @@ struct ext3_inode {
8468                         __u16   i_pad1;
8469                         __le16  l_i_uid_high;   /* these 2 fields    */
8470                         __le16  l_i_gid_high;   /* were reserved2[0] */
8471 -                       __u32   l_i_reserved2;
8472 +                       __le16  l_i_tag;        /* Context Tag */
8473 +                       __u16   l_i_reserved2;
8474                 } linux2;
8475                 struct {
8476                         __u8    h_i_frag;       /* Fragment number */
8477 @@ -351,6 +356,7 @@ struct ext3_inode {
8478  #define i_gid_low      i_gid
8479  #define i_uid_high     osd2.linux2.l_i_uid_high
8480  #define i_gid_high     osd2.linux2.l_i_gid_high
8481 +#define i_raw_tag      osd2.linux2.l_i_tag
8482  #define i_reserved2    osd2.linux2.l_i_reserved2
8483  
8484  #elif defined(__GNU__)
8485 @@ -414,6 +420,7 @@ struct ext3_inode {
8486  #define EXT3_MOUNT_GRPQUOTA            0x200000 /* "old" group quota */
8487  #define EXT3_MOUNT_DATA_ERR_ABORT      0x400000 /* Abort on file data write
8488                                                   * error in ordered mode */
8489 +#define EXT3_MOUNT_TAGGED              (1<<24) /* Enable Context Tags */
8490  
8491  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
8492  #ifndef _LINUX_EXT2_FS_H
8493 @@ -892,6 +899,7 @@ extern void ext3_get_inode_flags(struct 
8494  extern void ext3_set_aops(struct inode *inode);
8495  extern int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
8496                        u64 start, u64 len);
8497 +extern int ext3_sync_flags(struct inode *, int, int);
8498  
8499  /* ioctl.c */
8500  extern long ext3_ioctl(struct file *, unsigned int, unsigned long);
8501 diff -NurpP --minimal linux-2.6.31.6/include/linux/fs.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/fs.h
8502 --- linux-2.6.31.6/include/linux/fs.h   2009-09-10 15:26:25.000000000 +0200
8503 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/fs.h     2009-10-06 23:35:53.000000000 +0200
8504 @@ -205,6 +205,9 @@ struct inodes_stat_t {
8505  #define MS_KERNMOUNT   (1<<22) /* this is a kern_mount call */
8506  #define MS_I_VERSION   (1<<23) /* Update inode I_version field */
8507  #define MS_STRICTATIME (1<<24) /* Always perform atime updates */
8508 +#define MS_TAGGED      (1<<25) /* use generic inode tagging */
8509 +#define MS_TAGID       (1<<26) /* use specific tag for this mount */
8510 +#define MS_NOTAGCHECK  (1<<27) /* don't check tags */
8511  #define MS_ACTIVE      (1<<30)
8512  #define MS_NOUSER      (1<<31)
8513  
8514 @@ -231,6 +234,14 @@ struct inodes_stat_t {
8515  #define S_NOCMTIME     128     /* Do not update file c/mtime */
8516  #define S_SWAPFILE     256     /* Do not truncate: swapon got its bmaps */
8517  #define S_PRIVATE      512     /* Inode is fs-internal */
8518 +#define S_IXUNLINK     1024    /* Immutable Invert on unlink */
8519 +
8520 +/* Linux-VServer related Inode flags */
8521 +
8522 +#define V_VALID                1
8523 +#define V_XATTR                2
8524 +#define V_BARRIER      4       /* Barrier for chroot() */
8525 +#define V_COW          8       /* Copy on Write */
8526  
8527  /*
8528   * Note that nosuid etc flags are inode-specific: setting some file-system
8529 @@ -253,12 +264,15 @@ struct inodes_stat_t {
8530  #define IS_DIRSYNC(inode)      (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \
8531                                         ((inode)->i_flags & (S_SYNC|S_DIRSYNC)))
8532  #define IS_MANDLOCK(inode)     __IS_FLG(inode, MS_MANDLOCK)
8533 -#define IS_NOATIME(inode)   __IS_FLG(inode, MS_RDONLY|MS_NOATIME)
8534 -#define IS_I_VERSION(inode)   __IS_FLG(inode, MS_I_VERSION)
8535 +#define IS_NOATIME(inode)      __IS_FLG(inode, MS_RDONLY|MS_NOATIME)
8536 +#define IS_I_VERSION(inode)    __IS_FLG(inode, MS_I_VERSION)
8537 +#define IS_TAGGED(inode)       __IS_FLG(inode, MS_TAGGED)
8538  
8539  #define IS_NOQUOTA(inode)      ((inode)->i_flags & S_NOQUOTA)
8540  #define IS_APPEND(inode)       ((inode)->i_flags & S_APPEND)
8541  #define IS_IMMUTABLE(inode)    ((inode)->i_flags & S_IMMUTABLE)
8542 +#define IS_IXUNLINK(inode)     ((inode)->i_flags & S_IXUNLINK)
8543 +#define IS_IXORUNLINK(inode)   ((IS_IXUNLINK(inode) ? S_IMMUTABLE : 0) ^ IS_IMMUTABLE(inode))
8544  #define IS_POSIXACL(inode)     __IS_FLG(inode, MS_POSIXACL)
8545  
8546  #define IS_DEADDIR(inode)      ((inode)->i_flags & S_DEAD)
8547 @@ -266,6 +280,16 @@ struct inodes_stat_t {
8548  #define IS_SWAPFILE(inode)     ((inode)->i_flags & S_SWAPFILE)
8549  #define IS_PRIVATE(inode)      ((inode)->i_flags & S_PRIVATE)
8550  
8551 +#define IS_BARRIER(inode)      (S_ISDIR((inode)->i_mode) && ((inode)->i_vflags & V_BARRIER))
8552 +
8553 +#ifdef CONFIG_VSERVER_COWBL
8554 +#  define IS_COW(inode)                (IS_IXUNLINK(inode) && IS_IMMUTABLE(inode))
8555 +#  define IS_COW_LINK(inode)   (S_ISREG((inode)->i_mode) && ((inode)->i_nlink > 1))
8556 +#else
8557 +#  define IS_COW(inode)                (0)
8558 +#  define IS_COW_LINK(inode)   (0)
8559 +#endif
8560 +
8561  /* the read-only stuff doesn't really belong here, but any other place is
8562     probably as bad and I don't want to create yet another include file. */
8563  
8564 @@ -343,11 +367,14 @@ struct inodes_stat_t {
8565  #define FS_TOPDIR_FL                   0x00020000 /* Top of directory hierarchies*/
8566  #define FS_EXTENT_FL                   0x00080000 /* Extents */
8567  #define FS_DIRECTIO_FL                 0x00100000 /* Use direct i/o */
8568 +#define FS_IXUNLINK_FL                 0x08000000 /* Immutable invert on unlink */
8569  #define FS_RESERVED_FL                 0x80000000 /* reserved for ext2 lib */
8570  
8571 -#define FS_FL_USER_VISIBLE             0x0003DFFF /* User visible flags */
8572 -#define FS_FL_USER_MODIFIABLE          0x000380FF /* User modifiable flags */
8573 +#define FS_BARRIER_FL                  0x04000000 /* Barrier for chroot() */
8574 +#define FS_COW_FL                      0x20000000 /* Copy on Write marker */
8575  
8576 +#define FS_FL_USER_VISIBLE             0x0103DFFF /* User visible flags */
8577 +#define FS_FL_USER_MODIFIABLE          0x010380FF /* User modifiable flags */
8578  
8579  #define SYNC_FILE_RANGE_WAIT_BEFORE    1
8580  #define SYNC_FILE_RANGE_WRITE          2
8581 @@ -429,6 +456,7 @@ typedef void (dio_iodone_t)(struct kiocb
8582  #define ATTR_KILL_PRIV (1 << 14)
8583  #define ATTR_OPEN      (1 << 15) /* Truncating from open(O_TRUNC) */
8584  #define ATTR_TIMES_SET (1 << 16)
8585 +#define ATTR_TAG       (1 << 17)
8586  
8587  /*
8588   * This is the Inode Attributes structure, used for notify_change().  It
8589 @@ -444,6 +472,7 @@ struct iattr {
8590         umode_t         ia_mode;
8591         uid_t           ia_uid;
8592         gid_t           ia_gid;
8593 +       tag_t           ia_tag;
8594         loff_t          ia_size;
8595         struct timespec ia_atime;
8596         struct timespec ia_mtime;
8597 @@ -457,6 +486,9 @@ struct iattr {
8598         struct file     *ia_file;
8599  };
8600  
8601 +#define ATTR_FLAG_BARRIER      512     /* Barrier for chroot() */
8602 +#define ATTR_FLAG_IXUNLINK     1024    /* Immutable invert on unlink */
8603 +
8604  /*
8605   * Includes for diskquotas.
8606   */
8607 @@ -723,7 +755,9 @@ struct inode {
8608         unsigned int            i_nlink;
8609         uid_t                   i_uid;
8610         gid_t                   i_gid;
8611 +       tag_t                   i_tag;
8612         dev_t                   i_rdev;
8613 +       dev_t                   i_mdev;
8614         u64                     i_version;
8615         loff_t                  i_size;
8616  #ifdef __NEED_I_SIZE_ORDERED
8617 @@ -770,7 +804,8 @@ struct inode {
8618         unsigned long           i_state;
8619         unsigned long           dirtied_when;   /* jiffies of first dirtying */
8620  
8621 -       unsigned int            i_flags;
8622 +       unsigned short          i_flags;
8623 +       unsigned short          i_vflags;
8624  
8625         atomic_t                i_writecount;
8626  #ifdef CONFIG_SECURITY
8627 @@ -858,12 +893,12 @@ static inline void i_size_write(struct i
8628  
8629  static inline unsigned iminor(const struct inode *inode)
8630  {
8631 -       return MINOR(inode->i_rdev);
8632 +       return MINOR(inode->i_mdev);
8633  }
8634  
8635  static inline unsigned imajor(const struct inode *inode)
8636  {
8637 -       return MAJOR(inode->i_rdev);
8638 +       return MAJOR(inode->i_mdev);
8639  }
8640  
8641  extern struct block_device *I_BDEV(struct inode *inode);
8642 @@ -922,6 +957,7 @@ struct file {
8643         loff_t                  f_pos;
8644         struct fown_struct      f_owner;
8645         const struct cred       *f_cred;
8646 +       xid_t                   f_xid;
8647         struct file_ra_state    f_ra;
8648  
8649         u64                     f_version;
8650 @@ -1063,6 +1099,7 @@ struct file_lock {
8651         struct file *fl_file;
8652         loff_t fl_start;
8653         loff_t fl_end;
8654 +       xid_t fl_xid;
8655  
8656         struct fasync_struct *  fl_fasync; /* for lease break notifications */
8657         unsigned long fl_break_time;    /* for nonblocking lease breaks */
8658 @@ -1534,6 +1571,7 @@ struct inode_operations {
8659         ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
8660         ssize_t (*listxattr) (struct dentry *, char *, size_t);
8661         int (*removexattr) (struct dentry *, const char *);
8662 +       int (*sync_flags) (struct inode *, int, int);
8663         void (*truncate_range)(struct inode *, loff_t, loff_t);
8664         long (*fallocate)(struct inode *inode, int mode, loff_t offset,
8665                           loff_t len);
8666 @@ -1554,6 +1592,7 @@ extern ssize_t vfs_readv(struct file *, 
8667                 unsigned long, loff_t *);
8668  extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
8669                 unsigned long, loff_t *);
8670 +ssize_t vfs_sendfile(struct file *, struct file *, loff_t *, size_t, loff_t);
8671  
8672  struct super_operations {
8673         struct inode *(*alloc_inode)(struct super_block *sb);
8674 @@ -2328,6 +2367,7 @@ extern int dcache_dir_open(struct inode 
8675  extern int dcache_dir_close(struct inode *, struct file *);
8676  extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
8677  extern int dcache_readdir(struct file *, void *, filldir_t);
8678 +extern int dcache_readdir_filter(struct file *, void *, filldir_t, int (*)(struct dentry *));
8679  extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *);
8680  extern int simple_statfs(struct dentry *, struct kstatfs *);
8681  extern int simple_link(struct dentry *, struct inode *, struct dentry *);
8682 diff -NurpP --minimal linux-2.6.31.6/include/linux/gfs2_ondisk.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/gfs2_ondisk.h
8683 --- linux-2.6.31.6/include/linux/gfs2_ondisk.h  2009-03-24 14:22:41.000000000 +0100
8684 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/gfs2_ondisk.h    2009-10-07 18:20:44.000000000 +0200
8685 @@ -235,6 +235,9 @@ enum {
8686         gfs2fl_NoAtime          = 7,
8687         gfs2fl_Sync             = 8,
8688         gfs2fl_System           = 9,
8689 +       gfs2fl_IXUnlink         = 16,
8690 +       gfs2fl_Barrier          = 17,
8691 +       gfs2fl_Cow              = 18,
8692         gfs2fl_TruncInProg      = 29,
8693         gfs2fl_InheritDirectio  = 30,
8694         gfs2fl_InheritJdata     = 31,
8695 @@ -251,6 +254,9 @@ enum {
8696  #define GFS2_DIF_NOATIME               0x00000080
8697  #define GFS2_DIF_SYNC                  0x00000100
8698  #define GFS2_DIF_SYSTEM                        0x00000200 /* New in gfs2 */
8699 +#define GFS2_DIF_IXUNLINK              0x00010000
8700 +#define GFS2_DIF_BARRIER               0x00020000
8701 +#define GFS2_DIF_COW                   0x00040000
8702  #define GFS2_DIF_TRUNC_IN_PROG         0x20000000 /* New in gfs2 */
8703  #define GFS2_DIF_INHERIT_DIRECTIO      0x40000000
8704  #define GFS2_DIF_INHERIT_JDATA         0x80000000
8705 diff -NurpP --minimal linux-2.6.31.6/include/linux/if_tun.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/if_tun.h
8706 --- linux-2.6.31.6/include/linux/if_tun.h       2009-09-10 15:26:25.000000000 +0200
8707 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/if_tun.h 2009-09-10 16:11:43.000000000 +0200
8708 @@ -48,6 +48,7 @@
8709  #define TUNGETIFF      _IOR('T', 210, unsigned int)
8710  #define TUNGETSNDBUF   _IOR('T', 211, int)
8711  #define TUNSETSNDBUF   _IOW('T', 212, int)
8712 +#define TUNSETNID     _IOW('T', 215, int)
8713  
8714  /* TUNSETIFF ifr flags */
8715  #define IFF_TUN                0x0001
8716 diff -NurpP --minimal linux-2.6.31.6/include/linux/init_task.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/init_task.h
8717 --- linux-2.6.31.6/include/linux/init_task.h    2009-09-10 15:26:25.000000000 +0200
8718 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/init_task.h      2009-09-10 17:13:45.000000000 +0200
8719 @@ -173,6 +173,10 @@ extern struct cred init_cred;
8720         INIT_LOCKDEP                                                    \
8721         INIT_FTRACE_GRAPH                                               \
8722         INIT_TRACE_RECURSION                                            \
8723 +       .xid            = 0,                                            \
8724 +       .vx_info        = NULL,                                         \
8725 +       .nid            = 0,                                            \
8726 +       .nx_info        = NULL,                                         \
8727  }
8728  
8729  
8730 diff -NurpP --minimal linux-2.6.31.6/include/linux/interrupt.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/interrupt.h
8731 --- linux-2.6.31.6/include/linux/interrupt.h    2009-09-10 15:26:25.000000000 +0200
8732 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/interrupt.h      2009-09-10 16:11:43.000000000 +0200
8733 @@ -9,8 +9,8 @@
8734  #include <linux/cpumask.h>
8735  #include <linux/irqreturn.h>
8736  #include <linux/irqnr.h>
8737 -#include <linux/hardirq.h>
8738  #include <linux/sched.h>
8739 +#include <linux/hardirq.h>
8740  #include <linux/irqflags.h>
8741  #include <linux/smp.h>
8742  #include <linux/percpu.h>
8743 diff -NurpP --minimal linux-2.6.31.6/include/linux/ipc.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/ipc.h
8744 --- linux-2.6.31.6/include/linux/ipc.h  2008-12-25 00:26:37.000000000 +0100
8745 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/ipc.h    2009-09-10 16:11:43.000000000 +0200
8746 @@ -93,6 +93,7 @@ struct kern_ipc_perm
8747         key_t           key;
8748         uid_t           uid;
8749         gid_t           gid;
8750 +       xid_t           xid;
8751         uid_t           cuid;
8752         gid_t           cgid;
8753         mode_t          mode; 
8754 diff -NurpP --minimal linux-2.6.31.6/include/linux/Kbuild linux-2.6.31.6-vs2.3.0.36.24/include/linux/Kbuild
8755 --- linux-2.6.31.6/include/linux/Kbuild 2009-09-10 15:26:24.000000000 +0200
8756 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/Kbuild   2009-09-10 16:11:43.000000000 +0200
8757 @@ -376,5 +376,8 @@ unifdef-y += xattr.h
8758  unifdef-y += xfrm.h
8759  
8760  objhdr-y += version.h
8761 +
8762 +header-y += vserver/
8763  header-y += wimax.h
8764  header-y += wimax/
8765 +
8766 diff -NurpP --minimal linux-2.6.31.6/include/linux/loop.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/loop.h
8767 --- linux-2.6.31.6/include/linux/loop.h 2009-09-10 15:26:25.000000000 +0200
8768 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/loop.h   2009-09-10 16:11:43.000000000 +0200
8769 @@ -45,6 +45,7 @@ struct loop_device {
8770         struct loop_func_table *lo_encryption;
8771         __u32           lo_init[2];
8772         uid_t           lo_key_owner;   /* Who set the key */
8773 +       xid_t           lo_xid;
8774         int             (*ioctl)(struct loop_device *, int cmd, 
8775                                  unsigned long arg); 
8776  
8777 diff -NurpP --minimal linux-2.6.31.6/include/linux/magic.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/magic.h
8778 --- linux-2.6.31.6/include/linux/magic.h        2009-09-10 15:26:25.000000000 +0200
8779 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/magic.h  2009-09-10 16:11:43.000000000 +0200
8780 @@ -3,7 +3,7 @@
8781  
8782  #define ADFS_SUPER_MAGIC       0xadf5
8783  #define AFFS_SUPER_MAGIC       0xadff
8784 -#define AFS_SUPER_MAGIC                0x5346414F
8785 +#define AFS_SUPER_MAGIC                0x5346414F
8786  #define AUTOFS_SUPER_MAGIC     0x0187
8787  #define CODA_SUPER_MAGIC       0x73757245
8788  #define CRAMFS_MAGIC           0x28cd3d45      /* some random number */
8789 @@ -36,6 +36,7 @@
8790  #define NFS_SUPER_MAGIC                0x6969
8791  #define OPENPROM_SUPER_MAGIC   0x9fa1
8792  #define PROC_SUPER_MAGIC       0x9fa0
8793 +#define DEVPTS_SUPER_MAGIC     0x1cd1
8794  #define QNX4_SUPER_MAGIC       0x002f          /* qnx4 fs detection */
8795  
8796  #define REISERFS_SUPER_MAGIC   0x52654973      /* used by gcc */
8797 diff -NurpP --minimal linux-2.6.31.6/include/linux/major.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/major.h
8798 --- linux-2.6.31.6/include/linux/major.h        2009-09-10 15:26:25.000000000 +0200
8799 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/major.h  2009-09-10 16:11:43.000000000 +0200
8800 @@ -15,6 +15,7 @@
8801  #define HD_MAJOR               IDE0_MAJOR
8802  #define PTY_SLAVE_MAJOR                3
8803  #define TTY_MAJOR              4
8804 +#define VROOT_MAJOR            4
8805  #define TTYAUX_MAJOR           5
8806  #define LP_MAJOR               6
8807  #define VCS_MAJOR              7
8808 diff -NurpP --minimal linux-2.6.31.6/include/linux/mm_types.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/mm_types.h
8809 --- linux-2.6.31.6/include/linux/mm_types.h     2009-09-10 15:26:25.000000000 +0200
8810 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/mm_types.h       2009-09-10 16:11:43.000000000 +0200
8811 @@ -244,6 +244,7 @@ struct mm_struct {
8812  
8813         /* Architecture-specific MM context */
8814         mm_context_t context;
8815 +       struct vx_info *mm_vx_info;
8816  
8817         /* Swap token stuff */
8818         /*
8819 diff -NurpP --minimal linux-2.6.31.6/include/linux/mount.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/mount.h
8820 --- linux-2.6.31.6/include/linux/mount.h        2009-09-10 15:26:25.000000000 +0200
8821 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/mount.h  2009-09-10 17:14:39.000000000 +0200
8822 @@ -36,6 +36,9 @@ struct mnt_namespace;
8823  #define MNT_UNBINDABLE 0x2000  /* if the vfsmount is a unbindable mount */
8824  #define MNT_PNODE_MASK 0x3000  /* propagation flag mask */
8825  
8826 +#define MNT_TAGID      0x10000
8827 +#define MNT_NOTAG      0x20000
8828 +
8829  struct vfsmount {
8830         struct list_head mnt_hash;
8831         struct vfsmount *mnt_parent;    /* fs we are mounted on */
8832 @@ -70,6 +73,7 @@ struct vfsmount {
8833  #else
8834         int mnt_writers;
8835  #endif
8836 +       tag_t mnt_tag;                  /* tagging used for vfsmount */
8837  };
8838  
8839  static inline int *get_mnt_writers_ptr(struct vfsmount *mnt)
8840 diff -NurpP --minimal linux-2.6.31.6/include/linux/net.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/net.h
8841 --- linux-2.6.31.6/include/linux/net.h  2009-06-11 17:13:15.000000000 +0200
8842 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/net.h    2009-09-10 16:11:43.000000000 +0200
8843 @@ -68,6 +68,7 @@ struct net;
8844  #define SOCK_NOSPACE           2
8845  #define SOCK_PASSCRED          3
8846  #define SOCK_PASSSEC           4
8847 +#define SOCK_USER_SOCKET       5
8848  
8849  #ifndef ARCH_HAS_SOCKET_TYPES
8850  /**
8851 diff -NurpP --minimal linux-2.6.31.6/include/linux/nfs_mount.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/nfs_mount.h
8852 --- linux-2.6.31.6/include/linux/nfs_mount.h    2009-03-24 14:22:43.000000000 +0100
8853 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/nfs_mount.h      2009-09-10 16:11:43.000000000 +0200
8854 @@ -63,7 +63,8 @@ struct nfs_mount_data {
8855  #define NFS_MOUNT_SECFLAVOUR   0x2000  /* 5 */
8856  #define NFS_MOUNT_NORDIRPLUS   0x4000  /* 5 */
8857  #define NFS_MOUNT_UNSHARED     0x8000  /* 5 */
8858 -#define NFS_MOUNT_FLAGMASK     0xFFFF
8859 +#define NFS_MOUNT_TAGGED       0x10000 /* context tagging */
8860 +#define NFS_MOUNT_FLAGMASK     0x1FFFF
8861  
8862  /* The following are for internal use only */
8863  #define NFS_MOUNT_LOOKUP_CACHE_NONEG   0x10000
8864 diff -NurpP --minimal linux-2.6.31.6/include/linux/nsproxy.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/nsproxy.h
8865 --- linux-2.6.31.6/include/linux/nsproxy.h      2009-06-11 17:13:17.000000000 +0200
8866 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/nsproxy.h        2009-09-10 16:11:43.000000000 +0200
8867 @@ -3,6 +3,7 @@
8868  
8869  #include <linux/spinlock.h>
8870  #include <linux/sched.h>
8871 +#include <linux/vserver/debug.h>
8872  
8873  struct mnt_namespace;
8874  struct uts_namespace;
8875 @@ -63,22 +64,33 @@ static inline struct nsproxy *task_nspro
8876  }
8877  
8878  int copy_namespaces(unsigned long flags, struct task_struct *tsk);
8879 +struct nsproxy *copy_nsproxy(struct nsproxy *orig);
8880  void exit_task_namespaces(struct task_struct *tsk);
8881  void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
8882  void free_nsproxy(struct nsproxy *ns);
8883  int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
8884         struct fs_struct *);
8885  
8886 -static inline void put_nsproxy(struct nsproxy *ns)
8887 +#define        get_nsproxy(n)  __get_nsproxy(n, __FILE__, __LINE__)
8888 +
8889 +static inline void __get_nsproxy(struct nsproxy *ns,
8890 +       const char *_file, int _line)
8891  {
8892 -       if (atomic_dec_and_test(&ns->count)) {
8893 -               free_nsproxy(ns);
8894 -       }
8895 +       vxlprintk(VXD_CBIT(space, 0), "get_nsproxy(%p[%u])",
8896 +               ns, atomic_read(&ns->count), _file, _line);
8897 +       atomic_inc(&ns->count);
8898  }
8899  
8900 -static inline void get_nsproxy(struct nsproxy *ns)
8901 +#define        put_nsproxy(n)  __put_nsproxy(n, __FILE__, __LINE__)
8902 +
8903 +static inline void __put_nsproxy(struct nsproxy *ns,
8904 +       const char *_file, int _line)
8905  {
8906 -       atomic_inc(&ns->count);
8907 +       vxlprintk(VXD_CBIT(space, 0), "put_nsproxy(%p[%u])",
8908 +               ns, atomic_read(&ns->count), _file, _line);
8909 +       if (atomic_dec_and_test(&ns->count)) {
8910 +               free_nsproxy(ns);
8911 +       }
8912  }
8913  
8914  #ifdef CONFIG_CGROUP_NS
8915 diff -NurpP --minimal linux-2.6.31.6/include/linux/pid.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/pid.h
8916 --- linux-2.6.31.6/include/linux/pid.h  2009-03-24 14:22:43.000000000 +0100
8917 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/pid.h    2009-09-10 16:11:43.000000000 +0200
8918 @@ -8,7 +8,8 @@ enum pid_type
8919         PIDTYPE_PID,
8920         PIDTYPE_PGID,
8921         PIDTYPE_SID,
8922 -       PIDTYPE_MAX
8923 +       PIDTYPE_MAX,
8924 +       PIDTYPE_REALPID
8925  };
8926  
8927  /*
8928 @@ -160,6 +161,7 @@ static inline pid_t pid_nr(struct pid *p
8929  }
8930  
8931  pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns);
8932 +pid_t pid_unmapped_nr_ns(struct pid *pid, struct pid_namespace *ns);
8933  pid_t pid_vnr(struct pid *pid);
8934  
8935  #define do_each_pid_task(pid, type, task)                              \
8936 diff -NurpP --minimal linux-2.6.31.6/include/linux/proc_fs.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/proc_fs.h
8937 --- linux-2.6.31.6/include/linux/proc_fs.h      2009-09-10 15:26:26.000000000 +0200
8938 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/proc_fs.h        2009-09-10 16:11:43.000000000 +0200
8939 @@ -56,6 +56,7 @@ struct proc_dir_entry {
8940         nlink_t nlink;
8941         uid_t uid;
8942         gid_t gid;
8943 +       int vx_flags;
8944         loff_t size;
8945         const struct inode_operations *proc_iops;
8946         /*
8947 @@ -240,12 +241,18 @@ static inline void kclist_add(struct kco
8948  extern void kclist_add(struct kcore_list *, void *, size_t);
8949  #endif
8950  
8951 +struct vx_info;
8952 +struct nx_info;
8953 +
8954  union proc_op {
8955         int (*proc_get_link)(struct inode *, struct path *);
8956         int (*proc_read)(struct task_struct *task, char *page);
8957         int (*proc_show)(struct seq_file *m,
8958                 struct pid_namespace *ns, struct pid *pid,
8959                 struct task_struct *task);
8960 +       int (*proc_vs_read)(char *page);
8961 +       int (*proc_vxi_read)(struct vx_info *vxi, char *page);
8962 +       int (*proc_nxi_read)(struct nx_info *nxi, char *page);
8963  };
8964  
8965  struct ctl_table_header;
8966 @@ -253,6 +260,7 @@ struct ctl_table;
8967  
8968  struct proc_inode {
8969         struct pid *pid;
8970 +       int vx_flags;
8971         int fd;
8972         union proc_op op;
8973         struct proc_dir_entry *pde;
8974 diff -NurpP --minimal linux-2.6.31.6/include/linux/quotaops.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/quotaops.h
8975 --- linux-2.6.31.6/include/linux/quotaops.h     2009-09-10 15:26:26.000000000 +0200
8976 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/quotaops.h       2009-10-12 02:07:55.000000000 +0200
8977 @@ -8,6 +8,7 @@
8978  #define _LINUX_QUOTAOPS_
8979  
8980  #include <linux/fs.h>
8981 +#include <linux/vs_dlimit.h>
8982  
8983  static inline struct quota_info *sb_dqopt(struct super_block *sb)
8984  {
8985 @@ -154,10 +155,14 @@ static inline void vfs_dq_init(struct in
8986   * a transaction (deadlocks possible otherwise) */
8987  static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
8988  {
8989 +       if (dl_alloc_space(inode, nr))
8990 +               return 1;
8991         if (sb_any_quota_active(inode->i_sb)) {
8992                 /* Used space is updated in alloc_space() */
8993 -               if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA)
8994 +               if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA) {
8995 +                       dl_free_space(inode, nr);
8996                         return 1;
8997 +               }
8998         }
8999         else
9000                 inode_add_bytes(inode, nr);
9001 @@ -174,10 +179,14 @@ static inline int vfs_dq_prealloc_space(
9002  
9003  static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
9004  {
9005 +       if (dl_alloc_space(inode, nr))
9006 +               return 1;
9007         if (sb_any_quota_active(inode->i_sb)) {
9008                 /* Used space is updated in alloc_space() */
9009 -               if (inode->i_sb->dq_op->alloc_space(inode, nr, 0) == NO_QUOTA)
9010 +               if (inode->i_sb->dq_op->alloc_space(inode, nr, 0) == NO_QUOTA) {
9011 +                       dl_free_space(inode, nr);
9012                         return 1;
9013 +               }
9014         }
9015         else
9016                 inode_add_bytes(inode, nr);
9017 @@ -194,20 +203,28 @@ static inline int vfs_dq_alloc_space(str
9018  
9019  static inline int vfs_dq_reserve_space(struct inode *inode, qsize_t nr)
9020  {
9021 +       if (dl_reserve_space(inode, nr))
9022 +               return 1;
9023         if (sb_any_quota_active(inode->i_sb)) {
9024                 /* Used space is updated in alloc_space() */
9025 -               if (inode->i_sb->dq_op->reserve_space(inode, nr, 0) == NO_QUOTA)
9026 +               if (inode->i_sb->dq_op->reserve_space(inode, nr, 0) == NO_QUOTA) {
9027 +                       dl_release_space(inode, nr);
9028                         return 1;
9029 +               }
9030         }
9031         return 0;
9032  }
9033  
9034  static inline int vfs_dq_alloc_inode(struct inode *inode)
9035  {
9036 +       if (dl_alloc_inode(inode))
9037 +               return 1;
9038         if (sb_any_quota_active(inode->i_sb)) {
9039                 vfs_dq_init(inode);
9040 -               if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA)
9041 +               if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
9042 +                       dl_free_inode(inode);
9043                         return 1;
9044 +               }
9045         }
9046         return 0;
9047  }
9048 @@ -217,9 +234,13 @@ static inline int vfs_dq_alloc_inode(str
9049   */
9050  static inline int vfs_dq_claim_space(struct inode *inode, qsize_t nr)
9051  {
9052 +       if (dl_claim_space(inode, nr))
9053 +               return 1;
9054         if (sb_any_quota_active(inode->i_sb)) {
9055 -               if (inode->i_sb->dq_op->claim_space(inode, nr) == NO_QUOTA)
9056 +               if (inode->i_sb->dq_op->claim_space(inode, nr) == NO_QUOTA) {
9057 +                       dl_release_space(inode, nr);
9058                         return 1;
9059 +               }
9060         } else
9061                 inode_add_bytes(inode, nr);
9062  
9063 @@ -235,6 +256,7 @@ void vfs_dq_release_reservation_space(st
9064  {
9065         if (sb_any_quota_active(inode->i_sb))
9066                 inode->i_sb->dq_op->release_rsv(inode, nr);
9067 +       dl_release_space(inode, nr);
9068  }
9069  
9070  static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
9071 @@ -243,6 +265,7 @@ static inline void vfs_dq_free_space_nod
9072                 inode->i_sb->dq_op->free_space(inode, nr);
9073         else
9074                 inode_sub_bytes(inode, nr);
9075 +       dl_free_space(inode, nr);
9076  }
9077  
9078  static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
9079 @@ -255,6 +278,7 @@ static inline void vfs_dq_free_inode(str
9080  {
9081         if (sb_any_quota_active(inode->i_sb))
9082                 inode->i_sb->dq_op->free_inode(inode, 1);
9083 +       dl_free_inode(inode);
9084  }
9085  
9086  /* Cannot be called inside a transaction */
9087 @@ -358,6 +382,8 @@ static inline int vfs_dq_transfer(struct
9088  
9089  static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
9090  {
9091 +       if (dl_alloc_space(inode, nr))
9092 +               return 1;
9093         inode_add_bytes(inode, nr);
9094         return 0;
9095  }
9096 @@ -371,6 +397,8 @@ static inline int vfs_dq_prealloc_space(
9097  
9098  static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
9099  {
9100 +       if (dl_alloc_space(inode, nr))
9101 +               return 1;
9102         inode_add_bytes(inode, nr);
9103         return 0;
9104  }
9105 @@ -384,22 +412,28 @@ static inline int vfs_dq_alloc_space(str
9106  
9107  static inline int vfs_dq_reserve_space(struct inode *inode, qsize_t nr)
9108  {
9109 +       if (dl_reserve_space(inode, nr))
9110 +               return 1;
9111         return 0;
9112  }
9113  
9114  static inline int vfs_dq_claim_space(struct inode *inode, qsize_t nr)
9115  {
9116 +       if (dl_claim_space(inode, nr))
9117 +               return 1;
9118         return vfs_dq_alloc_space(inode, nr);
9119  }
9120  
9121  static inline
9122  int vfs_dq_release_reservation_space(struct inode *inode, qsize_t nr)
9123  {
9124 +       dl_release_space(inode, nr);
9125         return 0;
9126  }
9127  
9128  static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
9129  {
9130 +       dl_free_space(inode, nr);
9131         inode_sub_bytes(inode, nr);
9132  }
9133  
9134 diff -NurpP --minimal linux-2.6.31.6/include/linux/reiserfs_fs.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/reiserfs_fs.h
9135 --- linux-2.6.31.6/include/linux/reiserfs_fs.h  2009-09-10 15:26:26.000000000 +0200
9136 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/reiserfs_fs.h    2009-10-07 01:27:41.000000000 +0200
9137 @@ -899,6 +899,11 @@ struct stat_data_v1 {
9138  #define REISERFS_COMPR_FL     FS_COMPR_FL
9139  #define REISERFS_NOTAIL_FL    FS_NOTAIL_FL
9140  
9141 +/* unfortunately reiserfs sdattr is only 16 bit */
9142 +#define REISERFS_IXUNLINK_FL  (FS_IXUNLINK_FL >> 16)
9143 +#define REISERFS_BARRIER_FL   (FS_BARRIER_FL >> 16)
9144 +#define REISERFS_COW_FL       (FS_COW_FL >> 16)
9145 +
9146  /* persistent flags that file inherits from the parent directory */
9147  #define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL |        \
9148                                 REISERFS_SYNC_FL |      \
9149 @@ -908,6 +913,9 @@ struct stat_data_v1 {
9150                                 REISERFS_COMPR_FL |     \
9151                                 REISERFS_NOTAIL_FL )
9152  
9153 +#define REISERFS_FL_USER_VISIBLE       0x80FF
9154 +#define REISERFS_FL_USER_MODIFIABLE    0x80FF
9155 +
9156  /* Stat Data on disk (reiserfs version of UFS disk inode minus the
9157     address blocks) */
9158  struct stat_data {
9159 @@ -1989,6 +1997,7 @@ static inline void reiserfs_update_sd(st
9160  void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode);
9161  void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs);
9162  int reiserfs_setattr(struct dentry *dentry, struct iattr *attr);
9163 +int reiserfs_sync_flags(struct inode *inode, int, int);
9164  
9165  /* namei.c */
9166  void set_de_name_and_namelen(struct reiserfs_dir_entry *de);
9167 diff -NurpP --minimal linux-2.6.31.6/include/linux/reiserfs_fs_sb.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/reiserfs_fs_sb.h
9168 --- linux-2.6.31.6/include/linux/reiserfs_fs_sb.h       2009-09-10 15:26:26.000000000 +0200
9169 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/reiserfs_fs_sb.h 2009-09-10 16:11:43.000000000 +0200
9170 @@ -456,6 +456,7 @@ enum reiserfs_mount_options {
9171         REISERFS_EXPOSE_PRIVROOT,
9172         REISERFS_BARRIER_NONE,
9173         REISERFS_BARRIER_FLUSH,
9174 +       REISERFS_TAGGED,
9175  
9176         /* Actions on error */
9177         REISERFS_ERROR_PANIC,
9178 diff -NurpP --minimal linux-2.6.31.6/include/linux/sched.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/sched.h
9179 --- linux-2.6.31.6/include/linux/sched.h        2009-09-10 15:26:26.000000000 +0200
9180 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/sched.h  2009-10-06 04:39:26.000000000 +0200
9181 @@ -383,25 +383,28 @@ extern void arch_unmap_area_topdown(stru
9182   * The mm counters are not protected by its page_table_lock,
9183   * so must be incremented atomically.
9184   */
9185 -#define set_mm_counter(mm, member, value) atomic_long_set(&(mm)->_##member, value)
9186 -#define get_mm_counter(mm, member) ((unsigned long)atomic_long_read(&(mm)->_##member))
9187 -#define add_mm_counter(mm, member, value) atomic_long_add(value, &(mm)->_##member)
9188 -#define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
9189 -#define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
9190 +#define __set_mm_counter(mm, member, value) \
9191 +       atomic_long_set(&(mm)->_##member, value)
9192 +#define get_mm_counter(mm, member) \
9193 +       ((unsigned long)atomic_long_read(&(mm)->_##member))
9194  
9195  #else  /* !USE_SPLIT_PTLOCKS */
9196  /*
9197   * The mm counters are protected by its page_table_lock,
9198   * so can be incremented directly.
9199   */
9200 -#define set_mm_counter(mm, member, value) (mm)->_##member = (value)
9201 +#define __set_mm_counter(mm, member, value) (mm)->_##member = (value)
9202  #define get_mm_counter(mm, member) ((mm)->_##member)
9203 -#define add_mm_counter(mm, member, value) (mm)->_##member += (value)
9204 -#define inc_mm_counter(mm, member) (mm)->_##member++
9205 -#define dec_mm_counter(mm, member) (mm)->_##member--
9206  
9207  #endif /* !USE_SPLIT_PTLOCKS */
9208  
9209 +#define set_mm_counter(mm, member, value) \
9210 +       vx_ ## member ## pages_sub((mm), (get_mm_counter(mm, member) - value))
9211 +#define add_mm_counter(mm, member, value) \
9212 +       vx_ ## member ## pages_add((mm), (value))
9213 +#define inc_mm_counter(mm, member) vx_ ## member ## pages_inc((mm))
9214 +#define dec_mm_counter(mm, member) vx_ ## member ## pages_dec((mm))
9215 +
9216  #define get_mm_rss(mm)                                 \
9217         (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
9218  #define update_hiwater_rss(mm) do {                    \
9219 @@ -1024,7 +1027,7 @@ struct sched_domain;
9220  struct sched_class {
9221         const struct sched_class *next;
9222  
9223 -       void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
9224 +       int (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
9225         void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
9226         void (*yield_task) (struct rq *rq);
9227  
9228 @@ -1124,6 +1127,7 @@ struct sched_entity {
9229         u64                     nr_failed_migrations_affine;
9230         u64                     nr_failed_migrations_running;
9231         u64                     nr_failed_migrations_hot;
9232 +       u64                     nr_failed_migrations_throttled;
9233         u64                     nr_forced_migrations;
9234         u64                     nr_forced2_migrations;
9235  
9236 @@ -1136,6 +1140,12 @@ struct sched_entity {
9237         u64                     nr_wakeups_affine_attempts;
9238         u64                     nr_wakeups_passive;
9239         u64                     nr_wakeups_idle;
9240 +#ifdef CONFIG_CFS_HARD_LIMITS
9241 +       u64                     throttle_start;
9242 +       u64                     throttle_max;
9243 +       u64                     throttle_count;
9244 +       u64                     throttle_sum;
9245 +#endif
9246  #endif
9247  
9248  #ifdef CONFIG_FAIR_GROUP_SCHED
9249 @@ -1335,6 +1345,14 @@ struct task_struct {
9250  #endif
9251         seccomp_t seccomp;
9252  
9253 +/* vserver context data */
9254 +       struct vx_info *vx_info;
9255 +       struct nx_info *nx_info;
9256 +
9257 +       xid_t xid;
9258 +       nid_t nid;
9259 +       tag_t tag;
9260 +
9261  /* Thread group tracking */
9262         u32 parent_exec_id;
9263         u32 self_exec_id;
9264 @@ -1559,6 +1577,11 @@ struct pid_namespace;
9265  pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
9266                         struct pid_namespace *ns);
9267  
9268 +#include <linux/vserver/base.h>
9269 +#include <linux/vserver/context.h>
9270 +#include <linux/vserver/debug.h>
9271 +#include <linux/vserver/pid.h>
9272 +
9273  static inline pid_t task_pid_nr(struct task_struct *tsk)
9274  {
9275         return tsk->pid;
9276 @@ -1572,7 +1595,8 @@ static inline pid_t task_pid_nr_ns(struc
9277  
9278  static inline pid_t task_pid_vnr(struct task_struct *tsk)
9279  {
9280 -       return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
9281 +       // return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
9282 +       return vx_map_pid(__task_pid_nr_ns(tsk, PIDTYPE_PID, NULL));
9283  }
9284  
9285  
9286 @@ -1585,7 +1609,7 @@ pid_t task_tgid_nr_ns(struct task_struct
9287  
9288  static inline pid_t task_tgid_vnr(struct task_struct *tsk)
9289  {
9290 -       return pid_vnr(task_tgid(tsk));
9291 +       return vx_map_tgid(pid_vnr(task_tgid(tsk)));
9292  }
9293  
9294  
9295 diff -NurpP --minimal linux-2.6.31.6/include/linux/shmem_fs.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/shmem_fs.h
9296 --- linux-2.6.31.6/include/linux/shmem_fs.h     2009-09-10 15:26:26.000000000 +0200
9297 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/shmem_fs.h       2009-09-10 16:11:43.000000000 +0200
9298 @@ -8,6 +8,9 @@
9299  
9300  #define SHMEM_NR_DIRECT 16
9301  
9302 +#define TMPFS_SUPER_MAGIC      0x01021994
9303 +
9304 +
9305  struct shmem_inode_info {
9306         spinlock_t              lock;
9307         unsigned long           flags;
9308 diff -NurpP --minimal linux-2.6.31.6/include/linux/stat.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/stat.h
9309 --- linux-2.6.31.6/include/linux/stat.h 2008-12-25 00:26:37.000000000 +0100
9310 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/stat.h   2009-09-10 16:11:43.000000000 +0200
9311 @@ -66,6 +66,7 @@ struct kstat {
9312         unsigned int    nlink;
9313         uid_t           uid;
9314         gid_t           gid;
9315 +       tag_t           tag;
9316         dev_t           rdev;
9317         loff_t          size;
9318         struct timespec  atime;
9319 diff -NurpP --minimal linux-2.6.31.6/include/linux/sunrpc/auth.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/sunrpc/auth.h
9320 --- linux-2.6.31.6/include/linux/sunrpc/auth.h  2008-12-25 00:26:37.000000000 +0100
9321 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/sunrpc/auth.h    2009-09-10 16:11:43.000000000 +0200
9322 @@ -25,6 +25,7 @@
9323  struct auth_cred {
9324         uid_t   uid;
9325         gid_t   gid;
9326 +       tag_t   tag;
9327         struct group_info *group_info;
9328         unsigned char machine_cred : 1;
9329  };
9330 diff -NurpP --minimal linux-2.6.31.6/include/linux/sunrpc/clnt.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/sunrpc/clnt.h
9331 --- linux-2.6.31.6/include/linux/sunrpc/clnt.h  2009-09-10 15:26:26.000000000 +0200
9332 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/sunrpc/clnt.h    2009-09-10 16:11:43.000000000 +0200
9333 @@ -43,7 +43,8 @@ struct rpc_clnt {
9334         unsigned int            cl_softrtry : 1,/* soft timeouts */
9335                                 cl_discrtry : 1,/* disconnect before retry */
9336                                 cl_autobind : 1,/* use getport() */
9337 -                               cl_chatty   : 1;/* be verbose */
9338 +                               cl_chatty   : 1,/* be verbose */
9339 +                               cl_tag      : 1;/* context tagging */
9340  
9341         struct rpc_rtt *        cl_rtt;         /* RTO estimator data */
9342         const struct rpc_timeout *cl_timeout;   /* Timeout strategy */
9343 diff -NurpP --minimal linux-2.6.31.6/include/linux/syscalls.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/syscalls.h
9344 --- linux-2.6.31.6/include/linux/syscalls.h     2009-09-10 15:26:26.000000000 +0200
9345 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/syscalls.h       2009-09-10 16:11:43.000000000 +0200
9346 @@ -428,6 +428,8 @@ asmlinkage long sys_symlink(const char _
9347  asmlinkage long sys_unlink(const char __user *pathname);
9348  asmlinkage long sys_rename(const char __user *oldname,
9349                                 const char __user *newname);
9350 +asmlinkage long sys_copyfile(const char __user *from, const char __user *to,
9351 +                               umode_t mode);
9352  asmlinkage long sys_chmod(const char __user *filename, mode_t mode);
9353  asmlinkage long sys_fchmod(unsigned int fd, mode_t mode);
9354  
9355 diff -NurpP --minimal linux-2.6.31.6/include/linux/sysctl.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/sysctl.h
9356 --- linux-2.6.31.6/include/linux/sysctl.h       2009-06-11 17:13:18.000000000 +0200
9357 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/sysctl.h 2009-09-10 16:11:43.000000000 +0200
9358 @@ -70,6 +70,7 @@ enum
9359         CTL_ABI=9,              /* Binary emulation */
9360         CTL_CPU=10,             /* CPU stuff (speed scaling, etc) */
9361         CTL_ARLAN=254,          /* arlan wireless driver */
9362 +       CTL_VSERVER=4242,       /* Linux-VServer debug */
9363         CTL_S390DBF=5677,       /* s390 debug */
9364         CTL_SUNRPC=7249,        /* sunrpc debug */
9365         CTL_PM=9899,            /* frv power management */
9366 @@ -104,6 +105,7 @@ enum
9367  
9368         KERN_PANIC=15,          /* int: panic timeout */
9369         KERN_REALROOTDEV=16,    /* real root device to mount after initrd */
9370 +       KERN_VSHELPER=17,       /* string: path to vshelper policy agent */
9371  
9372         KERN_SPARC_REBOOT=21,   /* reboot command on Sparc */
9373         KERN_CTLALTDEL=22,      /* int: allow ctl-alt-del to reboot */
9374 diff -NurpP --minimal linux-2.6.31.6/include/linux/sysfs.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/sysfs.h
9375 --- linux-2.6.31.6/include/linux/sysfs.h        2008-12-25 00:26:37.000000000 +0100
9376 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/sysfs.h  2009-09-10 16:11:43.000000000 +0200
9377 @@ -17,6 +17,8 @@
9378  #include <linux/list.h>
9379  #include <asm/atomic.h>
9380  
9381 +#define SYSFS_SUPER_MAGIC      0x62656572
9382 +
9383  struct kobject;
9384  struct module;
9385  
9386 diff -NurpP --minimal linux-2.6.31.6/include/linux/time.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/time.h
9387 --- linux-2.6.31.6/include/linux/time.h 2009-09-10 15:26:26.000000000 +0200
9388 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/time.h   2009-09-10 16:11:43.000000000 +0200
9389 @@ -205,6 +205,9 @@ static __always_inline void timespec_add
9390         a->tv_sec += __iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns);
9391         a->tv_nsec = ns;
9392  }
9393 +
9394 +#include <linux/vs_time.h>
9395 +
9396  #endif /* __KERNEL__ */
9397  
9398  #define NFDBITS                        __NFDBITS
9399 diff -NurpP --minimal linux-2.6.31.6/include/linux/types.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/types.h
9400 --- linux-2.6.31.6/include/linux/types.h        2009-09-10 15:26:26.000000000 +0200
9401 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/types.h  2009-09-10 16:11:43.000000000 +0200
9402 @@ -37,6 +37,9 @@ typedef __kernel_uid32_t      uid_t;
9403  typedef __kernel_gid32_t       gid_t;
9404  typedef __kernel_uid16_t        uid16_t;
9405  typedef __kernel_gid16_t        gid16_t;
9406 +typedef unsigned int           xid_t;
9407 +typedef unsigned int           nid_t;
9408 +typedef unsigned int           tag_t;
9409  
9410  typedef unsigned long          uintptr_t;
9411  
9412 diff -NurpP --minimal linux-2.6.31.6/include/linux/vroot.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vroot.h
9413 --- linux-2.6.31.6/include/linux/vroot.h        1970-01-01 01:00:00.000000000 +0100
9414 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vroot.h  2009-09-10 16:11:43.000000000 +0200
9415 @@ -0,0 +1,51 @@
9416 +
9417 +/*
9418 + * include/linux/vroot.h
9419 + *
9420 + * written by Herbert Pötzl, 9/11/2002
9421 + * ported to 2.6 by Herbert Pötzl, 30/12/2004
9422 + *
9423 + * Copyright (C) 2002-2007 by Herbert Pötzl.
9424 + * Redistribution of this file is permitted under the
9425 + * GNU General Public License.
9426 + */
9427 +
9428 +#ifndef _LINUX_VROOT_H
9429 +#define _LINUX_VROOT_H
9430 +
9431 +
9432 +#ifdef __KERNEL__
9433 +
9434 +/* Possible states of device */
9435 +enum {
9436 +       Vr_unbound,
9437 +       Vr_bound,
9438 +};
9439 +
9440 +struct vroot_device {
9441 +       int             vr_number;
9442 +       int             vr_refcnt;
9443 +
9444 +       struct semaphore        vr_ctl_mutex;
9445 +       struct block_device    *vr_device;
9446 +       int                     vr_state;
9447 +};
9448 +
9449 +
9450 +typedef struct block_device *(vroot_grb_func)(struct block_device *);
9451 +
9452 +extern int register_vroot_grb(vroot_grb_func *);
9453 +extern int unregister_vroot_grb(vroot_grb_func *);
9454 +
9455 +#endif /* __KERNEL__ */
9456 +
9457 +#define MAX_VROOT_DEFAULT      8
9458 +
9459 +/*
9460 + * IOCTL commands --- we will commandeer 0x56 ('V')
9461 + */
9462 +
9463 +#define VROOT_SET_DEV          0x5600
9464 +#define VROOT_CLR_DEV          0x5601
9465 +
9466 +#endif /* _LINUX_VROOT_H */
9467 diff -NurpP --minimal linux-2.6.31.6/include/linux/vs_base.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_base.h
9468 --- linux-2.6.31.6/include/linux/vs_base.h      1970-01-01 01:00:00.000000000 +0100
9469 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_base.h        2009-09-10 16:11:43.000000000 +0200
9470 @@ -0,0 +1,10 @@
9471 +#ifndef _VS_BASE_H
9472 +#define _VS_BASE_H
9473 +
9474 +#include "vserver/base.h"
9475 +#include "vserver/check.h"
9476 +#include "vserver/debug.h"
9477 +
9478 +#else
9479 +#warning duplicate inclusion
9480 +#endif
9481 diff -NurpP --minimal linux-2.6.31.6/include/linux/vs_context.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_context.h
9482 --- linux-2.6.31.6/include/linux/vs_context.h   1970-01-01 01:00:00.000000000 +0100
9483 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_context.h     2009-09-30 02:28:59.000000000 +0200
9484 @@ -0,0 +1,242 @@
9485 +#ifndef _VS_CONTEXT_H
9486 +#define _VS_CONTEXT_H
9487 +
9488 +#include "vserver/base.h"
9489 +#include "vserver/check.h"
9490 +#include "vserver/context.h"
9491 +#include "vserver/history.h"
9492 +#include "vserver/debug.h"
9493 +
9494 +#include <linux/sched.h>
9495 +
9496 +
9497 +#define get_vx_info(i) __get_vx_info(i, __FILE__, __LINE__, __HERE__)
9498 +
9499 +static inline struct vx_info *__get_vx_info(struct vx_info *vxi,
9500 +       const char *_file, int _line, void *_here)
9501 +{
9502 +       if (!vxi)
9503 +               return NULL;
9504 +
9505 +       vxlprintk(VXD_CBIT(xid, 2), "get_vx_info(%p[#%d.%d])",
9506 +               vxi, vxi ? vxi->vx_id : 0,
9507 +               vxi ? atomic_read(&vxi->vx_usecnt) : 0,
9508 +               _file, _line);
9509 +       __vxh_get_vx_info(vxi, _here);
9510 +
9511 +       atomic_inc(&vxi->vx_usecnt);
9512 +       return vxi;
9513 +}
9514 +
9515 +
9516 +extern void free_vx_info(struct vx_info *);
9517 +
9518 +#define put_vx_info(i) __put_vx_info(i, __FILE__, __LINE__, __HERE__)
9519 +
9520 +static inline void __put_vx_info(struct vx_info *vxi,
9521 +       const char *_file, int _line, void *_here)
9522 +{
9523 +       if (!vxi)
9524 +               return;
9525 +
9526 +       vxlprintk(VXD_CBIT(xid, 2), "put_vx_info(%p[#%d.%d])",
9527 +               vxi, vxi ? vxi->vx_id : 0,
9528 +               vxi ? atomic_read(&vxi->vx_usecnt) : 0,
9529 +               _file, _line);
9530 +       __vxh_put_vx_info(vxi, _here);
9531 +
9532 +       if (atomic_dec_and_test(&vxi->vx_usecnt))
9533 +               free_vx_info(vxi);
9534 +}
9535 +
9536 +
9537 +#define init_vx_info(p, i) \
9538 +       __init_vx_info(p, i, __FILE__, __LINE__, __HERE__)
9539 +
9540 +static inline void __init_vx_info(struct vx_info **vxp, struct vx_info *vxi,
9541 +       const char *_file, int _line, void *_here)
9542 +{
9543 +       if (vxi) {
9544 +               vxlprintk(VXD_CBIT(xid, 3),
9545 +                       "init_vx_info(%p[#%d.%d])",
9546 +                       vxi, vxi ? vxi->vx_id : 0,
9547 +                       vxi ? atomic_read(&vxi->vx_usecnt) : 0,
9548 +                       _file, _line);
9549 +               __vxh_init_vx_info(vxi, vxp, _here);
9550 +
9551 +               atomic_inc(&vxi->vx_usecnt);
9552 +       }
9553 +       *vxp = vxi;
9554 +}
9555 +
9556 +
9557 +#define set_vx_info(p, i) \
9558 +       __set_vx_info(p, i, __FILE__, __LINE__, __HERE__)
9559 +
9560 +static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi,
9561 +       const char *_file, int _line, void *_here)
9562 +{
9563 +       struct vx_info *vxo;
9564 +
9565 +       if (!vxi)
9566 +               return;
9567 +
9568 +       vxlprintk(VXD_CBIT(xid, 3), "set_vx_info(%p[#%d.%d])",
9569 +               vxi, vxi ? vxi->vx_id : 0,
9570 +               vxi ? atomic_read(&vxi->vx_usecnt) : 0,
9571 +               _file, _line);
9572 +       __vxh_set_vx_info(vxi, vxp, _here);
9573 +
9574 +       atomic_inc(&vxi->vx_usecnt);
9575 +       vxo = xchg(vxp, vxi);
9576 +       BUG_ON(vxo);
9577 +}
9578 +
9579 +
9580 +#define clr_vx_info(p) __clr_vx_info(p, __FILE__, __LINE__, __HERE__)
9581 +
9582 +static inline void __clr_vx_info(struct vx_info **vxp,
9583 +       const char *_file, int _line, void *_here)
9584 +{
9585 +       struct vx_info *vxo;
9586 +
9587 +       vxo = xchg(vxp, NULL);
9588 +       if (!vxo)
9589 +               return;
9590 +
9591 +       vxlprintk(VXD_CBIT(xid, 3), "clr_vx_info(%p[#%d.%d])",
9592 +               vxo, vxo ? vxo->vx_id : 0,
9593 +               vxo ? atomic_read(&vxo->vx_usecnt) : 0,
9594 +               _file, _line);
9595 +       __vxh_clr_vx_info(vxo, vxp, _here);
9596 +
9597 +       if (atomic_dec_and_test(&vxo->vx_usecnt))
9598 +               free_vx_info(vxo);
9599 +}
9600 +
9601 +
9602 +#define claim_vx_info(v, p) \
9603 +       __claim_vx_info(v, p, __FILE__, __LINE__, __HERE__)
9604 +
9605 +static inline void __claim_vx_info(struct vx_info *vxi,
9606 +       struct task_struct *task,
9607 +       const char *_file, int _line, void *_here)
9608 +{
9609 +       vxlprintk(VXD_CBIT(xid, 3), "claim_vx_info(%p[#%d.%d.%d]) %p",
9610 +               vxi, vxi ? vxi->vx_id : 0,
9611 +               vxi ? atomic_read(&vxi->vx_usecnt) : 0,
9612 +               vxi ? atomic_read(&vxi->vx_tasks) : 0,
9613 +               task, _file, _line);
9614 +       __vxh_claim_vx_info(vxi, task, _here);
9615 +
9616 +       atomic_inc(&vxi->vx_tasks);
9617 +}
9618 +
9619 +
9620 +extern void unhash_vx_info(struct vx_info *);
9621 +
9622 +#define release_vx_info(v, p) \
9623 +       __release_vx_info(v, p, __FILE__, __LINE__, __HERE__)
9624 +
9625 +static inline void __release_vx_info(struct vx_info *vxi,
9626 +       struct task_struct *task,
9627 +       const char *_file, int _line, void *_here)
9628 +{
9629 +       vxlprintk(VXD_CBIT(xid, 3), "release_vx_info(%p[#%d.%d.%d]) %p",
9630 +               vxi, vxi ? vxi->vx_id : 0,
9631 +               vxi ? atomic_read(&vxi->vx_usecnt) : 0,
9632 +               vxi ? atomic_read(&vxi->vx_tasks) : 0,
9633 +               task, _file, _line);
9634 +       __vxh_release_vx_info(vxi, task, _here);
9635 +
9636 +       might_sleep();
9637 +
9638 +       if (atomic_dec_and_test(&vxi->vx_tasks))
9639 +               unhash_vx_info(vxi);
9640 +}
9641 +
9642 +
9643 +#define task_get_vx_info(p) \
9644 +       __task_get_vx_info(p, __FILE__, __LINE__, __HERE__)
9645 +
9646 +static inline struct vx_info *__task_get_vx_info(struct task_struct *p,
9647 +       const char *_file, int _line, void *_here)
9648 +{
9649 +       struct vx_info *vxi;
9650 +
9651 +       task_lock(p);
9652 +       vxlprintk(VXD_CBIT(xid, 5), "task_get_vx_info(%p)",
9653 +               p, _file, _line);
9654 +       vxi = __get_vx_info(p->vx_info, _file, _line, _here);
9655 +       task_unlock(p);
9656 +       return vxi;
9657 +}
9658 +
9659 +
9660 +static inline void __wakeup_vx_info(struct vx_info *vxi)
9661 +{
9662 +       if (waitqueue_active(&vxi->vx_wait))
9663 +               wake_up_interruptible(&vxi->vx_wait);
9664 +}
9665 +
9666 +
9667 +#define enter_vx_info(v, s) __enter_vx_info(v, s, __FILE__, __LINE__)
9668 +
9669 +static inline void __enter_vx_info(struct vx_info *vxi,
9670 +       struct vx_info_save *vxis, const char *_file, int _line)
9671 +{
9672 +       vxlprintk(VXD_CBIT(xid, 5), "enter_vx_info(%p[#%d],%p) %p[#%d,%p]",
9673 +               vxi, vxi ? vxi->vx_id : 0, vxis, current,
9674 +               current->xid, current->vx_info, _file, _line);
9675 +       vxis->vxi = xchg(&current->vx_info, vxi);
9676 +       vxis->xid = current->xid;
9677 +       current->xid = vxi ? vxi->vx_id : 0;
9678 +}
9679 +
9680 +#define leave_vx_info(s) __leave_vx_info(s, __FILE__, __LINE__)
9681 +
9682 +static inline void __leave_vx_info(struct vx_info_save *vxis,
9683 +       const char *_file, int _line)
9684 +{
9685 +       vxlprintk(VXD_CBIT(xid, 5), "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]",
9686 +               vxis, vxis->xid, vxis->vxi, current,
9687 +               current->xid, current->vx_info, _file, _line);
9688 +       (void)xchg(&current->vx_info, vxis->vxi);
9689 +       current->xid = vxis->xid;
9690 +}
9691 +
9692 +
9693 +static inline void __enter_vx_admin(struct vx_info_save *vxis)
9694 +{
9695 +       vxis->vxi = xchg(&current->vx_info, NULL);
9696 +       vxis->xid = xchg(&current->xid, (xid_t)0);
9697 +}
9698 +
9699 +static inline void __leave_vx_admin(struct vx_info_save *vxis)
9700 +{
9701 +       (void)xchg(&current->xid, vxis->xid);
9702 +       (void)xchg(&current->vx_info, vxis->vxi);
9703 +}
9704 +
9705 +#define task_is_init(p) \
9706 +       __task_is_init(p, __FILE__, __LINE__, __HERE__)
9707 +
9708 +static inline int __task_is_init(struct task_struct *p,
9709 +       const char *_file, int _line, void *_here)
9710 +{
9711 +       int is_init = is_global_init(p);
9712 +
9713 +       task_lock(p);
9714 +       if (p->vx_info)
9715 +               is_init = p->vx_info->vx_initpid == p->pid;
9716 +       task_unlock(p);
9717 +       return is_init;
9718 +}
9719 +
9720 +extern void exit_vx_info(struct task_struct *, int);
9721 +extern void exit_vx_info_early(struct task_struct *, int);
9722 +
9723 +
9724 +#else
9725 +#warning duplicate inclusion
9726 +#endif
9727 diff -NurpP --minimal linux-2.6.31.6/include/linux/vs_cowbl.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_cowbl.h
9728 --- linux-2.6.31.6/include/linux/vs_cowbl.h     1970-01-01 01:00:00.000000000 +0100
9729 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_cowbl.h       2009-09-10 16:11:43.000000000 +0200
9730 @@ -0,0 +1,47 @@
9731 +#ifndef _VS_COWBL_H
9732 +#define _VS_COWBL_H
9733 +
9734 +#include <linux/fs.h>
9735 +#include <linux/dcache.h>
9736 +#include <linux/namei.h>
9737 +
9738 +extern struct dentry *cow_break_link(const char *pathname);
9739 +
9740 +static inline int cow_check_and_break(struct path *path)
9741 +{
9742 +       struct inode *inode = path->dentry->d_inode;
9743 +       int error = 0;
9744 +
9745 +       /* do we need this check? */
9746 +       if (IS_RDONLY(inode))
9747 +               return -EROFS;
9748 +
9749 +       if (IS_COW(inode)) {
9750 +               if (IS_COW_LINK(inode)) {
9751 +                       struct dentry *new_dentry, *old_dentry = path->dentry;
9752 +                       char *pp, *buf;
9753 +
9754 +                       buf = kmalloc(PATH_MAX, GFP_KERNEL);
9755 +                       if (!buf) {
9756 +                               return -ENOMEM;
9757 +                       }
9758 +                       pp = d_path(path, buf, PATH_MAX);
9759 +                       new_dentry = cow_break_link(pp);
9760 +                       kfree(buf);
9761 +                       if (!IS_ERR(new_dentry)) {
9762 +                               path->dentry = new_dentry;
9763 +                               dput(old_dentry);
9764 +                       } else
9765 +                               error = PTR_ERR(new_dentry);
9766 +               } else {
9767 +                       inode->i_flags &= ~(S_IXUNLINK | S_IMMUTABLE);
9768 +                       inode->i_ctime = CURRENT_TIME;
9769 +                       mark_inode_dirty(inode);
9770 +               }
9771 +       }
9772 +       return error;
9773 +}
9774 +
9775 +#else
9776 +#warning duplicate inclusion
9777 +#endif
9778 diff -NurpP --minimal linux-2.6.31.6/include/linux/vs_cvirt.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_cvirt.h
9779 --- linux-2.6.31.6/include/linux/vs_cvirt.h     1970-01-01 01:00:00.000000000 +0100
9780 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_cvirt.h       2009-09-10 16:11:43.000000000 +0200
9781 @@ -0,0 +1,50 @@
9782 +#ifndef _VS_CVIRT_H
9783 +#define _VS_CVIRT_H
9784 +
9785 +#include "vserver/cvirt.h"
9786 +#include "vserver/context.h"
9787 +#include "vserver/base.h"
9788 +#include "vserver/check.h"
9789 +#include "vserver/debug.h"
9790 +
9791 +
9792 +static inline void vx_activate_task(struct task_struct *p)
9793 +{
9794 +       struct vx_info *vxi;
9795 +
9796 +       if ((vxi = p->vx_info)) {
9797 +               vx_update_load(vxi);
9798 +               atomic_inc(&vxi->cvirt.nr_running);
9799 +       }
9800 +}
9801 +
9802 +static inline void vx_deactivate_task(struct task_struct *p)
9803 +{
9804 +       struct vx_info *vxi;
9805 +
9806 +       if ((vxi = p->vx_info)) {
9807 +               vx_update_load(vxi);
9808 +               atomic_dec(&vxi->cvirt.nr_running);
9809 +       }
9810 +}
9811 +
9812 +static inline void vx_uninterruptible_inc(struct task_struct *p)
9813 +{
9814 +       struct vx_info *vxi;
9815 +
9816 +       if ((vxi = p->vx_info))
9817 +               atomic_inc(&vxi->cvirt.nr_uninterruptible);
9818 +}
9819 +
9820 +static inline void vx_uninterruptible_dec(struct task_struct *p)
9821 +{
9822 +       struct vx_info *vxi;
9823 +
9824 +       if ((vxi = p->vx_info))
9825 +               atomic_dec(&vxi->cvirt.nr_uninterruptible);
9826 +}
9827 +
9828 +
9829 +#else
9830 +#warning duplicate inclusion
9831 +#endif
9832 diff -NurpP --minimal linux-2.6.31.6/include/linux/vs_device.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_device.h
9833 --- linux-2.6.31.6/include/linux/vs_device.h    1970-01-01 01:00:00.000000000 +0100
9834 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_device.h      2009-09-10 16:11:43.000000000 +0200
9835 @@ -0,0 +1,45 @@
9836 +#ifndef _VS_DEVICE_H
9837 +#define _VS_DEVICE_H
9838 +
9839 +#include "vserver/base.h"
9840 +#include "vserver/device.h"
9841 +#include "vserver/debug.h"
9842 +
9843 +
9844 +#ifdef CONFIG_VSERVER_DEVICE
9845 +
9846 +int vs_map_device(struct vx_info *, dev_t, dev_t *, umode_t);
9847 +
9848 +#define vs_device_perm(v, d, m, p) \
9849 +       ((vs_map_device(current_vx_info(), d, NULL, m) & (p)) == (p))
9850 +
9851 +#else
9852 +
9853 +static inline
9854 +int vs_map_device(struct vx_info *vxi,
9855 +       dev_t device, dev_t *target, umode_t mode)
9856 +{
9857 +       if (target)
9858 +               *target = device;
9859 +       return ~0;
9860 +}
9861 +
9862 +#define vs_device_perm(v, d, m, p) ((p) == (p))
9863 +
9864 +#endif
9865 +
9866 +
9867 +#define vs_map_chrdev(d, t, p) \
9868 +       ((vs_map_device(current_vx_info(), d, t, S_IFCHR) & (p)) == (p))
9869 +#define vs_map_blkdev(d, t, p) \
9870 +       ((vs_map_device(current_vx_info(), d, t, S_IFBLK) & (p)) == (p))
9871 +
9872 +#define vs_chrdev_perm(d, p) \
9873 +       vs_device_perm(current_vx_info(), d, S_IFCHR, p)
9874 +#define vs_blkdev_perm(d, p) \
9875 +       vs_device_perm(current_vx_info(), d, S_IFBLK, p)
9876 +
9877 +
9878 +#else
9879 +#warning duplicate inclusion
9880 +#endif
9881 diff -NurpP --minimal linux-2.6.31.6/include/linux/vs_dlimit.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_dlimit.h
9882 --- linux-2.6.31.6/include/linux/vs_dlimit.h    1970-01-01 01:00:00.000000000 +0100
9883 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_dlimit.h      2009-10-12 02:50:50.000000000 +0200
9884 @@ -0,0 +1,216 @@
9885 +#ifndef _VS_DLIMIT_H
9886 +#define _VS_DLIMIT_H
9887 +
9888 +#include <linux/fs.h>
9889 +
9890 +#include "vserver/dlimit.h"
9891 +#include "vserver/base.h"
9892 +#include "vserver/debug.h"
9893 +
9894 +
9895 +#define get_dl_info(i) __get_dl_info(i, __FILE__, __LINE__)
9896 +
9897 +static inline struct dl_info *__get_dl_info(struct dl_info *dli,
9898 +       const char *_file, int _line)
9899 +{
9900 +       if (!dli)
9901 +               return NULL;
9902 +       vxlprintk(VXD_CBIT(dlim, 4), "get_dl_info(%p[#%d.%d])",
9903 +               dli, dli ? dli->dl_tag : 0,
9904 +               dli ? atomic_read(&dli->dl_usecnt) : 0,
9905 +               _file, _line);
9906 +       atomic_inc(&dli->dl_usecnt);
9907 +       return dli;
9908 +}
9909 +
9910 +
9911 +#define free_dl_info(i) \
9912 +       call_rcu(&(i)->dl_rcu, rcu_free_dl_info)
9913 +
9914 +#define put_dl_info(i) __put_dl_info(i, __FILE__, __LINE__)
9915 +
9916 +static inline void __put_dl_info(struct dl_info *dli,
9917 +       const char *_file, int _line)
9918 +{
9919 +       if (!dli)
9920 +               return;
9921 +       vxlprintk(VXD_CBIT(dlim, 4), "put_dl_info(%p[#%d.%d])",
9922 +               dli, dli ? dli->dl_tag : 0,
9923 +               dli ? atomic_read(&dli->dl_usecnt) : 0,
9924 +               _file, _line);
9925 +       if (atomic_dec_and_test(&dli->dl_usecnt))
9926 +               free_dl_info(dli);
9927 +}
9928 +
9929 +
9930 +#define __dlimit_char(d)       ((d) ? '*' : ' ')
9931 +
9932 +static inline int __dl_alloc_space(struct super_block *sb,
9933 +       tag_t tag, dlsize_t nr, const char *file, int line)
9934 +{
9935 +       struct dl_info *dli = NULL;
9936 +       int ret = 0;
9937 +
9938 +       if (nr == 0)
9939 +               goto out;
9940 +       dli = locate_dl_info(sb, tag);
9941 +       if (!dli)
9942 +               goto out;
9943 +
9944 +       spin_lock(&dli->dl_lock);
9945 +       ret = (dli->dl_space_used + nr > dli->dl_space_total);
9946 +       if (!ret)
9947 +               dli->dl_space_used += nr;
9948 +       spin_unlock(&dli->dl_lock);
9949 +       put_dl_info(dli);
9950 +out:
9951 +       vxlprintk(VXD_CBIT(dlim, 1),
9952 +               "ALLOC (%p,#%d)%c %lld bytes (%d)",
9953 +               sb, tag, __dlimit_char(dli), (long long)nr,
9954 +               ret, file, line);
9955 +       return ret;
9956 +}
9957 +
9958 +static inline void __dl_free_space(struct super_block *sb,
9959 +       tag_t tag, dlsize_t nr, const char *_file, int _line)
9960 +{
9961 +       struct dl_info *dli = NULL;
9962 +
9963 +       if (nr == 0)
9964 +               goto out;
9965 +       dli = locate_dl_info(sb, tag);
9966 +       if (!dli)
9967 +               goto out;
9968 +
9969 +       spin_lock(&dli->dl_lock);
9970 +       if (dli->dl_space_used > nr)
9971 +               dli->dl_space_used -= nr;
9972 +       else
9973 +               dli->dl_space_used = 0;
9974 +       spin_unlock(&dli->dl_lock);
9975 +       put_dl_info(dli);
9976 +out:
9977 +       vxlprintk(VXD_CBIT(dlim, 1),
9978 +               "FREE  (%p,#%d)%c %lld bytes",
9979 +               sb, tag, __dlimit_char(dli), (long long)nr,
9980 +               _file, _line);
9981 +}
9982 +
9983 +static inline int __dl_alloc_inode(struct super_block *sb,
9984 +       tag_t tag, const char *_file, int _line)
9985 +{
9986 +       struct dl_info *dli;
9987 +       int ret = 0;
9988 +
9989 +       dli = locate_dl_info(sb, tag);
9990 +       if (!dli)
9991 +               goto out;
9992 +
9993 +       spin_lock(&dli->dl_lock);
9994 +       ret = (dli->dl_inodes_used >= dli->dl_inodes_total);
9995 +       if (!ret)
9996 +               dli->dl_inodes_used++;
9997 +       spin_unlock(&dli->dl_lock);
9998 +       put_dl_info(dli);
9999 +out:
10000 +       vxlprintk(VXD_CBIT(dlim, 0),
10001 +               "ALLOC (%p,#%d)%c inode (%d)",
10002 +               sb, tag, __dlimit_char(dli), ret, _file, _line);
10003 +       return ret;
10004 +}
10005 +
10006 +static inline void __dl_free_inode(struct super_block *sb,
10007 +       tag_t tag, const char *_file, int _line)
10008 +{
10009 +       struct dl_info *dli;
10010 +
10011 +       dli = locate_dl_info(sb, tag);
10012 +       if (!dli)
10013 +               goto out;
10014 +
10015 +       spin_lock(&dli->dl_lock);
10016 +       if (dli->dl_inodes_used > 1)
10017 +               dli->dl_inodes_used--;
10018 +       else
10019 +               dli->dl_inodes_used = 0;
10020 +       spin_unlock(&dli->dl_lock);
10021 +       put_dl_info(dli);
10022 +out:
10023 +       vxlprintk(VXD_CBIT(dlim, 0),
10024 +               "FREE  (%p,#%d)%c inode",
10025 +               sb, tag, __dlimit_char(dli), _file, _line);
10026 +}
10027 +
10028 +static inline void __dl_adjust_block(struct super_block *sb, tag_t tag,
10029 +       unsigned long long *free_blocks, unsigned long long *root_blocks,
10030 +       const char *_file, int _line)
10031 +{
10032 +       struct dl_info *dli;
10033 +       uint64_t broot, bfree;
10034 +
10035 +       dli = locate_dl_info(sb, tag);
10036 +       if (!dli)
10037 +               return;
10038 +
10039 +       spin_lock(&dli->dl_lock);
10040 +       broot = (dli->dl_space_total -
10041 +               (dli->dl_space_total >> 10) * dli->dl_nrlmult)
10042 +               >> sb->s_blocksize_bits;
10043 +       bfree = (dli->dl_space_total - dli->dl_space_used)
10044 +                       >> sb->s_blocksize_bits;
10045 +       spin_unlock(&dli->dl_lock);
10046 +
10047 +       vxlprintk(VXD_CBIT(dlim, 2),
10048 +               "ADJUST: %lld,%lld on %lld,%lld [mult=%d]",
10049 +               (long long)bfree, (long long)broot,
10050 +               *free_blocks, *root_blocks, dli->dl_nrlmult,
10051 +               _file, _line);
10052 +       if (free_blocks) {
10053 +               if (*free_blocks > bfree)
10054 +                       *free_blocks = bfree;
10055 +       }
10056 +       if (root_blocks) {
10057 +               if (*root_blocks > broot)
10058 +                       *root_blocks = broot;
10059 +       }
10060 +       put_dl_info(dli);
10061 +}
10062 +
10063 +#define dl_prealloc_space(in, bytes) \
10064 +       __dl_alloc_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
10065 +               __FILE__, __LINE__ )
10066 +
10067 +#define dl_alloc_space(in, bytes) \
10068 +       __dl_alloc_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
10069 +               __FILE__, __LINE__ )
10070 +
10071 +#define dl_reserve_space(in, bytes) \
10072 +       __dl_alloc_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
10073 +               __FILE__, __LINE__ )
10074 +
10075 +#define dl_claim_space(in, bytes) (0)
10076 +
10077 +#define dl_release_space(in, bytes) \
10078 +       __dl_free_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
10079 +               __FILE__, __LINE__ )
10080 +
10081 +#define dl_free_space(in, bytes) \
10082 +       __dl_free_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
10083 +               __FILE__, __LINE__ )
10084 +
10085 +
10086 +
10087 +#define dl_alloc_inode(in) \
10088 +       __dl_alloc_inode((in)->i_sb, (in)->i_tag, __FILE__, __LINE__ )
10089 +
10090 +#define dl_free_inode(in) \
10091 +       __dl_free_inode((in)->i_sb, (in)->i_tag, __FILE__, __LINE__ )
10092 +
10093 +
10094 +#define dl_adjust_block(sb, tag, fb, rb) \
10095 +       __dl_adjust_block(sb, tag, fb, rb, __FILE__, __LINE__ )
10096 +
10097 +
10098 +#else
10099 +#warning duplicate inclusion
10100 +#endif
10101 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/base.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/base.h
10102 --- linux-2.6.31.6/include/linux/vserver/base.h 1970-01-01 01:00:00.000000000 +0100
10103 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/base.h   2009-11-05 04:16:31.000000000 +0100
10104 @@ -0,0 +1,159 @@
10105 +#ifndef _VX_BASE_H
10106 +#define _VX_BASE_H
10107 +
10108 +
10109 +/* context state changes */
10110 +
10111 +enum {
10112 +       VSC_STARTUP = 1,
10113 +       VSC_SHUTDOWN,
10114 +
10115 +       VSC_NETUP,
10116 +       VSC_NETDOWN,
10117 +};
10118 +
10119 +
10120 +
10121 +#define vx_task_xid(t) ((t)->xid)
10122 +
10123 +#define vx_current_xid() vx_task_xid(current)
10124 +
10125 +#define current_vx_info() (current->vx_info)
10126 +
10127 +
10128 +#define nx_task_nid(t) ((t)->nid)
10129 +
10130 +#define nx_current_nid() nx_task_nid(current)
10131 +
10132 +#define current_nx_info() (current->nx_info)
10133 +
10134 +
10135 +/* generic flag merging */
10136 +
10137 +#define vs_check_flags(v, m, f)        (((v) & (m)) ^ (f))
10138 +
10139 +#define vs_mask_flags(v, f, m) (((v) & ~(m)) | ((f) & (m)))
10140 +
10141 +#define vs_mask_mask(v, f, m)  (((v) & ~(m)) | ((v) & (f) & (m)))
10142 +
10143 +#define vs_check_bit(v, n)     ((v) & (1LL << (n)))
10144 +
10145 +
10146 +/* context flags */
10147 +
10148 +#define __vx_flags(v)  ((v) ? (v)->vx_flags : 0)
10149 +
10150 +#define vx_current_flags()     __vx_flags(current_vx_info())
10151 +
10152 +#define vx_info_flags(v, m, f) \
10153 +       vs_check_flags(__vx_flags(v), m, f)
10154 +
10155 +#define task_vx_flags(t, m, f) \
10156 +       ((t) && vx_info_flags((t)->vx_info, m, f))
10157 +
10158 +#define vx_flags(m, f) vx_info_flags(current_vx_info(), m, f)
10159 +
10160 +
10161 +/* context caps */
10162 +
10163 +#define __vx_ccaps(v)  ((v) ? (v)->vx_ccaps : 0)
10164 +
10165 +#define vx_current_ccaps()     __vx_ccaps(current_vx_info())
10166 +
10167 +#define vx_info_ccaps(v, c)    (__vx_ccaps(v) & (c))
10168 +
10169 +#define vx_ccaps(c)    vx_info_ccaps(current_vx_info(), (c))
10170 +
10171 +
10172 +
10173 +/* network flags */
10174 +
10175 +#define __nx_flags(n)  ((n) ? (n)->nx_flags : 0)
10176 +
10177 +#define nx_current_flags()     __nx_flags(current_nx_info())
10178 +
10179 +#define nx_info_flags(n, m, f) \
10180 +       vs_check_flags(__nx_flags(n), m, f)
10181 +
10182 +#define task_nx_flags(t, m, f) \
10183 +       ((t) && nx_info_flags((t)->nx_info, m, f))
10184 +
10185 +#define nx_flags(m, f) nx_info_flags(current_nx_info(), m, f)
10186 +
10187 +
10188 +/* network caps */
10189 +
10190 +#define __nx_ncaps(n)  ((n) ? (n)->nx_ncaps : 0)
10191 +
10192 +#define nx_current_ncaps()     __nx_ncaps(current_nx_info())
10193 +
10194 +#define nx_info_ncaps(n, c)    (__nx_ncaps(n) & (c))
10195 +
10196 +#define nx_ncaps(c)    nx_info_ncaps(current_nx_info(), c)
10197 +
10198 +
10199 +/* context mask capabilities */
10200 +
10201 +#define __vx_mcaps(v)  ((v) ? (v)->vx_ccaps >> 32UL : ~0 )
10202 +
10203 +#define vx_info_mcaps(v, c)    (__vx_mcaps(v) & (c))
10204 +
10205 +#define vx_mcaps(c)    vx_info_mcaps(current_vx_info(), c)
10206 +
10207 +
10208 +/* context bcap mask */
10209 +
10210 +#define __vx_bcaps(v)          ((v)->vx_bcaps)
10211 +
10212 +#define vx_current_bcaps()     __vx_bcaps(current_vx_info())
10213 +
10214 +
10215 +/* mask given bcaps */
10216 +
10217 +#define vx_info_mbcaps(v, c)   ((v) ? cap_intersect(__vx_bcaps(v), c) : c)
10218 +
10219 +#define vx_mbcaps(c)           vx_info_mbcaps(current_vx_info(), c)
10220 +
10221 +
10222 +/* masked cap_bset */
10223 +
10224 +#define vx_info_cap_bset(v)    vx_info_mbcaps(v, current->cap_bset)
10225 +
10226 +#define vx_current_cap_bset()  vx_info_cap_bset(current_vx_info())
10227 +
10228 +#if 0
10229 +#define vx_info_mbcap(v, b) \
10230 +       (!vx_info_flags(v, VXF_STATE_SETUP, 0) ? \
10231 +       vx_info_bcaps(v, b) : (b))
10232 +
10233 +#define task_vx_mbcap(t, b) \
10234 +       vx_info_mbcap((t)->vx_info, (t)->b)
10235 +
10236 +#define vx_mbcap(b)    task_vx_mbcap(current, b)
10237 +#endif
10238 +
10239 +#define vx_cap_raised(v, c, f) cap_raised(vx_info_mbcaps(v, c), f)
10240 +
10241 +#define vx_capable(b, c) (capable(b) || \
10242 +       (cap_raised(current_cap(), b) && vx_ccaps(c)))
10243 +
10244 +#define nx_capable(b, c) (capable(b) || \
10245 +       (cap_raised(current_cap(), b) && nx_ncaps(c)))
10246 +
10247 +#define vx_task_initpid(t, n) \
10248 +       ((t)->vx_info && \
10249 +       ((t)->vx_info->vx_initpid == (n)))
10250 +
10251 +#define vx_current_initpid(n)  vx_task_initpid(current, n)
10252 +
10253 +
10254 +#define __vx_state(v)  ((v) ? ((v)->vx_state) : 0)
10255 +
10256 +#define vx_info_state(v, m)    (__vx_state(v) & (m))
10257 +
10258 +
10259 +#define __nx_state(n)  ((n) ? ((n)->nx_state) : 0)
10260 +
10261 +#define nx_info_state(n, m)    (__nx_state(n) & (m))
10262 +
10263 +#endif
10264 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/cacct_cmd.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/cacct_cmd.h
10265 --- linux-2.6.31.6/include/linux/vserver/cacct_cmd.h    1970-01-01 01:00:00.000000000 +0100
10266 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/cacct_cmd.h      2009-09-10 16:11:43.000000000 +0200
10267 @@ -0,0 +1,23 @@
10268 +#ifndef _VX_CACCT_CMD_H
10269 +#define _VX_CACCT_CMD_H
10270 +
10271 +
10272 +/* virtual host info name commands */
10273 +
10274 +#define VCMD_sock_stat         VC_CMD(VSTAT, 5, 0)
10275 +
10276 +struct vcmd_sock_stat_v0 {
10277 +       uint32_t field;
10278 +       uint32_t count[3];
10279 +       uint64_t total[3];
10280 +};
10281 +
10282 +
10283 +#ifdef __KERNEL__
10284 +
10285 +#include <linux/compiler.h>
10286 +
10287 +extern int vc_sock_stat(struct vx_info *, void __user *);
10288 +
10289 +#endif /* __KERNEL__ */
10290 +#endif /* _VX_CACCT_CMD_H */
10291 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/cacct_def.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/cacct_def.h
10292 --- linux-2.6.31.6/include/linux/vserver/cacct_def.h    1970-01-01 01:00:00.000000000 +0100
10293 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/cacct_def.h      2009-09-10 16:11:43.000000000 +0200
10294 @@ -0,0 +1,43 @@
10295 +#ifndef _VX_CACCT_DEF_H
10296 +#define _VX_CACCT_DEF_H
10297 +
10298 +#include <asm/atomic.h>
10299 +#include <linux/vserver/cacct.h>
10300 +
10301 +
10302 +struct _vx_sock_acc {
10303 +       atomic_long_t count;
10304 +       atomic_long_t total;
10305 +};
10306 +
10307 +/* context sub struct */
10308 +
10309 +struct _vx_cacct {
10310 +       struct _vx_sock_acc sock[VXA_SOCK_SIZE][3];
10311 +       atomic_t slab[8];
10312 +       atomic_t page[6][8];
10313 +};
10314 +
10315 +#ifdef CONFIG_VSERVER_DEBUG
10316 +
10317 +static inline void __dump_vx_cacct(struct _vx_cacct *cacct)
10318 +{
10319 +       int i, j;
10320 +
10321 +       printk("\t_vx_cacct:");
10322 +       for (i = 0; i < 6; i++) {
10323 +               struct _vx_sock_acc *ptr = cacct->sock[i];
10324 +
10325 +               printk("\t [%d] =", i);
10326 +               for (j = 0; j < 3; j++) {
10327 +                       printk(" [%d] = %8lu, %8lu", j,
10328 +                               atomic_long_read(&ptr[j].count),
10329 +                               atomic_long_read(&ptr[j].total));
10330 +               }
10331 +               printk("\n");
10332 +       }
10333 +}
10334 +
10335 +#endif
10336 +
10337 +#endif /* _VX_CACCT_DEF_H */
10338 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/cacct.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/cacct.h
10339 --- linux-2.6.31.6/include/linux/vserver/cacct.h        1970-01-01 01:00:00.000000000 +0100
10340 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/cacct.h  2009-09-10 16:11:43.000000000 +0200
10341 @@ -0,0 +1,15 @@
10342 +#ifndef _VX_CACCT_H
10343 +#define _VX_CACCT_H
10344 +
10345 +
10346 +enum sock_acc_field {
10347 +       VXA_SOCK_UNSPEC = 0,
10348 +       VXA_SOCK_UNIX,
10349 +       VXA_SOCK_INET,
10350 +       VXA_SOCK_INET6,
10351 +       VXA_SOCK_PACKET,
10352 +       VXA_SOCK_OTHER,
10353 +       VXA_SOCK_SIZE   /* array size */
10354 +};
10355 +
10356 +#endif /* _VX_CACCT_H */
10357 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/cacct_int.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/cacct_int.h
10358 --- linux-2.6.31.6/include/linux/vserver/cacct_int.h    1970-01-01 01:00:00.000000000 +0100
10359 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/cacct_int.h      2009-09-10 16:11:43.000000000 +0200
10360 @@ -0,0 +1,21 @@
10361 +#ifndef _VX_CACCT_INT_H
10362 +#define _VX_CACCT_INT_H
10363 +
10364 +
10365 +#ifdef __KERNEL__
10366 +
10367 +static inline
10368 +unsigned long vx_sock_count(struct _vx_cacct *cacct, int type, int pos)
10369 +{
10370 +       return atomic_long_read(&cacct->sock[type][pos].count);
10371 +}
10372 +
10373 +
10374 +static inline
10375 +unsigned long vx_sock_total(struct _vx_cacct *cacct, int type, int pos)
10376 +{
10377 +       return atomic_long_read(&cacct->sock[type][pos].total);
10378 +}
10379 +
10380 +#endif /* __KERNEL__ */
10381 +#endif /* _VX_CACCT_INT_H */
10382 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/check.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/check.h
10383 --- linux-2.6.31.6/include/linux/vserver/check.h        1970-01-01 01:00:00.000000000 +0100
10384 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/check.h  2009-09-10 16:11:43.000000000 +0200
10385 @@ -0,0 +1,89 @@
10386 +#ifndef _VS_CHECK_H
10387 +#define _VS_CHECK_H
10388 +
10389 +
10390 +#define MAX_S_CONTEXT  65535   /* Arbitrary limit */
10391 +
10392 +#ifdef CONFIG_VSERVER_DYNAMIC_IDS
10393 +#define MIN_D_CONTEXT  49152   /* dynamic contexts start here */
10394 +#else
10395 +#define MIN_D_CONTEXT  65536
10396 +#endif
10397 +
10398 +/* check conditions */
10399 +
10400 +#define VS_ADMIN       0x0001
10401 +#define VS_WATCH       0x0002
10402 +#define VS_HIDE                0x0004
10403 +#define VS_HOSTID      0x0008
10404 +
10405 +#define VS_IDENT       0x0010
10406 +#define VS_EQUIV       0x0020
10407 +#define VS_PARENT      0x0040
10408 +#define VS_CHILD       0x0080
10409 +
10410 +#define VS_ARG_MASK    0x00F0
10411 +
10412 +#define VS_DYNAMIC     0x0100
10413 +#define VS_STATIC      0x0200
10414 +
10415 +#define VS_ATR_MASK    0x0F00
10416 +
10417 +#ifdef CONFIG_VSERVER_PRIVACY
10418 +#define VS_ADMIN_P     (0)
10419 +#define VS_WATCH_P     (0)
10420 +#else
10421 +#define VS_ADMIN_P     VS_ADMIN
10422 +#define VS_WATCH_P     VS_WATCH
10423 +#endif
10424 +
10425 +#define VS_HARDIRQ     0x1000
10426 +#define VS_SOFTIRQ     0x2000
10427 +#define VS_IRQ         0x4000
10428 +
10429 +#define VS_IRQ_MASK    0xF000
10430 +
10431 +#include <linux/hardirq.h>
10432 +
10433 +/*
10434 + * check current context for ADMIN/WATCH and
10435 + * optionally against supplied argument
10436 + */
10437 +static inline int __vs_check(int cid, int id, unsigned int mode)
10438 +{
10439 +       if (mode & VS_ARG_MASK) {
10440 +               if ((mode & VS_IDENT) && (id == cid))
10441 +                       return 1;
10442 +       }
10443 +       if (mode & VS_ATR_MASK) {
10444 +               if ((mode & VS_DYNAMIC) &&
10445 +                       (id >= MIN_D_CONTEXT) &&
10446 +                       (id <= MAX_S_CONTEXT))
10447 +                       return 1;
10448 +               if ((mode & VS_STATIC) &&
10449 +                       (id > 1) && (id < MIN_D_CONTEXT))
10450 +                       return 1;
10451 +       }
10452 +       if (mode & VS_IRQ_MASK) {
10453 +               if ((mode & VS_IRQ) && unlikely(in_interrupt()))
10454 +                       return 1;
10455 +               if ((mode & VS_HARDIRQ) && unlikely(in_irq()))
10456 +                       return 1;
10457 +               if ((mode & VS_SOFTIRQ) && unlikely(in_softirq()))
10458 +                       return 1;
10459 +       }
10460 +       return (((mode & VS_ADMIN) && (cid == 0)) ||
10461 +               ((mode & VS_WATCH) && (cid == 1)) ||
10462 +               ((mode & VS_HOSTID) && (id == 0)));
10463 +}
10464 +
10465 +#define vx_check(c, m) __vs_check(vx_current_xid(), c, (m) | VS_IRQ)
10466 +
10467 +#define vx_weak_check(c, m)    ((m) ? vx_check(c, m) : 1)
10468 +
10469 +
10470 +#define nx_check(c, m) __vs_check(nx_current_nid(), c, m)
10471 +
10472 +#define nx_weak_check(c, m)    ((m) ? nx_check(c, m) : 1)
10473 +
10474 +#endif
10475 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/context_cmd.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/context_cmd.h
10476 --- linux-2.6.31.6/include/linux/vserver/context_cmd.h  1970-01-01 01:00:00.000000000 +0100
10477 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/context_cmd.h    2009-09-10 16:11:43.000000000 +0200
10478 @@ -0,0 +1,128 @@
10479 +#ifndef _VX_CONTEXT_CMD_H
10480 +#define _VX_CONTEXT_CMD_H
10481 +
10482 +
10483 +/* vinfo commands */
10484 +
10485 +#define VCMD_task_xid          VC_CMD(VINFO, 1, 0)
10486 +
10487 +#ifdef __KERNEL__
10488 +extern int vc_task_xid(uint32_t);
10489 +
10490 +#endif /* __KERNEL__ */
10491 +
10492 +#define VCMD_vx_info           VC_CMD(VINFO, 5, 0)
10493 +
10494 +struct vcmd_vx_info_v0 {
10495 +       uint32_t xid;
10496 +       uint32_t initpid;
10497 +       /* more to come */
10498 +};
10499 +
10500 +#ifdef __KERNEL__
10501 +extern int vc_vx_info(struct vx_info *, void __user *);
10502 +
10503 +#endif /* __KERNEL__ */
10504 +
10505 +#define VCMD_ctx_stat          VC_CMD(VSTAT, 0, 0)
10506 +
10507 +struct vcmd_ctx_stat_v0 {
10508 +       uint32_t usecnt;
10509 +       uint32_t tasks;
10510 +       /* more to come */
10511 +};
10512 +
10513 +#ifdef __KERNEL__
10514 +extern int vc_ctx_stat(struct vx_info *, void __user *);
10515 +
10516 +#endif /* __KERNEL__ */
10517 +
10518 +/* context commands */
10519 +
10520 +#define VCMD_ctx_create_v0     VC_CMD(VPROC, 1, 0)
10521 +#define VCMD_ctx_create                VC_CMD(VPROC, 1, 1)
10522 +
10523 +struct vcmd_ctx_create {
10524 +       uint64_t flagword;
10525 +};
10526 +
10527 +#define VCMD_ctx_migrate_v0    VC_CMD(PROCMIG, 1, 0)
10528 +#define VCMD_ctx_migrate       VC_CMD(PROCMIG, 1, 1)
10529 +
10530 +struct vcmd_ctx_migrate {
10531 +       uint64_t flagword;
10532 +};
10533 +
10534 +#ifdef __KERNEL__
10535 +extern int vc_ctx_create(uint32_t, void __user *);
10536 +extern int vc_ctx_migrate(struct vx_info *, void __user *);
10537 +
10538 +#endif /* __KERNEL__ */
10539 +
10540 +
10541 +/* flag commands */
10542 +
10543 +#define VCMD_get_cflags                VC_CMD(FLAGS, 1, 0)
10544 +#define VCMD_set_cflags                VC_CMD(FLAGS, 2, 0)
10545 +
10546 +struct vcmd_ctx_flags_v0 {
10547 +       uint64_t flagword;
10548 +       uint64_t mask;
10549 +};
10550 +
10551 +#ifdef __KERNEL__
10552 +extern int vc_get_cflags(struct vx_info *, void __user *);
10553 +extern int vc_set_cflags(struct vx_info *, void __user *);
10554 +
10555 +#endif /* __KERNEL__ */
10556 +
10557 +
10558 +/* context caps commands */
10559 +
10560 +#define VCMD_get_ccaps         VC_CMD(FLAGS, 3, 1)
10561 +#define VCMD_set_ccaps         VC_CMD(FLAGS, 4, 1)
10562 +
10563 +struct vcmd_ctx_caps_v1 {
10564 +       uint64_t ccaps;
10565 +       uint64_t cmask;
10566 +};
10567 +
10568 +#ifdef __KERNEL__
10569 +extern int vc_get_ccaps(struct vx_info *, void __user *);
10570 +extern int vc_set_ccaps(struct vx_info *, void __user *);
10571 +
10572 +#endif /* __KERNEL__ */
10573 +
10574 +
10575 +/* bcaps commands */
10576 +
10577 +#define VCMD_get_bcaps         VC_CMD(FLAGS, 9, 0)
10578 +#define VCMD_set_bcaps         VC_CMD(FLAGS, 10, 0)
10579 +
10580 +struct vcmd_bcaps {
10581 +       uint64_t bcaps;
10582 +       uint64_t bmask;
10583 +};
10584 +
10585 +#ifdef __KERNEL__
10586 +extern int vc_get_bcaps(struct vx_info *, void __user *);
10587 +extern int vc_set_bcaps(struct vx_info *, void __user *);
10588 +
10589 +#endif /* __KERNEL__ */
10590 +
10591 +
10592 +/* OOM badness */
10593 +
10594 +#define VCMD_get_badness       VC_CMD(MEMCTRL, 5, 0)
10595 +#define VCMD_set_badness       VC_CMD(MEMCTRL, 6, 0)
10596 +
10597 +struct vcmd_badness_v0 {
10598 +       int64_t bias;
10599 +};
10600 +
10601 +#ifdef __KERNEL__
10602 +extern int vc_get_badness(struct vx_info *, void __user *);
10603 +extern int vc_set_badness(struct vx_info *, void __user *);
10604 +
10605 +#endif /* __KERNEL__ */
10606 +#endif /* _VX_CONTEXT_CMD_H */
10607 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/context.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/context.h
10608 --- linux-2.6.31.6/include/linux/vserver/context.h      1970-01-01 01:00:00.000000000 +0100
10609 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/context.h        2009-10-03 01:55:20.000000000 +0200
10610 @@ -0,0 +1,182 @@
10611 +#ifndef _VX_CONTEXT_H
10612 +#define _VX_CONTEXT_H
10613 +
10614 +#include <linux/types.h>
10615 +#include <linux/capability.h>
10616 +
10617 +
10618 +/* context flags */
10619 +
10620 +#define VXF_INFO_SCHED         0x00000002
10621 +#define VXF_INFO_NPROC         0x00000004
10622 +#define VXF_INFO_PRIVATE       0x00000008
10623 +
10624 +#define VXF_INFO_INIT          0x00000010
10625 +#define VXF_INFO_HIDE          0x00000020
10626 +#define VXF_INFO_ULIMIT                0x00000040
10627 +#define VXF_INFO_NSPACE                0x00000080
10628 +
10629 +#define VXF_SCHED_HARD         0x00000100
10630 +#define VXF_SCHED_PRIO         0x00000200
10631 +#define VXF_SCHED_PAUSE                0x00000400
10632 +
10633 +#define VXF_VIRT_MEM           0x00010000
10634 +#define VXF_VIRT_UPTIME                0x00020000
10635 +#define VXF_VIRT_CPU           0x00040000
10636 +#define VXF_VIRT_LOAD          0x00080000
10637 +#define VXF_VIRT_TIME          0x00100000
10638 +
10639 +#define VXF_HIDE_MOUNT         0x01000000
10640 +/* was VXF_HIDE_NETIF          0x02000000 */
10641 +#define VXF_HIDE_VINFO         0x04000000
10642 +
10643 +#define VXF_STATE_SETUP                (1ULL << 32)
10644 +#define VXF_STATE_INIT         (1ULL << 33)
10645 +#define VXF_STATE_ADMIN                (1ULL << 34)
10646 +
10647 +#define VXF_SC_HELPER          (1ULL << 36)
10648 +#define VXF_REBOOT_KILL                (1ULL << 37)
10649 +#define VXF_PERSISTENT         (1ULL << 38)
10650 +
10651 +#define VXF_FORK_RSS           (1ULL << 48)
10652 +#define VXF_PROLIFIC           (1ULL << 49)
10653 +
10654 +#define VXF_IGNEG_NICE         (1ULL << 52)
10655 +
10656 +#define VXF_ONE_TIME           (0x0007ULL << 32)
10657 +
10658 +#define VXF_INIT_SET           (VXF_STATE_SETUP | VXF_STATE_INIT | VXF_STATE_ADMIN)
10659 +
10660 +
10661 +/* context migration */
10662 +
10663 +#define VXM_SET_INIT           0x00000001
10664 +#define VXM_SET_REAPER         0x00000002
10665 +
10666 +/* context caps */
10667 +
10668 +#define VXC_CAP_MASK           0x00000000
10669 +
10670 +#define VXC_SET_UTSNAME                0x00000001
10671 +#define VXC_SET_RLIMIT         0x00000002
10672 +#define VXC_FS_SECURITY                0x00000004
10673 +
10674 +/* was VXC_RAW_ICMP            0x00000100 */
10675 +#define VXC_SYSLOG             0x00001000
10676 +#define VXC_OOM_ADJUST         0x00002000
10677 +#define VXC_AUDIT_CONTROL      0x00004000
10678 +
10679 +#define VXC_SECURE_MOUNT       0x00010000
10680 +#define VXC_SECURE_REMOUNT     0x00020000
10681 +#define VXC_BINARY_MOUNT       0x00040000
10682 +
10683 +#define VXC_QUOTA_CTL          0x00100000
10684 +#define VXC_ADMIN_MAPPER       0x00200000
10685 +#define VXC_ADMIN_CLOOP                0x00400000
10686 +
10687 +#define VXC_KTHREAD            0x01000000
10688 +#define VXC_NAMESPACE          0x02000000
10689 +
10690 +
10691 +#ifdef __KERNEL__
10692 +
10693 +#include <linux/list.h>
10694 +#include <linux/spinlock.h>
10695 +#include <linux/rcupdate.h>
10696 +
10697 +#include "limit_def.h"
10698 +#include "sched_def.h"
10699 +#include "cvirt_def.h"
10700 +#include "cacct_def.h"
10701 +#include "device_def.h"
10702 +
10703 +#define VX_SPACES      2
10704 +
10705 +struct _vx_info_pc {
10706 +       struct _vx_sched_pc sched_pc;
10707 +       struct _vx_cvirt_pc cvirt_pc;
10708 +};
10709 +
10710 +struct vx_info {
10711 +       struct hlist_node vx_hlist;             /* linked list of contexts */
10712 +       xid_t vx_id;                            /* context id */
10713 +       atomic_t vx_usecnt;                     /* usage count */
10714 +       atomic_t vx_tasks;                      /* tasks count */
10715 +       struct vx_info *vx_parent;              /* parent context */
10716 +       int vx_state;                           /* context state */
10717 +
10718 +       unsigned long vx_nsmask[VX_SPACES];     /* assignment mask */
10719 +       struct nsproxy *vx_nsproxy[VX_SPACES];  /* private namespaces */
10720 +       struct fs_struct *vx_fs[VX_SPACES];     /* private namespace fs */
10721 +
10722 +       uint64_t vx_flags;                      /* context flags */
10723 +       uint64_t vx_ccaps;                      /* context caps (vserver) */
10724 +       kernel_cap_t vx_bcaps;                  /* bounding caps (system) */
10725 +       // kernel_cap_t vx_cap_bset;            /* the guest's bset */
10726 +
10727 +       struct task_struct *vx_reaper;          /* guest reaper process */
10728 +       pid_t vx_initpid;                       /* PID of guest init */
10729 +       int64_t vx_badness_bias;                /* OOM points bias */
10730 +
10731 +       struct _vx_limit limit;                 /* vserver limits */
10732 +       struct _vx_sched sched;                 /* vserver scheduler */
10733 +       struct _vx_cvirt cvirt;                 /* virtual/bias stuff */
10734 +       struct _vx_cacct cacct;                 /* context accounting */
10735 +
10736 +       struct _vx_device dmap;                 /* default device map targets */
10737 +
10738 +#ifndef CONFIG_SMP
10739 +       struct _vx_info_pc info_pc;             /* per cpu data */
10740 +#else
10741 +       struct _vx_info_pc *ptr_pc;             /* per cpu array */
10742 +#endif
10743 +
10744 +       wait_queue_head_t vx_wait;              /* context exit waitqueue */
10745 +       int reboot_cmd;                         /* last sys_reboot() cmd */
10746 +       int exit_code;                          /* last process exit code */
10747 +
10748 +       char vx_name[65];                       /* vserver name */
10749 +};
10750 +
10751 +#ifndef CONFIG_SMP
10752 +#define        vx_ptr_pc(vxi)          (&(vxi)->info_pc)
10753 +#define        vx_per_cpu(vxi, v, id)  vx_ptr_pc(vxi)->v
10754 +#else
10755 +#define        vx_ptr_pc(vxi)          ((vxi)->ptr_pc)
10756 +#define        vx_per_cpu(vxi, v, id)  per_cpu_ptr(vx_ptr_pc(vxi), id)->v
10757 +#endif
10758 +
10759 +#define        vx_cpu(vxi, v)          vx_per_cpu(vxi, v, smp_processor_id())
10760 +
10761 +
10762 +struct vx_info_save {
10763 +       struct vx_info *vxi;
10764 +       xid_t xid;
10765 +};
10766 +
10767 +
10768 +/* status flags */
10769 +
10770 +#define VXS_HASHED     0x0001
10771 +#define VXS_PAUSED     0x0010
10772 +#define VXS_SHUTDOWN   0x0100
10773 +#define VXS_HELPER     0x1000
10774 +#define VXS_RELEASED   0x8000
10775 +
10776 +
10777 +extern void claim_vx_info(struct vx_info *, struct task_struct *);
10778 +extern void release_vx_info(struct vx_info *, struct task_struct *);
10779 +
10780 +extern struct vx_info *lookup_vx_info(int);
10781 +extern struct vx_info *lookup_or_create_vx_info(int);
10782 +
10783 +extern int get_xid_list(int, unsigned int *, int);
10784 +extern int xid_is_hashed(xid_t);
10785 +
10786 +extern int vx_migrate_task(struct task_struct *, struct vx_info *, int);
10787 +
10788 +extern long vs_state_change(struct vx_info *, unsigned int);
10789 +
10790 +
10791 +#endif /* __KERNEL__ */
10792 +#endif /* _VX_CONTEXT_H */
10793 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/cvirt_cmd.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/cvirt_cmd.h
10794 --- linux-2.6.31.6/include/linux/vserver/cvirt_cmd.h    1970-01-01 01:00:00.000000000 +0100
10795 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/cvirt_cmd.h      2009-09-10 16:11:43.000000000 +0200
10796 @@ -0,0 +1,53 @@
10797 +#ifndef _VX_CVIRT_CMD_H
10798 +#define _VX_CVIRT_CMD_H
10799 +
10800 +
10801 +/* virtual host info name commands */
10802 +
10803 +#define VCMD_set_vhi_name      VC_CMD(VHOST, 1, 0)
10804 +#define VCMD_get_vhi_name      VC_CMD(VHOST, 2, 0)
10805 +
10806 +struct vcmd_vhi_name_v0 {
10807 +       uint32_t field;
10808 +       char name[65];
10809 +};
10810 +
10811 +
10812 +enum vhi_name_field {
10813 +       VHIN_CONTEXT = 0,
10814 +       VHIN_SYSNAME,
10815 +       VHIN_NODENAME,
10816 +       VHIN_RELEASE,
10817 +       VHIN_VERSION,
10818 +       VHIN_MACHINE,
10819 +       VHIN_DOMAINNAME,
10820 +};
10821 +
10822 +
10823 +#ifdef __KERNEL__
10824 +
10825 +#include <linux/compiler.h>
10826 +
10827 +extern int vc_set_vhi_name(struct vx_info *, void __user *);
10828 +extern int vc_get_vhi_name(struct vx_info *, void __user *);
10829 +
10830 +#endif /* __KERNEL__ */
10831 +
10832 +#define VCMD_virt_stat         VC_CMD(VSTAT, 3, 0)
10833 +
10834 +struct vcmd_virt_stat_v0 {
10835 +       uint64_t offset;
10836 +       uint64_t uptime;
10837 +       uint32_t nr_threads;
10838 +       uint32_t nr_running;
10839 +       uint32_t nr_uninterruptible;
10840 +       uint32_t nr_onhold;
10841 +       uint32_t nr_forks;
10842 +       uint32_t load[3];
10843 +};
10844 +
10845 +#ifdef __KERNEL__
10846 +extern int vc_virt_stat(struct vx_info *, void __user *);
10847 +
10848 +#endif /* __KERNEL__ */
10849 +#endif /* _VX_CVIRT_CMD_H */
10850 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/cvirt_def.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/cvirt_def.h
10851 --- linux-2.6.31.6/include/linux/vserver/cvirt_def.h    1970-01-01 01:00:00.000000000 +0100
10852 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/cvirt_def.h      2009-09-10 16:11:43.000000000 +0200
10853 @@ -0,0 +1,80 @@
10854 +#ifndef _VX_CVIRT_DEF_H
10855 +#define _VX_CVIRT_DEF_H
10856 +
10857 +#include <linux/jiffies.h>
10858 +#include <linux/spinlock.h>
10859 +#include <linux/wait.h>
10860 +#include <linux/time.h>
10861 +#include <asm/atomic.h>
10862 +
10863 +
10864 +struct _vx_usage_stat {
10865 +       uint64_t user;
10866 +       uint64_t nice;
10867 +       uint64_t system;
10868 +       uint64_t softirq;
10869 +       uint64_t irq;
10870 +       uint64_t idle;
10871 +       uint64_t iowait;
10872 +};
10873 +
10874 +struct _vx_syslog {
10875 +       wait_queue_head_t log_wait;
10876 +       spinlock_t logbuf_lock;         /* lock for the log buffer */
10877 +
10878 +       unsigned long log_start;        /* next char to be read by syslog() */
10879 +       unsigned long con_start;        /* next char to be sent to consoles */
10880 +       unsigned long log_end;  /* most-recently-written-char + 1 */
10881 +       unsigned long logged_chars;     /* #chars since last read+clear operation */
10882 +
10883 +       char log_buf[1024];
10884 +};
10885 +
10886 +
10887 +/* context sub struct */
10888 +
10889 +struct _vx_cvirt {
10890 +       atomic_t nr_threads;            /* number of current threads */
10891 +       atomic_t nr_running;            /* number of running threads */
10892 +       atomic_t nr_uninterruptible;    /* number of uninterruptible threads */
10893 +
10894 +       atomic_t nr_onhold;             /* processes on hold */
10895 +       uint32_t onhold_last;           /* jiffies when put on hold */
10896 +
10897 +       struct timeval bias_tv;         /* time offset to the host */
10898 +       struct timespec bias_idle;
10899 +       struct timespec bias_uptime;    /* context creation point */
10900 +       uint64_t bias_clock;            /* offset in clock_t */
10901 +
10902 +       spinlock_t load_lock;           /* lock for the load averages */
10903 +       atomic_t load_updates;          /* nr of load updates done so far */
10904 +       uint32_t load_last;             /* last time load was calculated */
10905 +       uint32_t load[3];               /* load averages 1,5,15 */
10906 +
10907 +       atomic_t total_forks;           /* number of forks so far */
10908 +
10909 +       struct _vx_syslog syslog;
10910 +};
10911 +
10912 +struct _vx_cvirt_pc {
10913 +       struct _vx_usage_stat cpustat;
10914 +};
10915 +
10916 +
10917 +#ifdef CONFIG_VSERVER_DEBUG
10918 +
10919 +static inline void __dump_vx_cvirt(struct _vx_cvirt *cvirt)
10920 +{
10921 +       printk("\t_vx_cvirt:\n");
10922 +       printk("\t threads: %4d, %4d, %4d, %4d\n",
10923 +               atomic_read(&cvirt->nr_threads),
10924 +               atomic_read(&cvirt->nr_running),
10925 +               atomic_read(&cvirt->nr_uninterruptible),
10926 +               atomic_read(&cvirt->nr_onhold));
10927 +       /* add rest here */
10928 +       printk("\t total_forks = %d\n", atomic_read(&cvirt->total_forks));
10929 +}
10930 +
10931 +#endif
10932 +
10933 +#endif /* _VX_CVIRT_DEF_H */
10934 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/cvirt.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/cvirt.h
10935 --- linux-2.6.31.6/include/linux/vserver/cvirt.h        1970-01-01 01:00:00.000000000 +0100
10936 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/cvirt.h  2009-09-10 16:11:43.000000000 +0200
10937 @@ -0,0 +1,20 @@
10938 +#ifndef _VX_CVIRT_H
10939 +#define _VX_CVIRT_H
10940 +
10941 +
10942 +#ifdef __KERNEL__
10943 +
10944 +struct timespec;
10945 +
10946 +void vx_vsi_uptime(struct timespec *, struct timespec *);
10947 +
10948 +
10949 +struct vx_info;
10950 +
10951 +void vx_update_load(struct vx_info *);
10952 +
10953 +
10954 +int vx_do_syslog(int, char __user *, int);
10955 +
10956 +#endif /* __KERNEL__ */
10957 +#endif /* _VX_CVIRT_H */
10958 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/debug_cmd.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/debug_cmd.h
10959 --- linux-2.6.31.6/include/linux/vserver/debug_cmd.h    1970-01-01 01:00:00.000000000 +0100
10960 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/debug_cmd.h      2009-09-10 16:11:43.000000000 +0200
10961 @@ -0,0 +1,58 @@
10962 +#ifndef _VX_DEBUG_CMD_H
10963 +#define _VX_DEBUG_CMD_H
10964 +
10965 +
10966 +/* debug commands */
10967 +
10968 +#define VCMD_dump_history      VC_CMD(DEBUG, 1, 0)
10969 +
10970 +#define VCMD_read_history      VC_CMD(DEBUG, 5, 0)
10971 +#define VCMD_read_monitor      VC_CMD(DEBUG, 6, 0)
10972 +
10973 +struct  vcmd_read_history_v0 {
10974 +       uint32_t index;
10975 +       uint32_t count;
10976 +       char __user *data;
10977 +};
10978 +
10979 +struct  vcmd_read_monitor_v0 {
10980 +       uint32_t index;
10981 +       uint32_t count;
10982 +       char __user *data;
10983 +};
10984 +
10985 +
10986 +#ifdef __KERNEL__
10987 +
10988 +#ifdef CONFIG_COMPAT
10989 +
10990 +#include <asm/compat.h>
10991 +
10992 +struct vcmd_read_history_v0_x32 {
10993 +       uint32_t index;
10994 +       uint32_t count;
10995 +       compat_uptr_t data_ptr;
10996 +};
10997 +
10998 +struct vcmd_read_monitor_v0_x32 {
10999 +       uint32_t index;
11000 +       uint32_t count;
11001 +       compat_uptr_t data_ptr;
11002 +};
11003 +
11004 +#endif  /* CONFIG_COMPAT */
11005 +
11006 +extern int vc_dump_history(uint32_t);
11007 +
11008 +extern int vc_read_history(uint32_t, void __user *);
11009 +extern int vc_read_monitor(uint32_t, void __user *);
11010 +
11011 +#ifdef CONFIG_COMPAT
11012 +
11013 +extern int vc_read_history_x32(uint32_t, void __user *);
11014 +extern int vc_read_monitor_x32(uint32_t, void __user *);
11015 +
11016 +#endif  /* CONFIG_COMPAT */
11017 +
11018 +#endif /* __KERNEL__ */
11019 +#endif /* _VX_DEBUG_CMD_H */
11020 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/debug.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/debug.h
11021 --- linux-2.6.31.6/include/linux/vserver/debug.h        1970-01-01 01:00:00.000000000 +0100
11022 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/debug.h  2009-09-10 16:11:43.000000000 +0200
11023 @@ -0,0 +1,127 @@
11024 +#ifndef _VX_DEBUG_H
11025 +#define _VX_DEBUG_H
11026 +
11027 +
11028 +#define VXD_CBIT(n, m) (vx_debug_ ## n & (1 << (m)))
11029 +#define VXD_CMIN(n, m) (vx_debug_ ## n > (m))
11030 +#define VXD_MASK(n, m) (vx_debug_ ## n & (m))
11031 +
11032 +#define VXD_DEV(d)     (d), (d)->bd_inode->i_ino,              \
11033 +                       imajor((d)->bd_inode), iminor((d)->bd_inode)
11034 +#define VXF_DEV                "%p[%lu,%d:%d]"
11035 +
11036 +
11037 +#define vxd_path(p)                                            \
11038 +       ({ static char _buffer[PATH_MAX];                       \
11039 +          d_path(p, _buffer, sizeof(_buffer)); })
11040 +
11041 +#define vxd_cond_path(n)                                       \
11042 +       ((n) ? vxd_path(&(n)->path) : "<null>" )
11043 +
11044 +
11045 +#ifdef CONFIG_VSERVER_DEBUG
11046 +
11047 +extern unsigned int vx_debug_switch;
11048 +extern unsigned int vx_debug_xid;
11049 +extern unsigned int vx_debug_nid;
11050 +extern unsigned int vx_debug_tag;
11051 +extern unsigned int vx_debug_net;
11052 +extern unsigned int vx_debug_limit;
11053 +extern unsigned int vx_debug_cres;
11054 +extern unsigned int vx_debug_dlim;
11055 +extern unsigned int vx_debug_quota;
11056 +extern unsigned int vx_debug_cvirt;
11057 +extern unsigned int vx_debug_space;
11058 +extern unsigned int vx_debug_misc;
11059 +
11060 +
11061 +#define VX_LOGLEVEL    "vxD: "
11062 +#define VX_PROC_FMT    "%p: "
11063 +#define VX_PROCESS     current
11064 +
11065 +#define vxdprintk(c, f, x...)                                  \
11066 +       do {                                                    \
11067 +               if (c)                                          \
11068 +                       printk(VX_LOGLEVEL VX_PROC_FMT f "\n",  \
11069 +                               VX_PROCESS , ##x);              \
11070 +       } while (0)
11071 +
11072 +#define vxlprintk(c, f, x...)                                  \
11073 +       do {                                                    \
11074 +               if (c)                                          \
11075 +                       printk(VX_LOGLEVEL f " @%s:%d\n", x);   \
11076 +       } while (0)
11077 +
11078 +#define vxfprintk(c, f, x...)                                  \
11079 +       do {                                                    \
11080 +               if (c)                                          \
11081 +                       printk(VX_LOGLEVEL f " %s@%s:%d\n", x); \
11082 +       } while (0)
11083 +
11084 +
11085 +struct vx_info;
11086 +
11087 +void dump_vx_info(struct vx_info *, int);
11088 +void dump_vx_info_inactive(int);
11089 +
11090 +#else  /* CONFIG_VSERVER_DEBUG */
11091 +
11092 +#define vx_debug_switch 0
11093 +#define vx_debug_xid   0
11094 +#define vx_debug_nid   0
11095 +#define vx_debug_tag   0
11096 +#define vx_debug_net   0
11097 +#define vx_debug_limit 0
11098 +#define vx_debug_cres  0
11099 +#define vx_debug_dlim  0
11100 +#define vx_debug_cvirt 0
11101 +
11102 +#define vxdprintk(x...) do { } while (0)
11103 +#define vxlprintk(x...) do { } while (0)
11104 +#define vxfprintk(x...) do { } while (0)
11105 +
11106 +#endif /* CONFIG_VSERVER_DEBUG */
11107 +
11108 +
11109 +#ifdef CONFIG_VSERVER_WARN
11110 +
11111 +#define VX_WARNLEVEL   KERN_WARNING "vxW: "
11112 +#define VX_WARN_TASK   "[»%s«,%u:#%u|%u|%u] "
11113 +#define VX_WARN_XID    "[xid #%u] "
11114 +#define VX_WARN_NID    "[nid #%u] "
11115 +#define VX_WARN_TAG    "[tag #%u] "
11116 +
11117 +#define vxwprintk(c, f, x...)                                  \
11118 +       do {                                                    \
11119 +               if (c)                                          \
11120 +                       printk(VX_WARNLEVEL f "\n", ##x);       \
11121 +       } while (0)
11122 +
11123 +#else  /* CONFIG_VSERVER_WARN */
11124 +
11125 +#define vxwprintk(x...) do { } while (0)
11126 +
11127 +#endif /* CONFIG_VSERVER_WARN */
11128 +
11129 +#define vxwprintk_task(c, f, x...)                             \
11130 +       vxwprintk(c, VX_WARN_TASK f,                            \
11131 +               current->comm, current->pid,                    \
11132 +               current->xid, current->nid, current->tag, ##x)
11133 +#define vxwprintk_xid(c, f, x...)                              \
11134 +       vxwprintk(c, VX_WARN_XID f, current->xid, x)
11135 +#define vxwprintk_nid(c, f, x...)                              \
11136 +       vxwprintk(c, VX_WARN_NID f, current->nid, x)
11137 +#define vxwprintk_tag(c, f, x...)                              \
11138 +       vxwprintk(c, VX_WARN_TAG f, current->tag, x)
11139 +
11140 +#ifdef CONFIG_VSERVER_DEBUG
11141 +#define vxd_assert_lock(l)     assert_spin_locked(l)
11142 +#define vxd_assert(c, f, x...) vxlprintk(!(c), \
11143 +       "assertion [" f "] failed.", ##x, __FILE__, __LINE__)
11144 +#else
11145 +#define vxd_assert_lock(l)     do { } while (0)
11146 +#define vxd_assert(c, f, x...) do { } while (0)
11147 +#endif
11148 +
11149 +
11150 +#endif /* _VX_DEBUG_H */
11151 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/device_cmd.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/device_cmd.h
11152 --- linux-2.6.31.6/include/linux/vserver/device_cmd.h   1970-01-01 01:00:00.000000000 +0100
11153 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/device_cmd.h     2009-09-10 16:11:43.000000000 +0200
11154 @@ -0,0 +1,44 @@
11155 +#ifndef _VX_DEVICE_CMD_H
11156 +#define _VX_DEVICE_CMD_H
11157 +
11158 +
11159 +/*  device vserver commands */
11160 +
11161 +#define VCMD_set_mapping       VC_CMD(DEVICE, 1, 0)
11162 +#define VCMD_unset_mapping     VC_CMD(DEVICE, 2, 0)
11163 +
11164 +struct vcmd_set_mapping_v0 {
11165 +       const char __user *device;
11166 +       const char __user *target;
11167 +       uint32_t flags;
11168 +};
11169 +
11170 +
11171 +#ifdef __KERNEL__
11172 +
11173 +#ifdef CONFIG_COMPAT
11174 +
11175 +#include <asm/compat.h>
11176 +
11177 +struct vcmd_set_mapping_v0_x32 {
11178 +       compat_uptr_t device_ptr;
11179 +       compat_uptr_t target_ptr;
11180 +       uint32_t flags;
11181 +};
11182 +
11183 +#endif /* CONFIG_COMPAT */
11184 +
11185 +#include <linux/compiler.h>
11186 +
11187 +extern int vc_set_mapping(struct vx_info *, void __user *);
11188 +extern int vc_unset_mapping(struct vx_info *, void __user *);
11189 +
11190 +#ifdef CONFIG_COMPAT
11191 +
11192 +extern int vc_set_mapping_x32(struct vx_info *, void __user *);
11193 +extern int vc_unset_mapping_x32(struct vx_info *, void __user *);
11194 +
11195 +#endif /* CONFIG_COMPAT */
11196 +
11197 +#endif /* __KERNEL__ */
11198 +#endif /* _VX_DEVICE_CMD_H */
11199 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/device_def.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/device_def.h
11200 --- linux-2.6.31.6/include/linux/vserver/device_def.h   1970-01-01 01:00:00.000000000 +0100
11201 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/device_def.h     2009-09-10 16:11:43.000000000 +0200
11202 @@ -0,0 +1,17 @@
11203 +#ifndef _VX_DEVICE_DEF_H
11204 +#define _VX_DEVICE_DEF_H
11205 +
11206 +#include <linux/types.h>
11207 +
11208 +struct vx_dmap_target {
11209 +       dev_t target;
11210 +       uint32_t flags;
11211 +};
11212 +
11213 +struct _vx_device {
11214 +#ifdef CONFIG_VSERVER_DEVICE
11215 +       struct vx_dmap_target targets[2];
11216 +#endif
11217 +};
11218 +
11219 +#endif /* _VX_DEVICE_DEF_H */
11220 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/device.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/device.h
11221 --- linux-2.6.31.6/include/linux/vserver/device.h       1970-01-01 01:00:00.000000000 +0100
11222 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/device.h 2009-09-10 16:11:43.000000000 +0200
11223 @@ -0,0 +1,15 @@
11224 +#ifndef _VX_DEVICE_H
11225 +#define _VX_DEVICE_H
11226 +
11227 +
11228 +#define DATTR_CREATE   0x00000001
11229 +#define DATTR_OPEN     0x00000002
11230 +
11231 +#define DATTR_REMAP    0x00000010
11232 +
11233 +#define DATTR_MASK     0x00000013
11234 +
11235 +
11236 +#else  /* _VX_DEVICE_H */
11237 +#warning duplicate inclusion
11238 +#endif /* _VX_DEVICE_H */
11239 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/dlimit_cmd.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/dlimit_cmd.h
11240 --- linux-2.6.31.6/include/linux/vserver/dlimit_cmd.h   1970-01-01 01:00:00.000000000 +0100
11241 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/dlimit_cmd.h     2009-09-10 16:11:43.000000000 +0200
11242 @@ -0,0 +1,74 @@
11243 +#ifndef _VX_DLIMIT_CMD_H
11244 +#define _VX_DLIMIT_CMD_H
11245 +
11246 +
11247 +/*  dlimit vserver commands */
11248 +
11249 +#define VCMD_add_dlimit                VC_CMD(DLIMIT, 1, 0)
11250 +#define VCMD_rem_dlimit                VC_CMD(DLIMIT, 2, 0)
11251 +
11252 +#define VCMD_set_dlimit                VC_CMD(DLIMIT, 5, 0)
11253 +#define VCMD_get_dlimit                VC_CMD(DLIMIT, 6, 0)
11254 +
11255 +struct vcmd_ctx_dlimit_base_v0 {
11256 +       const char __user *name;
11257 +       uint32_t flags;
11258 +};
11259 +
11260 +struct vcmd_ctx_dlimit_v0 {
11261 +       const char __user *name;
11262 +       uint32_t space_used;                    /* used space in kbytes */
11263 +       uint32_t space_total;                   /* maximum space in kbytes */
11264 +       uint32_t inodes_used;                   /* used inodes */
11265 +       uint32_t inodes_total;                  /* maximum inodes */
11266 +       uint32_t reserved;                      /* reserved for root in % */
11267 +       uint32_t flags;
11268 +};
11269 +
11270 +#define CDLIM_UNSET            ((uint32_t)0UL)
11271 +#define CDLIM_INFINITY         ((uint32_t)~0UL)
11272 +#define CDLIM_KEEP             ((uint32_t)~1UL)
11273 +
11274 +#ifdef __KERNEL__
11275 +
11276 +#ifdef CONFIG_COMPAT
11277 +
11278 +#include <asm/compat.h>
11279 +
11280 +struct vcmd_ctx_dlimit_base_v0_x32 {
11281 +       compat_uptr_t name_ptr;
11282 +       uint32_t flags;
11283 +};
11284 +
11285 +struct vcmd_ctx_dlimit_v0_x32 {
11286 +       compat_uptr_t name_ptr;
11287 +       uint32_t space_used;                    /* used space in kbytes */
11288 +       uint32_t space_total;                   /* maximum space in kbytes */
11289 +       uint32_t inodes_used;                   /* used inodes */
11290 +       uint32_t inodes_total;                  /* maximum inodes */
11291 +       uint32_t reserved;                      /* reserved for root in % */
11292 +       uint32_t flags;
11293 +};
11294 +
11295 +#endif /* CONFIG_COMPAT */
11296 +
11297 +#include <linux/compiler.h>
11298 +
11299 +extern int vc_add_dlimit(uint32_t, void __user *);
11300 +extern int vc_rem_dlimit(uint32_t, void __user *);
11301 +
11302 +extern int vc_set_dlimit(uint32_t, void __user *);
11303 +extern int vc_get_dlimit(uint32_t, void __user *);
11304 +
11305 +#ifdef CONFIG_COMPAT
11306 +
11307 +extern int vc_add_dlimit_x32(uint32_t, void __user *);
11308 +extern int vc_rem_dlimit_x32(uint32_t, void __user *);
11309 +
11310 +extern int vc_set_dlimit_x32(uint32_t, void __user *);
11311 +extern int vc_get_dlimit_x32(uint32_t, void __user *);
11312 +
11313 +#endif /* CONFIG_COMPAT */
11314 +
11315 +#endif /* __KERNEL__ */
11316 +#endif /* _VX_DLIMIT_CMD_H */
11317 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/dlimit.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/dlimit.h
11318 --- linux-2.6.31.6/include/linux/vserver/dlimit.h       1970-01-01 01:00:00.000000000 +0100
11319 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/dlimit.h 2009-09-10 16:11:43.000000000 +0200
11320 @@ -0,0 +1,54 @@
11321 +#ifndef _VX_DLIMIT_H
11322 +#define _VX_DLIMIT_H
11323 +
11324 +#include "switch.h"
11325 +
11326 +
11327 +#ifdef __KERNEL__
11328 +
11329 +/*      keep in sync with CDLIM_INFINITY       */
11330 +
11331 +#define DLIM_INFINITY          (~0ULL)
11332 +
11333 +#include <linux/spinlock.h>
11334 +#include <linux/rcupdate.h>
11335 +
11336 +struct super_block;
11337 +
11338 +struct dl_info {
11339 +       struct hlist_node dl_hlist;             /* linked list of contexts */
11340 +       struct rcu_head dl_rcu;                 /* the rcu head */
11341 +       tag_t dl_tag;                           /* context tag */
11342 +       atomic_t dl_usecnt;                     /* usage count */
11343 +       atomic_t dl_refcnt;                     /* reference count */
11344 +
11345 +       struct super_block *dl_sb;              /* associated superblock */
11346 +
11347 +       spinlock_t dl_lock;                     /* protect the values */
11348 +
11349 +       unsigned long long dl_space_used;       /* used space in bytes */
11350 +       unsigned long long dl_space_total;      /* maximum space in bytes */
11351 +       unsigned long dl_inodes_used;           /* used inodes */
11352 +       unsigned long dl_inodes_total;          /* maximum inodes */
11353 +
11354 +       unsigned int dl_nrlmult;                /* non root limit mult */
11355 +};
11356 +
11357 +struct rcu_head;
11358 +
11359 +extern void rcu_free_dl_info(struct rcu_head *);
11360 +extern void unhash_dl_info(struct dl_info *);
11361 +
11362 +extern struct dl_info *locate_dl_info(struct super_block *, tag_t);
11363 +
11364 +
11365 +struct kstatfs;
11366 +
11367 +extern void vx_vsi_statfs(struct super_block *, struct kstatfs *);
11368 +
11369 +typedef uint64_t dlsize_t;
11370 +
11371 +#endif /* __KERNEL__ */
11372 +#else  /* _VX_DLIMIT_H */
11373 +#warning duplicate inclusion
11374 +#endif /* _VX_DLIMIT_H */
11375 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/global.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/global.h
11376 --- linux-2.6.31.6/include/linux/vserver/global.h       1970-01-01 01:00:00.000000000 +0100
11377 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/global.h 2009-09-10 16:11:43.000000000 +0200
11378 @@ -0,0 +1,19 @@
11379 +#ifndef _VX_GLOBAL_H
11380 +#define _VX_GLOBAL_H
11381 +
11382 +
11383 +extern atomic_t vx_global_ctotal;
11384 +extern atomic_t vx_global_cactive;
11385 +
11386 +extern atomic_t nx_global_ctotal;
11387 +extern atomic_t nx_global_cactive;
11388 +
11389 +extern atomic_t vs_global_nsproxy;
11390 +extern atomic_t vs_global_fs;
11391 +extern atomic_t vs_global_mnt_ns;
11392 +extern atomic_t vs_global_uts_ns;
11393 +extern atomic_t vs_global_user_ns;
11394 +extern atomic_t vs_global_pid_ns;
11395 +
11396 +
11397 +#endif /* _VX_GLOBAL_H */
11398 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/history.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/history.h
11399 --- linux-2.6.31.6/include/linux/vserver/history.h      1970-01-01 01:00:00.000000000 +0100
11400 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/history.h        2009-09-10 16:11:43.000000000 +0200
11401 @@ -0,0 +1,197 @@
11402 +#ifndef _VX_HISTORY_H
11403 +#define _VX_HISTORY_H
11404 +
11405 +
11406 +enum {
11407 +       VXH_UNUSED = 0,
11408 +       VXH_THROW_OOPS = 1,
11409 +
11410 +       VXH_GET_VX_INFO,
11411 +       VXH_PUT_VX_INFO,
11412 +       VXH_INIT_VX_INFO,
11413 +       VXH_SET_VX_INFO,
11414 +       VXH_CLR_VX_INFO,
11415 +       VXH_CLAIM_VX_INFO,
11416 +       VXH_RELEASE_VX_INFO,
11417 +       VXH_ALLOC_VX_INFO,
11418 +       VXH_DEALLOC_VX_INFO,
11419 +       VXH_HASH_VX_INFO,
11420 +       VXH_UNHASH_VX_INFO,
11421 +       VXH_LOC_VX_INFO,
11422 +       VXH_LOOKUP_VX_INFO,
11423 +       VXH_CREATE_VX_INFO,
11424 +};
11425 +
11426 +struct _vxhe_vxi {
11427 +       struct vx_info *ptr;
11428 +       unsigned xid;
11429 +       unsigned usecnt;
11430 +       unsigned tasks;
11431 +};
11432 +
11433 +struct _vxhe_set_clr {
11434 +       void *data;
11435 +};
11436 +
11437 +struct _vxhe_loc_lookup {
11438 +       unsigned arg;
11439 +};
11440 +
11441 +struct _vx_hist_entry {
11442 +       void *loc;
11443 +       unsigned short seq;
11444 +       unsigned short type;
11445 +       struct _vxhe_vxi vxi;
11446 +       union {
11447 +               struct _vxhe_set_clr sc;
11448 +               struct _vxhe_loc_lookup ll;
11449 +       };
11450 +};
11451 +
11452 +#ifdef CONFIG_VSERVER_HISTORY
11453 +
11454 +extern unsigned volatile int vxh_active;
11455 +
11456 +struct _vx_hist_entry *vxh_advance(void *loc);
11457 +
11458 +
11459 +static inline
11460 +void   __vxh_copy_vxi(struct _vx_hist_entry *entry, struct vx_info *vxi)
11461 +{
11462 +       entry->vxi.ptr = vxi;
11463 +       if (vxi) {
11464 +               entry->vxi.usecnt = atomic_read(&vxi->vx_usecnt);
11465 +               entry->vxi.tasks = atomic_read(&vxi->vx_tasks);
11466 +               entry->vxi.xid = vxi->vx_id;
11467 +       }
11468 +}
11469 +
11470 +
11471 +#define        __HERE__ current_text_addr()
11472 +
11473 +#define __VXH_BODY(__type, __data, __here)     \
11474 +       struct _vx_hist_entry *entry;           \
11475 +                                               \
11476 +       preempt_disable();                      \
11477 +       entry = vxh_advance(__here);            \
11478 +       __data;                                 \
11479 +       entry->type = __type;                   \
11480 +       preempt_enable();
11481 +
11482 +
11483 +       /* pass vxi only */
11484 +
11485 +#define __VXH_SMPL                             \
11486 +       __vxh_copy_vxi(entry, vxi)
11487 +
11488 +static inline
11489 +void   __vxh_smpl(struct vx_info *vxi, int __type, void *__here)
11490 +{
11491 +       __VXH_BODY(__type, __VXH_SMPL, __here)
11492 +}
11493 +
11494 +       /* pass vxi and data (void *) */
11495 +
11496 +#define __VXH_DATA                             \
11497 +       __vxh_copy_vxi(entry, vxi);             \
11498 +       entry->sc.data = data
11499 +
11500 +static inline
11501 +void   __vxh_data(struct vx_info *vxi, void *data,
11502 +                       int __type, void *__here)
11503 +{
11504 +       __VXH_BODY(__type, __VXH_DATA, __here)
11505 +}
11506 +
11507 +       /* pass vxi and arg (long) */
11508 +
11509 +#define __VXH_LONG                             \
11510 +       __vxh_copy_vxi(entry, vxi);             \
11511 +       entry->ll.arg = arg
11512 +
11513 +static inline
11514 +void   __vxh_long(struct vx_info *vxi, long arg,
11515 +                       int __type, void *__here)
11516 +{
11517 +       __VXH_BODY(__type, __VXH_LONG, __here)
11518 +}
11519 +
11520 +
11521 +static inline
11522 +void   __vxh_throw_oops(void *__here)
11523 +{
11524 +       __VXH_BODY(VXH_THROW_OOPS, {}, __here);
11525 +       /* prevent further acquisition */
11526 +       vxh_active = 0;
11527 +}
11528 +
11529 +
11530 +#define vxh_throw_oops()       __vxh_throw_oops(__HERE__);
11531 +
11532 +#define __vxh_get_vx_info(v, h)        __vxh_smpl(v, VXH_GET_VX_INFO, h);
11533 +#define __vxh_put_vx_info(v, h)        __vxh_smpl(v, VXH_PUT_VX_INFO, h);
11534 +
11535 +#define __vxh_init_vx_info(v, d, h) \
11536 +       __vxh_data(v, d, VXH_INIT_VX_INFO, h);
11537 +#define __vxh_set_vx_info(v, d, h) \
11538 +       __vxh_data(v, d, VXH_SET_VX_INFO, h);
11539 +#define __vxh_clr_vx_info(v, d, h) \
11540 +       __vxh_data(v, d, VXH_CLR_VX_INFO, h);
11541 +
11542 +#define __vxh_claim_vx_info(v, d, h) \
11543 +       __vxh_data(v, d, VXH_CLAIM_VX_INFO, h);
11544 +#define __vxh_release_vx_info(v, d, h) \
11545 +       __vxh_data(v, d, VXH_RELEASE_VX_INFO, h);
11546 +
11547 +#define vxh_alloc_vx_info(v) \
11548 +       __vxh_smpl(v, VXH_ALLOC_VX_INFO, __HERE__);
11549 +#define vxh_dealloc_vx_info(v) \
11550 +       __vxh_smpl(v, VXH_DEALLOC_VX_INFO, __HERE__);
11551 +
11552 +#define vxh_hash_vx_info(v) \
11553 +       __vxh_smpl(v, VXH_HASH_VX_INFO, __HERE__);
11554 +#define vxh_unhash_vx_info(v) \
11555 +       __vxh_smpl(v, VXH_UNHASH_VX_INFO, __HERE__);
11556 +
11557 +#define vxh_loc_vx_info(v, l) \
11558 +       __vxh_long(v, l, VXH_LOC_VX_INFO, __HERE__);
11559 +#define vxh_lookup_vx_info(v, l) \
11560 +       __vxh_long(v, l, VXH_LOOKUP_VX_INFO, __HERE__);
11561 +#define vxh_create_vx_info(v, l) \
11562 +       __vxh_long(v, l, VXH_CREATE_VX_INFO, __HERE__);
11563 +
11564 +extern void vxh_dump_history(void);
11565 +
11566 +
11567 +#else  /* CONFIG_VSERVER_HISTORY */
11568 +
11569 +#define        __HERE__        0
11570 +
11571 +#define vxh_throw_oops()               do { } while (0)
11572 +
11573 +#define __vxh_get_vx_info(v, h)                do { } while (0)
11574 +#define __vxh_put_vx_info(v, h)                do { } while (0)
11575 +
11576 +#define __vxh_init_vx_info(v, d, h)    do { } while (0)
11577 +#define __vxh_set_vx_info(v, d, h)     do { } while (0)
11578 +#define __vxh_clr_vx_info(v, d, h)     do { } while (0)
11579 +
11580 +#define __vxh_claim_vx_info(v, d, h)   do { } while (0)
11581 +#define __vxh_release_vx_info(v, d, h) do { } while (0)
11582 +
11583 +#define vxh_alloc_vx_info(v)           do { } while (0)
11584 +#define vxh_dealloc_vx_info(v)         do { } while (0)
11585 +
11586 +#define vxh_hash_vx_info(v)            do { } while (0)
11587 +#define vxh_unhash_vx_info(v)          do { } while (0)
11588 +
11589 +#define vxh_loc_vx_info(v, l)          do { } while (0)
11590 +#define vxh_lookup_vx_info(v, l)       do { } while (0)
11591 +#define vxh_create_vx_info(v, l)       do { } while (0)
11592 +
11593 +#define vxh_dump_history()             do { } while (0)
11594 +
11595 +
11596 +#endif /* CONFIG_VSERVER_HISTORY */
11597 +
11598 +#endif /* _VX_HISTORY_H */
11599 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/inode_cmd.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/inode_cmd.h
11600 --- linux-2.6.31.6/include/linux/vserver/inode_cmd.h    1970-01-01 01:00:00.000000000 +0100
11601 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/inode_cmd.h      2009-09-10 16:11:43.000000000 +0200
11602 @@ -0,0 +1,59 @@
11603 +#ifndef _VX_INODE_CMD_H
11604 +#define _VX_INODE_CMD_H
11605 +
11606 +
11607 +/*  inode vserver commands */
11608 +
11609 +#define VCMD_get_iattr         VC_CMD(INODE, 1, 1)
11610 +#define VCMD_set_iattr         VC_CMD(INODE, 2, 1)
11611 +
11612 +#define VCMD_fget_iattr                VC_CMD(INODE, 3, 0)
11613 +#define VCMD_fset_iattr                VC_CMD(INODE, 4, 0)
11614 +
11615 +struct vcmd_ctx_iattr_v1 {
11616 +       const char __user *name;
11617 +       uint32_t tag;
11618 +       uint32_t flags;
11619 +       uint32_t mask;
11620 +};
11621 +
11622 +struct vcmd_ctx_fiattr_v0 {
11623 +       uint32_t tag;
11624 +       uint32_t flags;
11625 +       uint32_t mask;
11626 +};
11627 +
11628 +
11629 +#ifdef __KERNEL__
11630 +
11631 +
11632 +#ifdef CONFIG_COMPAT
11633 +
11634 +#include <asm/compat.h>
11635 +
11636 +struct vcmd_ctx_iattr_v1_x32 {
11637 +       compat_uptr_t name_ptr;
11638 +       uint32_t tag;
11639 +       uint32_t flags;
11640 +       uint32_t mask;
11641 +};
11642 +
11643 +#endif /* CONFIG_COMPAT */
11644 +
11645 +#include <linux/compiler.h>
11646 +
11647 +extern int vc_get_iattr(void __user *);
11648 +extern int vc_set_iattr(void __user *);
11649 +
11650 +extern int vc_fget_iattr(uint32_t, void __user *);
11651 +extern int vc_fset_iattr(uint32_t, void __user *);
11652 +
11653 +#ifdef CONFIG_COMPAT
11654 +
11655 +extern int vc_get_iattr_x32(void __user *);
11656 +extern int vc_set_iattr_x32(void __user *);
11657 +
11658 +#endif /* CONFIG_COMPAT */
11659 +
11660 +#endif /* __KERNEL__ */
11661 +#endif /* _VX_INODE_CMD_H */
11662 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/inode.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/inode.h
11663 --- linux-2.6.31.6/include/linux/vserver/inode.h        1970-01-01 01:00:00.000000000 +0100
11664 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/inode.h  2009-10-06 23:34:11.000000000 +0200
11665 @@ -0,0 +1,39 @@
11666 +#ifndef _VX_INODE_H
11667 +#define _VX_INODE_H
11668 +
11669 +
11670 +#define IATTR_TAG      0x01000000
11671 +
11672 +#define IATTR_ADMIN    0x00000001
11673 +#define IATTR_WATCH    0x00000002
11674 +#define IATTR_HIDE     0x00000004
11675 +#define IATTR_FLAGS    0x00000007
11676 +
11677 +#define IATTR_BARRIER  0x00010000
11678 +#define IATTR_IXUNLINK 0x00020000
11679 +#define IATTR_IMMUTABLE 0x00040000
11680 +#define IATTR_COW      0x00080000
11681 +
11682 +#ifdef __KERNEL__
11683 +
11684 +
11685 +#ifdef CONFIG_VSERVER_PROC_SECURE
11686 +#define IATTR_PROC_DEFAULT     ( IATTR_ADMIN | IATTR_HIDE )
11687 +#define IATTR_PROC_SYMLINK     ( IATTR_ADMIN )
11688 +#else
11689 +#define IATTR_PROC_DEFAULT     ( IATTR_ADMIN )
11690 +#define IATTR_PROC_SYMLINK     ( IATTR_ADMIN )
11691 +#endif
11692 +
11693 +#define vx_hide_check(c, m)    (((m) & IATTR_HIDE) ? vx_check(c, m) : 1)
11694 +
11695 +#endif /* __KERNEL__ */
11696 +
11697 +/* inode ioctls */
11698 +
11699 +#define FIOC_GETXFLG   _IOR('x', 5, long)
11700 +#define FIOC_SETXFLG   _IOW('x', 6, long)
11701 +
11702 +#else  /* _VX_INODE_H */
11703 +#warning duplicate inclusion
11704 +#endif /* _VX_INODE_H */
11705 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/Kbuild linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/Kbuild
11706 --- linux-2.6.31.6/include/linux/vserver/Kbuild 1970-01-01 01:00:00.000000000 +0100
11707 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/Kbuild   2009-09-10 16:11:43.000000000 +0200
11708 @@ -0,0 +1,8 @@
11709 +
11710 +unifdef-y += context_cmd.h network_cmd.h space_cmd.h \
11711 +       cacct_cmd.h cvirt_cmd.h limit_cmd.h dlimit_cmd.h \
11712 +       inode_cmd.h tag_cmd.h sched_cmd.h signal_cmd.h \
11713 +       debug_cmd.h device_cmd.h
11714 +
11715 +unifdef-y += switch.h network.h monitor.h inode.h device.h
11716 +
11717 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/limit_cmd.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/limit_cmd.h
11718 --- linux-2.6.31.6/include/linux/vserver/limit_cmd.h    1970-01-01 01:00:00.000000000 +0100
11719 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/limit_cmd.h      2009-09-10 16:11:43.000000000 +0200
11720 @@ -0,0 +1,71 @@
11721 +#ifndef _VX_LIMIT_CMD_H
11722 +#define _VX_LIMIT_CMD_H
11723 +
11724 +
11725 +/*  rlimit vserver commands */
11726 +
11727 +#define VCMD_get_rlimit                VC_CMD(RLIMIT, 1, 0)
11728 +#define VCMD_set_rlimit                VC_CMD(RLIMIT, 2, 0)
11729 +#define VCMD_get_rlimit_mask   VC_CMD(RLIMIT, 3, 0)
11730 +#define VCMD_reset_hits                VC_CMD(RLIMIT, 7, 0)
11731 +#define VCMD_reset_minmax      VC_CMD(RLIMIT, 9, 0)
11732 +
11733 +struct vcmd_ctx_rlimit_v0 {
11734 +       uint32_t id;
11735 +       uint64_t minimum;
11736 +       uint64_t softlimit;
11737 +       uint64_t maximum;
11738 +};
11739 +
11740 +struct vcmd_ctx_rlimit_mask_v0 {
11741 +       uint32_t minimum;
11742 +       uint32_t softlimit;
11743 +       uint32_t maximum;
11744 +};
11745 +
11746 +#define VCMD_rlimit_stat       VC_CMD(VSTAT, 1, 0)
11747 +
11748 +struct vcmd_rlimit_stat_v0 {
11749 +       uint32_t id;
11750 +       uint32_t hits;
11751 +       uint64_t value;
11752 +       uint64_t minimum;
11753 +       uint64_t maximum;
11754 +};
11755 +
11756 +#define CRLIM_UNSET            (0ULL)
11757 +#define CRLIM_INFINITY         (~0ULL)
11758 +#define CRLIM_KEEP             (~1ULL)
11759 +
11760 +#ifdef __KERNEL__
11761 +
11762 +#ifdef CONFIG_IA32_EMULATION
11763 +
11764 +struct vcmd_ctx_rlimit_v0_x32 {
11765 +       uint32_t id;
11766 +       uint64_t minimum;
11767 +       uint64_t softlimit;
11768 +       uint64_t maximum;
11769 +} __attribute__ ((packed));
11770 +
11771 +#endif /* CONFIG_IA32_EMULATION */
11772 +
11773 +#include <linux/compiler.h>
11774 +
11775 +extern int vc_get_rlimit_mask(uint32_t, void __user *);
11776 +extern int vc_get_rlimit(struct vx_info *, void __user *);
11777 +extern int vc_set_rlimit(struct vx_info *, void __user *);
11778 +extern int vc_reset_hits(struct vx_info *, void __user *);
11779 +extern int vc_reset_minmax(struct vx_info *, void __user *);
11780 +
11781 +extern int vc_rlimit_stat(struct vx_info *, void __user *);
11782 +
11783 +#ifdef CONFIG_IA32_EMULATION
11784 +
11785 +extern int vc_get_rlimit_x32(struct vx_info *, void __user *);
11786 +extern int vc_set_rlimit_x32(struct vx_info *, void __user *);
11787 +
11788 +#endif /* CONFIG_IA32_EMULATION */
11789 +
11790 +#endif /* __KERNEL__ */
11791 +#endif /* _VX_LIMIT_CMD_H */
11792 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/limit_def.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/limit_def.h
11793 --- linux-2.6.31.6/include/linux/vserver/limit_def.h    1970-01-01 01:00:00.000000000 +0100
11794 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/limit_def.h      2009-09-10 16:11:43.000000000 +0200
11795 @@ -0,0 +1,47 @@
11796 +#ifndef _VX_LIMIT_DEF_H
11797 +#define _VX_LIMIT_DEF_H
11798 +
11799 +#include <asm/atomic.h>
11800 +#include <asm/resource.h>
11801 +
11802 +#include "limit.h"
11803 +
11804 +
11805 +struct _vx_res_limit {
11806 +       rlim_t soft;            /* Context soft limit */
11807 +       rlim_t hard;            /* Context hard limit */
11808 +
11809 +       rlim_atomic_t rcur;     /* Current value */
11810 +       rlim_t rmin;            /* Context minimum */
11811 +       rlim_t rmax;            /* Context maximum */
11812 +
11813 +       atomic_t lhit;          /* Limit hits */
11814 +};
11815 +
11816 +/* context sub struct */
11817 +
11818 +struct _vx_limit {
11819 +       struct _vx_res_limit res[NUM_LIMITS];
11820 +};
11821 +
11822 +#ifdef CONFIG_VSERVER_DEBUG
11823 +
11824 +static inline void __dump_vx_limit(struct _vx_limit *limit)
11825 +{
11826 +       int i;
11827 +
11828 +       printk("\t_vx_limit:");
11829 +       for (i = 0; i < NUM_LIMITS; i++) {
11830 +               printk("\t [%2d] = %8lu %8lu/%8lu, %8ld/%8ld, %8d\n",
11831 +                       i, (unsigned long)__rlim_get(limit, i),
11832 +                       (unsigned long)__rlim_rmin(limit, i),
11833 +                       (unsigned long)__rlim_rmax(limit, i),
11834 +                       (long)__rlim_soft(limit, i),
11835 +                       (long)__rlim_hard(limit, i),
11836 +                       atomic_read(&__rlim_lhit(limit, i)));
11837 +       }
11838 +}
11839 +
11840 +#endif
11841 +
11842 +#endif /* _VX_LIMIT_DEF_H */
11843 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/limit.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/limit.h
11844 --- linux-2.6.31.6/include/linux/vserver/limit.h        1970-01-01 01:00:00.000000000 +0100
11845 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/limit.h  2009-09-10 16:11:43.000000000 +0200
11846 @@ -0,0 +1,70 @@
11847 +#ifndef _VX_LIMIT_H
11848 +#define _VX_LIMIT_H
11849 +
11850 +#define VLIMIT_NSOCK   16
11851 +#define VLIMIT_OPENFD  17
11852 +#define VLIMIT_ANON    18
11853 +#define VLIMIT_SHMEM   19
11854 +#define VLIMIT_SEMARY  20
11855 +#define VLIMIT_NSEMS   21
11856 +#define VLIMIT_DENTRY  22
11857 +#define VLIMIT_MAPPED  23
11858 +
11859 +
11860 +#ifdef __KERNEL__
11861 +
11862 +#define        VLIM_NOCHECK    ((1L << VLIMIT_DENTRY) | (1L << RLIMIT_RSS))
11863 +
11864 +/*     keep in sync with CRLIM_INFINITY */
11865 +
11866 +#define        VLIM_INFINITY   (~0ULL)
11867 +
11868 +#include <asm/atomic.h>
11869 +#include <asm/resource.h>
11870 +
11871 +#ifndef RLIM_INFINITY
11872 +#warning RLIM_INFINITY is undefined
11873 +#endif
11874 +
11875 +#define __rlim_val(l, r, v)    ((l)->res[r].v)
11876 +
11877 +#define __rlim_soft(l, r)      __rlim_val(l, r, soft)
11878 +#define __rlim_hard(l, r)      __rlim_val(l, r, hard)
11879 +
11880 +#define __rlim_rcur(l, r)      __rlim_val(l, r, rcur)
11881 +#define __rlim_rmin(l, r)      __rlim_val(l, r, rmin)
11882 +#define __rlim_rmax(l, r)      __rlim_val(l, r, rmax)
11883 +
11884 +#define __rlim_lhit(l, r)      __rlim_val(l, r, lhit)
11885 +#define __rlim_hit(l, r)       atomic_inc(&__rlim_lhit(l, r))
11886 +
11887 +typedef atomic_long_t rlim_atomic_t;
11888 +typedef unsigned long rlim_t;
11889 +
11890 +#define __rlim_get(l, r)       atomic_long_read(&__rlim_rcur(l, r))
11891 +#define __rlim_set(l, r, v)    atomic_long_set(&__rlim_rcur(l, r), v)
11892 +#define __rlim_inc(l, r)       atomic_long_inc(&__rlim_rcur(l, r))
11893 +#define __rlim_dec(l, r)       atomic_long_dec(&__rlim_rcur(l, r))
11894 +#define __rlim_add(l, r, v)    atomic_long_add(v, &__rlim_rcur(l, r))
11895 +#define __rlim_sub(l, r, v)    atomic_long_sub(v, &__rlim_rcur(l, r))
11896 +
11897 +
11898 +#if    (RLIM_INFINITY == VLIM_INFINITY)
11899 +#define        VX_VLIM(r) ((long long)(long)(r))
11900 +#define        VX_RLIM(v) ((rlim_t)(v))
11901 +#else
11902 +#define        VX_VLIM(r) (((r) == RLIM_INFINITY) \
11903 +               ? VLIM_INFINITY : (long long)(r))
11904 +#define        VX_RLIM(v) (((v) == VLIM_INFINITY) \
11905 +               ? RLIM_INFINITY : (rlim_t)(v))
11906 +#endif
11907 +
11908 +struct sysinfo;
11909 +
11910 +void vx_vsi_meminfo(struct sysinfo *);
11911 +void vx_vsi_swapinfo(struct sysinfo *);
11912 +
11913 +#define NUM_LIMITS     24
11914 +
11915 +#endif /* __KERNEL__ */
11916 +#endif /* _VX_LIMIT_H */
11917 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/limit_int.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/limit_int.h
11918 --- linux-2.6.31.6/include/linux/vserver/limit_int.h    1970-01-01 01:00:00.000000000 +0100
11919 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/limit_int.h      2009-09-10 16:11:43.000000000 +0200
11920 @@ -0,0 +1,198 @@
11921 +#ifndef _VX_LIMIT_INT_H
11922 +#define _VX_LIMIT_INT_H
11923 +
11924 +#include "context.h"
11925 +
11926 +#ifdef __KERNEL__
11927 +
11928 +#define VXD_RCRES_COND(r)      VXD_CBIT(cres, r)
11929 +#define VXD_RLIMIT_COND(r)     VXD_CBIT(limit, r)
11930 +
11931 +extern const char *vlimit_name[NUM_LIMITS];
11932 +
11933 +static inline void __vx_acc_cres(struct vx_info *vxi,
11934 +       int res, int dir, void *_data, char *_file, int _line)
11935 +{
11936 +       if (VXD_RCRES_COND(res))
11937 +               vxlprintk(1, "vx_acc_cres[%5d,%s,%2d]: %5ld%s (%p)",
11938 +                       (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
11939 +                       (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
11940 +                       (dir > 0) ? "++" : "--", _data, _file, _line);
11941 +       if (!vxi)
11942 +               return;
11943 +
11944 +       if (dir > 0)
11945 +               __rlim_inc(&vxi->limit, res);
11946 +       else
11947 +               __rlim_dec(&vxi->limit, res);
11948 +}
11949 +
11950 +static inline void __vx_add_cres(struct vx_info *vxi,
11951 +       int res, int amount, void *_data, char *_file, int _line)
11952 +{
11953 +       if (VXD_RCRES_COND(res))
11954 +               vxlprintk(1, "vx_add_cres[%5d,%s,%2d]: %5ld += %5d (%p)",
11955 +                       (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
11956 +                       (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
11957 +                       amount, _data, _file, _line);
11958 +       if (amount == 0)
11959 +               return;
11960 +       if (!vxi)
11961 +               return;
11962 +       __rlim_add(&vxi->limit, res, amount);
11963 +}
11964 +
11965 +static inline
11966 +int __vx_cres_adjust_max(struct _vx_limit *limit, int res, rlim_t value)
11967 +{
11968 +       int cond = (value > __rlim_rmax(limit, res));
11969 +
11970 +       if (cond)
11971 +               __rlim_rmax(limit, res) = value;
11972 +       return cond;
11973 +}
11974 +
11975 +static inline
11976 +int __vx_cres_adjust_min(struct _vx_limit *limit, int res, rlim_t value)
11977 +{
11978 +       int cond = (value < __rlim_rmin(limit, res));
11979 +
11980 +       if (cond)
11981 +               __rlim_rmin(limit, res) = value;
11982 +       return cond;
11983 +}
11984 +
11985 +static inline
11986 +void __vx_cres_fixup(struct _vx_limit *limit, int res, rlim_t value)
11987 +{
11988 +       if (!__vx_cres_adjust_max(limit, res, value))
11989 +               __vx_cres_adjust_min(limit, res, value);
11990 +}
11991 +
11992 +
11993 +/*     return values:
11994 +        +1 ... no limit hit
11995 +        -1 ... over soft limit
11996 +         0 ... over hard limit         */
11997 +
11998 +static inline int __vx_cres_avail(struct vx_info *vxi,
11999 +       int res, int num, char *_file, int _line)
12000 +{
12001 +       struct _vx_limit *limit;
12002 +       rlim_t value;
12003 +
12004 +       if (VXD_RLIMIT_COND(res))
12005 +               vxlprintk(1, "vx_cres_avail[%5d,%s,%2d]: %5ld/%5ld > %5ld + %5d",
12006 +                       (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
12007 +                       (vxi ? (long)__rlim_soft(&vxi->limit, res) : -1),
12008 +                       (vxi ? (long)__rlim_hard(&vxi->limit, res) : -1),
12009 +                       (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
12010 +                       num, _file, _line);
12011 +       if (!vxi)
12012 +               return 1;
12013 +
12014 +       limit = &vxi->limit;
12015 +       value = __rlim_get(limit, res);
12016 +
12017 +       if (!__vx_cres_adjust_max(limit, res, value))
12018 +               __vx_cres_adjust_min(limit, res, value);
12019 +
12020 +       if (num == 0)
12021 +               return 1;
12022 +
12023 +       if (__rlim_soft(limit, res) == RLIM_INFINITY)
12024 +               return -1;
12025 +       if (value + num <= __rlim_soft(limit, res))
12026 +               return -1;
12027 +
12028 +       if (__rlim_hard(limit, res) == RLIM_INFINITY)
12029 +               return 1;
12030 +       if (value + num <= __rlim_hard(limit, res))
12031 +               return 1;
12032 +
12033 +       __rlim_hit(limit, res);
12034 +       return 0;
12035 +}
12036 +
12037 +
12038 +static const int VLA_RSS[] = { RLIMIT_RSS, VLIMIT_ANON, VLIMIT_MAPPED, 0 };
12039 +
12040 +static inline
12041 +rlim_t __vx_cres_array_sum(struct _vx_limit *limit, const int *array)
12042 +{
12043 +       rlim_t value, sum = 0;
12044 +       int res;
12045 +
12046 +       while ((res = *array++)) {
12047 +               value = __rlim_get(limit, res);
12048 +               __vx_cres_fixup(limit, res, value);
12049 +               sum += value;
12050 +       }
12051 +       return sum;
12052 +}
12053 +
12054 +static inline
12055 +rlim_t __vx_cres_array_fixup(struct _vx_limit *limit, const int *array)
12056 +{
12057 +       rlim_t value = __vx_cres_array_sum(limit, array + 1);
12058 +       int res = *array;
12059 +
12060 +       if (value == __rlim_get(limit, res))
12061 +               return value;
12062 +
12063 +       __rlim_set(limit, res, value);
12064 +       /* now adjust min/max */
12065 +       if (!__vx_cres_adjust_max(limit, res, value))
12066 +               __vx_cres_adjust_min(limit, res, value);
12067 +
12068 +       return value;
12069 +}
12070 +
12071 +static inline int __vx_cres_array_avail(struct vx_info *vxi,
12072 +       const int *array, int num, char *_file, int _line)
12073 +{
12074 +       struct _vx_limit *limit;
12075 +       rlim_t value = 0;
12076 +       int res;
12077 +
12078 +       if (num == 0)
12079 +               return 1;
12080 +       if (!vxi)
12081 +               return 1;
12082 +
12083 +       limit = &vxi->limit;
12084 +       res = *array;
12085 +       value = __vx_cres_array_sum(limit, array + 1);
12086 +
12087 +       __rlim_set(limit, res, value);
12088 +       __vx_cres_fixup(limit, res, value);
12089 +
12090 +       return __vx_cres_avail(vxi, res, num, _file, _line);
12091 +}
12092 +
12093 +
12094 +static inline void vx_limit_fixup(struct _vx_limit *limit, int id)
12095 +{
12096 +       rlim_t value;
12097 +       int res;
12098 +
12099 +       /* complex resources first */
12100 +       if ((id < 0) || (id == RLIMIT_RSS))
12101 +               __vx_cres_array_fixup(limit, VLA_RSS);
12102 +
12103 +       for (res = 0; res < NUM_LIMITS; res++) {
12104 +               if ((id > 0) && (res != id))
12105 +                       continue;
12106 +
12107 +               value = __rlim_get(limit, res);
12108 +               __vx_cres_fixup(limit, res, value);
12109 +
12110 +               /* not supposed to happen, maybe warn? */
12111 +               if (__rlim_rmax(limit, res) > __rlim_hard(limit, res))
12112 +                       __rlim_rmax(limit, res) = __rlim_hard(limit, res);
12113 +       }
12114 +}
12115 +
12116 +
12117 +#endif /* __KERNEL__ */
12118 +#endif /* _VX_LIMIT_INT_H */
12119 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/monitor.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/monitor.h
12120 --- linux-2.6.31.6/include/linux/vserver/monitor.h      1970-01-01 01:00:00.000000000 +0100
12121 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/monitor.h        2009-09-10 16:11:43.000000000 +0200
12122 @@ -0,0 +1,96 @@
12123 +#ifndef _VX_MONITOR_H
12124 +#define _VX_MONITOR_H
12125 +
12126 +#include <linux/types.h>
12127 +
12128 +enum {
12129 +       VXM_UNUSED = 0,
12130 +
12131 +       VXM_SYNC = 0x10,
12132 +
12133 +       VXM_UPDATE = 0x20,
12134 +       VXM_UPDATE_1,
12135 +       VXM_UPDATE_2,
12136 +
12137 +       VXM_RQINFO_1 = 0x24,
12138 +       VXM_RQINFO_2,
12139 +
12140 +       VXM_ACTIVATE = 0x40,
12141 +       VXM_DEACTIVATE,
12142 +       VXM_IDLE,
12143 +
12144 +       VXM_HOLD = 0x44,
12145 +       VXM_UNHOLD,
12146 +
12147 +       VXM_MIGRATE = 0x48,
12148 +       VXM_RESCHED,
12149 +
12150 +       /* all other bits are flags */
12151 +       VXM_SCHED = 0x80,
12152 +};
12153 +
12154 +struct _vxm_update_1 {
12155 +       uint32_t tokens_max;
12156 +       uint32_t fill_rate;
12157 +       uint32_t interval;
12158 +};
12159 +
12160 +struct _vxm_update_2 {
12161 +       uint32_t tokens_min;
12162 +       uint32_t fill_rate;
12163 +       uint32_t interval;
12164 +};
12165 +
12166 +struct _vxm_rqinfo_1 {
12167 +       uint16_t running;
12168 +       uint16_t onhold;
12169 +       uint16_t iowait;
12170 +       uint16_t uintr;
12171 +       uint32_t idle_tokens;
12172 +};
12173 +
12174 +struct _vxm_rqinfo_2 {
12175 +       uint32_t norm_time;
12176 +       uint32_t idle_time;
12177 +       uint32_t idle_skip;
12178 +};
12179 +
12180 +struct _vxm_sched {
12181 +       uint32_t tokens;
12182 +       uint32_t norm_time;
12183 +       uint32_t idle_time;
12184 +};
12185 +
12186 +struct _vxm_task {
12187 +       uint16_t pid;
12188 +       uint16_t state;
12189 +};
12190 +
12191 +struct _vxm_event {
12192 +       uint32_t jif;
12193 +       union {
12194 +               uint32_t seq;
12195 +               uint32_t sec;
12196 +       };
12197 +       union {
12198 +               uint32_t tokens;
12199 +               uint32_t nsec;
12200 +               struct _vxm_task tsk;
12201 +       };
12202 +};
12203 +
12204 +struct _vx_mon_entry {
12205 +       uint16_t type;
12206 +       uint16_t xid;
12207 +       union {
12208 +               struct _vxm_event ev;
12209 +               struct _vxm_sched sd;
12210 +               struct _vxm_update_1 u1;
12211 +               struct _vxm_update_2 u2;
12212 +               struct _vxm_rqinfo_1 q1;
12213 +               struct _vxm_rqinfo_2 q2;
12214 +       };
12215 +};
12216 +
12217 +
12218 +#endif /* _VX_MONITOR_H */
12219 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/network_cmd.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/network_cmd.h
12220 --- linux-2.6.31.6/include/linux/vserver/network_cmd.h  1970-01-01 01:00:00.000000000 +0100
12221 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/network_cmd.h    2009-09-10 16:11:43.000000000 +0200
12222 @@ -0,0 +1,150 @@
12223 +#ifndef _VX_NETWORK_CMD_H
12224 +#define _VX_NETWORK_CMD_H
12225 +
12226 +
12227 +/* vinfo commands */
12228 +
12229 +#define VCMD_task_nid          VC_CMD(VINFO, 2, 0)
12230 +
12231 +#ifdef __KERNEL__
12232 +extern int vc_task_nid(uint32_t);
12233 +
12234 +#endif /* __KERNEL__ */
12235 +
12236 +#define VCMD_nx_info           VC_CMD(VINFO, 6, 0)
12237 +
12238 +struct vcmd_nx_info_v0 {
12239 +       uint32_t nid;
12240 +       /* more to come */
12241 +};
12242 +
12243 +#ifdef __KERNEL__
12244 +extern int vc_nx_info(struct nx_info *, void __user *);
12245 +
12246 +#endif /* __KERNEL__ */
12247 +
12248 +#include <linux/in.h>
12249 +#include <linux/in6.h>
12250 +
12251 +#define VCMD_net_create_v0     VC_CMD(VNET, 1, 0)
12252 +#define VCMD_net_create                VC_CMD(VNET, 1, 1)
12253 +
12254 +struct  vcmd_net_create {
12255 +       uint64_t flagword;
12256 +};
12257 +
12258 +#define VCMD_net_migrate       VC_CMD(NETMIG, 1, 0)
12259 +
12260 +#define VCMD_net_add           VC_CMD(NETALT, 1, 0)
12261 +#define VCMD_net_remove                VC_CMD(NETALT, 2, 0)
12262 +
12263 +struct vcmd_net_addr_v0 {
12264 +       uint16_t type;
12265 +       uint16_t count;
12266 +       struct in_addr ip[4];
12267 +       struct in_addr mask[4];
12268 +};
12269 +
12270 +#define VCMD_net_add_ipv4      VC_CMD(NETALT, 1, 1)
12271 +#define VCMD_net_remove_ipv4   VC_CMD(NETALT, 2, 1)
12272 +
12273 +struct vcmd_net_addr_ipv4_v1 {
12274 +       uint16_t type;
12275 +       uint16_t flags;
12276 +       struct in_addr ip;
12277 +       struct in_addr mask;
12278 +};
12279 +
12280 +#define VCMD_net_add_ipv6      VC_CMD(NETALT, 3, 1)
12281 +#define VCMD_net_remove_ipv6   VC_CMD(NETALT, 4, 1)
12282 +
12283 +struct vcmd_net_addr_ipv6_v1 {
12284 +       uint16_t type;
12285 +       uint16_t flags;
12286 +       uint32_t prefix;
12287 +       struct in6_addr ip;
12288 +       struct in6_addr mask;
12289 +};
12290 +
12291 +#define VCMD_add_match_ipv4    VC_CMD(NETALT, 5, 0)
12292 +#define VCMD_get_match_ipv4    VC_CMD(NETALT, 6, 0)
12293 +
12294 +struct vcmd_match_ipv4_v0 {
12295 +       uint16_t type;
12296 +       uint16_t flags;
12297 +       uint16_t parent;
12298 +       uint16_t prefix;
12299 +       struct in_addr ip;
12300 +       struct in_addr ip2;
12301 +       struct in_addr mask;
12302 +};
12303 +
12304 +#define VCMD_add_match_ipv6    VC_CMD(NETALT, 7, 0)
12305 +#define VCMD_get_match_ipv6    VC_CMD(NETALT, 8, 0)
12306 +
12307 +struct vcmd_match_ipv6_v0 {
12308 +       uint16_t type;
12309 +       uint16_t flags;
12310 +       uint16_t parent;
12311 +       uint16_t prefix;
12312 +       struct in6_addr ip;
12313 +       struct in6_addr ip2;
12314 +       struct in6_addr mask;
12315 +};
12316 +
12317 +
12318 +#ifdef __KERNEL__
12319 +extern int vc_net_create(uint32_t, void __user *);
12320 +extern int vc_net_migrate(struct nx_info *, void __user *);
12321 +
12322 +extern int vc_net_add(struct nx_info *, void __user *);
12323 +extern int vc_net_remove(struct nx_info *, void __user *);
12324 +
12325 +extern int vc_net_add_ipv4(struct nx_info *, void __user *);
12326 +extern int vc_net_remove_ipv4(struct nx_info *, void __user *);
12327 +
12328 +extern int vc_net_add_ipv6(struct nx_info *, void __user *);
12329 +extern int vc_net_remove_ipv6(struct nx_info *, void __user *);
12330 +
12331 +extern int vc_add_match_ipv4(struct nx_info *, void __user *);
12332 +extern int vc_get_match_ipv4(struct nx_info *, void __user *);
12333 +
12334 +extern int vc_add_match_ipv6(struct nx_info *, void __user *);
12335 +extern int vc_get_match_ipv6(struct nx_info *, void __user *);
12336 +
12337 +#endif /* __KERNEL__ */
12338 +
12339 +
12340 +/* flag commands */
12341 +
12342 +#define VCMD_get_nflags                VC_CMD(FLAGS, 5, 0)
12343 +#define VCMD_set_nflags                VC_CMD(FLAGS, 6, 0)
12344 +
12345 +struct vcmd_net_flags_v0 {
12346 +       uint64_t flagword;
12347 +       uint64_t mask;
12348 +};
12349 +
12350 +#ifdef __KERNEL__
12351 +extern int vc_get_nflags(struct nx_info *, void __user *);
12352 +extern int vc_set_nflags(struct nx_info *, void __user *);
12353 +
12354 +#endif /* __KERNEL__ */
12355 +
12356 +
12357 +/* network caps commands */
12358 +
12359 +#define VCMD_get_ncaps         VC_CMD(FLAGS, 7, 0)
12360 +#define VCMD_set_ncaps         VC_CMD(FLAGS, 8, 0)
12361 +
12362 +struct vcmd_net_caps_v0 {
12363 +       uint64_t ncaps;
12364 +       uint64_t cmask;
12365 +};
12366 +
12367 +#ifdef __KERNEL__
12368 +extern int vc_get_ncaps(struct nx_info *, void __user *);
12369 +extern int vc_set_ncaps(struct nx_info *, void __user *);
12370 +
12371 +#endif /* __KERNEL__ */
12372 +#endif /* _VX_CONTEXT_CMD_H */
12373 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/network.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/network.h
12374 --- linux-2.6.31.6/include/linux/vserver/network.h      1970-01-01 01:00:00.000000000 +0100
12375 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/network.h        2009-09-10 16:11:43.000000000 +0200
12376 @@ -0,0 +1,146 @@
12377 +#ifndef _VX_NETWORK_H
12378 +#define _VX_NETWORK_H
12379 +
12380 +#include <linux/types.h>
12381 +
12382 +
12383 +#define MAX_N_CONTEXT  65535   /* Arbitrary limit */
12384 +
12385 +
12386 +/* network flags */
12387 +
12388 +#define NXF_INFO_PRIVATE       0x00000008
12389 +
12390 +#define NXF_SINGLE_IP          0x00000100
12391 +#define NXF_LBACK_REMAP                0x00000200
12392 +#define NXF_LBACK_ALLOW                0x00000400
12393 +
12394 +#define NXF_HIDE_NETIF         0x02000000
12395 +#define NXF_HIDE_LBACK         0x04000000
12396 +
12397 +#define NXF_STATE_SETUP                (1ULL << 32)
12398 +#define NXF_STATE_ADMIN                (1ULL << 34)
12399 +
12400 +#define NXF_SC_HELPER          (1ULL << 36)
12401 +#define NXF_PERSISTENT         (1ULL << 38)
12402 +
12403 +#define NXF_ONE_TIME           (0x0005ULL << 32)
12404 +
12405 +
12406 +#define        NXF_INIT_SET            (__nxf_init_set())
12407 +
12408 +static inline uint64_t __nxf_init_set(void) {
12409 +       return    NXF_STATE_ADMIN
12410 +#ifdef CONFIG_VSERVER_AUTO_LBACK
12411 +               | NXF_LBACK_REMAP
12412 +               | NXF_HIDE_LBACK
12413 +#endif
12414 +#ifdef CONFIG_VSERVER_AUTO_SINGLE
12415 +               | NXF_SINGLE_IP
12416 +#endif
12417 +               | NXF_HIDE_NETIF;
12418 +}
12419 +
12420 +
12421 +/* network caps */
12422 +
12423 +#define NXC_TUN_CREATE         0x00000001
12424 +
12425 +#define NXC_RAW_ICMP           0x00000100
12426 +
12427 +
12428 +/* address types */
12429 +
12430 +#define NXA_TYPE_IPV4          0x0001
12431 +#define NXA_TYPE_IPV6          0x0002
12432 +
12433 +#define NXA_TYPE_NONE          0x0000
12434 +#define NXA_TYPE_ANY           0x00FF
12435 +
12436 +#define NXA_TYPE_ADDR          0x0010
12437 +#define NXA_TYPE_MASK          0x0020
12438 +#define NXA_TYPE_RANGE         0x0040
12439 +
12440 +#define NXA_MASK_ALL           (NXA_TYPE_ADDR | NXA_TYPE_MASK | NXA_TYPE_RANGE)
12441 +
12442 +#define NXA_MOD_BCAST          0x0100
12443 +#define NXA_MOD_LBACK          0x0200
12444 +
12445 +#define NXA_LOOPBACK           0x1000
12446 +
12447 +#define NXA_MASK_BIND          (NXA_MASK_ALL | NXA_MOD_BCAST | NXA_MOD_LBACK)
12448 +#define NXA_MASK_SHOW          (NXA_MASK_ALL | NXA_LOOPBACK)
12449 +
12450 +#ifdef __KERNEL__
12451 +
12452 +#include <linux/list.h>
12453 +#include <linux/spinlock.h>
12454 +#include <linux/rcupdate.h>
12455 +#include <linux/in.h>
12456 +#include <linux/in6.h>
12457 +#include <asm/atomic.h>
12458 +
12459 +struct nx_addr_v4 {
12460 +       struct nx_addr_v4 *next;
12461 +       struct in_addr ip[2];
12462 +       struct in_addr mask;
12463 +       uint16_t type;
12464 +       uint16_t flags;
12465 +};
12466 +
12467 +struct nx_addr_v6 {
12468 +       struct nx_addr_v6 *next;
12469 +       struct in6_addr ip;
12470 +       struct in6_addr mask;
12471 +       uint32_t prefix;
12472 +       uint16_t type;
12473 +       uint16_t flags;
12474 +};
12475 +
12476 +struct nx_info {
12477 +       struct hlist_node nx_hlist;     /* linked list of nxinfos */
12478 +       nid_t nx_id;                    /* vnet id */
12479 +       atomic_t nx_usecnt;             /* usage count */
12480 +       atomic_t nx_tasks;              /* tasks count */
12481 +       int nx_state;                   /* context state */
12482 +
12483 +       uint64_t nx_flags;              /* network flag word */
12484 +       uint64_t nx_ncaps;              /* network capabilities */
12485 +
12486 +       struct in_addr v4_lback;        /* Loopback address */
12487 +       struct in_addr v4_bcast;        /* Broadcast address */
12488 +       struct nx_addr_v4 v4;           /* First/Single ipv4 address */
12489 +#ifdef CONFIG_IPV6
12490 +       struct nx_addr_v6 v6;           /* First/Single ipv6 address */
12491 +#endif
12492 +       char nx_name[65];               /* network context name */
12493 +};
12494 +
12495 +
12496 +/* status flags */
12497 +
12498 +#define NXS_HASHED      0x0001
12499 +#define NXS_SHUTDOWN    0x0100
12500 +#define NXS_RELEASED    0x8000
12501 +
12502 +extern struct nx_info *lookup_nx_info(int);
12503 +
12504 +extern int get_nid_list(int, unsigned int *, int);
12505 +extern int nid_is_hashed(nid_t);
12506 +
12507 +extern int nx_migrate_task(struct task_struct *, struct nx_info *);
12508 +
12509 +extern long vs_net_change(struct nx_info *, unsigned int);
12510 +
12511 +struct sock;
12512 +
12513 +
12514 +#define NX_IPV4(n)     ((n)->v4.type != NXA_TYPE_NONE)
12515 +#ifdef  CONFIG_IPV6
12516 +#define NX_IPV6(n)     ((n)->v6.type != NXA_TYPE_NONE)
12517 +#else
12518 +#define NX_IPV6(n)     (0)
12519 +#endif
12520 +
12521 +#endif /* __KERNEL__ */
12522 +#endif /* _VX_NETWORK_H */
12523 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/percpu.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/percpu.h
12524 --- linux-2.6.31.6/include/linux/vserver/percpu.h       1970-01-01 01:00:00.000000000 +0100
12525 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/percpu.h 2009-09-10 16:11:43.000000000 +0200
12526 @@ -0,0 +1,14 @@
12527 +#ifndef _VX_PERCPU_H
12528 +#define _VX_PERCPU_H
12529 +
12530 +#include "cvirt_def.h"
12531 +#include "sched_def.h"
12532 +
12533 +struct _vx_percpu {
12534 +       struct _vx_cvirt_pc cvirt;
12535 +       struct _vx_sched_pc sched;
12536 +};
12537 +
12538 +#define        PERCPU_PERCTX   (sizeof(struct _vx_percpu))
12539 +
12540 +#endif /* _VX_PERCPU_H */
12541 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/pid.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/pid.h
12542 --- linux-2.6.31.6/include/linux/vserver/pid.h  1970-01-01 01:00:00.000000000 +0100
12543 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/pid.h    2009-11-05 04:17:16.000000000 +0100
12544 @@ -0,0 +1,51 @@
12545 +#ifndef _VSERVER_PID_H
12546 +#define _VSERVER_PID_H
12547 +
12548 +/* pid faking stuff */
12549 +
12550 +#define vx_info_map_pid(v, p) \
12551 +       __vx_info_map_pid((v), (p), __func__, __FILE__, __LINE__)
12552 +#define vx_info_map_tgid(v,p)  vx_info_map_pid(v,p)
12553 +#define vx_map_pid(p) vx_info_map_pid(current_vx_info(), p)
12554 +#define vx_map_tgid(p) vx_map_pid(p)
12555 +
12556 +static inline int __vx_info_map_pid(struct vx_info *vxi, int pid,
12557 +       const char *func, const char *file, int line)
12558 +{
12559 +       if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
12560 +               vxfprintk(VXD_CBIT(cvirt, 2),
12561 +                       "vx_map_tgid: %p/%llx: %d -> %d",
12562 +                       vxi, (long long)vxi->vx_flags, pid,
12563 +                       (pid && pid == vxi->vx_initpid) ? 1 : pid,
12564 +                       func, file, line);
12565 +               if (pid == 0)
12566 +                       return 0;
12567 +               if (pid == vxi->vx_initpid)
12568 +                       return 1;
12569 +       }
12570 +       return pid;
12571 +}
12572 +
12573 +#define vx_info_rmap_pid(v, p) \
12574 +       __vx_info_rmap_pid((v), (p), __func__, __FILE__, __LINE__)
12575 +#define vx_rmap_pid(p) vx_info_rmap_pid(current_vx_info(), p)
12576 +#define vx_rmap_tgid(p) vx_rmap_pid(p)
12577 +
12578 +static inline int __vx_info_rmap_pid(struct vx_info *vxi, int pid,
12579 +       const char *func, const char *file, int line)
12580 +{
12581 +       if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
12582 +               vxfprintk(VXD_CBIT(cvirt, 2),
12583 +                       "vx_rmap_tgid: %p/%llx: %d -> %d",
12584 +                       vxi, (long long)vxi->vx_flags, pid,
12585 +                       (pid == 1) ? vxi->vx_initpid : pid,
12586 +                       func, file, line);
12587 +               if ((pid == 1) && vxi->vx_initpid)
12588 +                       return vxi->vx_initpid;
12589 +               if (pid == vxi->vx_initpid)
12590 +                       return ~0U;
12591 +       }
12592 +       return pid;
12593 +}
12594 +
12595 +#endif
12596 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/sched_cmd.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/sched_cmd.h
12597 --- linux-2.6.31.6/include/linux/vserver/sched_cmd.h    1970-01-01 01:00:00.000000000 +0100
12598 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/sched_cmd.h      2009-09-10 16:11:43.000000000 +0200
12599 @@ -0,0 +1,108 @@
12600 +#ifndef _VX_SCHED_CMD_H
12601 +#define _VX_SCHED_CMD_H
12602 +
12603 +
12604 +/*  sched vserver commands */
12605 +
12606 +#define VCMD_set_sched_v2      VC_CMD(SCHED, 1, 2)
12607 +#define VCMD_set_sched_v3      VC_CMD(SCHED, 1, 3)
12608 +#define VCMD_set_sched_v4      VC_CMD(SCHED, 1, 4)
12609 +
12610 +struct vcmd_set_sched_v2 {
12611 +       int32_t fill_rate;
12612 +       int32_t interval;
12613 +       int32_t tokens;
12614 +       int32_t tokens_min;
12615 +       int32_t tokens_max;
12616 +       uint64_t cpu_mask;
12617 +};
12618 +
12619 +struct vcmd_set_sched_v3 {
12620 +       uint32_t set_mask;
12621 +       int32_t fill_rate;
12622 +       int32_t interval;
12623 +       int32_t tokens;
12624 +       int32_t tokens_min;
12625 +       int32_t tokens_max;
12626 +       int32_t priority_bias;
12627 +};
12628 +
12629 +struct vcmd_set_sched_v4 {
12630 +       uint32_t set_mask;
12631 +       int32_t fill_rate;
12632 +       int32_t interval;
12633 +       int32_t tokens;
12634 +       int32_t tokens_min;
12635 +       int32_t tokens_max;
12636 +       int32_t prio_bias;
12637 +       int32_t cpu_id;
12638 +       int32_t bucket_id;
12639 +};
12640 +
12641 +#define VCMD_set_sched         VC_CMD(SCHED, 1, 5)
12642 +#define VCMD_get_sched         VC_CMD(SCHED, 2, 5)
12643 +
12644 +struct vcmd_sched_v5 {
12645 +       uint32_t mask;
12646 +       int32_t cpu_id;
12647 +       int32_t bucket_id;
12648 +       int32_t fill_rate[2];
12649 +       int32_t interval[2];
12650 +       int32_t tokens;
12651 +       int32_t tokens_min;
12652 +       int32_t tokens_max;
12653 +       int32_t prio_bias;
12654 +};
12655 +
12656 +#define VXSM_FILL_RATE         0x0001
12657 +#define VXSM_INTERVAL          0x0002
12658 +#define VXSM_FILL_RATE2                0x0004
12659 +#define VXSM_INTERVAL2         0x0008
12660 +#define VXSM_TOKENS            0x0010
12661 +#define VXSM_TOKENS_MIN                0x0020
12662 +#define VXSM_TOKENS_MAX                0x0040
12663 +#define VXSM_PRIO_BIAS         0x0100
12664 +
12665 +#define VXSM_IDLE_TIME         0x0200
12666 +#define VXSM_FORCE             0x0400
12667 +
12668 +#define        VXSM_V3_MASK            0x0173
12669 +#define        VXSM_SET_MASK           0x01FF
12670 +
12671 +#define VXSM_CPU_ID            0x1000
12672 +#define VXSM_BUCKET_ID         0x2000
12673 +
12674 +#define VXSM_MSEC              0x4000
12675 +
12676 +#define SCHED_KEEP             (-2)    /* only for v2 */
12677 +
12678 +#ifdef __KERNEL__
12679 +
12680 +#include <linux/compiler.h>
12681 +
12682 +extern int vc_set_sched_v2(struct vx_info *, void __user *);
12683 +extern int vc_set_sched_v3(struct vx_info *, void __user *);
12684 +extern int vc_set_sched_v4(struct vx_info *, void __user *);
12685 +extern int vc_set_sched(struct vx_info *, void __user *);
12686 +extern int vc_get_sched(struct vx_info *, void __user *);
12687 +
12688 +#endif /* __KERNEL__ */
12689 +
12690 +#define VCMD_sched_info                VC_CMD(SCHED, 3, 0)
12691 +
12692 +struct vcmd_sched_info {
12693 +       int32_t cpu_id;
12694 +       int32_t bucket_id;
12695 +       uint64_t user_msec;
12696 +       uint64_t sys_msec;
12697 +       uint64_t hold_msec;
12698 +       uint32_t token_usec;
12699 +       int32_t vavavoom;
12700 +};
12701 +
12702 +#ifdef __KERNEL__
12703 +
12704 +extern int vc_sched_info(struct vx_info *, void __user *);
12705 +
12706 +#endif /* __KERNEL__ */
12707 +#endif /* _VX_SCHED_CMD_H */
12708 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/sched_def.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/sched_def.h
12709 --- linux-2.6.31.6/include/linux/vserver/sched_def.h    1970-01-01 01:00:00.000000000 +0100
12710 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/sched_def.h      2009-09-10 16:11:43.000000000 +0200
12711 @@ -0,0 +1,68 @@
12712 +#ifndef _VX_SCHED_DEF_H
12713 +#define _VX_SCHED_DEF_H
12714 +
12715 +#include <linux/spinlock.h>
12716 +#include <linux/jiffies.h>
12717 +#include <linux/cpumask.h>
12718 +#include <asm/atomic.h>
12719 +#include <asm/param.h>
12720 +
12721 +
12722 +/* context sub struct */
12723 +
12724 +struct _vx_sched {
12725 +       spinlock_t tokens_lock;         /* lock for token bucket */
12726 +
12727 +       int tokens;                     /* number of CPU tokens */
12728 +       int fill_rate[2];               /* Fill rate: add X tokens... */
12729 +       int interval[2];                /* Divisor:   per Y jiffies   */
12730 +       int tokens_min;                 /* Limit:     minimum for unhold */
12731 +       int tokens_max;                 /* Limit:     no more than N tokens */
12732 +
12733 +       int prio_bias;                  /* bias offset for priority */
12734 +
12735 +       unsigned update_mask;           /* which features should be updated */
12736 +       cpumask_t update;               /* CPUs which should update */
12737 +};
12738 +
12739 +struct _vx_sched_pc {
12740 +       int tokens;                     /* number of CPU tokens */
12741 +       int flags;                      /* bucket flags */
12742 +
12743 +       int fill_rate[2];               /* Fill rate: add X tokens... */
12744 +       int interval[2];                /* Divisor:   per Y jiffies   */
12745 +       int tokens_min;                 /* Limit:     minimum for unhold */
12746 +       int tokens_max;                 /* Limit:     no more than N tokens */
12747 +
12748 +       int prio_bias;                  /* bias offset for priority */
12749 +       int vavavoom;                   /* last calculated vavavoom */
12750 +
12751 +       unsigned long norm_time;        /* last time accounted */
12752 +       unsigned long idle_time;        /* non linear time for fair sched */
12753 +       unsigned long token_time;       /* token time for accounting */
12754 +       unsigned long onhold;           /* jiffies when put on hold */
12755 +
12756 +       uint64_t user_ticks;            /* token tick events */
12757 +       uint64_t sys_ticks;             /* token tick events */
12758 +       uint64_t hold_ticks;            /* token ticks paused */
12759 +};
12760 +
12761 +
12762 +#define VXSF_ONHOLD    0x0001
12763 +#define VXSF_IDLE_TIME 0x0100
12764 +
12765 +#ifdef CONFIG_VSERVER_DEBUG
12766 +
12767 +static inline void __dump_vx_sched(struct _vx_sched *sched)
12768 +{
12769 +       printk("\t_vx_sched:\n");
12770 +       printk("\t tokens: %4d/%4d, %4d/%4d, %4d, %4d\n",
12771 +               sched->fill_rate[0], sched->interval[0],
12772 +               sched->fill_rate[1], sched->interval[1],
12773 +               sched->tokens_min, sched->tokens_max);
12774 +       printk("\t priority = %4d\n", sched->prio_bias);
12775 +}
12776 +
12777 +#endif
12778 +
12779 +#endif /* _VX_SCHED_DEF_H */
12780 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/sched.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/sched.h
12781 --- linux-2.6.31.6/include/linux/vserver/sched.h        1970-01-01 01:00:00.000000000 +0100
12782 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/sched.h  2009-09-10 16:11:43.000000000 +0200
12783 @@ -0,0 +1,26 @@
12784 +#ifndef _VX_SCHED_H
12785 +#define _VX_SCHED_H
12786 +
12787 +
12788 +#ifdef __KERNEL__
12789 +
12790 +struct timespec;
12791 +
12792 +void vx_vsi_uptime(struct timespec *, struct timespec *);
12793 +
12794 +
12795 +struct vx_info;
12796 +
12797 +void vx_update_load(struct vx_info *);
12798 +
12799 +
12800 +int vx_tokens_recalc(struct _vx_sched_pc *,
12801 +       unsigned long *, unsigned long *, int [2]);
12802 +
12803 +void vx_update_sched_param(struct _vx_sched *sched,
12804 +       struct _vx_sched_pc *sched_pc);
12805 +
12806 +#endif /* __KERNEL__ */
12807 +#else  /* _VX_SCHED_H */
12808 +#warning duplicate inclusion
12809 +#endif /* _VX_SCHED_H */
12810 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/signal_cmd.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/signal_cmd.h
12811 --- linux-2.6.31.6/include/linux/vserver/signal_cmd.h   1970-01-01 01:00:00.000000000 +0100
12812 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/signal_cmd.h     2009-09-10 16:11:43.000000000 +0200
12813 @@ -0,0 +1,43 @@
12814 +#ifndef _VX_SIGNAL_CMD_H
12815 +#define _VX_SIGNAL_CMD_H
12816 +
12817 +
12818 +/*  signalling vserver commands */
12819 +
12820 +#define VCMD_ctx_kill          VC_CMD(PROCTRL, 1, 0)
12821 +#define VCMD_wait_exit         VC_CMD(EVENT, 99, 0)
12822 +
12823 +struct vcmd_ctx_kill_v0 {
12824 +       int32_t pid;
12825 +       int32_t sig;
12826 +};
12827 +
12828 +struct vcmd_wait_exit_v0 {
12829 +       int32_t reboot_cmd;
12830 +       int32_t exit_code;
12831 +};
12832 +
12833 +#ifdef __KERNEL__
12834 +
12835 +extern int vc_ctx_kill(struct vx_info *, void __user *);
12836 +extern int vc_wait_exit(struct vx_info *, void __user *);
12837 +
12838 +#endif /* __KERNEL__ */
12839 +
12840 +/*  process alteration commands */
12841 +
12842 +#define VCMD_get_pflags                VC_CMD(PROCALT, 5, 0)
12843 +#define VCMD_set_pflags                VC_CMD(PROCALT, 6, 0)
12844 +
12845 +struct vcmd_pflags_v0 {
12846 +       uint32_t flagword;
12847 +       uint32_t mask;
12848 +};
12849 +
12850 +#ifdef __KERNEL__
12851 +
12852 +extern int vc_get_pflags(uint32_t pid, void __user *);
12853 +extern int vc_set_pflags(uint32_t pid, void __user *);
12854 +
12855 +#endif /* __KERNEL__ */
12856 +#endif /* _VX_SIGNAL_CMD_H */
12857 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/signal.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/signal.h
12858 --- linux-2.6.31.6/include/linux/vserver/signal.h       1970-01-01 01:00:00.000000000 +0100
12859 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/signal.h 2009-09-10 16:11:43.000000000 +0200
12860 @@ -0,0 +1,14 @@
12861 +#ifndef _VX_SIGNAL_H
12862 +#define _VX_SIGNAL_H
12863 +
12864 +
12865 +#ifdef __KERNEL__
12866 +
12867 +struct vx_info;
12868 +
12869 +int vx_info_kill(struct vx_info *, int, int);
12870 +
12871 +#endif /* __KERNEL__ */
12872 +#else  /* _VX_SIGNAL_H */
12873 +#warning duplicate inclusion
12874 +#endif /* _VX_SIGNAL_H */
12875 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/space_cmd.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/space_cmd.h
12876 --- linux-2.6.31.6/include/linux/vserver/space_cmd.h    1970-01-01 01:00:00.000000000 +0100
12877 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/space_cmd.h      2009-09-10 16:11:43.000000000 +0200
12878 @@ -0,0 +1,38 @@
12879 +#ifndef _VX_SPACE_CMD_H
12880 +#define _VX_SPACE_CMD_H
12881 +
12882 +
12883 +#define VCMD_enter_space_v0    VC_CMD(PROCALT, 1, 0)
12884 +#define VCMD_enter_space_v1    VC_CMD(PROCALT, 1, 1)
12885 +#define VCMD_enter_space       VC_CMD(PROCALT, 1, 2)
12886 +
12887 +#define VCMD_set_space_v0      VC_CMD(PROCALT, 3, 0)
12888 +#define VCMD_set_space_v1      VC_CMD(PROCALT, 3, 1)
12889 +#define VCMD_set_space         VC_CMD(PROCALT, 3, 2)
12890 +
12891 +#define VCMD_get_space_mask_v0 VC_CMD(PROCALT, 4, 0)
12892 +
12893 +#define VCMD_get_space_mask    VC_CMD(VSPACE, 0, 1)
12894 +#define VCMD_get_space_default VC_CMD(VSPACE, 1, 0)
12895 +
12896 +
12897 +struct vcmd_space_mask_v1 {
12898 +       uint64_t mask;
12899 +};
12900 +
12901 +struct vcmd_space_mask_v2 {
12902 +       uint64_t mask;
12903 +       uint32_t index;
12904 +};
12905 +
12906 +
12907 +#ifdef __KERNEL__
12908 +
12909 +extern int vc_enter_space_v1(struct vx_info *, void __user *);
12910 +extern int vc_set_space_v1(struct vx_info *, void __user *);
12911 +extern int vc_enter_space(struct vx_info *, void __user *);
12912 +extern int vc_set_space(struct vx_info *, void __user *);
12913 +extern int vc_get_space_mask(void __user *, int);
12914 +
12915 +#endif /* __KERNEL__ */
12916 +#endif /* _VX_SPACE_CMD_H */
12917 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/space.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/space.h
12918 --- linux-2.6.31.6/include/linux/vserver/space.h        1970-01-01 01:00:00.000000000 +0100
12919 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/space.h  2009-09-10 16:11:43.000000000 +0200
12920 @@ -0,0 +1,12 @@
12921 +#ifndef _VX_SPACE_H
12922 +#define _VX_SPACE_H
12923 +
12924 +#include <linux/types.h>
12925 +
12926 +struct vx_info;
12927 +
12928 +int vx_set_space(struct vx_info *vxi, unsigned long mask, unsigned index);
12929 +
12930 +#else  /* _VX_SPACE_H */
12931 +#warning duplicate inclusion
12932 +#endif /* _VX_SPACE_H */
12933 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/switch.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/switch.h
12934 --- linux-2.6.31.6/include/linux/vserver/switch.h       1970-01-01 01:00:00.000000000 +0100
12935 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/switch.h 2009-09-10 16:11:43.000000000 +0200
12936 @@ -0,0 +1,98 @@
12937 +#ifndef _VX_SWITCH_H
12938 +#define _VX_SWITCH_H
12939 +
12940 +#include <linux/types.h>
12941 +
12942 +
12943 +#define VC_CATEGORY(c)         (((c) >> 24) & 0x3F)
12944 +#define VC_COMMAND(c)          (((c) >> 16) & 0xFF)
12945 +#define VC_VERSION(c)          ((c) & 0xFFF)
12946 +
12947 +#define VC_CMD(c, i, v)                ((((VC_CAT_ ## c) & 0x3F) << 24) \
12948 +                               | (((i) & 0xFF) << 16) | ((v) & 0xFFF))
12949 +
12950 +/*
12951 +
12952 +  Syscall Matrix V2.8
12953 +
12954 +        |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL|
12955 +        |STATS  |DESTROY|ALTER  |CHANGE |LIMIT  |TEST   | |       |       |
12956 +        |INFO   |SETUP  |       |MOVE   |       |       | |       |       |
12957 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12958 +  SYSTEM |VERSION|VSETUP |VHOST  |       |       |       | |DEVICE |       |
12959 +  HOST   |     00|     01|     02|     03|     04|     05| |     06|     07|
12960 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12961 +  CPU    |       |VPROC  |PROCALT|PROCMIG|PROCTRL|       | |SCHED. |       |
12962 +  PROCESS|     08|     09|     10|     11|     12|     13| |     14|     15|
12963 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12964 +  MEMORY |       |       |       |       |MEMCTRL|       | |SWAP   |       |
12965 +        |     16|     17|     18|     19|     20|     21| |     22|     23|
12966 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12967 +  NETWORK|       |VNET   |NETALT |NETMIG |NETCTL |       | |SERIAL |       |
12968 +        |     24|     25|     26|     27|     28|     29| |     30|     31|
12969 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12970 +  DISK   |       |       |       |TAGMIG |DLIMIT |       | |INODE  |       |
12971 +  VFS    |     32|     33|     34|     35|     36|     37| |     38|     39|
12972 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12973 +  OTHER  |VSTAT  |       |       |       |       |       | |VINFO  |       |
12974 +        |     40|     41|     42|     43|     44|     45| |     46|     47|
12975 +  =======+=======+=======+=======+=======+=======+=======+ +=======+=======+
12976 +  SPECIAL|EVENT  |       |       |       |FLAGS  |       | |VSPACE |       |
12977 +        |     48|     49|     50|     51|     52|     53| |     54|     55|
12978 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12979 +  SPECIAL|DEBUG  |       |       |       |RLIMIT |SYSCALL| |       |COMPAT |
12980 +        |     56|     57|     58|     59|     60|TEST 61| |     62|     63|
12981 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12982 +
12983 +*/
12984 +
12985 +#define VC_CAT_VERSION         0
12986 +
12987 +#define VC_CAT_VSETUP          1
12988 +#define VC_CAT_VHOST           2
12989 +
12990 +#define VC_CAT_DEVICE          6
12991 +
12992 +#define VC_CAT_VPROC           9
12993 +#define VC_CAT_PROCALT         10
12994 +#define VC_CAT_PROCMIG         11
12995 +#define VC_CAT_PROCTRL         12
12996 +
12997 +#define VC_CAT_SCHED           14
12998 +#define VC_CAT_MEMCTRL         20
12999 +
13000 +#define VC_CAT_VNET            25
13001 +#define VC_CAT_NETALT          26
13002 +#define VC_CAT_NETMIG          27
13003 +#define VC_CAT_NETCTRL         28
13004 +
13005 +#define VC_CAT_TAGMIG          35
13006 +#define VC_CAT_DLIMIT          36
13007 +#define VC_CAT_INODE           38
13008 +
13009 +#define VC_CAT_VSTAT           40
13010 +#define VC_CAT_VINFO           46
13011 +#define VC_CAT_EVENT           48
13012 +
13013 +#define VC_CAT_FLAGS           52
13014 +#define VC_CAT_VSPACE          54
13015 +#define VC_CAT_DEBUG           56
13016 +#define VC_CAT_RLIMIT          60
13017 +
13018 +#define VC_CAT_SYSTEST         61
13019 +#define VC_CAT_COMPAT          63
13020 +
13021 +/*  query version */
13022 +
13023 +#define VCMD_get_version       VC_CMD(VERSION, 0, 0)
13024 +#define VCMD_get_vci           VC_CMD(VERSION, 1, 0)
13025 +
13026 +
13027 +#ifdef __KERNEL__
13028 +
13029 +#include <linux/errno.h>
13030 +
13031 +#endif /* __KERNEL__ */
13032 +
13033 +#endif /* _VX_SWITCH_H */
13034 +
13035 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/tag_cmd.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/tag_cmd.h
13036 --- linux-2.6.31.6/include/linux/vserver/tag_cmd.h      1970-01-01 01:00:00.000000000 +0100
13037 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/tag_cmd.h        2009-09-10 16:11:43.000000000 +0200
13038 @@ -0,0 +1,22 @@
13039 +#ifndef _VX_TAG_CMD_H
13040 +#define _VX_TAG_CMD_H
13041 +
13042 +
13043 +/* vinfo commands */
13044 +
13045 +#define VCMD_task_tag          VC_CMD(VINFO, 3, 0)
13046 +
13047 +#ifdef __KERNEL__
13048 +extern int vc_task_tag(uint32_t);
13049 +
13050 +#endif /* __KERNEL__ */
13051 +
13052 +/* context commands */
13053 +
13054 +#define VCMD_tag_migrate       VC_CMD(TAGMIG, 1, 0)
13055 +
13056 +#ifdef __KERNEL__
13057 +extern int vc_tag_migrate(uint32_t);
13058 +
13059 +#endif /* __KERNEL__ */
13060 +#endif /* _VX_TAG_CMD_H */
13061 diff -NurpP --minimal linux-2.6.31.6/include/linux/vserver/tag.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/tag.h
13062 --- linux-2.6.31.6/include/linux/vserver/tag.h  1970-01-01 01:00:00.000000000 +0100
13063 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vserver/tag.h    2009-09-10 16:11:43.000000000 +0200
13064 @@ -0,0 +1,143 @@
13065 +#ifndef _DX_TAG_H
13066 +#define _DX_TAG_H
13067 +
13068 +#include <linux/types.h>
13069 +
13070 +
13071 +#define DX_TAG(in)     (IS_TAGGED(in))
13072 +
13073 +
13074 +#ifdef CONFIG_TAG_NFSD
13075 +#define DX_TAG_NFSD    1
13076 +#else
13077 +#define DX_TAG_NFSD    0
13078 +#endif
13079 +
13080 +
13081 +#ifdef CONFIG_TAGGING_NONE
13082 +
13083 +#define MAX_UID                0xFFFFFFFF
13084 +#define MAX_GID                0xFFFFFFFF
13085 +
13086 +#define INOTAG_TAG(cond, uid, gid, tag)        (0)
13087 +
13088 +#define TAGINO_UID(cond, uid, tag)     (uid)
13089 +#define TAGINO_GID(cond, gid, tag)     (gid)
13090 +
13091 +#endif
13092 +
13093 +
13094 +#ifdef CONFIG_TAGGING_GID16
13095 +
13096 +#define MAX_UID                0xFFFFFFFF
13097 +#define MAX_GID                0x0000FFFF
13098 +
13099 +#define INOTAG_TAG(cond, uid, gid, tag)        \
13100 +       ((cond) ? (((gid) >> 16) & 0xFFFF) : 0)
13101 +
13102 +#define TAGINO_UID(cond, uid, tag)     (uid)
13103 +#define TAGINO_GID(cond, gid, tag)     \
13104 +       ((cond) ? (((gid) & 0xFFFF) | ((tag) << 16)) : (gid))
13105 +
13106 +#endif
13107 +
13108 +
13109 +#ifdef CONFIG_TAGGING_ID24
13110 +
13111 +#define MAX_UID                0x00FFFFFF
13112 +#define MAX_GID                0x00FFFFFF
13113 +
13114 +#define INOTAG_TAG(cond, uid, gid, tag)        \
13115 +       ((cond) ? ((((uid) >> 16) & 0xFF00) | (((gid) >> 24) & 0xFF)) : 0)
13116 +
13117 +#define TAGINO_UID(cond, uid, tag)     \
13118 +       ((cond) ? (((uid) & 0xFFFFFF) | (((tag) & 0xFF00) << 16)) : (uid))
13119 +#define TAGINO_GID(cond, gid, tag)     \
13120 +       ((cond) ? (((gid) & 0xFFFFFF) | (((tag) & 0x00FF) << 24)) : (gid))
13121 +
13122 +#endif
13123 +
13124 +
13125 +#ifdef CONFIG_TAGGING_UID16
13126 +
13127 +#define MAX_UID                0x0000FFFF
13128 +#define MAX_GID                0xFFFFFFFF
13129 +
13130 +#define INOTAG_TAG(cond, uid, gid, tag)        \
13131 +       ((cond) ? (((uid) >> 16) & 0xFFFF) : 0)
13132 +
13133 +#define TAGINO_UID(cond, uid, tag)     \
13134 +       ((cond) ? (((uid) & 0xFFFF) | ((tag) << 16)) : (uid))
13135 +#define TAGINO_GID(cond, gid, tag)     (gid)
13136 +
13137 +#endif
13138 +
13139 +
13140 +#ifdef CONFIG_TAGGING_INTERN
13141 +
13142 +#define MAX_UID                0xFFFFFFFF
13143 +#define MAX_GID                0xFFFFFFFF
13144 +
13145 +#define INOTAG_TAG(cond, uid, gid, tag)        \
13146 +       ((cond) ? (tag) : 0)
13147 +
13148 +#define TAGINO_UID(cond, uid, tag)     (uid)
13149 +#define TAGINO_GID(cond, gid, tag)     (gid)
13150 +
13151 +#endif
13152 +
13153 +
13154 +#ifndef CONFIG_TAGGING_NONE
13155 +#define dx_current_fstag(sb)   \
13156 +       ((sb)->s_flags & MS_TAGGED ? dx_current_tag() : 0)
13157 +#else
13158 +#define dx_current_fstag(sb)   (0)
13159 +#endif
13160 +
13161 +#ifndef CONFIG_TAGGING_INTERN
13162 +#define TAGINO_TAG(cond, tag)  (0)
13163 +#else
13164 +#define TAGINO_TAG(cond, tag)  ((cond) ? (tag) : 0)
13165 +#endif
13166 +
13167 +#define INOTAG_UID(cond, uid, gid)     \
13168 +       ((cond) ? ((uid) & MAX_UID) : (uid))
13169 +#define INOTAG_GID(cond, uid, gid)     \
13170 +       ((cond) ? ((gid) & MAX_GID) : (gid))
13171 +
13172 +
13173 +static inline uid_t dx_map_uid(uid_t uid)
13174 +{
13175 +       if ((uid > MAX_UID) && (uid != -1))
13176 +               uid = -2;
13177 +       return (uid & MAX_UID);
13178 +}
13179 +
13180 +static inline gid_t dx_map_gid(gid_t gid)
13181 +{
13182 +       if ((gid > MAX_GID) && (gid != -1))
13183 +               gid = -2;
13184 +       return (gid & MAX_GID);
13185 +}
13186 +
13187 +struct peer_tag {
13188 +       int32_t xid;
13189 +       int32_t nid;
13190 +};
13191 +
13192 +#define dx_notagcheck(sb) ((sb) && ((sb)->s_flags & MS_NOTAGCHECK))
13193 +
13194 +int dx_parse_tag(char *string, tag_t *tag, int remove, int *mnt_flags,
13195 +                unsigned long *flags);
13196 +
13197 +#ifdef CONFIG_PROPAGATE
13198 +
13199 +void __dx_propagate_tag(struct nameidata *nd, struct inode *inode);
13200 +
13201 +#define dx_propagate_tag(n, i) __dx_propagate_tag(n, i)
13202 +
13203 +#else
13204 +#define dx_propagate_tag(n, i) do { } while (0)
13205 +#endif
13206 +
13207 +#endif /* _DX_TAG_H */
13208 diff -NurpP --minimal linux-2.6.31.6/include/linux/vs_inet6.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_inet6.h
13209 --- linux-2.6.31.6/include/linux/vs_inet6.h     1970-01-01 01:00:00.000000000 +0100
13210 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_inet6.h       2009-09-10 16:11:43.000000000 +0200
13211 @@ -0,0 +1,246 @@
13212 +#ifndef _VS_INET6_H
13213 +#define _VS_INET6_H
13214 +
13215 +#include "vserver/base.h"
13216 +#include "vserver/network.h"
13217 +#include "vserver/debug.h"
13218 +
13219 +#include <net/ipv6.h>
13220 +
13221 +#define NXAV6(a)       &(a)->ip, &(a)->mask, (a)->prefix, (a)->type
13222 +#define NXAV6_FMT      "[%pI6/%pI6/%d:%04x]"
13223 +
13224 +
13225 +#ifdef CONFIG_IPV6
13226 +
13227 +static inline
13228 +int v6_addr_match(struct nx_addr_v6 *nxa,
13229 +       const struct in6_addr *addr, uint16_t mask)
13230 +{
13231 +       int ret = 0;
13232 +
13233 +       switch (nxa->type & mask) {
13234 +       case NXA_TYPE_MASK:
13235 +               ret = ipv6_masked_addr_cmp(&nxa->ip, &nxa->mask, addr);
13236 +               break;
13237 +       case NXA_TYPE_ADDR:
13238 +               ret = ipv6_addr_equal(&nxa->ip, addr);
13239 +               break;
13240 +       case NXA_TYPE_ANY:
13241 +               ret = 1;
13242 +               break;
13243 +       }
13244 +       vxdprintk(VXD_CBIT(net, 0),
13245 +               "v6_addr_match(%p" NXAV6_FMT ",%pI6,%04x) = %d",
13246 +               nxa, NXAV6(nxa), addr, mask, ret);
13247 +       return ret;
13248 +}
13249 +
13250 +static inline
13251 +int v6_addr_in_nx_info(struct nx_info *nxi,
13252 +       const struct in6_addr *addr, uint16_t mask)
13253 +{
13254 +       struct nx_addr_v6 *nxa;
13255 +       int ret = 1;
13256 +
13257 +       if (!nxi)
13258 +               goto out;
13259 +       for (nxa = &nxi->v6; nxa; nxa = nxa->next)
13260 +               if (v6_addr_match(nxa, addr, mask))
13261 +                       goto out;
13262 +       ret = 0;
13263 +out:
13264 +       vxdprintk(VXD_CBIT(net, 0),
13265 +               "v6_addr_in_nx_info(%p[#%u],%pI6,%04x) = %d",
13266 +               nxi, nxi ? nxi->nx_id : 0, addr, mask, ret);
13267 +       return ret;
13268 +}
13269 +
13270 +static inline
13271 +int v6_nx_addr_match(struct nx_addr_v6 *nxa, struct nx_addr_v6 *addr, uint16_t mask)
13272 +{
13273 +       /* FIXME: needs full range checks */
13274 +       return v6_addr_match(nxa, &addr->ip, mask);
13275 +}
13276 +
13277 +static inline
13278 +int v6_nx_addr_in_nx_info(struct nx_info *nxi, struct nx_addr_v6 *nxa, uint16_t mask)
13279 +{
13280 +       struct nx_addr_v6 *ptr;
13281 +
13282 +       for (ptr = &nxi->v6; ptr; ptr = ptr->next)
13283 +               if (v6_nx_addr_match(ptr, nxa, mask))
13284 +                       return 1;
13285 +       return 0;
13286 +}
13287 +
13288 +
13289 +/*
13290 + *     Check if a given address matches for a socket
13291 + *
13292 + *     nxi:            the socket's nx_info if any
13293 + *     addr:           to be verified address
13294 + */
13295 +static inline
13296 +int v6_sock_addr_match (
13297 +       struct nx_info *nxi,
13298 +       struct inet_sock *inet,
13299 +       struct in6_addr *addr)
13300 +{
13301 +       struct sock *sk = &inet->sk;
13302 +       struct in6_addr *saddr = inet6_rcv_saddr(sk);
13303 +
13304 +       if (!ipv6_addr_any(addr) &&
13305 +               ipv6_addr_equal(saddr, addr))
13306 +               return 1;
13307 +       if (ipv6_addr_any(saddr))
13308 +               return v6_addr_in_nx_info(nxi, addr, -1);
13309 +       return 0;
13310 +}
13311 +
13312 +/*
13313 + *     check if address is covered by socket
13314 + *
13315 + *     sk:     the socket to check against
13316 + *     addr:   the address in question (must be != 0)
13317 + */
13318 +
13319 +static inline
13320 +int __v6_addr_match_socket(const struct sock *sk, struct nx_addr_v6 *nxa)
13321 +{
13322 +       struct nx_info *nxi = sk->sk_nx_info;
13323 +       struct in6_addr *saddr = inet6_rcv_saddr(sk);
13324 +
13325 +       vxdprintk(VXD_CBIT(net, 5),
13326 +               "__v6_addr_in_socket(%p," NXAV6_FMT ") %p:%pI6 %p;%lx",
13327 +               sk, NXAV6(nxa), nxi, saddr, sk->sk_socket,
13328 +               (sk->sk_socket?sk->sk_socket->flags:0));
13329 +
13330 +       if (!ipv6_addr_any(saddr)) {    /* direct address match */
13331 +               return v6_addr_match(nxa, saddr, -1);
13332 +       } else if (nxi) {               /* match against nx_info */
13333 +               return v6_nx_addr_in_nx_info(nxi, nxa, -1);
13334 +       } else {                        /* unrestricted any socket */
13335 +               return 1;
13336 +       }
13337 +}
13338 +
13339 +
13340 +/* inet related checks and helpers */
13341 +
13342 +
13343 +struct in_ifaddr;
13344 +struct net_device;
13345 +struct sock;
13346 +
13347 +
13348 +#include <linux/netdevice.h>
13349 +#include <linux/inetdevice.h>
13350 +#include <net/inet_timewait_sock.h>
13351 +
13352 +
13353 +int dev_in_nx_info(struct net_device *, struct nx_info *);
13354 +int v6_dev_in_nx_info(struct net_device *, struct nx_info *);
13355 +int nx_v6_addr_conflict(struct nx_info *, struct nx_info *);
13356 +
13357 +
13358 +
13359 +static inline
13360 +int v6_ifa_in_nx_info(struct inet6_ifaddr *ifa, struct nx_info *nxi)
13361 +{
13362 +       if (!nxi)
13363 +               return 1;
13364 +       if (!ifa)
13365 +               return 0;
13366 +       return v6_addr_in_nx_info(nxi, &ifa->addr, -1);
13367 +}
13368 +
13369 +static inline
13370 +int nx_v6_ifa_visible(struct nx_info *nxi, struct inet6_ifaddr *ifa)
13371 +{
13372 +       vxdprintk(VXD_CBIT(net, 1), "nx_v6_ifa_visible(%p[#%u],%p) %d",
13373 +               nxi, nxi ? nxi->nx_id : 0, ifa,
13374 +               nxi ? v6_ifa_in_nx_info(ifa, nxi) : 0);
13375 +
13376 +       if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
13377 +               return 1;
13378 +       if (v6_ifa_in_nx_info(ifa, nxi))
13379 +               return 1;
13380 +       return 0;
13381 +}
13382 +
13383 +
13384 +struct nx_v6_sock_addr {
13385 +       struct in6_addr saddr;  /* Address used for validation */
13386 +       struct in6_addr baddr;  /* Address used for socket bind */
13387 +};
13388 +
13389 +static inline
13390 +int v6_map_sock_addr(struct inet_sock *inet, struct sockaddr_in6 *addr,
13391 +       struct nx_v6_sock_addr *nsa)
13392 +{
13393 +       // struct sock *sk = &inet->sk;
13394 +       // struct nx_info *nxi = sk->sk_nx_info;
13395 +       struct in6_addr saddr = addr->sin6_addr;
13396 +       struct in6_addr baddr = saddr;
13397 +
13398 +       nsa->saddr = saddr;
13399 +       nsa->baddr = baddr;
13400 +       return 0;
13401 +}
13402 +
13403 +static inline
13404 +void v6_set_sock_addr(struct inet_sock *inet, struct nx_v6_sock_addr *nsa)
13405 +{
13406 +       // struct sock *sk = &inet->sk;
13407 +       // struct in6_addr *saddr = inet6_rcv_saddr(sk);
13408 +
13409 +       // *saddr = nsa->baddr;
13410 +       // inet->saddr = nsa->baddr;
13411 +}
13412 +
13413 +static inline
13414 +int nx_info_has_v6(struct nx_info *nxi)
13415 +{
13416 +       if (!nxi)
13417 +               return 1;
13418 +       if (NX_IPV6(nxi))
13419 +               return 1;
13420 +       return 0;
13421 +}
13422 +
13423 +#else /* CONFIG_IPV6 */
13424 +
13425 +static inline
13426 +int nx_v6_dev_visible(struct nx_info *n, struct net_device *d)
13427 +{
13428 +       return 1;
13429 +}
13430 +
13431 +
13432 +static inline
13433 +int nx_v6_addr_conflict(struct nx_info *n, uint32_t a, const struct sock *s)
13434 +{
13435 +       return 1;
13436 +}
13437 +
13438 +static inline
13439 +int v6_ifa_in_nx_info(struct in_ifaddr *a, struct nx_info *n)
13440 +{
13441 +       return 1;
13442 +}
13443 +
13444 +static inline
13445 +int nx_info_has_v6(struct nx_info *nxi)
13446 +{
13447 +       return 0;
13448 +}
13449 +
13450 +#endif /* CONFIG_IPV6 */
13451 +
13452 +#define current_nx_info_has_v6() \
13453 +       nx_info_has_v6(current_nx_info())
13454 +
13455 +#else
13456 +#warning duplicate inclusion
13457 +#endif
13458 diff -NurpP --minimal linux-2.6.31.6/include/linux/vs_inet.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_inet.h
13459 --- linux-2.6.31.6/include/linux/vs_inet.h      1970-01-01 01:00:00.000000000 +0100
13460 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_inet.h        2009-09-10 16:11:43.000000000 +0200
13461 @@ -0,0 +1,342 @@
13462 +#ifndef _VS_INET_H
13463 +#define _VS_INET_H
13464 +
13465 +#include "vserver/base.h"
13466 +#include "vserver/network.h"
13467 +#include "vserver/debug.h"
13468 +
13469 +#define IPI_LOOPBACK   htonl(INADDR_LOOPBACK)
13470 +
13471 +#define NXAV4(a)       NIPQUAD((a)->ip[0]), NIPQUAD((a)->ip[1]), \
13472 +                       NIPQUAD((a)->mask), (a)->type
13473 +#define NXAV4_FMT      "[" NIPQUAD_FMT "-" NIPQUAD_FMT "/" NIPQUAD_FMT ":%04x]"
13474 +
13475 +
13476 +static inline
13477 +int v4_addr_match(struct nx_addr_v4 *nxa, __be32 addr, uint16_t tmask)
13478 +{
13479 +       __be32 ip = nxa->ip[0].s_addr;
13480 +       __be32 mask = nxa->mask.s_addr;
13481 +       __be32 bcast = ip | ~mask;
13482 +       int ret = 0;
13483 +
13484 +       switch (nxa->type & tmask) {
13485 +       case NXA_TYPE_MASK:
13486 +               ret = (ip == (addr & mask));
13487 +               break;
13488 +       case NXA_TYPE_ADDR:
13489 +               ret = 3;
13490 +               if (addr == ip)
13491 +                       break;
13492 +               /* fall through to broadcast */
13493 +       case NXA_MOD_BCAST:
13494 +               ret = ((tmask & NXA_MOD_BCAST) && (addr == bcast));
13495 +               break;
13496 +       case NXA_TYPE_RANGE:
13497 +               ret = ((nxa->ip[0].s_addr <= addr) &&
13498 +                       (nxa->ip[1].s_addr > addr));
13499 +               break;
13500 +       case NXA_TYPE_ANY:
13501 +               ret = 2;
13502 +               break;
13503 +       }
13504 +
13505 +       vxdprintk(VXD_CBIT(net, 0),
13506 +               "v4_addr_match(%p" NXAV4_FMT "," NIPQUAD_FMT ",%04x) = %d",
13507 +               nxa, NXAV4(nxa), NIPQUAD(addr), tmask, ret);
13508 +       return ret;
13509 +}
13510 +
13511 +static inline
13512 +int v4_addr_in_nx_info(struct nx_info *nxi, __be32 addr, uint16_t tmask)
13513 +{
13514 +       struct nx_addr_v4 *nxa;
13515 +       int ret = 1;
13516 +
13517 +       if (!nxi)
13518 +               goto out;
13519 +
13520 +       ret = 2;
13521 +       /* allow 127.0.0.1 when remapping lback */
13522 +       if ((tmask & NXA_LOOPBACK) &&
13523 +               (addr == IPI_LOOPBACK) &&
13524 +               nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
13525 +               goto out;
13526 +       ret = 3;
13527 +       /* check for lback address */
13528 +       if ((tmask & NXA_MOD_LBACK) &&
13529 +               (nxi->v4_lback.s_addr == addr))
13530 +               goto out;
13531 +       ret = 4;
13532 +       /* check for broadcast address */
13533 +       if ((tmask & NXA_MOD_BCAST) &&
13534 +               (nxi->v4_bcast.s_addr == addr))
13535 +               goto out;
13536 +       ret = 5;
13537 +       /* check for v4 addresses */
13538 +       for (nxa = &nxi->v4; nxa; nxa = nxa->next)
13539 +               if (v4_addr_match(nxa, addr, tmask))
13540 +                       goto out;
13541 +       ret = 0;
13542 +out:
13543 +       vxdprintk(VXD_CBIT(net, 0),
13544 +               "v4_addr_in_nx_info(%p[#%u]," NIPQUAD_FMT ",%04x) = %d",
13545 +               nxi, nxi ? nxi->nx_id : 0, NIPQUAD(addr), tmask, ret);
13546 +       return ret;
13547 +}
13548 +
13549 +static inline
13550 +int v4_nx_addr_match(struct nx_addr_v4 *nxa, struct nx_addr_v4 *addr, uint16_t mask)
13551 +{
13552 +       /* FIXME: needs full range checks */
13553 +       return v4_addr_match(nxa, addr->ip[0].s_addr, mask);
13554 +}
13555 +
13556 +static inline
13557 +int v4_nx_addr_in_nx_info(struct nx_info *nxi, struct nx_addr_v4 *nxa, uint16_t mask)
13558 +{
13559 +       struct nx_addr_v4 *ptr;
13560 +
13561 +       for (ptr = &nxi->v4; ptr; ptr = ptr->next)
13562 +               if (v4_nx_addr_match(ptr, nxa, mask))
13563 +                       return 1;
13564 +       return 0;
13565 +}
13566 +
13567 +#include <net/inet_sock.h>
13568 +
13569 +/*
13570 + *     Check if a given address matches for a socket
13571 + *
13572 + *     nxi:            the socket's nx_info if any
13573 + *     addr:           to be verified address
13574 + */
13575 +static inline
13576 +int v4_sock_addr_match (
13577 +       struct nx_info *nxi,
13578 +       struct inet_sock *inet,
13579 +       __be32 addr)
13580 +{
13581 +       __be32 saddr = inet->rcv_saddr;
13582 +       __be32 bcast = nxi ? nxi->v4_bcast.s_addr : INADDR_BROADCAST;
13583 +
13584 +       if (addr && (saddr == addr || bcast == addr))
13585 +               return 1;
13586 +       if (!saddr)
13587 +               return v4_addr_in_nx_info(nxi, addr, NXA_MASK_BIND);
13588 +       return 0;
13589 +}
13590 +
13591 +
13592 +/* inet related checks and helpers */
13593 +
13594 +
13595 +struct in_ifaddr;
13596 +struct net_device;
13597 +struct sock;
13598 +
13599 +#ifdef CONFIG_INET
13600 +
13601 +#include <linux/netdevice.h>
13602 +#include <linux/inetdevice.h>
13603 +#include <net/inet_sock.h>
13604 +#include <net/inet_timewait_sock.h>
13605 +
13606 +
13607 +int dev_in_nx_info(struct net_device *, struct nx_info *);
13608 +int v4_dev_in_nx_info(struct net_device *, struct nx_info *);
13609 +int nx_v4_addr_conflict(struct nx_info *, struct nx_info *);
13610 +
13611 +
13612 +/*
13613 + *     check if address is covered by socket
13614 + *
13615 + *     sk:     the socket to check against
13616 + *     addr:   the address in question (must be != 0)
13617 + */
13618 +
13619 +static inline
13620 +int __v4_addr_match_socket(const struct sock *sk, struct nx_addr_v4 *nxa)
13621 +{
13622 +       struct nx_info *nxi = sk->sk_nx_info;
13623 +       __be32 saddr = inet_rcv_saddr(sk);
13624 +
13625 +       vxdprintk(VXD_CBIT(net, 5),
13626 +               "__v4_addr_in_socket(%p," NXAV4_FMT ") %p:" NIPQUAD_FMT " %p;%lx",
13627 +               sk, NXAV4(nxa), nxi, NIPQUAD(saddr), sk->sk_socket,
13628 +               (sk->sk_socket?sk->sk_socket->flags:0));
13629 +
13630 +       if (saddr) {            /* direct address match */
13631 +               return v4_addr_match(nxa, saddr, -1);
13632 +       } else if (nxi) {       /* match against nx_info */
13633 +               return v4_nx_addr_in_nx_info(nxi, nxa, -1);
13634 +       } else {                /* unrestricted any socket */
13635 +               return 1;
13636 +       }
13637 +}
13638 +
13639 +
13640 +
13641 +static inline
13642 +int nx_dev_visible(struct nx_info *nxi, struct net_device *dev)
13643 +{
13644 +       vxdprintk(VXD_CBIT(net, 1), "nx_dev_visible(%p[#%u],%p Â»%s«) %d",
13645 +               nxi, nxi ? nxi->nx_id : 0, dev, dev->name,
13646 +               nxi ? dev_in_nx_info(dev, nxi) : 0);
13647 +
13648 +       if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
13649 +               return 1;
13650 +       if (dev_in_nx_info(dev, nxi))
13651 +               return 1;
13652 +       return 0;
13653 +}
13654 +
13655 +
13656 +static inline
13657 +int v4_ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
13658 +{
13659 +       if (!nxi)
13660 +               return 1;
13661 +       if (!ifa)
13662 +               return 0;
13663 +       return v4_addr_in_nx_info(nxi, ifa->ifa_local, NXA_MASK_SHOW);
13664 +}
13665 +
13666 +static inline
13667 +int nx_v4_ifa_visible(struct nx_info *nxi, struct in_ifaddr *ifa)
13668 +{
13669 +       vxdprintk(VXD_CBIT(net, 1), "nx_v4_ifa_visible(%p[#%u],%p) %d",
13670 +               nxi, nxi ? nxi->nx_id : 0, ifa,
13671 +               nxi ? v4_ifa_in_nx_info(ifa, nxi) : 0);
13672 +
13673 +       if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
13674 +               return 1;
13675 +       if (v4_ifa_in_nx_info(ifa, nxi))
13676 +               return 1;
13677 +       return 0;
13678 +}
13679 +
13680 +
13681 +struct nx_v4_sock_addr {
13682 +       __be32 saddr;   /* Address used for validation */
13683 +       __be32 baddr;   /* Address used for socket bind */
13684 +};
13685 +
13686 +static inline
13687 +int v4_map_sock_addr(struct inet_sock *inet, struct sockaddr_in *addr,
13688 +       struct nx_v4_sock_addr *nsa)
13689 +{
13690 +       struct sock *sk = &inet->sk;
13691 +       struct nx_info *nxi = sk->sk_nx_info;
13692 +       __be32 saddr = addr->sin_addr.s_addr;
13693 +       __be32 baddr = saddr;
13694 +
13695 +       vxdprintk(VXD_CBIT(net, 3),
13696 +               "inet_bind(%p)* %p,%p;%lx " NIPQUAD_FMT,
13697 +               sk, sk->sk_nx_info, sk->sk_socket,
13698 +               (sk->sk_socket ? sk->sk_socket->flags : 0),
13699 +               NIPQUAD(saddr));
13700 +
13701 +       if (nxi) {
13702 +               if (saddr == INADDR_ANY) {
13703 +                       if (nx_info_flags(nxi, NXF_SINGLE_IP, 0))
13704 +                               baddr = nxi->v4.ip[0].s_addr;
13705 +               } else if (saddr == IPI_LOOPBACK) {
13706 +                       if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
13707 +                               baddr = nxi->v4_lback.s_addr;
13708 +               } else {        /* normal address bind */
13709 +                       if (!v4_addr_in_nx_info(nxi, saddr, NXA_MASK_BIND))
13710 +                               return -EADDRNOTAVAIL;
13711 +               }
13712 +       }
13713 +
13714 +       vxdprintk(VXD_CBIT(net, 3),
13715 +               "inet_bind(%p) " NIPQUAD_FMT ", " NIPQUAD_FMT,
13716 +               sk, NIPQUAD(saddr), NIPQUAD(baddr));
13717 +
13718 +       nsa->saddr = saddr;
13719 +       nsa->baddr = baddr;
13720 +       return 0;
13721 +}
13722 +
13723 +static inline
13724 +void v4_set_sock_addr(struct inet_sock *inet, struct nx_v4_sock_addr *nsa)
13725 +{
13726 +       inet->saddr = nsa->baddr;
13727 +       inet->rcv_saddr = nsa->baddr;
13728 +}
13729 +
13730 +
13731 +/*
13732 + *      helper to simplify inet_lookup_listener
13733 + *
13734 + *      nxi:   the socket's nx_info if any
13735 + *      addr:  to be verified address
13736 + *      saddr: socket address
13737 + */
13738 +static inline int v4_inet_addr_match (
13739 +       struct nx_info *nxi,
13740 +       __be32 addr,
13741 +       __be32 saddr)
13742 +{
13743 +       if (addr && (saddr == addr))
13744 +               return 1;
13745 +       if (!saddr)
13746 +               return nxi ? v4_addr_in_nx_info(nxi, addr, NXA_MASK_BIND) : 1;
13747 +       return 0;
13748 +}
13749 +
13750 +static inline __be32 nx_map_sock_lback(struct nx_info *nxi, __be32 addr)
13751 +{
13752 +       if (nx_info_flags(nxi, NXF_HIDE_LBACK, 0) &&
13753 +               (addr == nxi->v4_lback.s_addr))
13754 +               return IPI_LOOPBACK;
13755 +       return addr;
13756 +}
13757 +
13758 +static inline
13759 +int nx_info_has_v4(struct nx_info *nxi)
13760 +{
13761 +       if (!nxi)
13762 +               return 1;
13763 +       if (NX_IPV4(nxi))
13764 +               return 1;
13765 +       if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
13766 +               return 1;
13767 +       return 0;
13768 +}
13769 +
13770 +#else /* CONFIG_INET */
13771 +
13772 +static inline
13773 +int nx_dev_visible(struct nx_info *n, struct net_device *d)
13774 +{
13775 +       return 1;
13776 +}
13777 +
13778 +static inline
13779 +int nx_v4_addr_conflict(struct nx_info *n, uint32_t a, const struct sock *s)
13780 +{
13781 +       return 1;
13782 +}
13783 +
13784 +static inline
13785 +int v4_ifa_in_nx_info(struct in_ifaddr *a, struct nx_info *n)
13786 +{
13787 +       return 1;
13788 +}
13789 +
13790 +static inline
13791 +int nx_info_has_v4(struct nx_info *nxi)
13792 +{
13793 +       return 0;
13794 +}
13795 +
13796 +#endif /* CONFIG_INET */
13797 +
13798 +#define current_nx_info_has_v4() \
13799 +       nx_info_has_v4(current_nx_info())
13800 +
13801 +#else
13802 +// #warning duplicate inclusion
13803 +#endif
13804 diff -NurpP --minimal linux-2.6.31.6/include/linux/vs_limit.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_limit.h
13805 --- linux-2.6.31.6/include/linux/vs_limit.h     1970-01-01 01:00:00.000000000 +0100
13806 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_limit.h       2009-11-05 04:14:38.000000000 +0100
13807 @@ -0,0 +1,140 @@
13808 +#ifndef _VS_LIMIT_H
13809 +#define _VS_LIMIT_H
13810 +
13811 +#include "vserver/limit.h"
13812 +#include "vserver/base.h"
13813 +#include "vserver/context.h"
13814 +#include "vserver/debug.h"
13815 +#include "vserver/context.h"
13816 +#include "vserver/limit_int.h"
13817 +
13818 +
13819 +#define vx_acc_cres(v, d, p, r) \
13820 +       __vx_acc_cres(v, r, d, p, __FILE__, __LINE__)
13821 +
13822 +#define vx_acc_cres_cond(x, d, p, r) \
13823 +       __vx_acc_cres(((x) == vx_current_xid()) ? current_vx_info() : 0, \
13824 +       r, d, p, __FILE__, __LINE__)
13825 +
13826 +
13827 +#define vx_add_cres(v, a, p, r) \
13828 +       __vx_add_cres(v, r, a, p, __FILE__, __LINE__)
13829 +#define vx_sub_cres(v, a, p, r)                vx_add_cres(v, -(a), p, r)
13830 +
13831 +#define vx_add_cres_cond(x, a, p, r) \
13832 +       __vx_add_cres(((x) == vx_current_xid()) ? current_vx_info() : 0, \
13833 +       r, a, p, __FILE__, __LINE__)
13834 +#define vx_sub_cres_cond(x, a, p, r)   vx_add_cres_cond(x, -(a), p, r)
13835 +
13836 +
13837 +/* process and file limits */
13838 +
13839 +#define vx_nproc_inc(p) \
13840 +       vx_acc_cres((p)->vx_info, 1, p, RLIMIT_NPROC)
13841 +
13842 +#define vx_nproc_dec(p) \
13843 +       vx_acc_cres((p)->vx_info,-1, p, RLIMIT_NPROC)
13844 +
13845 +#define vx_files_inc(f) \
13846 +       vx_acc_cres_cond((f)->f_xid, 1, f, RLIMIT_NOFILE)
13847 +
13848 +#define vx_files_dec(f) \
13849 +       vx_acc_cres_cond((f)->f_xid,-1, f, RLIMIT_NOFILE)
13850 +
13851 +#define vx_locks_inc(l) \
13852 +       vx_acc_cres_cond((l)->fl_xid, 1, l, RLIMIT_LOCKS)
13853 +
13854 +#define vx_locks_dec(l) \
13855 +       vx_acc_cres_cond((l)->fl_xid,-1, l, RLIMIT_LOCKS)
13856 +
13857 +#define vx_openfd_inc(f) \
13858 +       vx_acc_cres(current_vx_info(), 1, (void *)(long)(f), VLIMIT_OPENFD)
13859 +
13860 +#define vx_openfd_dec(f) \
13861 +       vx_acc_cres(current_vx_info(),-1, (void *)(long)(f), VLIMIT_OPENFD)
13862 +
13863 +
13864 +#define vx_cres_avail(v, n, r) \
13865 +       __vx_cres_avail(v, r, n, __FILE__, __LINE__)
13866 +
13867 +
13868 +#define vx_nproc_avail(n) \
13869 +       vx_cres_avail(current_vx_info(), n, RLIMIT_NPROC)
13870 +
13871 +#define vx_files_avail(n) \
13872 +       vx_cres_avail(current_vx_info(), n, RLIMIT_NOFILE)
13873 +
13874 +#define vx_locks_avail(n) \
13875 +       vx_cres_avail(current_vx_info(), n, RLIMIT_LOCKS)
13876 +
13877 +#define vx_openfd_avail(n) \
13878 +       vx_cres_avail(current_vx_info(), n, VLIMIT_OPENFD)
13879 +
13880 +
13881 +/* dentry limits */
13882 +
13883 +#define vx_dentry_inc(d) do {                                          \
13884 +       if (atomic_read(&d->d_count) == 1)                              \
13885 +               vx_acc_cres(current_vx_info(), 1, d, VLIMIT_DENTRY);    \
13886 +       } while (0)
13887 +
13888 +#define vx_dentry_dec(d) do {                                          \
13889 +       if (atomic_read(&d->d_count) == 0)                              \
13890 +               vx_acc_cres(current_vx_info(),-1, d, VLIMIT_DENTRY);    \
13891 +       } while (0)
13892 +
13893 +#define vx_dentry_avail(n) \
13894 +       vx_cres_avail(current_vx_info(), n, VLIMIT_DENTRY)
13895 +
13896 +
13897 +/* socket limits */
13898 +
13899 +#define vx_sock_inc(s) \
13900 +       vx_acc_cres((s)->sk_vx_info, 1, s, VLIMIT_NSOCK)
13901 +
13902 +#define vx_sock_dec(s) \
13903 +       vx_acc_cres((s)->sk_vx_info,-1, s, VLIMIT_NSOCK)
13904 +
13905 +#define vx_sock_avail(n) \
13906 +       vx_cres_avail(current_vx_info(), n, VLIMIT_NSOCK)
13907 +
13908 +
13909 +/* ipc resource limits */
13910 +
13911 +#define vx_ipcmsg_add(v, u, a) \
13912 +       vx_add_cres(v, a, u, RLIMIT_MSGQUEUE)
13913 +
13914 +#define vx_ipcmsg_sub(v, u, a) \
13915 +       vx_sub_cres(v, a, u, RLIMIT_MSGQUEUE)
13916 +
13917 +#define vx_ipcmsg_avail(v, a) \
13918 +       vx_cres_avail(v, a, RLIMIT_MSGQUEUE)
13919 +
13920 +
13921 +#define vx_ipcshm_add(v, k, a) \
13922 +       vx_add_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM)
13923 +
13924 +#define vx_ipcshm_sub(v, k, a) \
13925 +       vx_sub_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM)
13926 +
13927 +#define vx_ipcshm_avail(v, a) \
13928 +       vx_cres_avail(v, a, VLIMIT_SHMEM)
13929 +
13930 +
13931 +#define vx_semary_inc(a) \
13932 +       vx_acc_cres(current_vx_info(), 1, a, VLIMIT_SEMARY)
13933 +
13934 +#define vx_semary_dec(a) \
13935 +       vx_acc_cres(current_vx_info(), -1, a, VLIMIT_SEMARY)
13936 +
13937 +
13938 +#define vx_nsems_add(a,n) \
13939 +       vx_add_cres(current_vx_info(), n, a, VLIMIT_NSEMS)
13940 +
13941 +#define vx_nsems_sub(a,n) \
13942 +       vx_sub_cres(current_vx_info(), n, a, VLIMIT_NSEMS)
13943 +
13944 +
13945 +#else
13946 +#warning duplicate inclusion
13947 +#endif
13948 diff -NurpP --minimal linux-2.6.31.6/include/linux/vs_memory.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_memory.h
13949 --- linux-2.6.31.6/include/linux/vs_memory.h    1970-01-01 01:00:00.000000000 +0100
13950 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_memory.h      2009-09-10 16:11:43.000000000 +0200
13951 @@ -0,0 +1,159 @@
13952 +#ifndef _VS_MEMORY_H
13953 +#define _VS_MEMORY_H
13954 +
13955 +#include "vserver/limit.h"
13956 +#include "vserver/base.h"
13957 +#include "vserver/context.h"
13958 +#include "vserver/debug.h"
13959 +#include "vserver/context.h"
13960 +#include "vserver/limit_int.h"
13961 +
13962 +
13963 +#define __acc_add_long(a, v)   (*(v) += (a))
13964 +#define __acc_inc_long(v)      (++*(v))
13965 +#define __acc_dec_long(v)      (--*(v))
13966 +
13967 +#if    NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
13968 +#define __acc_add_atomic(a, v) atomic_long_add(a, v)
13969 +#define __acc_inc_atomic(v)    atomic_long_inc(v)
13970 +#define __acc_dec_atomic(v)    atomic_long_dec(v)
13971 +#else  /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
13972 +#define __acc_add_atomic(a, v) __acc_add_long(a, v)
13973 +#define __acc_inc_atomic(v)    __acc_inc_long(v)
13974 +#define __acc_dec_atomic(v)    __acc_dec_long(v)
13975 +#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
13976 +
13977 +
13978 +#define vx_acc_page(m, d, v, r) do {                                   \
13979 +       if ((d) > 0)                                                    \
13980 +               __acc_inc_long(&(m)->v);                                \
13981 +       else                                                            \
13982 +               __acc_dec_long(&(m)->v);                                \
13983 +       __vx_acc_cres(m->mm_vx_info, r, d, m, __FILE__, __LINE__);      \
13984 +} while (0)
13985 +
13986 +#define vx_acc_page_atomic(m, d, v, r) do {                            \
13987 +       if ((d) > 0)                                                    \
13988 +               __acc_inc_atomic(&(m)->v);                              \
13989 +       else                                                            \
13990 +               __acc_dec_atomic(&(m)->v);                              \
13991 +       __vx_acc_cres(m->mm_vx_info, r, d, m, __FILE__, __LINE__);      \
13992 +} while (0)
13993 +
13994 +
13995 +#define vx_acc_pages(m, p, v, r) do {                                  \
13996 +       unsigned long __p = (p);                                        \
13997 +       __acc_add_long(__p, &(m)->v);                                   \
13998 +       __vx_add_cres(m->mm_vx_info, r, __p, m, __FILE__, __LINE__);    \
13999 +} while (0)
14000 +
14001 +#define vx_acc_pages_atomic(m, p, v, r) do {                           \
14002 +       unsigned long __p = (p);                                        \
14003 +       __acc_add_atomic(__p, &(m)->v);                                 \
14004 +       __vx_add_cres(m->mm_vx_info, r, __p, m, __FILE__, __LINE__);    \
14005 +} while (0)
14006 +
14007 +
14008 +
14009 +#define vx_acc_vmpage(m, d) \
14010 +       vx_acc_page(m, d, total_vm,  RLIMIT_AS)
14011 +#define vx_acc_vmlpage(m, d) \
14012 +       vx_acc_page(m, d, locked_vm, RLIMIT_MEMLOCK)
14013 +#define vx_acc_file_rsspage(m, d) \
14014 +       vx_acc_page_atomic(m, d, _file_rss, VLIMIT_MAPPED)
14015 +#define vx_acc_anon_rsspage(m, d) \
14016 +       vx_acc_page_atomic(m, d, _anon_rss, VLIMIT_ANON)
14017 +
14018 +#define vx_acc_vmpages(m, p) \
14019 +       vx_acc_pages(m, p, total_vm,  RLIMIT_AS)
14020 +#define vx_acc_vmlpages(m, p) \
14021 +       vx_acc_pages(m, p, locked_vm, RLIMIT_MEMLOCK)
14022 +#define vx_acc_file_rsspages(m, p) \
14023 +       vx_acc_pages_atomic(m, p, _file_rss, VLIMIT_MAPPED)
14024 +#define vx_acc_anon_rsspages(m, p) \
14025 +       vx_acc_pages_atomic(m, p, _anon_rss, VLIMIT_ANON)
14026 +
14027 +#define vx_pages_add(s, r, p)  __vx_add_cres(s, r, p, 0, __FILE__, __LINE__)
14028 +#define vx_pages_sub(s, r, p)  vx_pages_add(s, r, -(p))
14029 +
14030 +#define vx_vmpages_inc(m)              vx_acc_vmpage(m, 1)
14031 +#define vx_vmpages_dec(m)              vx_acc_vmpage(m, -1)
14032 +#define vx_vmpages_add(m, p)           vx_acc_vmpages(m, p)
14033 +#define vx_vmpages_sub(m, p)           vx_acc_vmpages(m, -(p))
14034 +
14035 +#define vx_vmlocked_inc(m)             vx_acc_vmlpage(m, 1)
14036 +#define vx_vmlocked_dec(m)             vx_acc_vmlpage(m, -1)
14037 +#define vx_vmlocked_add(m, p)          vx_acc_vmlpages(m, p)
14038 +#define vx_vmlocked_sub(m, p)          vx_acc_vmlpages(m, -(p))
14039 +
14040 +#define vx_file_rsspages_inc(m)                vx_acc_file_rsspage(m, 1)
14041 +#define vx_file_rsspages_dec(m)                vx_acc_file_rsspage(m, -1)
14042 +#define vx_file_rsspages_add(m, p)     vx_acc_file_rsspages(m, p)
14043 +#define vx_file_rsspages_sub(m, p)     vx_acc_file_rsspages(m, -(p))
14044 +
14045 +#define vx_anon_rsspages_inc(m)                vx_acc_anon_rsspage(m, 1)
14046 +#define vx_anon_rsspages_dec(m)                vx_acc_anon_rsspage(m, -1)
14047 +#define vx_anon_rsspages_add(m, p)     vx_acc_anon_rsspages(m, p)
14048 +#define vx_anon_rsspages_sub(m, p)     vx_acc_anon_rsspages(m, -(p))
14049 +
14050 +
14051 +#define vx_pages_avail(m, p, r) \
14052 +       __vx_cres_avail((m)->mm_vx_info, r, p, __FILE__, __LINE__)
14053 +
14054 +#define vx_vmpages_avail(m, p) vx_pages_avail(m, p, RLIMIT_AS)
14055 +#define vx_vmlocked_avail(m, p)        vx_pages_avail(m, p, RLIMIT_MEMLOCK)
14056 +#define vx_anon_avail(m, p)    vx_pages_avail(m, p, VLIMIT_ANON)
14057 +#define vx_mapped_avail(m, p)  vx_pages_avail(m, p, VLIMIT_MAPPED)
14058 +
14059 +#define vx_rss_avail(m, p) \
14060 +       __vx_cres_array_avail((m)->mm_vx_info, VLA_RSS, p, __FILE__, __LINE__)
14061 +
14062 +
14063 +enum {
14064 +       VXPT_UNKNOWN = 0,
14065 +       VXPT_ANON,
14066 +       VXPT_NONE,
14067 +       VXPT_FILE,
14068 +       VXPT_SWAP,
14069 +       VXPT_WRITE
14070 +};
14071 +
14072 +#if 0
14073 +#define        vx_page_fault(mm, vma, type, ret)
14074 +#else
14075 +
14076 +static inline
14077 +void __vx_page_fault(struct mm_struct *mm,
14078 +       struct vm_area_struct *vma, int type, int ret)
14079 +{
14080 +       struct vx_info *vxi = mm->mm_vx_info;
14081 +       int what;
14082 +/*
14083 +       static char *page_type[6] =
14084 +               { "UNKNOWN", "ANON", "NONE", "FILE", "SWAP", "WRITE" };
14085 +       static char *page_what[4] =
14086 +               { "FAULT_OOM", "FAULT_SIGBUS", "FAULT_MINOR", "FAULT_MAJOR" };
14087 +*/
14088 +
14089 +       if (!vxi)
14090 +               return;
14091 +
14092 +       what = (ret & 0x3);
14093 +
14094 +/*     printk("[%d] page[%d][%d] %2x %s %s\n", vxi->vx_id,
14095 +               type, what, ret, page_type[type], page_what[what]);
14096 +*/
14097 +       if (ret & VM_FAULT_WRITE)
14098 +               what |= 0x4;
14099 +       atomic_inc(&vxi->cacct.page[type][what]);
14100 +}
14101 +
14102 +#define        vx_page_fault(mm, vma, type, ret)       __vx_page_fault(mm, vma, type, ret)
14103 +#endif
14104 +
14105 +
14106 +extern unsigned long vx_badness(struct task_struct *task, struct mm_struct *mm);
14107 +
14108 +#else
14109 +#warning duplicate inclusion
14110 +#endif
14111 diff -NurpP --minimal linux-2.6.31.6/include/linux/vs_network.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_network.h
14112 --- linux-2.6.31.6/include/linux/vs_network.h   1970-01-01 01:00:00.000000000 +0100
14113 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_network.h     2009-09-10 16:11:43.000000000 +0200
14114 @@ -0,0 +1,169 @@
14115 +#ifndef _NX_VS_NETWORK_H
14116 +#define _NX_VS_NETWORK_H
14117 +
14118 +#include "vserver/context.h"
14119 +#include "vserver/network.h"
14120 +#include "vserver/base.h"
14121 +#include "vserver/check.h"
14122 +#include "vserver/debug.h"
14123 +
14124 +#include <linux/sched.h>
14125 +
14126 +
14127 +#define get_nx_info(i) __get_nx_info(i, __FILE__, __LINE__)
14128 +
14129 +static inline struct nx_info *__get_nx_info(struct nx_info *nxi,
14130 +       const char *_file, int _line)
14131 +{
14132 +       if (!nxi)
14133 +               return NULL;
14134 +
14135 +       vxlprintk(VXD_CBIT(nid, 2), "get_nx_info(%p[#%d.%d])",
14136 +               nxi, nxi ? nxi->nx_id : 0,
14137 +               nxi ? atomic_read(&nxi->nx_usecnt) : 0,
14138 +               _file, _line);
14139 +
14140 +       atomic_inc(&nxi->nx_usecnt);
14141 +       return nxi;
14142 +}
14143 +
14144 +
14145 +extern void free_nx_info(struct nx_info *);
14146 +
14147 +#define put_nx_info(i) __put_nx_info(i, __FILE__, __LINE__)
14148 +
14149 +static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line)
14150 +{
14151 +       if (!nxi)
14152 +               return;
14153 +
14154 +       vxlprintk(VXD_CBIT(nid, 2), "put_nx_info(%p[#%d.%d])",
14155 +               nxi, nxi ? nxi->nx_id : 0,
14156 +               nxi ? atomic_read(&nxi->nx_usecnt) : 0,
14157 +               _file, _line);
14158 +
14159 +       if (atomic_dec_and_test(&nxi->nx_usecnt))
14160 +               free_nx_info(nxi);
14161 +}
14162 +
14163 +
14164 +#define init_nx_info(p, i) __init_nx_info(p, i, __FILE__, __LINE__)
14165 +
14166 +static inline void __init_nx_info(struct nx_info **nxp, struct nx_info *nxi,
14167 +               const char *_file, int _line)
14168 +{
14169 +       if (nxi) {
14170 +               vxlprintk(VXD_CBIT(nid, 3),
14171 +                       "init_nx_info(%p[#%d.%d])",
14172 +                       nxi, nxi ? nxi->nx_id : 0,
14173 +                       nxi ? atomic_read(&nxi->nx_usecnt) : 0,
14174 +                       _file, _line);
14175 +
14176 +               atomic_inc(&nxi->nx_usecnt);
14177 +       }
14178 +       *nxp = nxi;
14179 +}
14180 +
14181 +
14182 +#define set_nx_info(p, i) __set_nx_info(p, i, __FILE__, __LINE__)
14183 +
14184 +static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi,
14185 +       const char *_file, int _line)
14186 +{
14187 +       struct nx_info *nxo;
14188 +
14189 +       if (!nxi)
14190 +               return;
14191 +
14192 +       vxlprintk(VXD_CBIT(nid, 3), "set_nx_info(%p[#%d.%d])",
14193 +               nxi, nxi ? nxi->nx_id : 0,
14194 +               nxi ? atomic_read(&nxi->nx_usecnt) : 0,
14195 +               _file, _line);
14196 +
14197 +       atomic_inc(&nxi->nx_usecnt);
14198 +       nxo = xchg(nxp, nxi);
14199 +       BUG_ON(nxo);
14200 +}
14201 +
14202 +#define clr_nx_info(p) __clr_nx_info(p, __FILE__, __LINE__)
14203 +
14204 +static inline void __clr_nx_info(struct nx_info **nxp,
14205 +       const char *_file, int _line)
14206 +{
14207 +       struct nx_info *nxo;
14208 +
14209 +       nxo = xchg(nxp, NULL);
14210 +       if (!nxo)
14211 +               return;
14212 +
14213 +       vxlprintk(VXD_CBIT(nid, 3), "clr_nx_info(%p[#%d.%d])",
14214 +               nxo, nxo ? nxo->nx_id : 0,
14215 +               nxo ? atomic_read(&nxo->nx_usecnt) : 0,
14216 +               _file, _line);
14217 +
14218 +       if (atomic_dec_and_test(&nxo->nx_usecnt))
14219 +               free_nx_info(nxo);
14220 +}
14221 +
14222 +
14223 +#define claim_nx_info(v, p) __claim_nx_info(v, p, __FILE__, __LINE__)
14224 +
14225 +static inline void __claim_nx_info(struct nx_info *nxi,
14226 +       struct task_struct *task, const char *_file, int _line)
14227 +{
14228 +       vxlprintk(VXD_CBIT(nid, 3), "claim_nx_info(%p[#%d.%d.%d]) %p",
14229 +               nxi, nxi ? nxi->nx_id : 0,
14230 +               nxi?atomic_read(&nxi->nx_usecnt):0,
14231 +               nxi?atomic_read(&nxi->nx_tasks):0,
14232 +               task, _file, _line);
14233 +
14234 +       atomic_inc(&nxi->nx_tasks);
14235 +}
14236 +
14237 +
14238 +extern void unhash_nx_info(struct nx_info *);
14239 +
14240 +#define release_nx_info(v, p) __release_nx_info(v, p, __FILE__, __LINE__)
14241 +
14242 +static inline void __release_nx_info(struct nx_info *nxi,
14243 +       struct task_struct *task, const char *_file, int _line)
14244 +{
14245 +       vxlprintk(VXD_CBIT(nid, 3), "release_nx_info(%p[#%d.%d.%d]) %p",
14246 +               nxi, nxi ? nxi->nx_id : 0,
14247 +               nxi ? atomic_read(&nxi->nx_usecnt) : 0,
14248 +               nxi ? atomic_read(&nxi->nx_tasks) : 0,
14249 +               task, _file, _line);
14250 +
14251 +       might_sleep();
14252 +
14253 +       if (atomic_dec_and_test(&nxi->nx_tasks))
14254 +               unhash_nx_info(nxi);
14255 +}
14256 +
14257 +
14258 +#define task_get_nx_info(i)    __task_get_nx_info(i, __FILE__, __LINE__)
14259 +
14260 +static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p,
14261 +       const char *_file, int _line)
14262 +{
14263 +       struct nx_info *nxi;
14264 +
14265 +       task_lock(p);
14266 +       vxlprintk(VXD_CBIT(nid, 5), "task_get_nx_info(%p)",
14267 +               p, _file, _line);
14268 +       nxi = __get_nx_info(p->nx_info, _file, _line);
14269 +       task_unlock(p);
14270 +       return nxi;
14271 +}
14272 +
14273 +
14274 +static inline void exit_nx_info(struct task_struct *p)
14275 +{
14276 +       if (p->nx_info)
14277 +               release_nx_info(p->nx_info, p);
14278 +}
14279 +
14280 +
14281 +#else
14282 +#warning duplicate inclusion
14283 +#endif
14284 diff -NurpP --minimal linux-2.6.31.6/include/linux/vs_pid.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_pid.h
14285 --- linux-2.6.31.6/include/linux/vs_pid.h       1970-01-01 01:00:00.000000000 +0100
14286 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_pid.h 2009-09-10 16:11:43.000000000 +0200
14287 @@ -0,0 +1,95 @@
14288 +#ifndef _VS_PID_H
14289 +#define _VS_PID_H
14290 +
14291 +#include "vserver/base.h"
14292 +#include "vserver/check.h"
14293 +#include "vserver/context.h"
14294 +#include "vserver/debug.h"
14295 +#include "vserver/pid.h"
14296 +#include <linux/pid_namespace.h>
14297 +
14298 +
14299 +#define VXF_FAKE_INIT  (VXF_INFO_INIT | VXF_STATE_INIT)
14300 +
14301 +static inline
14302 +int vx_proc_task_visible(struct task_struct *task)
14303 +{
14304 +       if ((task->pid == 1) &&
14305 +               !vx_flags(VXF_FAKE_INIT, VXF_FAKE_INIT))
14306 +               /* show a blend through init */
14307 +               goto visible;
14308 +       if (vx_check(vx_task_xid(task), VS_WATCH | VS_IDENT))
14309 +               goto visible;
14310 +       return 0;
14311 +visible:
14312 +       return 1;
14313 +}
14314 +
14315 +#define find_task_by_real_pid(pid) find_task_by_pid_ns(pid, &init_pid_ns)
14316 +
14317 +#if 0
14318 +
14319 +static inline
14320 +struct task_struct *vx_find_proc_task_by_pid(int pid)
14321 +{
14322 +       struct task_struct *task = find_task_by_real_pid(pid);
14323 +
14324 +       if (task && !vx_proc_task_visible(task)) {
14325 +               vxdprintk(VXD_CBIT(misc, 6),
14326 +                       "dropping task (find) %p[#%u,%u] for %p[#%u,%u]",
14327 +                       task, task->xid, task->pid,
14328 +                       current, current->xid, current->pid);
14329 +               task = NULL;
14330 +       }
14331 +       return task;
14332 +}
14333 +
14334 +#endif
14335 +
14336 +static inline
14337 +struct task_struct *vx_get_proc_task(struct inode *inode, struct pid *pid)
14338 +{
14339 +       struct task_struct *task = get_pid_task(pid, PIDTYPE_PID);
14340 +
14341 +       if (task && !vx_proc_task_visible(task)) {
14342 +               vxdprintk(VXD_CBIT(misc, 6),
14343 +                       "dropping task (get) %p[#%u,%u] for %p[#%u,%u]",
14344 +                       task, task->xid, task->pid,
14345 +                       current, current->xid, current->pid);
14346 +               put_task_struct(task);
14347 +               task = NULL;
14348 +       }
14349 +       return task;
14350 +}
14351 +
14352 +#if 0
14353 +
14354 +static inline
14355 +struct task_struct *vx_child_reaper(struct task_struct *p)
14356 +{
14357 +       struct vx_info *vxi = p->vx_info;
14358 +       struct task_struct *reaper = child_reaper(p);
14359 +
14360 +       if (!vxi)
14361 +               goto out;
14362 +
14363 +       BUG_ON(!p->vx_info->vx_reaper);
14364 +
14365 +       /* child reaper for the guest reaper */
14366 +       if (vxi->vx_reaper == p)
14367 +               goto out;
14368 +
14369 +       reaper = vxi->vx_reaper;
14370 +out:
14371 +       vxdprintk(VXD_CBIT(xid, 7),
14372 +               "vx_child_reaper(%p[#%u,%u]) = %p[#%u,%u]",
14373 +               p, p->xid, p->pid, reaper, reaper->xid, reaper->pid);
14374 +       return reaper;
14375 +}
14376 +
14377 +#endif
14378 +
14379 +
14380 +#else
14381 +#warning duplicate inclusion
14382 +#endif
14383 diff -NurpP --minimal linux-2.6.31.6/include/linux/vs_sched.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_sched.h
14384 --- linux-2.6.31.6/include/linux/vs_sched.h     1970-01-01 01:00:00.000000000 +0100
14385 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_sched.h       2009-09-10 16:11:43.000000000 +0200
14386 @@ -0,0 +1,110 @@
14387 +#ifndef _VS_SCHED_H
14388 +#define _VS_SCHED_H
14389 +
14390 +#include "vserver/base.h"
14391 +#include "vserver/context.h"
14392 +#include "vserver/sched.h"
14393 +
14394 +
14395 +#define VAVAVOOM_RATIO          50
14396 +
14397 +#define MAX_PRIO_BIAS           20
14398 +#define MIN_PRIO_BIAS          -20
14399 +
14400 +
14401 +#ifdef CONFIG_VSERVER_HARDCPU
14402 +
14403 +/*
14404 + * effective_prio - return the priority that is based on the static
14405 + * priority but is modified by bonuses/penalties.
14406 + *
14407 + * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
14408 + * into a -4 ... 0 ... +4 bonus/penalty range.
14409 + *
14410 + * Additionally, we scale another amount based on the number of
14411 + * CPU tokens currently held by the context, if the process is
14412 + * part of a context (and the appropriate SCHED flag is set).
14413 + * This ranges from -5 ... 0 ... +15, quadratically.
14414 + *
14415 + * So, the total bonus is -9 .. 0 .. +19
14416 + * We use ~50% of the full 0...39 priority range so that:
14417 + *
14418 + * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
14419 + * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
14420 + *    unless that context is far exceeding its CPU allocation.
14421 + *
14422 + * Both properties are important to certain workloads.
14423 + */
14424 +static inline
14425 +int vx_effective_vavavoom(struct _vx_sched_pc *sched_pc, int max_prio)
14426 +{
14427 +       int vavavoom, max;
14428 +
14429 +       /* lots of tokens = lots of vavavoom
14430 +        *      no tokens = no vavavoom      */
14431 +       if ((vavavoom = sched_pc->tokens) >= 0) {
14432 +               max = sched_pc->tokens_max;
14433 +               vavavoom = max - vavavoom;
14434 +               max = max * max;
14435 +               vavavoom = max_prio * VAVAVOOM_RATIO / 100
14436 +                       * (vavavoom*vavavoom - (max >> 2)) / max;
14437 +               return vavavoom;
14438 +       }
14439 +       return 0;
14440 +}
14441 +
14442 +
14443 +static inline
14444 +int vx_adjust_prio(struct task_struct *p, int prio, int max_user)
14445 +{
14446 +       struct vx_info *vxi = p->vx_info;
14447 +       struct _vx_sched_pc *sched_pc;
14448 +
14449 +       if (!vxi)
14450 +               return prio;
14451 +
14452 +       sched_pc = &vx_cpu(vxi, sched_pc);
14453 +       if (vx_info_flags(vxi, VXF_SCHED_PRIO, 0)) {
14454 +               int vavavoom = vx_effective_vavavoom(sched_pc, max_user);
14455 +
14456 +               sched_pc->vavavoom = vavavoom;
14457 +               prio += vavavoom;
14458 +       }
14459 +       prio += sched_pc->prio_bias;
14460 +       return prio;
14461 +}
14462 +
14463 +#else /* !CONFIG_VSERVER_HARDCPU */
14464 +
14465 +static inline
14466 +int vx_adjust_prio(struct task_struct *p, int prio, int max_user)
14467 +{
14468 +       struct vx_info *vxi = p->vx_info;
14469 +
14470 +       if (vxi)
14471 +               prio += vx_cpu(vxi, sched_pc).prio_bias;
14472 +       return prio;
14473 +}
14474 +
14475 +#endif /* CONFIG_VSERVER_HARDCPU */
14476 +
14477 +
14478 +static inline void vx_account_user(struct vx_info *vxi,
14479 +       cputime_t cputime, int nice)
14480 +{
14481 +       if (!vxi)
14482 +               return;
14483 +       vx_cpu(vxi, sched_pc).user_ticks += cputime;
14484 +}
14485 +
14486 +static inline void vx_account_system(struct vx_info *vxi,
14487 +       cputime_t cputime, int idle)
14488 +{
14489 +       if (!vxi)
14490 +               return;
14491 +       vx_cpu(vxi, sched_pc).sys_ticks += cputime;
14492 +}
14493 +
14494 +#else
14495 +#warning duplicate inclusion
14496 +#endif
14497 diff -NurpP --minimal linux-2.6.31.6/include/linux/vs_socket.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_socket.h
14498 --- linux-2.6.31.6/include/linux/vs_socket.h    1970-01-01 01:00:00.000000000 +0100
14499 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_socket.h      2009-09-10 16:11:43.000000000 +0200
14500 @@ -0,0 +1,67 @@
14501 +#ifndef _VS_SOCKET_H
14502 +#define _VS_SOCKET_H
14503 +
14504 +#include "vserver/debug.h"
14505 +#include "vserver/base.h"
14506 +#include "vserver/cacct.h"
14507 +#include "vserver/context.h"
14508 +#include "vserver/tag.h"
14509 +
14510 +
14511 +/* socket accounting */
14512 +
14513 +#include <linux/socket.h>
14514 +
14515 +static inline int vx_sock_type(int family)
14516 +{
14517 +       switch (family) {
14518 +       case PF_UNSPEC:
14519 +               return VXA_SOCK_UNSPEC;
14520 +       case PF_UNIX:
14521 +               return VXA_SOCK_UNIX;
14522 +       case PF_INET:
14523 +               return VXA_SOCK_INET;
14524 +       case PF_INET6:
14525 +               return VXA_SOCK_INET6;
14526 +       case PF_PACKET:
14527 +               return VXA_SOCK_PACKET;
14528 +       default:
14529 +               return VXA_SOCK_OTHER;
14530 +       }
14531 +}
14532 +
14533 +#define vx_acc_sock(v, f, p, s) \
14534 +       __vx_acc_sock(v, f, p, s, __FILE__, __LINE__)
14535 +
14536 +static inline void __vx_acc_sock(struct vx_info *vxi,
14537 +       int family, int pos, int size, char *file, int line)
14538 +{
14539 +       if (vxi) {
14540 +               int type = vx_sock_type(family);
14541 +
14542 +               atomic_long_inc(&vxi->cacct.sock[type][pos].count);
14543 +               atomic_long_add(size, &vxi->cacct.sock[type][pos].total);
14544 +       }
14545 +}
14546 +
14547 +#define vx_sock_recv(sk, s) \
14548 +       vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 0, s)
14549 +#define vx_sock_send(sk, s) \
14550 +       vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 1, s)
14551 +#define vx_sock_fail(sk, s) \
14552 +       vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, s)
14553 +
14554 +
14555 +#define sock_vx_init(s) do {           \
14556 +       (s)->sk_xid = 0;                \
14557 +       (s)->sk_vx_info = NULL;         \
14558 +       } while (0)
14559 +
14560 +#define sock_nx_init(s) do {           \
14561 +       (s)->sk_nid = 0;                \
14562 +       (s)->sk_nx_info = NULL;         \
14563 +       } while (0)
14564 +
14565 +#else
14566 +#warning duplicate inclusion
14567 +#endif
14568 diff -NurpP --minimal linux-2.6.31.6/include/linux/vs_tag.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_tag.h
14569 --- linux-2.6.31.6/include/linux/vs_tag.h       1970-01-01 01:00:00.000000000 +0100
14570 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_tag.h 2009-09-10 16:11:43.000000000 +0200
14571 @@ -0,0 +1,47 @@
14572 +#ifndef _VS_TAG_H
14573 +#define _VS_TAG_H
14574 +
14575 +#include <linux/vserver/tag.h>
14576 +
14577 +/* check conditions */
14578 +
14579 +#define DX_ADMIN       0x0001
14580 +#define DX_WATCH       0x0002
14581 +#define DX_HOSTID      0x0008
14582 +
14583 +#define DX_IDENT       0x0010
14584 +
14585 +#define DX_ARG_MASK    0x0010
14586 +
14587 +
14588 +#define dx_task_tag(t) ((t)->tag)
14589 +
14590 +#define dx_current_tag() dx_task_tag(current)
14591 +
14592 +#define dx_check(c, m) __dx_check(dx_current_tag(), c, m)
14593 +
14594 +#define dx_weak_check(c, m)    ((m) ? dx_check(c, m) : 1)
14595 +
14596 +
14597 +/*
14598 + * check current context for ADMIN/WATCH and
14599 + * optionally against supplied argument
14600 + */
14601 +static inline int __dx_check(tag_t cid, tag_t id, unsigned int mode)
14602 +{
14603 +       if (mode & DX_ARG_MASK) {
14604 +               if ((mode & DX_IDENT) && (id == cid))
14605 +                       return 1;
14606 +       }
14607 +       return (((mode & DX_ADMIN) && (cid == 0)) ||
14608 +               ((mode & DX_WATCH) && (cid == 1)) ||
14609 +               ((mode & DX_HOSTID) && (id == 0)));
14610 +}
14611 +
14612 +struct inode;
14613 +int dx_permission(struct inode *inode, int mask);
14614 +
14615 +
14616 +#else
14617 +#warning duplicate inclusion
14618 +#endif
14619 diff -NurpP --minimal linux-2.6.31.6/include/linux/vs_time.h linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_time.h
14620 --- linux-2.6.31.6/include/linux/vs_time.h      1970-01-01 01:00:00.000000000 +0100
14621 +++ linux-2.6.31.6-vs2.3.0.36.24/include/linux/vs_time.h        2009-09-10 16:11:43.000000000 +0200
14622 @@ -0,0 +1,19 @@
14623 +#ifndef _VS_TIME_H
14624 +#define _VS_TIME_H
14625 +
14626 +
14627 +/* time faking stuff */
14628 +
14629 +#ifdef CONFIG_VSERVER_VTIME
14630 +
14631 +extern void vx_gettimeofday(struct timeval *tv);
14632 +extern int vx_settimeofday(struct timespec *ts);
14633 +
14634 +#else
14635 +#define        vx_gettimeofday(t)      do_gettimeofday(t)
14636 +#define        vx_settimeofday(t)      do_settimeofday(t)
14637 +#endif
14638 +
14639 +#else
14640 +#warning duplicate inclusion
14641 +#endif
14642 diff -NurpP --minimal linux-2.6.31.6/include/net/addrconf.h linux-2.6.31.6-vs2.3.0.36.24/include/net/addrconf.h
14643 --- linux-2.6.31.6/include/net/addrconf.h       2009-06-11 17:13:18.000000000 +0200
14644 +++ linux-2.6.31.6-vs2.3.0.36.24/include/net/addrconf.h 2009-09-10 16:11:43.000000000 +0200
14645 @@ -84,7 +84,8 @@ extern int                    ipv6_dev_get_saddr(struct n
14646                                                struct net_device *dev,
14647                                                const struct in6_addr *daddr,
14648                                                unsigned int srcprefs,
14649 -                                              struct in6_addr *saddr);
14650 +                                              struct in6_addr *saddr,
14651 +                                              struct nx_info *nxi);
14652  extern int                     ipv6_get_lladdr(struct net_device *dev,
14653                                                 struct in6_addr *addr,
14654                                                 unsigned char banned_flags);
14655 diff -NurpP --minimal linux-2.6.31.6/include/net/af_unix.h linux-2.6.31.6-vs2.3.0.36.24/include/net/af_unix.h
14656 --- linux-2.6.31.6/include/net/af_unix.h        2008-12-25 00:26:37.000000000 +0100
14657 +++ linux-2.6.31.6-vs2.3.0.36.24/include/net/af_unix.h  2009-09-10 16:11:43.000000000 +0200
14658 @@ -4,6 +4,7 @@
14659  #include <linux/socket.h>
14660  #include <linux/un.h>
14661  #include <linux/mutex.h>
14662 +#include <linux/vs_base.h>
14663  #include <net/sock.h>
14664  
14665  extern void unix_inflight(struct file *fp);
14666 diff -NurpP --minimal linux-2.6.31.6/include/net/inet_timewait_sock.h linux-2.6.31.6-vs2.3.0.36.24/include/net/inet_timewait_sock.h
14667 --- linux-2.6.31.6/include/net/inet_timewait_sock.h     2009-09-10 15:26:27.000000000 +0200
14668 +++ linux-2.6.31.6-vs2.3.0.36.24/include/net/inet_timewait_sock.h       2009-09-10 16:11:43.000000000 +0200
14669 @@ -117,6 +117,10 @@ struct inet_timewait_sock {
14670  #define tw_hash                        __tw_common.skc_hash
14671  #define tw_prot                        __tw_common.skc_prot
14672  #define tw_net                 __tw_common.skc_net
14673 +#define tw_xid                 __tw_common.skc_xid
14674 +#define tw_vx_info             __tw_common.skc_vx_info
14675 +#define tw_nid                 __tw_common.skc_nid
14676 +#define tw_nx_info             __tw_common.skc_nx_info
14677         int                     tw_timeout;
14678         volatile unsigned char  tw_substate;
14679         /* 3 bits hole, try to pack */
14680 diff -NurpP --minimal linux-2.6.31.6/include/net/route.h linux-2.6.31.6-vs2.3.0.36.24/include/net/route.h
14681 --- linux-2.6.31.6/include/net/route.h  2009-09-10 15:26:27.000000000 +0200
14682 +++ linux-2.6.31.6-vs2.3.0.36.24/include/net/route.h    2009-11-05 03:48:31.000000000 +0100
14683 @@ -135,6 +135,9 @@ static inline void ip_rt_put(struct rtab
14684                 dst_release(&rt->u.dst);
14685  }
14686  
14687 +#include <linux/vs_base.h>
14688 +#include <linux/vs_inet.h>
14689 +
14690  #define IPTOS_RT_MASK  (IPTOS_TOS_MASK & ~3)
14691  
14692  extern const __u8 ip_tos2prio[16];
14693 @@ -144,6 +147,9 @@ static inline char rt_tos2priority(u8 to
14694         return ip_tos2prio[IPTOS_TOS(tos)>>1];
14695  }
14696  
14697 +extern int ip_v4_find_src(struct net *net, struct nx_info *,
14698 +       struct rtable **, struct flowi *);
14699 +
14700  static inline int ip_route_connect(struct rtable **rp, __be32 dst,
14701                                    __be32 src, u32 tos, int oif, u8 protocol,
14702                                    __be16 sport, __be16 dport, struct sock *sk,
14703 @@ -161,11 +167,24 @@ static inline int ip_route_connect(struc
14704  
14705         int err;
14706         struct net *net = sock_net(sk);
14707 +       struct nx_info *nx_info = current_nx_info();
14708  
14709         if (inet_sk(sk)->transparent)
14710                 fl.flags |= FLOWI_FLAG_ANYSRC;
14711  
14712 -       if (!dst || !src) {
14713 +       if (sk)
14714 +               nx_info = sk->sk_nx_info;
14715 +
14716 +       vxdprintk(VXD_CBIT(net, 4),
14717 +               "ip_route_connect(%p) %p,%p;%lx",
14718 +               sk, nx_info, sk->sk_socket,
14719 +               (sk->sk_socket?sk->sk_socket->flags:0));
14720 +
14721 +       err = ip_v4_find_src(net, nx_info, rp, &fl);
14722 +       if (err)
14723 +               return err;
14724 +
14725 +       if (!fl.fl4_dst || !fl.fl4_src) {
14726                 err = __ip_route_output_key(net, rp, &fl);
14727                 if (err)
14728                         return err;
14729 diff -NurpP --minimal linux-2.6.31.6/include/net/sock.h linux-2.6.31.6-vs2.3.0.36.24/include/net/sock.h
14730 --- linux-2.6.31.6/include/net/sock.h   2009-09-10 15:26:27.000000000 +0200
14731 +++ linux-2.6.31.6-vs2.3.0.36.24/include/net/sock.h     2009-09-10 17:15:39.000000000 +0200
14732 @@ -139,6 +139,10 @@ struct sock_common {
14733  #ifdef CONFIG_NET_NS
14734         struct net              *skc_net;
14735  #endif
14736 +       xid_t                   skc_xid;
14737 +       struct vx_info          *skc_vx_info;
14738 +       nid_t                   skc_nid;
14739 +       struct nx_info          *skc_nx_info;
14740  };
14741  
14742  /**
14743 @@ -225,6 +229,10 @@ struct sock {
14744  #define sk_bind_node           __sk_common.skc_bind_node
14745  #define sk_prot                        __sk_common.skc_prot
14746  #define sk_net                 __sk_common.skc_net
14747 +#define sk_xid                 __sk_common.skc_xid
14748 +#define sk_vx_info             __sk_common.skc_vx_info
14749 +#define sk_nid                 __sk_common.skc_nid
14750 +#define sk_nx_info             __sk_common.skc_nx_info
14751         kmemcheck_bitfield_begin(flags);
14752         unsigned char           sk_shutdown : 2,
14753                                 sk_no_check : 2,
14754 diff -NurpP --minimal linux-2.6.31.6/init/Kconfig linux-2.6.31.6-vs2.3.0.36.24/init/Kconfig
14755 --- linux-2.6.31.6/init/Kconfig 2009-09-10 15:26:27.000000000 +0200
14756 +++ linux-2.6.31.6-vs2.3.0.36.24/init/Kconfig   2009-10-06 04:38:47.000000000 +0200
14757 @@ -492,6 +492,19 @@ config CGROUP_SCHED
14758  
14759  endchoice
14760  
14761 +config CFS_HARD_LIMITS
14762 +       bool "Hard Limits for CFS Group Scheduler"
14763 +       depends on EXPERIMENTAL
14764 +       depends on FAIR_GROUP_SCHED && CGROUP_SCHED
14765 +       default n
14766 +       help
14767 +         This option enables hard limiting of CPU time obtained by
14768 +         a fair task group. Use this if you want to throttle a group of tasks
14769 +         based on its CPU usage. For more details refer to
14770 +         Documentation/scheduler/sched-cfs-hard-limits.txt
14771 +
14772 +         Say N if unsure.
14773 +
14774  menuconfig CGROUPS
14775         boolean "Control Group support"
14776         help
14777 diff -NurpP --minimal linux-2.6.31.6/init/main.c linux-2.6.31.6-vs2.3.0.36.24/init/main.c
14778 --- linux-2.6.31.6/init/main.c  2009-09-10 15:26:27.000000000 +0200
14779 +++ linux-2.6.31.6-vs2.3.0.36.24/init/main.c    2009-09-10 16:11:43.000000000 +0200
14780 @@ -69,6 +69,7 @@
14781  #include <linux/kmemcheck.h>
14782  #include <linux/kmemtrace.h>
14783  #include <trace/boot.h>
14784 +#include <linux/vserver/percpu.h>
14785  
14786  #include <asm/io.h>
14787  #include <asm/bugs.h>
14788 @@ -381,12 +382,14 @@ EXPORT_SYMBOL(__per_cpu_offset);
14789  
14790  static void __init setup_per_cpu_areas(void)
14791  {
14792 -       unsigned long size, i;
14793 +       unsigned long size, vspc, i;
14794         char *ptr;
14795         unsigned long nr_possible_cpus = num_possible_cpus();
14796  
14797 +       vspc = PERCPU_PERCTX * CONFIG_VSERVER_CONTEXTS;
14798 +
14799         /* Copy section for each CPU (we discard the original) */
14800 -       size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
14801 +       size = ALIGN(PERCPU_ENOUGH_ROOM + vspc, PAGE_SIZE);
14802         ptr = alloc_bootmem_pages(size * nr_possible_cpus);
14803  
14804         for_each_possible_cpu(i) {
14805 diff -NurpP --minimal linux-2.6.31.6/ipc/mqueue.c linux-2.6.31.6-vs2.3.0.36.24/ipc/mqueue.c
14806 --- linux-2.6.31.6/ipc/mqueue.c 2009-09-10 15:26:27.000000000 +0200
14807 +++ linux-2.6.31.6-vs2.3.0.36.24/ipc/mqueue.c   2009-09-10 16:31:20.000000000 +0200
14808 @@ -33,6 +33,8 @@
14809  #include <linux/pid.h>
14810  #include <linux/ipc_namespace.h>
14811  #include <linux/ima.h>
14812 +#include <linux/vs_context.h>
14813 +#include <linux/vs_limit.h>
14814  
14815  #include <net/sock.h>
14816  #include "util.h"
14817 @@ -66,6 +68,7 @@ struct mqueue_inode_info {
14818         struct sigevent notify;
14819         struct pid* notify_owner;
14820         struct user_struct *user;       /* user who created, for accounting */
14821 +       struct vx_info *vxi;
14822         struct sock *notify_sock;
14823         struct sk_buff *notify_cookie;
14824  
14825 @@ -125,6 +128,7 @@ static struct inode *mqueue_get_inode(st
14826                 if (S_ISREG(mode)) {
14827                         struct mqueue_inode_info *info;
14828                         struct task_struct *p = current;
14829 +                       struct vx_info *vxi = p->vx_info;
14830                         unsigned long mq_bytes, mq_msg_tblsz;
14831  
14832                         inode->i_fop = &mqueue_file_operations;
14833 @@ -139,6 +143,7 @@ static struct inode *mqueue_get_inode(st
14834                         info->notify_owner = NULL;
14835                         info->qsize = 0;
14836                         info->user = NULL;      /* set when all is ok */
14837 +                       info->vxi = NULL;
14838                         memset(&info->attr, 0, sizeof(info->attr));
14839                         info->attr.mq_maxmsg = ipc_ns->mq_msg_max;
14840                         info->attr.mq_msgsize = ipc_ns->mq_msgsize_max;
14841 @@ -153,22 +158,26 @@ static struct inode *mqueue_get_inode(st
14842                         spin_lock(&mq_lock);
14843                         if (u->mq_bytes + mq_bytes < u->mq_bytes ||
14844                             u->mq_bytes + mq_bytes >
14845 -                           p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur) {
14846 +                           p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur ||
14847 +                           !vx_ipcmsg_avail(vxi, mq_bytes)) {
14848                                 spin_unlock(&mq_lock);
14849                                 goto out_inode;
14850                         }
14851                         u->mq_bytes += mq_bytes;
14852 +                       vx_ipcmsg_add(vxi, u, mq_bytes);
14853                         spin_unlock(&mq_lock);
14854  
14855                         info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
14856                         if (!info->messages) {
14857                                 spin_lock(&mq_lock);
14858                                 u->mq_bytes -= mq_bytes;
14859 +                               vx_ipcmsg_sub(vxi, u, mq_bytes);
14860                                 spin_unlock(&mq_lock);
14861                                 goto out_inode;
14862                         }
14863                         /* all is ok */
14864                         info->user = get_uid(u);
14865 +                       info->vxi = get_vx_info(vxi);
14866                 } else if (S_ISDIR(mode)) {
14867                         inc_nlink(inode);
14868                         /* Some things misbehave if size == 0 on a directory */
14869 @@ -269,8 +278,11 @@ static void mqueue_delete_inode(struct i
14870                    (info->attr.mq_maxmsg * info->attr.mq_msgsize));
14871         user = info->user;
14872         if (user) {
14873 +               struct vx_info *vxi = info->vxi;
14874 +
14875                 spin_lock(&mq_lock);
14876                 user->mq_bytes -= mq_bytes;
14877 +               vx_ipcmsg_sub(vxi, user, mq_bytes);
14878                 /*
14879                  * get_ns_from_inode() ensures that the
14880                  * (ipc_ns = sb->s_fs_info) is either a valid ipc_ns
14881 @@ -280,6 +292,7 @@ static void mqueue_delete_inode(struct i
14882                 if (ipc_ns)
14883                         ipc_ns->mq_queues_count--;
14884                 spin_unlock(&mq_lock);
14885 +               put_vx_info(vxi);
14886                 free_uid(user);
14887         }
14888         if (ipc_ns)
14889 diff -NurpP --minimal linux-2.6.31.6/ipc/msg.c linux-2.6.31.6-vs2.3.0.36.24/ipc/msg.c
14890 --- linux-2.6.31.6/ipc/msg.c    2009-03-24 14:22:44.000000000 +0100
14891 +++ linux-2.6.31.6-vs2.3.0.36.24/ipc/msg.c      2009-09-10 16:11:43.000000000 +0200
14892 @@ -38,6 +38,7 @@
14893  #include <linux/rwsem.h>
14894  #include <linux/nsproxy.h>
14895  #include <linux/ipc_namespace.h>
14896 +#include <linux/vs_base.h>
14897  
14898  #include <asm/current.h>
14899  #include <asm/uaccess.h>
14900 @@ -190,6 +191,7 @@ static int newque(struct ipc_namespace *
14901  
14902         msq->q_perm.mode = msgflg & S_IRWXUGO;
14903         msq->q_perm.key = key;
14904 +       msq->q_perm.xid = vx_current_xid();
14905  
14906         msq->q_perm.security = NULL;
14907         retval = security_msg_queue_alloc(msq);
14908 diff -NurpP --minimal linux-2.6.31.6/ipc/namespace.c linux-2.6.31.6-vs2.3.0.36.24/ipc/namespace.c
14909 --- linux-2.6.31.6/ipc/namespace.c      2009-09-10 15:26:27.000000000 +0200
14910 +++ linux-2.6.31.6-vs2.3.0.36.24/ipc/namespace.c        2009-09-10 16:11:43.000000000 +0200
14911 @@ -11,6 +11,8 @@
14912  #include <linux/slab.h>
14913  #include <linux/fs.h>
14914  #include <linux/mount.h>
14915 +#include <linux/vs_base.h>
14916 +#include <linux/vserver/global.h>
14917  
14918  #include "util.h"
14919  
14920 diff -NurpP --minimal linux-2.6.31.6/ipc/sem.c linux-2.6.31.6-vs2.3.0.36.24/ipc/sem.c
14921 --- linux-2.6.31.6/ipc/sem.c    2009-09-10 15:26:27.000000000 +0200
14922 +++ linux-2.6.31.6-vs2.3.0.36.24/ipc/sem.c      2009-09-10 16:11:43.000000000 +0200
14923 @@ -83,6 +83,8 @@
14924  #include <linux/rwsem.h>
14925  #include <linux/nsproxy.h>
14926  #include <linux/ipc_namespace.h>
14927 +#include <linux/vs_base.h>
14928 +#include <linux/vs_limit.h>
14929  
14930  #include <asm/uaccess.h>
14931  #include "util.h"
14932 @@ -255,6 +257,7 @@ static int newary(struct ipc_namespace *
14933  
14934         sma->sem_perm.mode = (semflg & S_IRWXUGO);
14935         sma->sem_perm.key = key;
14936 +       sma->sem_perm.xid = vx_current_xid();
14937  
14938         sma->sem_perm.security = NULL;
14939         retval = security_sem_alloc(sma);
14940 @@ -270,6 +273,9 @@ static int newary(struct ipc_namespace *
14941                 return id;
14942         }
14943         ns->used_sems += nsems;
14944 +       /* FIXME: obsoleted? */
14945 +       vx_semary_inc(sma);
14946 +       vx_nsems_add(sma, nsems);
14947  
14948         sma->sem_base = (struct sem *) &sma[1];
14949         INIT_LIST_HEAD(&sma->sem_pending);
14950 @@ -546,6 +552,9 @@ static void freeary(struct ipc_namespace
14951         sem_unlock(sma);
14952  
14953         ns->used_sems -= sma->sem_nsems;
14954 +       /* FIXME: obsoleted? */
14955 +       vx_nsems_sub(sma, sma->sem_nsems);
14956 +       vx_semary_dec(sma);
14957         security_sem_free(sma);
14958         ipc_rcu_putref(sma);
14959  }
14960 diff -NurpP --minimal linux-2.6.31.6/ipc/shm.c linux-2.6.31.6-vs2.3.0.36.24/ipc/shm.c
14961 --- linux-2.6.31.6/ipc/shm.c    2009-11-12 12:10:11.000000000 +0100
14962 +++ linux-2.6.31.6-vs2.3.0.36.24/ipc/shm.c      2009-11-05 04:17:37.000000000 +0100
14963 @@ -40,6 +40,8 @@
14964  #include <linux/mount.h>
14965  #include <linux/ipc_namespace.h>
14966  #include <linux/ima.h>
14967 +#include <linux/vs_context.h>
14968 +#include <linux/vs_limit.h>
14969  
14970  #include <asm/uaccess.h>
14971  
14972 @@ -169,7 +171,12 @@ static void shm_open(struct vm_area_stru
14973   */
14974  static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
14975  {
14976 -       ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
14977 +       struct vx_info *vxi = lookup_vx_info(shp->shm_perm.xid);
14978 +       int numpages = (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
14979 +
14980 +       vx_ipcshm_sub(vxi, shp, numpages);
14981 +       ns->shm_tot -= numpages;
14982 +
14983         shm_rmid(ns, shp);
14984         shm_unlock(shp);
14985         if (!is_file_hugepages(shp->shm_file))
14986 @@ -179,6 +186,7 @@ static void shm_destroy(struct ipc_names
14987                                                 shp->mlock_user);
14988         fput (shp->shm_file);
14989         security_shm_free(shp);
14990 +       put_vx_info(vxi);
14991         ipc_rcu_putref(shp);
14992  }
14993  
14994 @@ -349,11 +357,15 @@ static int newseg(struct ipc_namespace *
14995         if (ns->shm_tot + numpages > ns->shm_ctlall)
14996                 return -ENOSPC;
14997  
14998 +       if (!vx_ipcshm_avail(current_vx_info(), numpages))
14999 +               return -ENOSPC;
15000 +
15001         shp = ipc_rcu_alloc(sizeof(*shp));
15002         if (!shp)
15003                 return -ENOMEM;
15004  
15005         shp->shm_perm.key = key;
15006 +       shp->shm_perm.xid = vx_current_xid();
15007         shp->shm_perm.mode = (shmflg & S_IRWXUGO);
15008         shp->mlock_user = NULL;
15009  
15010 @@ -407,6 +419,7 @@ static int newseg(struct ipc_namespace *
15011         ns->shm_tot += numpages;
15012         error = shp->shm_perm.id;
15013         shm_unlock(shp);
15014 +       vx_ipcshm_add(current_vx_info(), key, numpages);
15015         return error;
15016  
15017  no_id:
15018 diff -NurpP --minimal linux-2.6.31.6/kernel/capability.c linux-2.6.31.6-vs2.3.0.36.24/kernel/capability.c
15019 --- linux-2.6.31.6/kernel/capability.c  2009-03-24 14:22:44.000000000 +0100
15020 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/capability.c    2009-09-10 16:11:43.000000000 +0200
15021 @@ -14,6 +14,7 @@
15022  #include <linux/security.h>
15023  #include <linux/syscalls.h>
15024  #include <linux/pid_namespace.h>
15025 +#include <linux/vs_context.h>
15026  #include <asm/uaccess.h>
15027  #include "cred-internals.h"
15028  
15029 @@ -122,6 +123,7 @@ static int cap_validate_magic(cap_user_h
15030         return 0;
15031  }
15032  
15033 +
15034  /*
15035   * The only thing that can change the capabilities of the current
15036   * process is the current process. As such, we can't be in this code
15037 @@ -289,6 +291,8 @@ error:
15038         return ret;
15039  }
15040  
15041 +#include <linux/vserver/base.h>
15042 +
15043  /**
15044   * capable - Determine if the current task has a superior capability in effect
15045   * @cap: The capability to be tested for
15046 @@ -301,6 +305,9 @@ error:
15047   */
15048  int capable(int cap)
15049  {
15050 +       /* here for now so we don't require task locking */
15051 +       if (vs_check_bit(VXC_CAP_MASK, cap) && !vx_mcaps(1L << cap))
15052 +               return 0;
15053         if (unlikely(!cap_valid(cap))) {
15054                 printk(KERN_CRIT "capable() called with invalid cap=%u\n", cap);
15055                 BUG();
15056 diff -NurpP --minimal linux-2.6.31.6/kernel/compat.c linux-2.6.31.6-vs2.3.0.36.24/kernel/compat.c
15057 --- linux-2.6.31.6/kernel/compat.c      2009-09-10 15:26:27.000000000 +0200
15058 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/compat.c        2009-09-10 16:11:43.000000000 +0200
15059 @@ -902,7 +902,7 @@ asmlinkage long compat_sys_time(compat_t
15060         compat_time_t i;
15061         struct timeval tv;
15062  
15063 -       do_gettimeofday(&tv);
15064 +       vx_gettimeofday(&tv);
15065         i = tv.tv_sec;
15066  
15067         if (tloc) {
15068 @@ -927,7 +927,7 @@ asmlinkage long compat_sys_stime(compat_
15069         if (err)
15070                 return err;
15071  
15072 -       do_settimeofday(&tv);
15073 +       vx_settimeofday(&tv);
15074         return 0;
15075  }
15076  
15077 diff -NurpP --minimal linux-2.6.31.6/kernel/exit.c linux-2.6.31.6-vs2.3.0.36.24/kernel/exit.c
15078 --- linux-2.6.31.6/kernel/exit.c        2009-11-12 12:10:11.000000000 +0100
15079 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/exit.c  2009-10-15 03:49:19.000000000 +0200
15080 @@ -48,6 +48,10 @@
15081  #include <linux/fs_struct.h>
15082  #include <linux/init_task.h>
15083  #include <linux/perf_counter.h>
15084 +#include <linux/vs_limit.h>
15085 +#include <linux/vs_context.h>
15086 +#include <linux/vs_network.h>
15087 +#include <linux/vs_pid.h>
15088  #include <trace/events/sched.h>
15089  
15090  #include <asm/uaccess.h>
15091 @@ -488,9 +492,11 @@ static void close_files(struct files_str
15092                                         filp_close(file, files);
15093                                         cond_resched();
15094                                 }
15095 +                               vx_openfd_dec(i);
15096                         }
15097                         i++;
15098                         set >>= 1;
15099 +                       cond_resched();
15100                 }
15101         }
15102  }
15103 @@ -1007,10 +1013,15 @@ NORET_TYPE void do_exit(long code)
15104         if (tsk->splice_pipe)
15105                 __free_pipe_info(tsk->splice_pipe);
15106  
15107 +       /* needs to stay after exit_notify() */
15108 +       exit_vx_info(tsk, code);
15109 +       exit_nx_info(tsk);
15110 +
15111         preempt_disable();
15112         /* causes final put_task_struct in finish_task_switch(). */
15113         tsk->state = TASK_DEAD;
15114         schedule();
15115 +       printk("bad task: %p [%lx]\n", current, current->state);
15116         BUG();
15117         /* Avoid "noreturn function does return".  */
15118         for (;;)
15119 diff -NurpP --minimal linux-2.6.31.6/kernel/fork.c linux-2.6.31.6-vs2.3.0.36.24/kernel/fork.c
15120 --- linux-2.6.31.6/kernel/fork.c        2009-11-12 12:10:11.000000000 +0100
15121 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/fork.c  2009-11-05 04:17:43.000000000 +0100
15122 @@ -62,6 +62,10 @@
15123  #include <linux/fs_struct.h>
15124  #include <linux/magic.h>
15125  #include <linux/perf_counter.h>
15126 +#include <linux/vs_context.h>
15127 +#include <linux/vs_network.h>
15128 +#include <linux/vs_limit.h>
15129 +#include <linux/vs_memory.h>
15130  
15131  #include <asm/pgtable.h>
15132  #include <asm/pgalloc.h>
15133 @@ -141,6 +145,8 @@ void free_task(struct task_struct *tsk)
15134         prop_local_destroy_single(&tsk->dirties);
15135         free_thread_info(tsk->stack);
15136         rt_mutex_debug_task_free(tsk);
15137 +       clr_vx_info(&tsk->vx_info);
15138 +       clr_nx_info(&tsk->nx_info);
15139         ftrace_graph_exit_task(tsk);
15140         free_task_struct(tsk);
15141  }
15142 @@ -284,6 +290,8 @@ static int dup_mmap(struct mm_struct *mm
15143         mm->free_area_cache = oldmm->mmap_base;
15144         mm->cached_hole_size = ~0UL;
15145         mm->map_count = 0;
15146 +       __set_mm_counter(mm, file_rss, 0);
15147 +       __set_mm_counter(mm, anon_rss, 0);
15148         cpumask_clear(mm_cpumask(mm));
15149         mm->mm_rb = RB_ROOT;
15150         rb_link = &mm->mm_rb.rb_node;
15151 @@ -295,7 +303,7 @@ static int dup_mmap(struct mm_struct *mm
15152  
15153                 if (mpnt->vm_flags & VM_DONTCOPY) {
15154                         long pages = vma_pages(mpnt);
15155 -                       mm->total_vm -= pages;
15156 +                       vx_vmpages_sub(mm, pages);
15157                         vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
15158                                                                 -pages);
15159                         continue;
15160 @@ -428,8 +436,8 @@ static struct mm_struct * mm_init(struct
15161         mm->flags = (current->mm) ? current->mm->flags : default_dump_filter;
15162         mm->core_state = NULL;
15163         mm->nr_ptes = 0;
15164 -       set_mm_counter(mm, file_rss, 0);
15165 -       set_mm_counter(mm, anon_rss, 0);
15166 +       __set_mm_counter(mm, file_rss, 0);
15167 +       __set_mm_counter(mm, anon_rss, 0);
15168         spin_lock_init(&mm->page_table_lock);
15169         spin_lock_init(&mm->ioctx_lock);
15170         INIT_HLIST_HEAD(&mm->ioctx_list);
15171 @@ -440,6 +448,7 @@ static struct mm_struct * mm_init(struct
15172         if (likely(!mm_alloc_pgd(mm))) {
15173                 mm->def_flags = 0;
15174                 mmu_notifier_mm_init(mm);
15175 +               set_vx_info(&mm->mm_vx_info, p->vx_info);
15176                 return mm;
15177         }
15178  
15179 @@ -473,6 +482,7 @@ void __mmdrop(struct mm_struct *mm)
15180         mm_free_pgd(mm);
15181         destroy_context(mm);
15182         mmu_notifier_mm_destroy(mm);
15183 +       clr_vx_info(&mm->mm_vx_info);
15184         free_mm(mm);
15185  }
15186  EXPORT_SYMBOL_GPL(__mmdrop);
15187 @@ -605,6 +615,7 @@ struct mm_struct *dup_mm(struct task_str
15188                 goto fail_nomem;
15189  
15190         memcpy(mm, oldmm, sizeof(*mm));
15191 +       mm->mm_vx_info = NULL;
15192  
15193         /* Initializing for Swap token stuff */
15194         mm->token_priority = 0;
15195 @@ -638,6 +649,7 @@ fail_nocontext:
15196          * If init_new_context() failed, we cannot use mmput() to free the mm
15197          * because it calls destroy_context()
15198          */
15199 +       clr_vx_info(&mm->mm_vx_info);
15200         mm_free_pgd(mm);
15201         free_mm(mm);
15202         return NULL;
15203 @@ -946,6 +958,8 @@ static struct task_struct *copy_process(
15204         int retval;
15205         struct task_struct *p;
15206         int cgroup_callbacks_done = 0;
15207 +       struct vx_info *vxi;
15208 +       struct nx_info *nxi;
15209  
15210         if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
15211                 return ERR_PTR(-EINVAL);
15212 @@ -982,12 +996,28 @@ static struct task_struct *copy_process(
15213         DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
15214         DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
15215  #endif
15216 +       init_vx_info(&p->vx_info, current_vx_info());
15217 +       init_nx_info(&p->nx_info, current_nx_info());
15218 +
15219 +       /* check vserver memory */
15220 +       if (p->mm && !(clone_flags & CLONE_VM)) {
15221 +               if (vx_vmpages_avail(p->mm, p->mm->total_vm))
15222 +                       vx_pages_add(p->vx_info, RLIMIT_AS, p->mm->total_vm);
15223 +               else
15224 +                       goto bad_fork_free;
15225 +       }
15226 +       if (p->mm && vx_flags(VXF_FORK_RSS, 0)) {
15227 +               if (!vx_rss_avail(p->mm, get_mm_counter(p->mm, file_rss)))
15228 +                       goto bad_fork_cleanup_vm;
15229 +       }
15230         retval = -EAGAIN;
15231 +       if (!vx_nproc_avail(1))
15232 +               goto bad_fork_cleanup_vm;
15233         if (atomic_read(&p->real_cred->user->processes) >=
15234                         p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
15235                 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
15236                     p->real_cred->user != INIT_USER)
15237 -                       goto bad_fork_free;
15238 +                       goto bad_fork_cleanup_vm;
15239         }
15240  
15241         retval = copy_creds(p, clone_flags);
15242 @@ -1260,6 +1290,18 @@ static struct task_struct *copy_process(
15243  
15244         total_forks++;
15245         spin_unlock(&current->sighand->siglock);
15246 +
15247 +       /* p is copy of current */
15248 +       vxi = p->vx_info;
15249 +       if (vxi) {
15250 +               claim_vx_info(vxi, p);
15251 +               atomic_inc(&vxi->cvirt.nr_threads);
15252 +               atomic_inc(&vxi->cvirt.total_forks);
15253 +               vx_nproc_inc(p);
15254 +       }
15255 +       nxi = p->nx_info;
15256 +       if (nxi)
15257 +               claim_nx_info(nxi, p);
15258         write_unlock_irq(&tasklist_lock);
15259         proc_fork_connector(p);
15260         cgroup_post_fork(p);
15261 @@ -1305,6 +1347,9 @@ bad_fork_cleanup_count:
15262         atomic_dec(&p->cred->user->processes);
15263         put_cred(p->real_cred);
15264         put_cred(p->cred);
15265 +bad_fork_cleanup_vm:
15266 +       if (p->mm && !(clone_flags & CLONE_VM))
15267 +               vx_pages_sub(p->vx_info, RLIMIT_AS, p->mm->total_vm);
15268  bad_fork_free:
15269         free_task(p);
15270  fork_out:
15271 diff -NurpP --minimal linux-2.6.31.6/kernel/kthread.c linux-2.6.31.6-vs2.3.0.36.24/kernel/kthread.c
15272 --- linux-2.6.31.6/kernel/kthread.c     2009-09-10 15:26:28.000000000 +0200
15273 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/kthread.c       2009-09-10 16:43:27.000000000 +0200
15274 @@ -14,6 +14,7 @@
15275  #include <linux/file.h>
15276  #include <linux/module.h>
15277  #include <linux/mutex.h>
15278 +#include <linux/vs_pid.h>
15279  #include <trace/events/sched.h>
15280  
15281  #define KTHREAD_NICE_LEVEL (-5)
15282 diff -NurpP --minimal linux-2.6.31.6/kernel/Makefile linux-2.6.31.6-vs2.3.0.36.24/kernel/Makefile
15283 --- linux-2.6.31.6/kernel/Makefile      2009-09-10 15:26:27.000000000 +0200
15284 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/Makefile        2009-09-10 16:11:43.000000000 +0200
15285 @@ -23,6 +23,7 @@ CFLAGS_REMOVE_cgroup-debug.o = -pg
15286  CFLAGS_REMOVE_sched_clock.o = -pg
15287  endif
15288  
15289 +obj-y += vserver/
15290  obj-$(CONFIG_FREEZER) += freezer.o
15291  obj-$(CONFIG_PROFILING) += profile.o
15292  obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
15293 diff -NurpP --minimal linux-2.6.31.6/kernel/nsproxy.c linux-2.6.31.6-vs2.3.0.36.24/kernel/nsproxy.c
15294 --- linux-2.6.31.6/kernel/nsproxy.c     2009-09-10 15:26:28.000000000 +0200
15295 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/nsproxy.c       2009-09-10 17:37:49.000000000 +0200
15296 @@ -19,6 +19,8 @@
15297  #include <linux/mnt_namespace.h>
15298  #include <linux/utsname.h>
15299  #include <linux/pid_namespace.h>
15300 +#include <linux/vserver/global.h>
15301 +#include <linux/vserver/debug.h>
15302  #include <net/net_namespace.h>
15303  #include <linux/ipc_namespace.h>
15304  
15305 @@ -31,8 +33,11 @@ static inline struct nsproxy *create_nsp
15306         struct nsproxy *nsproxy;
15307  
15308         nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
15309 -       if (nsproxy)
15310 +       if (nsproxy) {
15311                 atomic_set(&nsproxy->count, 1);
15312 +               atomic_inc(&vs_global_nsproxy);
15313 +       }
15314 +       vxdprintk(VXD_CBIT(space, 2), "create_nsproxy = %p[1]", nsproxy);
15315         return nsproxy;
15316  }
15317  
15318 @@ -41,41 +46,52 @@ static inline struct nsproxy *create_nsp
15319   * Return the newly created nsproxy.  Do not attach this to the task,
15320   * leave it to the caller to do proper locking and attach it to task.
15321   */
15322 -static struct nsproxy *create_new_namespaces(unsigned long flags,
15323 -                       struct task_struct *tsk, struct fs_struct *new_fs)
15324 +static struct nsproxy *unshare_namespaces(unsigned long flags,
15325 +                       struct nsproxy *orig, struct fs_struct *new_fs)
15326  {
15327         struct nsproxy *new_nsp;
15328         int err;
15329  
15330 +       vxdprintk(VXD_CBIT(space, 4),
15331 +               "unshare_namespaces(0x%08lx,%p,%p)",
15332 +               flags, orig, new_fs);
15333 +
15334         new_nsp = create_nsproxy();
15335         if (!new_nsp)
15336                 return ERR_PTR(-ENOMEM);
15337  
15338 -       new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs);
15339 +       new_nsp->mnt_ns = copy_mnt_ns(flags, orig->mnt_ns, new_fs);
15340         if (IS_ERR(new_nsp->mnt_ns)) {
15341                 err = PTR_ERR(new_nsp->mnt_ns);
15342                 goto out_ns;
15343         }
15344  
15345 -       new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns);
15346 +       new_nsp->uts_ns = copy_utsname(flags, orig->uts_ns);
15347         if (IS_ERR(new_nsp->uts_ns)) {
15348                 err = PTR_ERR(new_nsp->uts_ns);
15349                 goto out_uts;
15350         }
15351  
15352 -       new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns);
15353 +       new_nsp->ipc_ns = copy_ipcs(flags, orig->ipc_ns);
15354         if (IS_ERR(new_nsp->ipc_ns)) {
15355                 err = PTR_ERR(new_nsp->ipc_ns);
15356                 goto out_ipc;
15357         }
15358  
15359 -       new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk));
15360 +       new_nsp->pid_ns = copy_pid_ns(flags, orig->pid_ns);
15361         if (IS_ERR(new_nsp->pid_ns)) {
15362                 err = PTR_ERR(new_nsp->pid_ns);
15363                 goto out_pid;
15364         }
15365  
15366 -       new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns);
15367 +       /* disabled now?
15368 +       new_nsp->user_ns = copy_user_ns(flags, orig->user_ns);
15369 +       if (IS_ERR(new_nsp->user_ns)) {
15370 +               err = PTR_ERR(new_nsp->user_ns);
15371 +               goto out_user;
15372 +       } */
15373 +
15374 +       new_nsp->net_ns = copy_net_ns(flags, orig->net_ns);
15375         if (IS_ERR(new_nsp->net_ns)) {
15376                 err = PTR_ERR(new_nsp->net_ns);
15377                 goto out_net;
15378 @@ -100,6 +116,38 @@ out_ns:
15379         return ERR_PTR(err);
15380  }
15381  
15382 +static struct nsproxy *create_new_namespaces(int flags, struct task_struct *tsk,
15383 +                       struct fs_struct *new_fs)
15384 +{
15385 +       return unshare_namespaces(flags, tsk->nsproxy, new_fs);
15386 +}
15387 +
15388 +/*
15389 + * copies the nsproxy, setting refcount to 1, and grabbing a
15390 + * reference to all contained namespaces.
15391 + */
15392 +struct nsproxy *copy_nsproxy(struct nsproxy *orig)
15393 +{
15394 +       struct nsproxy *ns = create_nsproxy();
15395 +
15396 +       if (ns) {
15397 +               memcpy(ns, orig, sizeof(struct nsproxy));
15398 +               atomic_set(&ns->count, 1);
15399 +
15400 +               if (ns->mnt_ns)
15401 +                       get_mnt_ns(ns->mnt_ns);
15402 +               if (ns->uts_ns)
15403 +                       get_uts_ns(ns->uts_ns);
15404 +               if (ns->ipc_ns)
15405 +                       get_ipc_ns(ns->ipc_ns);
15406 +               if (ns->pid_ns)
15407 +                       get_pid_ns(ns->pid_ns);
15408 +               if (ns->net_ns)
15409 +                       get_net(ns->net_ns);
15410 +       }
15411 +       return ns;
15412 +}
15413 +
15414  /*
15415   * called from clone.  This now handles copy for nsproxy and all
15416   * namespaces therein.
15417 @@ -107,9 +155,12 @@ out_ns:
15418  int copy_namespaces(unsigned long flags, struct task_struct *tsk)
15419  {
15420         struct nsproxy *old_ns = tsk->nsproxy;
15421 -       struct nsproxy *new_ns;
15422 +       struct nsproxy *new_ns = NULL;
15423         int err = 0;
15424  
15425 +       vxdprintk(VXD_CBIT(space, 7), "copy_namespaces(0x%08lx,%p[%p])",
15426 +               flags, tsk, old_ns);
15427 +
15428         if (!old_ns)
15429                 return 0;
15430  
15431 @@ -119,7 +170,7 @@ int copy_namespaces(unsigned long flags,
15432                                 CLONE_NEWPID | CLONE_NEWNET)))
15433                 return 0;
15434  
15435 -       if (!capable(CAP_SYS_ADMIN)) {
15436 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_NAMESPACE)) {
15437                 err = -EPERM;
15438                 goto out;
15439         }
15440 @@ -146,6 +197,9 @@ int copy_namespaces(unsigned long flags,
15441  
15442  out:
15443         put_nsproxy(old_ns);
15444 +       vxdprintk(VXD_CBIT(space, 3),
15445 +               "copy_namespaces(0x%08lx,%p[%p]) = %d [%p]",
15446 +               flags, tsk, old_ns, err, new_ns);
15447         return err;
15448  }
15449  
15450 @@ -159,7 +213,9 @@ void free_nsproxy(struct nsproxy *ns)
15451                 put_ipc_ns(ns->ipc_ns);
15452         if (ns->pid_ns)
15453                 put_pid_ns(ns->pid_ns);
15454 -       put_net(ns->net_ns);
15455 +       if (ns->net_ns)
15456 +               put_net(ns->net_ns);
15457 +       atomic_dec(&vs_global_nsproxy);
15458         kmem_cache_free(nsproxy_cachep, ns);
15459  }
15460  
15461 @@ -172,11 +228,15 @@ int unshare_nsproxy_namespaces(unsigned 
15462  {
15463         int err = 0;
15464  
15465 +       vxdprintk(VXD_CBIT(space, 4),
15466 +               "unshare_nsproxy_namespaces(0x%08lx,[%p])",
15467 +               unshare_flags, current->nsproxy);
15468 +
15469         if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
15470                                CLONE_NEWNET)))
15471                 return 0;
15472  
15473 -       if (!capable(CAP_SYS_ADMIN))
15474 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_NAMESPACE))
15475                 return -EPERM;
15476  
15477         *new_nsp = create_new_namespaces(unshare_flags, current,
15478 diff -NurpP --minimal linux-2.6.31.6/kernel/pid.c linux-2.6.31.6-vs2.3.0.36.24/kernel/pid.c
15479 --- linux-2.6.31.6/kernel/pid.c 2009-09-10 15:26:28.000000000 +0200
15480 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/pid.c   2009-09-10 16:36:49.000000000 +0200
15481 @@ -36,6 +36,7 @@
15482  #include <linux/pid_namespace.h>
15483  #include <linux/init_task.h>
15484  #include <linux/syscalls.h>
15485 +#include <linux/vs_pid.h>
15486  
15487  #define pid_hashfn(nr, ns)     \
15488         hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
15489 @@ -305,7 +306,7 @@ EXPORT_SYMBOL_GPL(find_pid_ns);
15490  
15491  struct pid *find_vpid(int nr)
15492  {
15493 -       return find_pid_ns(nr, current->nsproxy->pid_ns);
15494 +       return find_pid_ns(vx_rmap_pid(nr), current->nsproxy->pid_ns);
15495  }
15496  EXPORT_SYMBOL_GPL(find_vpid);
15497  
15498 @@ -365,6 +366,9 @@ void transfer_pid(struct task_struct *ol
15499  struct task_struct *pid_task(struct pid *pid, enum pid_type type)
15500  {
15501         struct task_struct *result = NULL;
15502 +
15503 +       if (type == PIDTYPE_REALPID)
15504 +               type = PIDTYPE_PID;
15505         if (pid) {
15506                 struct hlist_node *first;
15507                 first = rcu_dereference(pid->tasks[type].first);
15508 @@ -380,7 +384,7 @@ EXPORT_SYMBOL(pid_task);
15509   */
15510  struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
15511  {
15512 -       return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
15513 +       return pid_task(find_pid_ns(vx_rmap_pid(nr), ns), PIDTYPE_PID);
15514  }
15515  
15516  struct task_struct *find_task_by_vpid(pid_t vnr)
15517 @@ -422,7 +426,7 @@ struct pid *find_get_pid(pid_t nr)
15518  }
15519  EXPORT_SYMBOL_GPL(find_get_pid);
15520  
15521 -pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
15522 +pid_t pid_unmapped_nr_ns(struct pid *pid, struct pid_namespace *ns)
15523  {
15524         struct upid *upid;
15525         pid_t nr = 0;
15526 @@ -435,6 +439,11 @@ pid_t pid_nr_ns(struct pid *pid, struct 
15527         return nr;
15528  }
15529  
15530 +pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
15531 +{
15532 +       return vx_map_pid(pid_unmapped_nr_ns(pid, ns));
15533 +}
15534 +
15535  pid_t pid_vnr(struct pid *pid)
15536  {
15537         return pid_nr_ns(pid, current->nsproxy->pid_ns);
15538 diff -NurpP --minimal linux-2.6.31.6/kernel/pid_namespace.c linux-2.6.31.6-vs2.3.0.36.24/kernel/pid_namespace.c
15539 --- linux-2.6.31.6/kernel/pid_namespace.c       2009-09-10 15:26:28.000000000 +0200
15540 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/pid_namespace.c 2009-09-10 16:11:43.000000000 +0200
15541 @@ -13,6 +13,7 @@
15542  #include <linux/syscalls.h>
15543  #include <linux/err.h>
15544  #include <linux/acct.h>
15545 +#include <linux/vserver/global.h>
15546  
15547  #define BITS_PER_PAGE          (PAGE_SIZE*8)
15548  
15549 @@ -86,6 +87,7 @@ static struct pid_namespace *create_pid_
15550                 goto out_free_map;
15551  
15552         kref_init(&ns->kref);
15553 +       atomic_inc(&vs_global_pid_ns);
15554         ns->level = level;
15555         ns->parent = get_pid_ns(parent_pid_ns);
15556  
15557 @@ -111,6 +113,7 @@ static void destroy_pid_namespace(struct
15558  
15559         for (i = 0; i < PIDMAP_ENTRIES; i++)
15560                 kfree(ns->pidmap[i].page);
15561 +       atomic_dec(&vs_global_pid_ns);
15562         kmem_cache_free(pid_ns_cachep, ns);
15563  }
15564  
15565 diff -NurpP --minimal linux-2.6.31.6/kernel/posix-timers.c linux-2.6.31.6-vs2.3.0.36.24/kernel/posix-timers.c
15566 --- linux-2.6.31.6/kernel/posix-timers.c        2009-09-10 15:26:28.000000000 +0200
15567 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/posix-timers.c  2009-10-24 23:47:04.000000000 +0200
15568 @@ -46,6 +46,7 @@
15569  #include <linux/wait.h>
15570  #include <linux/workqueue.h>
15571  #include <linux/module.h>
15572 +#include <linux/vs_context.h>
15573  
15574  /*
15575   * Management arrays for POSIX timers.  Timers are kept in slab memory
15576 @@ -328,6 +329,7 @@ int posix_timer_event(struct k_itimer *t
15577  {
15578         struct task_struct *task;
15579         int shared, ret = -1;
15580 +
15581         /*
15582          * FIXME: if ->sigq is queued we can race with
15583          * dequeue_signal()->do_schedule_next_timer().
15584 @@ -344,10 +346,18 @@ int posix_timer_event(struct k_itimer *t
15585         rcu_read_lock();
15586         task = pid_task(timr->it_pid, PIDTYPE_PID);
15587         if (task) {
15588 +               struct vx_info_save vxis;
15589 +               struct vx_info *vxi;
15590 +
15591 +               vxi = get_vx_info(task->vx_info);
15592 +               enter_vx_info(vxi, &vxis);
15593                 shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
15594                 ret = send_sigqueue(timr->sigq, task, shared);
15595 +               leave_vx_info(&vxis);
15596 +               put_vx_info(vxi);
15597         }
15598         rcu_read_unlock();
15599 +
15600         /* If we failed to send the signal the timer stops. */
15601         return ret > 0;
15602  }
15603 diff -NurpP --minimal linux-2.6.31.6/kernel/printk.c linux-2.6.31.6-vs2.3.0.36.24/kernel/printk.c
15604 --- linux-2.6.31.6/kernel/printk.c      2009-09-10 15:26:28.000000000 +0200
15605 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/printk.c        2009-09-10 16:11:43.000000000 +0200
15606 @@ -33,6 +33,7 @@
15607  #include <linux/bootmem.h>
15608  #include <linux/syscalls.h>
15609  #include <linux/kexec.h>
15610 +#include <linux/vs_cvirt.h>
15611  
15612  #include <asm/uaccess.h>
15613  
15614 @@ -270,18 +271,13 @@ int do_syslog(int type, char __user *buf
15615         unsigned i, j, limit, count;
15616         int do_clear = 0;
15617         char c;
15618 -       int error = 0;
15619 +       int error;
15620  
15621         error = security_syslog(type);
15622         if (error)
15623                 return error;
15624  
15625 -       switch (type) {
15626 -       case 0:         /* Close log */
15627 -               break;
15628 -       case 1:         /* Open log */
15629 -               break;
15630 -       case 2:         /* Read from log */
15631 +       if ((type >= 2) && (type <= 4)) {
15632                 error = -EINVAL;
15633                 if (!buf || len < 0)
15634                         goto out;
15635 @@ -292,6 +288,16 @@ int do_syslog(int type, char __user *buf
15636                         error = -EFAULT;
15637                         goto out;
15638                 }
15639 +       }
15640 +       if (!vx_check(0, VS_ADMIN|VS_WATCH))
15641 +               return vx_do_syslog(type, buf, len);
15642 +
15643 +       switch (type) {
15644 +       case 0:         /* Close log */
15645 +               break;
15646 +       case 1:         /* Open log */
15647 +               break;
15648 +       case 2:         /* Read from log */
15649                 error = wait_event_interruptible(log_wait,
15650                                                         (log_start - log_end));
15651                 if (error)
15652 @@ -316,16 +322,6 @@ int do_syslog(int type, char __user *buf
15653                 do_clear = 1;
15654                 /* FALL THRU */
15655         case 3:         /* Read last kernel messages */
15656 -               error = -EINVAL;
15657 -               if (!buf || len < 0)
15658 -                       goto out;
15659 -               error = 0;
15660 -               if (!len)
15661 -                       goto out;
15662 -               if (!access_ok(VERIFY_WRITE, buf, len)) {
15663 -                       error = -EFAULT;
15664 -                       goto out;
15665 -               }
15666                 count = len;
15667                 if (count > log_buf_len)
15668                         count = log_buf_len;
15669 diff -NurpP --minimal linux-2.6.31.6/kernel/ptrace.c linux-2.6.31.6-vs2.3.0.36.24/kernel/ptrace.c
15670 --- linux-2.6.31.6/kernel/ptrace.c      2009-09-10 15:26:28.000000000 +0200
15671 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/ptrace.c        2009-09-10 16:11:43.000000000 +0200
15672 @@ -22,6 +22,7 @@
15673  #include <linux/pid_namespace.h>
15674  #include <linux/syscalls.h>
15675  #include <linux/uaccess.h>
15676 +#include <linux/vs_context.h>
15677  
15678  
15679  /*
15680 @@ -151,6 +152,11 @@ int __ptrace_may_access(struct task_stru
15681                 dumpable = get_dumpable(task->mm);
15682         if (!dumpable && !capable(CAP_SYS_PTRACE))
15683                 return -EPERM;
15684 +       if (!vx_check(task->xid, VS_ADMIN_P|VS_IDENT))
15685 +               return -EPERM;
15686 +       if (!vx_check(task->xid, VS_IDENT) &&
15687 +               !task_vx_flags(task, VXF_STATE_ADMIN, 0))
15688 +               return -EACCES;
15689  
15690         return security_ptrace_may_access(task, mode);
15691  }
15692 @@ -618,6 +624,10 @@ SYSCALL_DEFINE4(ptrace, long, request, l
15693                 goto out;
15694         }
15695  
15696 +       ret = -EPERM;
15697 +       if (!vx_check(vx_task_xid(child), VS_WATCH_P | VS_IDENT))
15698 +               goto out_put_task_struct;
15699 +
15700         if (request == PTRACE_ATTACH) {
15701                 ret = ptrace_attach(child);
15702                 /*
15703 diff -NurpP --minimal linux-2.6.31.6/kernel/sched.c linux-2.6.31.6-vs2.3.0.36.24/kernel/sched.c
15704 --- linux-2.6.31.6/kernel/sched.c       2009-09-10 15:26:28.000000000 +0200
15705 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/sched.c 2009-11-05 04:17:49.000000000 +0100
15706 @@ -72,6 +72,8 @@
15707  #include <linux/debugfs.h>
15708  #include <linux/ctype.h>
15709  #include <linux/ftrace.h>
15710 +#include <linux/vs_sched.h>
15711 +#include <linux/vs_cvirt.h>
15712  
15713  #include <asm/tlb.h>
15714  #include <asm/irq_regs.h>
15715 @@ -262,6 +264,15 @@ static DEFINE_MUTEX(sched_domains_mutex)
15716  
15717  #include <linux/cgroup.h>
15718  
15719 +#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_CFS_HARD_LIMITS)
15720 +struct cfs_bandwidth {
15721 +       spinlock_t              cfs_runtime_lock;
15722 +       ktime_t                 cfs_period;
15723 +       u64                     cfs_runtime;
15724 +       struct hrtimer          cfs_period_timer;
15725 +};
15726 +#endif
15727 +
15728  struct cfs_rq;
15729  
15730  static LIST_HEAD(task_groups);
15731 @@ -282,6 +293,11 @@ struct task_group {
15732         /* runqueue "owned" by this group on each cpu */
15733         struct cfs_rq **cfs_rq;
15734         unsigned long shares;
15735 +#ifdef CONFIG_CFS_HARD_LIMITS
15736 +       struct cfs_bandwidth cfs_bandwidth;
15737 +       /* If set, throttle when the group exceeds its bandwidth */
15738 +       int hard_limit_enabled;
15739 +#endif
15740  #endif
15741  
15742  #ifdef CONFIG_RT_GROUP_SCHED
15743 @@ -477,6 +493,20 @@ struct cfs_rq {
15744         unsigned long rq_weight;
15745  #endif
15746  #endif
15747 +#ifdef CONFIG_CFS_HARD_LIMITS
15748 +       /* set when the group is throttled  on this cpu */
15749 +       int cfs_throttled;
15750 +
15751 +       /* runtime currently consumed by the group on this rq */
15752 +       u64 cfs_time;
15753 +
15754 +       /* runtime available to the group on this rq */
15755 +       u64 cfs_runtime;
15756 +#endif
15757 +       /*
15758 +        * Number of tasks at this heirarchy.
15759 +        */
15760 +       unsigned long nr_tasks_running;
15761  };
15762  
15763  /* Real-Time classes' related field in a runqueue: */
15764 @@ -661,6 +691,11 @@ struct rq {
15765         /* BKL stats */
15766         unsigned int bkl_count;
15767  #endif
15768 +       /*
15769 +        * Protects the cfs runtime related fields of all cfs_rqs under
15770 +        * this rq
15771 +        */
15772 +       spinlock_t runtime_lock;
15773  };
15774  
15775  static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
15776 @@ -1552,6 +1587,7 @@ update_group_shares_cpu(struct task_grou
15777         }
15778  }
15779  
15780 +static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq);
15781  /*
15782   * Re-compute the task group their per cpu shares over the given domain.
15783   * This needs to be done in a bottom-up fashion because the rq weight of a
15784 @@ -1569,9 +1605,11 @@ static int tg_shares_up(struct task_grou
15785                  * If there are currently no tasks on the cpu pretend there
15786                  * is one of average load so that when a new task gets to
15787                  * run here it will not get delayed by group starvation.
15788 +                * Also if the group is throttled on this cpu, pretend that
15789 +                * it has no tasks.
15790                  */
15791                 weight = tg->cfs_rq[i]->load.weight;
15792 -               if (!weight)
15793 +               if (!weight || cfs_rq_throttled(tg->cfs_rq[i]))
15794                         weight = NICE_0_LOAD;
15795  
15796                 tg->cfs_rq[i]->rq_weight = weight;
15797 @@ -1595,6 +1633,7 @@ static int tg_shares_up(struct task_grou
15798   * Compute the cpu's hierarchical load factor for each task group.
15799   * This needs to be done in a top-down fashion because the load of a child
15800   * group is a fraction of its parents load.
15801 + * A throttled group's h_load is set to 0.
15802   */
15803  static int tg_load_down(struct task_group *tg, void *data)
15804  {
15805 @@ -1603,6 +1642,8 @@ static int tg_load_down(struct task_grou
15806  
15807         if (!tg->parent) {
15808                 load = cpu_rq(cpu)->load.weight;
15809 +       } else if (cfs_rq_throttled(tg->cfs_rq[cpu])) {
15810 +               load = 0;
15811         } else {
15812                 load = tg->parent->cfs_rq[cpu]->h_load;
15813                 load *= tg->cfs_rq[cpu]->shares;
15814 @@ -1732,6 +1773,187 @@ static void cfs_rq_set_shares(struct cfs
15815  
15816  static void calc_load_account_active(struct rq *this_rq);
15817  
15818 +
15819 +#if defined(CONFIG_RT_GROUP_SCHED) || defined(CONFIG_FAIR_GROUP_SCHED)
15820 +
15821 +#ifdef CONFIG_SMP
15822 +static inline const struct cpumask *sched_bw_period_mask(void)
15823 +{
15824 +       return cpu_rq(smp_processor_id())->rd->span;
15825 +}
15826 +#else /* !CONFIG_SMP */
15827 +static inline const struct cpumask *sched_bw_period_mask(void)
15828 +{
15829 +       return cpu_online_mask;
15830 +}
15831 +#endif /* CONFIG_SMP */
15832 +
15833 +#else
15834 +static inline const struct cpumask *sched_bw_period_mask(void)
15835 +{
15836 +       return cpu_online_mask;
15837 +}
15838 +
15839 +#endif
15840 +
15841 +#ifdef CONFIG_FAIR_GROUP_SCHED
15842 +#ifdef CONFIG_CFS_HARD_LIMITS
15843 +
15844 +/*
15845 + * Runtime allowed for a cfs group before it is hard limited.
15846 + * default: Infinite which means no hard limiting.
15847 + */
15848 +u64 sched_cfs_runtime = RUNTIME_INF;
15849 +
15850 +/*
15851 + * period over which we hard limit the cfs group's bandwidth.
15852 + * default: 0.5s
15853 + */
15854 +u64 sched_cfs_period = 500000;
15855 +
15856 +static inline u64 global_cfs_period(void)
15857 +{
15858 +       return sched_cfs_period * NSEC_PER_USEC;
15859 +}
15860 +
15861 +static inline u64 global_cfs_runtime(void)
15862 +{
15863 +       return RUNTIME_INF;
15864 +}
15865 +
15866 +int task_group_throttled(struct task_group *tg, int cpu);
15867 +void do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b);
15868 +
15869 +static inline int cfs_bandwidth_enabled(struct task_group *tg)
15870 +{
15871 +       return tg->hard_limit_enabled;
15872 +}
15873 +
15874 +static inline void rq_runtime_lock(struct rq *rq)
15875 +{
15876 +       spin_lock(&rq->runtime_lock);
15877 +}
15878 +
15879 +static inline void rq_runtime_unlock(struct rq *rq)
15880 +{
15881 +       spin_unlock(&rq->runtime_lock);
15882 +}
15883 +
15884 +/*
15885 + * Refresh the runtimes of the throttled groups.
15886 + * But nothing much to do now, will populate this in later patches.
15887 + */
15888 +static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
15889 +{
15890 +       struct cfs_bandwidth *cfs_b =
15891 +               container_of(timer, struct cfs_bandwidth, cfs_period_timer);
15892 +
15893 +       do_sched_cfs_period_timer(cfs_b);
15894 +       hrtimer_add_expires_ns(timer, ktime_to_ns(cfs_b->cfs_period));
15895 +       return HRTIMER_RESTART;
15896 +}
15897 +
15898 +/*
15899 + * TODO: Check if this kind of timer setup is sufficient for cfs or
15900 + * should we do what rt is doing.
15901 + */
15902 +static void start_cfs_bandwidth(struct task_group *tg)
15903 +{
15904 +       struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
15905 +
15906 +       /*
15907 +        * Timer isn't setup for groups with infinite runtime or for groups
15908 +        * for which hard limiting isn't enabled.
15909 +        */
15910 +       if (!cfs_bandwidth_enabled(tg) || (cfs_b->cfs_runtime == RUNTIME_INF))
15911 +               return;
15912 +
15913 +       if (hrtimer_active(&cfs_b->cfs_period_timer))
15914 +               return;
15915 +
15916 +       hrtimer_start_range_ns(&cfs_b->cfs_period_timer, cfs_b->cfs_period,
15917 +                       0, HRTIMER_MODE_REL);
15918 +}
15919 +
15920 +static void init_cfs_bandwidth(struct task_group *tg)
15921 +{
15922 +       struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
15923 +
15924 +       cfs_b->cfs_period = ns_to_ktime(global_cfs_period());
15925 +       cfs_b->cfs_runtime = global_cfs_runtime();
15926 +
15927 +       spin_lock_init(&cfs_b->cfs_runtime_lock);
15928 +
15929 +       hrtimer_init(&cfs_b->cfs_period_timer,
15930 +                       CLOCK_MONOTONIC, HRTIMER_MODE_REL);
15931 +       cfs_b->cfs_period_timer.function = &sched_cfs_period_timer;
15932 +}
15933 +
15934 +static inline void destroy_cfs_bandwidth(struct task_group *tg)
15935 +{
15936 +       hrtimer_cancel(&tg->cfs_bandwidth.cfs_period_timer);
15937 +}
15938 +
15939 +static void init_cfs_hard_limits(struct cfs_rq *cfs_rq, struct task_group *tg)
15940 +{
15941 +       cfs_rq->cfs_time = 0;
15942 +       cfs_rq->cfs_throttled = 0;
15943 +       cfs_rq->cfs_runtime = tg->cfs_bandwidth.cfs_runtime;
15944 +       tg->hard_limit_enabled = 0;
15945 +}
15946 +
15947 +#else /* !CONFIG_CFS_HARD_LIMITS */
15948 +
15949 +static void init_cfs_bandwidth(struct task_group *tg)
15950 +{
15951 +       return;
15952 +}
15953 +
15954 +static inline void destroy_cfs_bandwidth(struct task_group *tg)
15955 +{
15956 +       return;
15957 +}
15958 +
15959 +static void init_cfs_hard_limits(struct cfs_rq *cfs_rq, struct task_group *tg)
15960 +{
15961 +       return;
15962 +}
15963 +
15964 +static inline void rq_runtime_lock(struct rq *rq)
15965 +{
15966 +       return;
15967 +}
15968 +
15969 +static inline void rq_runtime_unlock(struct rq *rq)
15970 +{
15971 +       return;
15972 +}
15973 +
15974 +#endif /* CONFIG_CFS_HARD_LIMITS */
15975 +#else /* !CONFIG_FAIR_GROUP_SCHED */
15976 +
15977 +static inline void rq_runtime_lock(struct rq *rq)
15978 +{
15979 +       return;
15980 +}
15981 +
15982 +static inline void rq_runtime_unlock(struct rq *rq)
15983 +{
15984 +       return;
15985 +}
15986 +
15987 +int task_group_throttled(struct task_group *tg, int cpu)
15988 +{
15989 +       return 0;
15990 +}
15991 +
15992 +static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
15993 +{
15994 +       return 0;
15995 +}
15996 +
15997 +#endif /* CONFIG_FAIR_GROUP_SCHED */
15998 +
15999  #include "sched_stats.h"
16000  #include "sched_idletask.c"
16001  #include "sched_fair.c"
16002 @@ -1781,14 +2003,17 @@ static void update_avg(u64 *avg, u64 sam
16003         *avg += diff >> 3;
16004  }
16005  
16006 -static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
16007 +static int enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
16008  {
16009 +       int ret;
16010 +
16011         if (wakeup)
16012                 p->se.start_runtime = p->se.sum_exec_runtime;
16013  
16014         sched_info_queued(p);
16015 -       p->sched_class->enqueue_task(rq, p, wakeup);
16016 +       ret = p->sched_class->enqueue_task(rq, p, wakeup);
16017         p->se.on_rq = 1;
16018 +       return ret;
16019  }
16020  
16021  static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
16022 @@ -1863,8 +2088,15 @@ static void activate_task(struct rq *rq,
16023         if (task_contributes_to_load(p))
16024                 rq->nr_uninterruptible--;
16025  
16026 -       enqueue_task(rq, p, wakeup);
16027 -       inc_nr_running(rq);
16028 +       /*
16029 +        * Increment rq->nr_running only if enqueue_task() succeeds.
16030 +        * enqueue_task() can fail when the task being activated belongs
16031 +        * to a throttled group. In this case, the task gets enqueued to
16032 +        * throttled group and the group will be enqueued later when it
16033 +        * gets unthrottled. rq->nr_running gets incremented at that time.
16034 +        */
16035 +       if (!enqueue_task(rq, p, wakeup))
16036 +               inc_nr_running(rq);
16037  }
16038  
16039  /*
16040 @@ -2981,9 +3213,17 @@ EXPORT_SYMBOL(avenrun);
16041   */
16042  void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
16043  {
16044 -       loads[0] = (avenrun[0] + offset) << shift;
16045 -       loads[1] = (avenrun[1] + offset) << shift;
16046 -       loads[2] = (avenrun[2] + offset) << shift;
16047 +       if (vx_flags(VXF_VIRT_LOAD, 0)) {
16048 +               struct vx_info *vxi = current_vx_info();
16049 +
16050 +               loads[0] = (vxi->cvirt.load[0] + offset) << shift;
16051 +               loads[1] = (vxi->cvirt.load[1] + offset) << shift;
16052 +               loads[2] = (vxi->cvirt.load[2] + offset) << shift;
16053 +       } else {
16054 +               loads[0] = (avenrun[0] + offset) << shift;
16055 +               loads[1] = (avenrun[1] + offset) << shift;
16056 +               loads[2] = (avenrun[2] + offset) << shift;
16057 +       }
16058  }
16059  
16060  static unsigned long
16061 @@ -3201,6 +3441,7 @@ int can_migrate_task(struct task_struct 
16062          * 1) running (obviously), or
16063          * 2) cannot be migrated to this CPU due to cpus_allowed, or
16064          * 3) are cache-hot on their current CPU.
16065 +        * 4) end up in throttled task groups on this CPU.
16066          */
16067         if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) {
16068                 schedstat_inc(p, se.nr_failed_migrations_affine);
16069 @@ -3214,6 +3455,18 @@ int can_migrate_task(struct task_struct 
16070         }
16071  
16072         /*
16073 +        * Don't migrate the task if it belongs to a
16074 +        * - throttled group on its current cpu
16075 +        * - throttled group on this_cpu
16076 +        * - group whose hierarchy is throttled on this_cpu
16077 +        */
16078 +       if (cfs_rq_throttled(cfs_rq_of(&p->se)) ||
16079 +               task_group_throttled(task_group(p), this_cpu)) {
16080 +               schedstat_inc(p, se.nr_failed_migrations_throttled);
16081 +               return 0;
16082 +       }
16083 +
16084 +       /*
16085          * Aggressive migration if:
16086          * 1) task is cache cold, or
16087          * 2) too many balance attempts have failed.
16088 @@ -4912,16 +5165,19 @@ void account_user_time(struct task_struc
16089                        cputime_t cputime_scaled)
16090  {
16091         struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
16092 +       struct vx_info *vxi = p->vx_info;  /* p is _always_ current */
16093         cputime64_t tmp;
16094 +       int nice = (TASK_NICE(p) > 0);
16095  
16096         /* Add user time to process. */
16097         p->utime = cputime_add(p->utime, cputime);
16098         p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
16099 +       vx_account_user(vxi, cputime, nice);
16100         account_group_user_time(p, cputime);
16101  
16102         /* Add user time to cpustat. */
16103         tmp = cputime_to_cputime64(cputime);
16104 -       if (TASK_NICE(p) > 0)
16105 +       if (nice)
16106                 cpustat->nice = cputime64_add(cpustat->nice, tmp);
16107         else
16108                 cpustat->user = cputime64_add(cpustat->user, tmp);
16109 @@ -4967,6 +5223,7 @@ void account_system_time(struct task_str
16110                          cputime_t cputime, cputime_t cputime_scaled)
16111  {
16112         struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
16113 +       struct vx_info *vxi = p->vx_info;  /* p is _always_ current */
16114         cputime64_t tmp;
16115  
16116         if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
16117 @@ -4977,6 +5234,7 @@ void account_system_time(struct task_str
16118         /* Add system time to process. */
16119         p->stime = cputime_add(p->stime, cputime);
16120         p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
16121 +       vx_account_system(vxi, cputime, 0 /* do we have idle time? */);
16122         account_group_system_time(p, cputime);
16123  
16124         /* Add system time to cpustat. */
16125 @@ -5896,8 +6154,10 @@ void rt_mutex_setprio(struct task_struct
16126         oldprio = p->prio;
16127         on_rq = p->se.on_rq;
16128         running = task_current(rq, p);
16129 -       if (on_rq)
16130 +       if (on_rq) {
16131                 dequeue_task(rq, p, 0);
16132 +               dec_nr_running(rq);
16133 +       }
16134         if (running)
16135                 p->sched_class->put_prev_task(rq, p);
16136  
16137 @@ -5911,7 +6171,8 @@ void rt_mutex_setprio(struct task_struct
16138         if (running)
16139                 p->sched_class->set_curr_task(rq);
16140         if (on_rq) {
16141 -               enqueue_task(rq, p, 0);
16142 +               if (!enqueue_task(rq, p, 0))
16143 +                       inc_nr_running(rq);
16144  
16145                 check_class_changed(rq, p, prev_class, oldprio, running);
16146         }
16147 @@ -5945,8 +6206,10 @@ void set_user_nice(struct task_struct *p
16148                 goto out_unlock;
16149         }
16150         on_rq = p->se.on_rq;
16151 -       if (on_rq)
16152 +       if (on_rq) {
16153                 dequeue_task(rq, p, 0);
16154 +               dec_nr_running(rq);
16155 +       }
16156  
16157         p->static_prio = NICE_TO_PRIO(nice);
16158         set_load_weight(p);
16159 @@ -5955,7 +6218,8 @@ void set_user_nice(struct task_struct *p
16160         delta = p->prio - old_prio;
16161  
16162         if (on_rq) {
16163 -               enqueue_task(rq, p, 0);
16164 +               if (!enqueue_task(rq, p, 0))
16165 +                       inc_nr_running(rq);
16166                 /*
16167                  * If the task increased its priority or is running and
16168                  * lowered its priority, then reschedule its CPU:
16169 @@ -6012,7 +6276,7 @@ SYSCALL_DEFINE1(nice, int, increment)
16170                 nice = 19;
16171  
16172         if (increment < 0 && !can_nice(current, nice))
16173 -               return -EPERM;
16174 +               return vx_flags(VXF_IGNEG_NICE, 0) ? 0 : -EPERM;
16175  
16176         retval = security_task_setnice(current, nice);
16177         if (retval)
16178 @@ -9119,6 +9383,7 @@ static void init_tg_cfs_entry(struct tas
16179         struct rq *rq = cpu_rq(cpu);
16180         tg->cfs_rq[cpu] = cfs_rq;
16181         init_cfs_rq(cfs_rq, rq);
16182 +       init_cfs_hard_limits(cfs_rq, tg);
16183         cfs_rq->tg = tg;
16184         if (add)
16185                 list_add(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
16186 @@ -9248,6 +9513,10 @@ void __init sched_init(void)
16187  #endif /* CONFIG_USER_SCHED */
16188  #endif /* CONFIG_RT_GROUP_SCHED */
16189  
16190 +#ifdef CONFIG_FAIR_GROUP_SCHED
16191 +       init_cfs_bandwidth(&init_task_group);
16192 +#endif
16193 +
16194  #ifdef CONFIG_GROUP_SCHED
16195         list_add(&init_task_group.list, &task_groups);
16196         INIT_LIST_HEAD(&init_task_group.children);
16197 @@ -9264,6 +9533,7 @@ void __init sched_init(void)
16198  
16199                 rq = cpu_rq(i);
16200                 spin_lock_init(&rq->lock);
16201 +               spin_lock_init(&rq->runtime_lock);
16202                 rq->nr_running = 0;
16203                 rq->calc_load_active = 0;
16204                 rq->calc_load_update = jiffies + LOAD_FREQ;
16205 @@ -9537,6 +9807,7 @@ static void free_fair_sched_group(struct
16206  {
16207         int i;
16208  
16209 +       destroy_cfs_bandwidth(tg);
16210         for_each_possible_cpu(i) {
16211                 if (tg->cfs_rq)
16212                         kfree(tg->cfs_rq[i]);
16213 @@ -9563,6 +9834,7 @@ int alloc_fair_sched_group(struct task_g
16214         if (!tg->se)
16215                 goto err;
16216  
16217 +       init_cfs_bandwidth(tg);
16218         tg->shares = NICE_0_LOAD;
16219  
16220         for_each_possible_cpu(i) {
16221 @@ -9795,8 +10067,10 @@ void sched_move_task(struct task_struct 
16222         running = task_current(rq, tsk);
16223         on_rq = tsk->se.on_rq;
16224  
16225 -       if (on_rq)
16226 +       if (on_rq) {
16227                 dequeue_task(rq, tsk, 0);
16228 +               dec_nr_running(rq);
16229 +       }
16230         if (unlikely(running))
16231                 tsk->sched_class->put_prev_task(rq, tsk);
16232  
16233 @@ -9810,7 +10084,8 @@ void sched_move_task(struct task_struct 
16234         if (unlikely(running))
16235                 tsk->sched_class->set_curr_task(rq);
16236         if (on_rq)
16237 -               enqueue_task(rq, tsk, 0);
16238 +               if (!enqueue_task(rq, tsk, 0))
16239 +                       inc_nr_running(rq);
16240  
16241         task_rq_unlock(rq, &flags);
16242  }
16243 @@ -10257,6 +10532,134 @@ static u64 cpu_shares_read_u64(struct cg
16244  
16245         return (u64) tg->shares;
16246  }
16247 +
16248 +#ifdef CONFIG_CFS_HARD_LIMITS
16249 +
16250 +static int tg_set_cfs_bandwidth(struct task_group *tg,
16251 +               u64 cfs_period, u64 cfs_runtime)
16252 +{
16253 +       int i, err = 0;
16254 +
16255 +       spin_lock_irq(&tg->cfs_bandwidth.cfs_runtime_lock);
16256 +       tg->cfs_bandwidth.cfs_period = ns_to_ktime(cfs_period);
16257 +       tg->cfs_bandwidth.cfs_runtime = cfs_runtime;
16258 +
16259 +       for_each_possible_cpu(i) {
16260 +               struct cfs_rq *cfs_rq = tg->cfs_rq[i];
16261 +
16262 +               rq_runtime_lock(rq_of(cfs_rq));
16263 +               cfs_rq->cfs_runtime = cfs_runtime;
16264 +               rq_runtime_unlock(rq_of(cfs_rq));
16265 +       }
16266 +
16267 +       start_cfs_bandwidth(tg);
16268 +       spin_unlock_irq(&tg->cfs_bandwidth.cfs_runtime_lock);
16269 +       return err;
16270 +}
16271 +
16272 +int tg_set_cfs_runtime(struct task_group *tg, long cfs_runtime_us)
16273 +{
16274 +       u64 cfs_runtime, cfs_period;
16275 +
16276 +       cfs_period = ktime_to_ns(tg->cfs_bandwidth.cfs_period);
16277 +       cfs_runtime = (u64)cfs_runtime_us * NSEC_PER_USEC;
16278 +       if (cfs_runtime_us < 0)
16279 +               cfs_runtime = RUNTIME_INF;
16280 +
16281 +       return tg_set_cfs_bandwidth(tg, cfs_period, cfs_runtime);
16282 +}
16283 +
16284 +long tg_get_cfs_runtime(struct task_group *tg)
16285 +{
16286 +       u64 cfs_runtime_us;
16287 +
16288 +       if (tg->cfs_bandwidth.cfs_runtime == RUNTIME_INF)
16289 +               return -1;
16290 +
16291 +       cfs_runtime_us = tg->cfs_bandwidth.cfs_runtime;
16292 +       do_div(cfs_runtime_us, NSEC_PER_USEC);
16293 +       return cfs_runtime_us;
16294 +}
16295 +
16296 +int tg_set_cfs_period(struct task_group *tg, long cfs_period_us)
16297 +{
16298 +       u64 cfs_runtime, cfs_period;
16299 +
16300 +       cfs_period = (u64)cfs_period_us * NSEC_PER_USEC;
16301 +       cfs_runtime = tg->cfs_bandwidth.cfs_runtime;
16302 +
16303 +       if (cfs_period == 0)
16304 +               return -EINVAL;
16305 +
16306 +       return tg_set_cfs_bandwidth(tg, cfs_period, cfs_runtime);
16307 +}
16308 +
16309 +long tg_get_cfs_period(struct task_group *tg)
16310 +{
16311 +       u64 cfs_period_us;
16312 +
16313 +       cfs_period_us = ktime_to_ns(tg->cfs_bandwidth.cfs_period);
16314 +       do_div(cfs_period_us, NSEC_PER_USEC);
16315 +       return cfs_period_us;
16316 +}
16317 +
16318 +int tg_set_hard_limit_enabled(struct task_group *tg, u64 val)
16319 +{
16320 +       local_irq_disable();
16321 +       spin_lock(&tg->cfs_bandwidth.cfs_runtime_lock);
16322 +       if (val > 0) {
16323 +               tg->hard_limit_enabled = 1;
16324 +               start_cfs_bandwidth(tg);
16325 +               spin_unlock(&tg->cfs_bandwidth.cfs_runtime_lock);
16326 +       } else {
16327 +               destroy_cfs_bandwidth(tg);
16328 +               tg->hard_limit_enabled = 0;
16329 +               spin_unlock(&tg->cfs_bandwidth.cfs_runtime_lock);
16330 +               /*
16331 +                * Hard limiting is being disabled for this group.
16332 +                * Refresh runtimes and put the throttled entities
16333 +                * of the group back onto runqueue.
16334 +                */
16335 +               do_sched_cfs_period_timer(&tg->cfs_bandwidth);
16336 +       }
16337 +       local_irq_enable();
16338 +       return 0;
16339 +}
16340 +
16341 +static s64 cpu_cfs_runtime_read_s64(struct cgroup *cgrp, struct cftype *cft)
16342 +{
16343 +       return tg_get_cfs_runtime(cgroup_tg(cgrp));
16344 +}
16345 +
16346 +static int cpu_cfs_runtime_write_s64(struct cgroup *cgrp, struct cftype *cftype,
16347 +                               s64 cfs_runtime_us)
16348 +{
16349 +       return tg_set_cfs_runtime(cgroup_tg(cgrp), cfs_runtime_us);
16350 +}
16351 +
16352 +static u64 cpu_cfs_period_read_u64(struct cgroup *cgrp, struct cftype *cft)
16353 +{
16354 +       return tg_get_cfs_period(cgroup_tg(cgrp));
16355 +}
16356 +
16357 +static int cpu_cfs_period_write_u64(struct cgroup *cgrp, struct cftype *cftype,
16358 +                               u64 cfs_period_us)
16359 +{
16360 +       return tg_set_cfs_period(cgroup_tg(cgrp), cfs_period_us);
16361 +}
16362 +
16363 +static u64 cpu_cfs_hard_limit_read_u64(struct cgroup *cgrp, struct cftype *cft)
16364 +{
16365 +       return cfs_bandwidth_enabled(cgroup_tg(cgrp));
16366 +}
16367 +
16368 +static int cpu_cfs_hard_limit_write_u64(struct cgroup *cgrp,
16369 +               struct cftype *cftype, u64 val)
16370 +{
16371 +       return tg_set_hard_limit_enabled(cgroup_tg(cgrp), val);
16372 +}
16373 +
16374 +#endif /* CONFIG_CFS_HARD_LIMITS */
16375  #endif /* CONFIG_FAIR_GROUP_SCHED */
16376  
16377  #ifdef CONFIG_RT_GROUP_SCHED
16378 @@ -10290,6 +10693,23 @@ static struct cftype cpu_files[] = {
16379                 .read_u64 = cpu_shares_read_u64,
16380                 .write_u64 = cpu_shares_write_u64,
16381         },
16382 +#ifdef CONFIG_CFS_HARD_LIMITS
16383 +       {
16384 +               .name = "cfs_runtime_us",
16385 +               .read_s64 = cpu_cfs_runtime_read_s64,
16386 +               .write_s64 = cpu_cfs_runtime_write_s64,
16387 +       },
16388 +       {
16389 +               .name = "cfs_period_us",
16390 +               .read_u64 = cpu_cfs_period_read_u64,
16391 +               .write_u64 = cpu_cfs_period_write_u64,
16392 +       },
16393 +       {
16394 +               .name = "cfs_hard_limit",
16395 +               .read_u64 = cpu_cfs_hard_limit_read_u64,
16396 +               .write_u64 = cpu_cfs_hard_limit_write_u64,
16397 +       },
16398 +#endif /* CONFIG_CFS_HARD_LIMITS */
16399  #endif
16400  #ifdef CONFIG_RT_GROUP_SCHED
16401         {
16402 diff -NurpP --minimal linux-2.6.31.6/kernel/sched_debug.c linux-2.6.31.6-vs2.3.0.36.24/kernel/sched_debug.c
16403 --- linux-2.6.31.6/kernel/sched_debug.c 2009-09-10 15:26:28.000000000 +0200
16404 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/sched_debug.c   2009-10-06 04:39:26.000000000 +0200
16405 @@ -80,6 +80,11 @@ static void print_cfs_group_stats(struct
16406         PN(se->wait_max);
16407         PN(se->wait_sum);
16408         P(se->wait_count);
16409 +#ifdef CONFIG_CFS_HARD_LIMITS
16410 +       PN(se->throttle_max);
16411 +       PN(se->throttle_sum);
16412 +       P(se->throttle_count);
16413 +#endif
16414  #endif
16415         P(se->load.weight);
16416  #undef PN
16417 @@ -214,6 +219,18 @@ void print_cfs_rq(struct seq_file *m, in
16418  #ifdef CONFIG_SMP
16419         SEQ_printf(m, "  .%-30s: %lu\n", "shares", cfs_rq->shares);
16420  #endif
16421 +       SEQ_printf(m, "  .%-30s: %ld\n", "nr_tasks_running",
16422 +                       cfs_rq->nr_tasks_running);
16423 +#ifdef CONFIG_CFS_HARD_LIMITS
16424 +       spin_lock_irqsave(&rq->lock, flags);
16425 +       SEQ_printf(m, "  .%-30s: %d\n", "cfs_throttled",
16426 +                       cfs_rq->cfs_throttled);
16427 +       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "cfs_time",
16428 +                       SPLIT_NS(cfs_rq->cfs_time));
16429 +       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "cfs_runtime",
16430 +                       SPLIT_NS(cfs_rq->cfs_runtime));
16431 +       spin_unlock_irqrestore(&rq->lock, flags);
16432 +#endif
16433         print_cfs_group_stats(m, cpu, cfs_rq->tg);
16434  #endif
16435  }
16436 @@ -310,7 +327,7 @@ static int sched_debug_show(struct seq_f
16437         u64 now = ktime_to_ns(ktime_get());
16438         int cpu;
16439  
16440 -       SEQ_printf(m, "Sched Debug Version: v0.09, %s %.*s\n",
16441 +       SEQ_printf(m, "Sched Debug Version: v0.10, %s %.*s\n",
16442                 init_utsname()->release,
16443                 (int)strcspn(init_utsname()->version, " "),
16444                 init_utsname()->version);
16445 @@ -415,6 +432,7 @@ void proc_sched_show_task(struct task_st
16446         P(se.nr_failed_migrations_affine);
16447         P(se.nr_failed_migrations_running);
16448         P(se.nr_failed_migrations_hot);
16449 +       P(se.nr_failed_migrations_throttled);
16450         P(se.nr_forced_migrations);
16451         P(se.nr_forced2_migrations);
16452         P(se.nr_wakeups);
16453 @@ -489,6 +507,7 @@ void proc_sched_set_task(struct task_str
16454         p->se.nr_failed_migrations_affine       = 0;
16455         p->se.nr_failed_migrations_running      = 0;
16456         p->se.nr_failed_migrations_hot          = 0;
16457 +       p->se.nr_failed_migrations_throttled    = 0;
16458         p->se.nr_forced_migrations              = 0;
16459         p->se.nr_forced2_migrations             = 0;
16460         p->se.nr_wakeups                        = 0;
16461 diff -NurpP --minimal linux-2.6.31.6/kernel/sched_fair.c linux-2.6.31.6-vs2.3.0.36.24/kernel/sched_fair.c
16462 --- linux-2.6.31.6/kernel/sched_fair.c  2009-09-10 15:26:28.000000000 +0200
16463 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/sched_fair.c    2009-10-06 04:39:37.000000000 +0200
16464 @@ -186,6 +186,286 @@ find_matching_se(struct sched_entity **s
16465         }
16466  }
16467  
16468 +#ifdef CONFIG_CFS_HARD_LIMITS
16469 +
16470 +static inline void update_stats_throttle_start(struct cfs_rq *cfs_rq,
16471 +                       struct sched_entity *se)
16472 +{
16473 +       schedstat_set(se->throttle_start, rq_of(cfs_rq)->clock);
16474 +}
16475 +
16476 +static inline void update_stats_throttle_end(struct cfs_rq *cfs_rq,
16477 +                       struct sched_entity *se)
16478 +{
16479 +       schedstat_set(se->throttle_max, max(se->throttle_max,
16480 +                       rq_of(cfs_rq)->clock - se->throttle_start));
16481 +       schedstat_set(se->throttle_count, se->throttle_count + 1);
16482 +       schedstat_set(se->throttle_sum, se->throttle_sum +
16483 +                       rq_of(cfs_rq)->clock - se->throttle_start);
16484 +       schedstat_set(se->throttle_start, 0);
16485 +}
16486 +
16487 +static void double_rq_runtime_lock(struct rq *rq1, struct rq *rq2)
16488 +       __acquires(rq1->runtime_lock)
16489 +       __acquires(rq2->runtime_lock)
16490 +{
16491 +       BUG_ON(!irqs_disabled());
16492 +       if (rq1 == rq2) {
16493 +               spin_lock(&rq1->runtime_lock);
16494 +               __acquire(rq2->runtime_lock);   /* Fake it out ;) */
16495 +       } else {
16496 +               if (rq1 < rq2) {
16497 +                       spin_lock(&rq1->runtime_lock);
16498 +                       spin_lock_nested(&rq2->runtime_lock,
16499 +                                       SINGLE_DEPTH_NESTING);
16500 +               } else {
16501 +                       spin_lock(&rq2->runtime_lock);
16502 +                       spin_lock_nested(&rq1->runtime_lock,
16503 +                                       SINGLE_DEPTH_NESTING);
16504 +               }
16505 +       }
16506 +       update_rq_clock(rq1);
16507 +       update_rq_clock(rq2);
16508 +}
16509 +
16510 +static void double_rq_runtime_unlock(struct rq *rq1, struct rq *rq2)
16511 +       __releases(rq1->runtime_lock)
16512 +       __releases(rq2->runtime_lock)
16513 +{
16514 +       spin_unlock(&rq1->runtime_lock);
16515 +       if (rq1 != rq2)
16516 +               spin_unlock(&rq2->runtime_lock);
16517 +       else
16518 +               __release(rq2->runtime_lock);
16519 +}
16520 +
16521 +static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
16522 +{
16523 +       return cfs_rq->cfs_throttled;
16524 +}
16525 +
16526 +/*
16527 + * Ran out of runtime, check if we can borrow some from others
16528 + * instead of getting throttled right away.
16529 + */
16530 +static void do_cfs_balance_runtime(struct cfs_rq *cfs_rq)
16531 +{
16532 +       struct rq *rq = rq_of(cfs_rq);
16533 +       struct cfs_bandwidth *cfs_b = &cfs_rq->tg->cfs_bandwidth;
16534 +       const struct cpumask *span = sched_bw_period_mask();
16535 +       int i, weight;
16536 +       u64 cfs_period;
16537 +       struct task_group *tg = container_of(cfs_b, struct task_group,
16538 +                               cfs_bandwidth);
16539 +
16540 +       weight = cpumask_weight(span);
16541 +       spin_lock(&cfs_b->cfs_runtime_lock);
16542 +       cfs_period = ktime_to_ns(cfs_b->cfs_period);
16543 +
16544 +       for_each_cpu(i, span) {
16545 +               struct cfs_rq *borrow_cfs_rq = tg->cfs_rq[i];
16546 +               struct rq *borrow_rq = rq_of(borrow_cfs_rq);
16547 +               s64 diff;
16548 +
16549 +               if (borrow_cfs_rq == cfs_rq)
16550 +                       continue;
16551 +
16552 +               double_rq_runtime_lock(rq, borrow_rq);
16553 +               if (borrow_cfs_rq->cfs_runtime == RUNTIME_INF) {
16554 +                       double_rq_runtime_unlock(rq, borrow_rq);
16555 +                       continue;
16556 +               }
16557 +
16558 +               diff = borrow_cfs_rq->cfs_runtime - borrow_cfs_rq->cfs_time;
16559 +               if (diff > 0) {
16560 +                       diff = div_u64((u64)diff, weight);
16561 +                       if (cfs_rq->cfs_runtime + diff > cfs_period)
16562 +                               diff = cfs_period - cfs_rq->cfs_runtime;
16563 +                       borrow_cfs_rq->cfs_runtime -= diff;
16564 +                       cfs_rq->cfs_runtime += diff;
16565 +                       if (cfs_rq->cfs_runtime == cfs_period) {
16566 +                               double_rq_runtime_unlock(rq, borrow_rq);
16567 +                               break;
16568 +                       }
16569 +               }
16570 +               double_rq_runtime_unlock(rq, borrow_rq);
16571 +       }
16572 +       spin_unlock(&cfs_b->cfs_runtime_lock);
16573 +}
16574 +
16575 +/*
16576 + * Called with rq->runtime_lock held.
16577 + */
16578 +static void cfs_balance_runtime(struct cfs_rq *cfs_rq)
16579 +{
16580 +       struct rq *rq = rq_of(cfs_rq);
16581 +
16582 +       rq_runtime_unlock(rq);
16583 +       do_cfs_balance_runtime(cfs_rq);
16584 +       rq_runtime_lock(rq);
16585 +}
16586 +
16587 +/*
16588 + * Check if group entity exceeded its runtime. If so, mark the cfs_rq as
16589 + * throttled mark the current task for reschedling.
16590 + */
16591 +static void sched_cfs_runtime_exceeded(struct sched_entity *se,
16592 +       struct task_struct *tsk_curr, unsigned long delta_exec)
16593 +{
16594 +       struct cfs_rq *cfs_rq;
16595 +
16596 +       cfs_rq = group_cfs_rq(se);
16597 +
16598 +       if (!cfs_bandwidth_enabled(cfs_rq->tg))
16599 +               return;
16600 +
16601 +       if (cfs_rq->cfs_runtime == RUNTIME_INF)
16602 +               return;
16603 +
16604 +       cfs_rq->cfs_time += delta_exec;
16605 +
16606 +       if (cfs_rq_throttled(cfs_rq))
16607 +               return;
16608 +
16609 +       if (cfs_rq->cfs_time > cfs_rq->cfs_runtime)
16610 +               cfs_balance_runtime(cfs_rq);
16611 +
16612 +       if (cfs_rq->cfs_time > cfs_rq->cfs_runtime) {
16613 +               cfs_rq->cfs_throttled = 1;
16614 +               update_stats_throttle_start(cfs_rq, se);
16615 +               resched_task(tsk_curr);
16616 +       }
16617 +}
16618 +
16619 +/*
16620 + * Check if the entity is throttled.
16621 + */
16622 +static int entity_throttled(struct sched_entity *se)
16623 +{
16624 +       struct cfs_rq *cfs_rq;
16625 +
16626 +       /* Only group entities can be throttled */
16627 +       if (entity_is_task(se))
16628 +               return 0;
16629 +
16630 +       cfs_rq = group_cfs_rq(se);
16631 +       if (cfs_rq_throttled(cfs_rq))
16632 +               return 1;
16633 +       return 0;
16634 +}
16635 +
16636 +int task_group_throttled(struct task_group *tg, int cpu)
16637 +{
16638 +       struct sched_entity *se = tg->se[cpu];
16639 +
16640 +       for_each_sched_entity(se) {
16641 +               if (entity_throttled(se))
16642 +                       return 1;
16643 +       }
16644 +       return 0;
16645 +}
16646 +
16647 +static void enqueue_entity_locked(struct cfs_rq *cfs_rq,
16648 +               struct sched_entity *se, int wakeup);
16649 +static void add_cfs_rq_tasks_running(struct sched_entity *se,
16650 +               unsigned long count);
16651 +static void sub_cfs_rq_tasks_running(struct sched_entity *se,
16652 +               unsigned long count);
16653 +
16654 +static void enqueue_throttled_entity(struct rq *rq, struct sched_entity *se)
16655 +{
16656 +       unsigned long nr_tasks = 0;
16657 +       struct sched_entity *se_tmp = se;
16658 +       int throttled = 0;
16659 +
16660 +       for_each_sched_entity(se) {
16661 +               if (se->on_rq)
16662 +                       break;
16663 +
16664 +               if (entity_throttled(se)) {
16665 +                       throttled = 1;
16666 +                       break;
16667 +               }
16668 +
16669 +               enqueue_entity_locked(cfs_rq_of(se), se, 0);
16670 +               nr_tasks += group_cfs_rq(se)->nr_tasks_running;
16671 +       }
16672 +
16673 +       if (!nr_tasks)
16674 +               return;
16675 +
16676 +       /*
16677 +        * Add the number of tasks this entity has to
16678 +        * all of its parent entities.
16679 +        */
16680 +       add_cfs_rq_tasks_running(se_tmp, nr_tasks);
16681 +
16682 +       /*
16683 +        * Add the number of tasks this entity has to
16684 +        * this cpu's rq only if the entity got enqueued all the
16685 +        * way up without any throttled entity in the hierarchy.
16686 +        */
16687 +       if (!throttled)
16688 +               rq->nr_running += nr_tasks;
16689 +}
16690 +
16691 +/*
16692 + * Refresh runtimes of all cfs_rqs in this group, i,e.,
16693 + * refresh runtimes of the representative cfs_rq of this
16694 + * tg on all cpus. Enqueue any throttled entity back.
16695 + */
16696 +void do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b)
16697 +{
16698 +       int i;
16699 +       const struct cpumask *span = sched_bw_period_mask();
16700 +       struct task_group *tg = container_of(cfs_b, struct task_group,
16701 +                                       cfs_bandwidth);
16702 +       unsigned long flags;
16703 +
16704 +       for_each_cpu(i, span) {
16705 +               struct rq *rq = cpu_rq(i);
16706 +               struct cfs_rq *cfs_rq = tg->cfs_rq[i];
16707 +               struct sched_entity *se = tg->se[i];
16708 +
16709 +               spin_lock_irqsave(&rq->lock, flags);
16710 +               rq_runtime_lock(rq);
16711 +               cfs_rq->cfs_time = 0;
16712 +               if (cfs_rq_throttled(cfs_rq)) {
16713 +                       update_rq_clock(rq);
16714 +                       update_stats_throttle_end(cfs_rq, se);
16715 +                       cfs_rq->cfs_throttled = 0;
16716 +                       enqueue_throttled_entity(rq, se);
16717 +               }
16718 +               rq_runtime_unlock(rq);
16719 +               spin_unlock_irqrestore(&rq->lock, flags);
16720 +       }
16721 +}
16722 +
16723 +#else
16724 +
16725 +static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
16726 +{
16727 +       return 0;
16728 +}
16729 +
16730 +int task_group_throttled(struct task_group *tg, int cpu)
16731 +{
16732 +       return 0;
16733 +}
16734 +
16735 +static void sched_cfs_runtime_exceeded(struct sched_entity *se,
16736 +       struct task_struct *tsk_curr, unsigned long delta_exec)
16737 +{
16738 +       return;
16739 +}
16740 +
16741 +static int entity_throttled(struct sched_entity *se)
16742 +{
16743 +       return 0;
16744 +}
16745 +
16746 +#endif /* CONFIG_CFS_HARD_LIMITS */
16747 +
16748  #else  /* CONFIG_FAIR_GROUP_SCHED */
16749  
16750  static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
16751 @@ -241,8 +521,47 @@ find_matching_se(struct sched_entity **s
16752  {
16753  }
16754  
16755 +static void sched_cfs_runtime_exceeded(struct sched_entity *se,
16756 +       struct task_struct *tsk_curr, unsigned long delta_exec)
16757 +{
16758 +       return;
16759 +}
16760 +
16761 +static int entity_throttled(struct sched_entity *se)
16762 +{
16763 +       return 0;
16764 +}
16765 +
16766  #endif /* CONFIG_FAIR_GROUP_SCHED */
16767  
16768 +static void add_cfs_rq_tasks_running(struct sched_entity *se,
16769 +               unsigned long count)
16770 +{
16771 +       struct cfs_rq *cfs_rq;
16772 +
16773 +       for_each_sched_entity(se) {
16774 +               /*
16775 +                * If any entity in the hierarchy is throttled, don't
16776 +                * propogate the tasks count up since this entity isn't
16777 +                * on rq yet.
16778 +                */
16779 +               if (entity_throttled(se))
16780 +                       break;
16781 +               cfs_rq = cfs_rq_of(se);
16782 +               cfs_rq->nr_tasks_running += count;
16783 +       }
16784 +}
16785 +
16786 +static void sub_cfs_rq_tasks_running(struct sched_entity *se,
16787 +               unsigned long count)
16788 +{
16789 +       struct cfs_rq *cfs_rq;
16790 +
16791 +       for_each_sched_entity(se) {
16792 +               cfs_rq = cfs_rq_of(se);
16793 +               cfs_rq->nr_tasks_running -= count;
16794 +       }
16795 +}
16796  
16797  /**************************************************************
16798   * Scheduling class tree data structure manipulation methods:
16799 @@ -481,10 +800,12 @@ __update_curr(struct cfs_rq *cfs_rq, str
16800         update_min_vruntime(cfs_rq);
16801  }
16802  
16803 -static void update_curr(struct cfs_rq *cfs_rq)
16804 +static void update_curr_common(struct cfs_rq *cfs_rq)
16805  {
16806         struct sched_entity *curr = cfs_rq->curr;
16807 -       u64 now = rq_of(cfs_rq)->clock;
16808 +       struct rq *rq = rq_of(cfs_rq);
16809 +       struct task_struct *tsk_curr = rq->curr;
16810 +       u64 now = rq->clock;
16811         unsigned long delta_exec;
16812  
16813         if (unlikely(!curr))
16814 @@ -507,9 +828,23 @@ static void update_curr(struct cfs_rq *c
16815  
16816                 cpuacct_charge(curtask, delta_exec);
16817                 account_group_exec_runtime(curtask, delta_exec);
16818 +       } else {
16819 +               sched_cfs_runtime_exceeded(curr, tsk_curr, delta_exec);
16820         }
16821  }
16822  
16823 +static void update_curr(struct cfs_rq *cfs_rq)
16824 +{
16825 +       rq_runtime_lock(rq_of(cfs_rq));
16826 +       update_curr_common(cfs_rq);
16827 +       rq_runtime_unlock(rq_of(cfs_rq));
16828 +}
16829 +
16830 +static inline void update_curr_locked(struct cfs_rq *cfs_rq)
16831 +{
16832 +       update_curr_common(cfs_rq);
16833 +}
16834 +
16835  static inline void
16836  update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
16837  {
16838 @@ -713,13 +1048,9 @@ place_entity(struct cfs_rq *cfs_rq, stru
16839         se->vruntime = vruntime;
16840  }
16841  
16842 -static void
16843 -enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
16844 +static void enqueue_entity_common(struct cfs_rq *cfs_rq,
16845 +               struct sched_entity *se, int wakeup)
16846  {
16847 -       /*
16848 -        * Update run-time statistics of the 'current'.
16849 -        */
16850 -       update_curr(cfs_rq);
16851         account_entity_enqueue(cfs_rq, se);
16852  
16853         if (wakeup) {
16854 @@ -731,6 +1062,29 @@ enqueue_entity(struct cfs_rq *cfs_rq, st
16855         check_spread(cfs_rq, se);
16856         if (se != cfs_rq->curr)
16857                 __enqueue_entity(cfs_rq, se);
16858 +
16859 +       if (entity_is_task(se))
16860 +               vx_activate_task(task_of(se));
16861 +}
16862 +
16863 +static void enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
16864 +               int wakeup)
16865 +{
16866 +       /*
16867 +        * Update run-time statistics of the 'current'.
16868 +        */
16869 +       update_curr(cfs_rq);
16870 +       enqueue_entity_common(cfs_rq, se, wakeup);
16871 +}
16872 +
16873 +static void enqueue_entity_locked(struct cfs_rq *cfs_rq,
16874 +               struct sched_entity *se, int wakeup)
16875 +{
16876 +       /*
16877 +        * Update run-time statistics of the 'current'.
16878 +        */
16879 +       update_curr_locked(cfs_rq);
16880 +       enqueue_entity_common(cfs_rq, se, wakeup);
16881  }
16882  
16883  static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
16884 @@ -774,6 +1128,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
16885  
16886         if (se != cfs_rq->curr)
16887                 __dequeue_entity(cfs_rq, se);
16888 +       if (entity_is_task(se))
16889 +               vx_deactivate_task(task_of(se));
16890         account_entity_dequeue(cfs_rq, se);
16891         update_min_vruntime(cfs_rq);
16892  }
16893 @@ -844,8 +1200,40 @@ static struct sched_entity *pick_next_en
16894         return se;
16895  }
16896  
16897 +/*
16898 + * Called from put_prev_entity()
16899 + * If a group entity (@se) is found to be throttled, it will not be put back
16900 + * on @cfs_rq, which is equivalent to dequeing it.
16901 + */
16902 +static void dequeue_throttled_entity(struct cfs_rq *cfs_rq,
16903 +               struct sched_entity *se)
16904 +{
16905 +       unsigned long nr_tasks = group_cfs_rq(se)->nr_tasks_running;
16906 +
16907 +       __clear_buddies(cfs_rq, se);
16908 +       account_entity_dequeue(cfs_rq, se);
16909 +       cfs_rq->curr = NULL;
16910 +
16911 +       if (!nr_tasks)
16912 +               return;
16913 +
16914 +       /*
16915 +        * Decrement the number of tasks this entity has from
16916 +        * all of its parent entities.
16917 +        */
16918 +       sub_cfs_rq_tasks_running(se, nr_tasks);
16919 +
16920 +       /*
16921 +        * Decrement the number of tasks this entity has from
16922 +        * this cpu's rq.
16923 +        */
16924 +       rq_of(cfs_rq)->nr_running -= nr_tasks;
16925 +}
16926 +
16927  static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
16928  {
16929 +       struct cfs_rq *gcfs_rq = group_cfs_rq(prev);
16930 +
16931         /*
16932          * If still on the runqueue then deactivate_task()
16933          * was not called and update_curr() has to be done:
16934 @@ -855,6 +1243,18 @@ static void put_prev_entity(struct cfs_r
16935  
16936         check_spread(cfs_rq, prev);
16937         if (prev->on_rq) {
16938 +               /*
16939 +                * If the group entity is throttled or if it has no
16940 +                * no child entities, then don't enqueue it back.
16941 +                */
16942 +               rq_runtime_lock(rq_of(cfs_rq));
16943 +               if (entity_throttled(prev) ||
16944 +                       (gcfs_rq && !gcfs_rq->nr_running)) {
16945 +                       dequeue_throttled_entity(cfs_rq, prev);
16946 +                       rq_runtime_unlock(rq_of(cfs_rq));
16947 +                       return;
16948 +               }
16949 +               rq_runtime_unlock(rq_of(cfs_rq));
16950                 update_stats_wait_start(cfs_rq, prev);
16951                 /* Put 'current' back into the tree. */
16952                 __enqueue_entity(cfs_rq, prev);
16953 @@ -955,21 +1355,32 @@ static inline void hrtick_update(struct 
16954   * The enqueue_task method is called before nr_running is
16955   * increased. Here we update the fair scheduling stats and
16956   * then put the task into the rbtree:
16957 + * Don't enqueue a throttled entity further into the hierarchy.
16958   */
16959 -static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
16960 +static int enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
16961  {
16962         struct cfs_rq *cfs_rq;
16963         struct sched_entity *se = &p->se;
16964 +       int throttled = 0;
16965  
16966 +       rq_runtime_lock(rq);
16967         for_each_sched_entity(se) {
16968                 if (se->on_rq)
16969                         break;
16970 +               if (entity_throttled(se)) {
16971 +                       throttled = 1;
16972 +                       break;
16973 +               }
16974                 cfs_rq = cfs_rq_of(se);
16975 -               enqueue_entity(cfs_rq, se, wakeup);
16976 +               enqueue_entity_locked(cfs_rq, se, wakeup);
16977                 wakeup = 1;
16978         }
16979  
16980 +       add_cfs_rq_tasks_running(&p->se, 1);
16981 +       rq_runtime_unlock(rq);
16982 +
16983         hrtick_update(rq);
16984 +       return throttled;
16985  }
16986  
16987  /*
16988 @@ -991,6 +1402,7 @@ static void dequeue_task_fair(struct rq 
16989                 sleep = 1;
16990         }
16991  
16992 +       sub_cfs_rq_tasks_running(&p->se, 1);
16993         hrtick_update(rq);
16994  }
16995  
16996 @@ -1518,6 +1930,7 @@ static struct task_struct *pick_next_tas
16997  
16998         do {
16999                 se = pick_next_entity(cfs_rq);
17000 +
17001                 /*
17002                  * If se was a buddy, clear it so that it will have to earn
17003                  * the favour again.
17004 @@ -1627,9 +2040,9 @@ load_balance_fair(struct rq *this_rq, in
17005                 u64 rem_load, moved_load;
17006  
17007                 /*
17008 -                * empty group
17009 +                * empty group or a group with no h_load (throttled)
17010                  */
17011 -               if (!busiest_cfs_rq->task_weight)
17012 +               if (!busiest_cfs_rq->task_weight || !busiest_h_load)
17013                         continue;
17014  
17015                 rem_load = (u64)rem_load_move * busiest_weight;
17016 diff -NurpP --minimal linux-2.6.31.6/kernel/sched_rt.c linux-2.6.31.6-vs2.3.0.36.24/kernel/sched_rt.c
17017 --- linux-2.6.31.6/kernel/sched_rt.c    2009-09-10 15:26:28.000000000 +0200
17018 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/sched_rt.c      2009-10-06 04:39:02.000000000 +0200
17019 @@ -222,18 +222,6 @@ static int rt_se_boosted(struct sched_rt
17020         return p->prio != p->normal_prio;
17021  }
17022  
17023 -#ifdef CONFIG_SMP
17024 -static inline const struct cpumask *sched_rt_period_mask(void)
17025 -{
17026 -       return cpu_rq(smp_processor_id())->rd->span;
17027 -}
17028 -#else
17029 -static inline const struct cpumask *sched_rt_period_mask(void)
17030 -{
17031 -       return cpu_online_mask;
17032 -}
17033 -#endif
17034 -
17035  static inline
17036  struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
17037  {
17038 @@ -283,11 +271,6 @@ static inline int rt_rq_throttled(struct
17039         return rt_rq->rt_throttled;
17040  }
17041  
17042 -static inline const struct cpumask *sched_rt_period_mask(void)
17043 -{
17044 -       return cpu_online_mask;
17045 -}
17046 -
17047  static inline
17048  struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
17049  {
17050 @@ -505,7 +488,7 @@ static int do_sched_rt_period_timer(stru
17051         if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
17052                 return 1;
17053  
17054 -       span = sched_rt_period_mask();
17055 +       span = sched_bw_period_mask();
17056         for_each_cpu(i, span) {
17057                 int enqueue = 0;
17058                 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
17059 @@ -863,7 +846,7 @@ static void dequeue_rt_entity(struct sch
17060  /*
17061   * Adding/removing a task to/from a priority array:
17062   */
17063 -static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
17064 +static int enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
17065  {
17066         struct sched_rt_entity *rt_se = &p->rt;
17067  
17068 @@ -876,6 +859,7 @@ static void enqueue_task_rt(struct rq *r
17069                 enqueue_pushable_task(rq, p);
17070  
17071         inc_cpu_load(rq, p->se.load.weight);
17072 +       return 0;
17073  }
17074  
17075  static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
17076 diff -NurpP --minimal linux-2.6.31.6/kernel/signal.c linux-2.6.31.6-vs2.3.0.36.24/kernel/signal.c
17077 --- linux-2.6.31.6/kernel/signal.c      2009-09-10 15:26:28.000000000 +0200
17078 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/signal.c        2009-09-10 16:43:45.000000000 +0200
17079 @@ -27,6 +27,8 @@
17080  #include <linux/freezer.h>
17081  #include <linux/pid_namespace.h>
17082  #include <linux/nsproxy.h>
17083 +#include <linux/vs_context.h>
17084 +#include <linux/vs_pid.h>
17085  #include <trace/events/sched.h>
17086  
17087  #include <asm/param.h>
17088 @@ -598,6 +600,14 @@ static int check_kill_permission(int sig
17089         if (!valid_signal(sig))
17090                 return -EINVAL;
17091  
17092 +       if ((info != SEND_SIG_NOINFO) &&
17093 +               (is_si_special(info) || !SI_FROMUSER(info)))
17094 +               goto skip;
17095 +
17096 +       vxdprintk(VXD_CBIT(misc, 7),
17097 +               "check_kill_permission(%d,%p,%p[#%u,%u])",
17098 +               sig, info, t, vx_task_xid(t), t->pid);
17099 +
17100         if (info != SEND_SIG_NOINFO && (is_si_special(info) || SI_FROMKERNEL(info)))
17101                 return 0;
17102  
17103 @@ -625,6 +635,20 @@ static int check_kill_permission(int sig
17104                 }
17105         }
17106  
17107 +       error = -EPERM;
17108 +       if (t->pid == 1 && current->xid)
17109 +               return error;
17110 +
17111 +       error = -ESRCH;
17112 +       /* FIXME: we shouldn't return ESRCH ever, to avoid
17113 +                 loops, maybe ENOENT or EACCES? */
17114 +       if (!vx_check(vx_task_xid(t), VS_WATCH_P | VS_IDENT)) {
17115 +               vxdprintk(current->xid || VXD_CBIT(misc, 7),
17116 +                       "signal %d[%p] xid mismatch %p[#%u,%u] xid=#%u",
17117 +                       sig, info, t, vx_task_xid(t), t->pid, current->xid);
17118 +               return error;
17119 +       }
17120 +skip:
17121         return security_task_kill(t, info, sig, 0);
17122  }
17123  
17124 @@ -1112,7 +1136,7 @@ int kill_pid_info(int sig, struct siginf
17125         rcu_read_lock();
17126  retry:
17127         p = pid_task(pid, PIDTYPE_PID);
17128 -       if (p) {
17129 +       if (p && vx_check(vx_task_xid(p), VS_IDENT)) {
17130                 error = group_send_sig_info(sig, info, p);
17131                 if (unlikely(error == -ESRCH))
17132                         /*
17133 @@ -1151,7 +1175,7 @@ int kill_pid_info_as_uid(int sig, struct
17134  
17135         read_lock(&tasklist_lock);
17136         p = pid_task(pid, PIDTYPE_PID);
17137 -       if (!p) {
17138 +       if (!p || !vx_check(vx_task_xid(p), VS_IDENT)) {
17139                 ret = -ESRCH;
17140                 goto out_unlock;
17141         }
17142 @@ -1205,8 +1229,10 @@ static int kill_something_info(int sig, 
17143                 struct task_struct * p;
17144  
17145                 for_each_process(p) {
17146 -                       if (task_pid_vnr(p) > 1 &&
17147 -                                       !same_thread_group(p, current)) {
17148 +                       if (vx_check(vx_task_xid(p), VS_ADMIN|VS_IDENT) &&
17149 +                               task_pid_vnr(p) > 1 &&
17150 +                               !same_thread_group(p, current) &&
17151 +                               !vx_current_initpid(p->pid)) {
17152                                 int err = group_send_sig_info(sig, info, p);
17153                                 ++count;
17154                                 if (err != -EPERM)
17155 @@ -1892,6 +1918,11 @@ relock:
17156                                 !sig_kernel_only(signr))
17157                         continue;
17158  
17159 +               /* virtual init is protected against user signals */
17160 +               if ((info->si_code == SI_USER) &&
17161 +                       vx_current_initpid(current->pid))
17162 +                       continue;
17163 +
17164                 if (sig_kernel_stop(signr)) {
17165                         /*
17166                          * The default action is to stop all threads in
17167 diff -NurpP --minimal linux-2.6.31.6/kernel/softirq.c linux-2.6.31.6-vs2.3.0.36.24/kernel/softirq.c
17168 --- linux-2.6.31.6/kernel/softirq.c     2009-09-10 15:26:28.000000000 +0200
17169 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/softirq.c       2009-09-10 16:33:13.000000000 +0200
17170 @@ -24,6 +24,7 @@
17171  #include <linux/ftrace.h>
17172  #include <linux/smp.h>
17173  #include <linux/tick.h>
17174 +#include <linux/vs_context.h>
17175  
17176  #define CREATE_TRACE_POINTS
17177  #include <trace/events/irq.h>
17178 diff -NurpP --minimal linux-2.6.31.6/kernel/sys.c linux-2.6.31.6-vs2.3.0.36.24/kernel/sys.c
17179 --- linux-2.6.31.6/kernel/sys.c 2009-09-10 15:26:28.000000000 +0200
17180 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/sys.c   2009-10-06 03:52:09.000000000 +0200
17181 @@ -41,6 +41,7 @@
17182  #include <linux/syscalls.h>
17183  #include <linux/kprobes.h>
17184  #include <linux/user_namespace.h>
17185 +#include <linux/vs_pid.h>
17186  
17187  #include <asm/uaccess.h>
17188  #include <asm/io.h>
17189 @@ -130,7 +131,10 @@ static int set_one_prio(struct task_stru
17190                 goto out;
17191         }
17192         if (niceval < task_nice(p) && !can_nice(p, niceval)) {
17193 -               error = -EACCES;
17194 +               if (vx_flags(VXF_IGNEG_NICE, 0))
17195 +                       error = 0;
17196 +               else
17197 +                       error = -EACCES;
17198                 goto out;
17199         }
17200         no_nice = security_task_setnice(p, niceval);
17201 @@ -179,6 +183,8 @@ SYSCALL_DEFINE3(setpriority, int, which,
17202                         else
17203                                 pgrp = task_pgrp(current);
17204                         do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
17205 +                               if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
17206 +                                       continue;
17207                                 error = set_one_prio(p, niceval, error);
17208                         } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
17209                         break;
17210 @@ -240,6 +246,8 @@ SYSCALL_DEFINE2(getpriority, int, which,
17211                         else
17212                                 pgrp = task_pgrp(current);
17213                         do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
17214 +                               if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
17215 +                                       continue;
17216                                 niceval = 20 - task_nice(p);
17217                                 if (niceval > retval)
17218                                         retval = niceval;
17219 @@ -349,6 +357,9 @@ void kernel_power_off(void)
17220         machine_power_off();
17221  }
17222  EXPORT_SYMBOL_GPL(kernel_power_off);
17223 +
17224 +long vs_reboot(unsigned int, void __user *);
17225 +
17226  /*
17227   * Reboot system call: for obvious reasons only root may call it,
17228   * and even root needs to set up some magic numbers in the registers
17229 @@ -381,6 +392,9 @@ SYSCALL_DEFINE4(reboot, int, magic1, int
17230         if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
17231                 cmd = LINUX_REBOOT_CMD_HALT;
17232  
17233 +       if (!vx_check(0, VS_ADMIN|VS_WATCH))
17234 +               return vs_reboot(cmd, arg);
17235 +
17236         lock_kernel();
17237         switch (cmd) {
17238         case LINUX_REBOOT_CMD_RESTART:
17239 @@ -1131,7 +1145,7 @@ SYSCALL_DEFINE2(sethostname, char __user
17240         int errno;
17241         char tmp[__NEW_UTS_LEN];
17242  
17243 -       if (!capable(CAP_SYS_ADMIN))
17244 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SET_UTSNAME))
17245                 return -EPERM;
17246         if (len < 0 || len > __NEW_UTS_LEN)
17247                 return -EINVAL;
17248 @@ -1180,7 +1194,7 @@ SYSCALL_DEFINE2(setdomainname, char __us
17249         int errno;
17250         char tmp[__NEW_UTS_LEN];
17251  
17252 -       if (!capable(CAP_SYS_ADMIN))
17253 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SET_UTSNAME))
17254                 return -EPERM;
17255         if (len < 0 || len > __NEW_UTS_LEN)
17256                 return -EINVAL;
17257 @@ -1249,7 +1263,7 @@ SYSCALL_DEFINE2(setrlimit, unsigned int,
17258                 return -EINVAL;
17259         old_rlim = current->signal->rlim + resource;
17260         if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
17261 -           !capable(CAP_SYS_RESOURCE))
17262 +           !vx_capable(CAP_SYS_RESOURCE, VXC_SET_RLIMIT))
17263                 return -EPERM;
17264         if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
17265                 return -EPERM;
17266 diff -NurpP --minimal linux-2.6.31.6/kernel/sysctl.c linux-2.6.31.6-vs2.3.0.36.24/kernel/sysctl.c
17267 --- linux-2.6.31.6/kernel/sysctl.c      2009-09-10 15:26:28.000000000 +0200
17268 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/sysctl.c        2009-09-10 16:11:43.000000000 +0200
17269 @@ -119,6 +119,7 @@ static int ngroups_max = NGROUPS_MAX;
17270  extern char modprobe_path[];
17271  extern int modules_disabled;
17272  #endif
17273 +extern char vshelper_path[];
17274  #ifdef CONFIG_CHR_DEV_SG
17275  extern int sg_big_buff;
17276  #endif
17277 @@ -572,6 +573,15 @@ static struct ctl_table kern_table[] = {
17278                 .strategy       = &sysctl_string,
17279         },
17280  #endif
17281 +       {
17282 +               .ctl_name       = KERN_VSHELPER,
17283 +               .procname       = "vshelper",
17284 +               .data           = &vshelper_path,
17285 +               .maxlen         = 256,
17286 +               .mode           = 0644,
17287 +               .proc_handler   = &proc_dostring,
17288 +               .strategy       = &sysctl_string,
17289 +       },
17290  #ifdef CONFIG_CHR_DEV_SG
17291         {
17292                 .ctl_name       = KERN_SG_BIG_BUFF,
17293 diff -NurpP --minimal linux-2.6.31.6/kernel/sysctl_check.c linux-2.6.31.6-vs2.3.0.36.24/kernel/sysctl_check.c
17294 --- linux-2.6.31.6/kernel/sysctl_check.c        2009-06-11 17:13:26.000000000 +0200
17295 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/sysctl_check.c  2009-09-10 16:11:43.000000000 +0200
17296 @@ -39,6 +39,7 @@ static const struct trans_ctl_table tran
17297  
17298         { KERN_PANIC,                   "panic" },
17299         { KERN_REALROOTDEV,             "real-root-dev" },
17300 +       { KERN_VSHELPER,                "vshelper", },
17301  
17302         { KERN_SPARC_REBOOT,            "reboot-cmd" },
17303         { KERN_CTLALTDEL,               "ctrl-alt-del" },
17304 @@ -1217,6 +1218,22 @@ static const struct trans_ctl_table tran
17305         {}
17306  };
17307  
17308 +static struct trans_ctl_table trans_vserver_table[] = {
17309 +       { 1,    "debug_switch" },
17310 +       { 2,    "debug_xid" },
17311 +       { 3,    "debug_nid" },
17312 +       { 4,    "debug_tag" },
17313 +       { 5,    "debug_net" },
17314 +       { 6,    "debug_limit" },
17315 +       { 7,    "debug_cres" },
17316 +       { 8,    "debug_dlim" },
17317 +       { 9,    "debug_quota" },
17318 +       { 10,   "debug_cvirt" },
17319 +       { 11,   "debug_space" },
17320 +       { 12,   "debug_misc" },
17321 +       {}
17322 +};
17323 +
17324  static const struct trans_ctl_table trans_root_table[] = {
17325         { CTL_KERN,     "kernel",       trans_kern_table },
17326         { CTL_VM,       "vm",           trans_vm_table },
17327 @@ -1233,6 +1250,7 @@ static const struct trans_ctl_table tran
17328         { CTL_SUNRPC,   "sunrpc",       trans_sunrpc_table },
17329         { CTL_PM,       "pm",           trans_pm_table },
17330         { CTL_FRV,      "frv",          trans_frv_table },
17331 +       { CTL_VSERVER,  "vserver",      trans_vserver_table },
17332         {}
17333  };
17334  
17335 diff -NurpP --minimal linux-2.6.31.6/kernel/time.c linux-2.6.31.6-vs2.3.0.36.24/kernel/time.c
17336 --- linux-2.6.31.6/kernel/time.c        2009-03-24 14:22:45.000000000 +0100
17337 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/time.c  2009-09-10 16:11:43.000000000 +0200
17338 @@ -63,6 +63,7 @@ EXPORT_SYMBOL(sys_tz);
17339  SYSCALL_DEFINE1(time, time_t __user *, tloc)
17340  {
17341         time_t i = get_seconds();
17342 +/*     FIXME: do_gettimeofday(&tv) -> vx_gettimeofday(&tv) */
17343  
17344         if (tloc) {
17345                 if (put_user(i,tloc))
17346 @@ -93,7 +94,7 @@ SYSCALL_DEFINE1(stime, time_t __user *, 
17347         if (err)
17348                 return err;
17349  
17350 -       do_settimeofday(&tv);
17351 +       vx_settimeofday(&tv);
17352         return 0;
17353  }
17354  
17355 @@ -104,7 +105,7 @@ SYSCALL_DEFINE2(gettimeofday, struct tim
17356  {
17357         if (likely(tv != NULL)) {
17358                 struct timeval ktv;
17359 -               do_gettimeofday(&ktv);
17360 +               vx_gettimeofday(&ktv);
17361                 if (copy_to_user(tv, &ktv, sizeof(ktv)))
17362                         return -EFAULT;
17363         }
17364 @@ -179,7 +180,7 @@ int do_sys_settimeofday(struct timespec 
17365                 /* SMP safe, again the code in arch/foo/time.c should
17366                  * globally block out interrupts when it runs.
17367                  */
17368 -               return do_settimeofday(tv);
17369 +               return vx_settimeofday(tv);
17370         }
17371         return 0;
17372  }
17373 @@ -311,7 +312,7 @@ void getnstimeofday(struct timespec *tv)
17374  {
17375         struct timeval x;
17376  
17377 -       do_gettimeofday(&x);
17378 +       vx_gettimeofday(&x);
17379         tv->tv_sec = x.tv_sec;
17380         tv->tv_nsec = x.tv_usec * NSEC_PER_USEC;
17381  }
17382 diff -NurpP --minimal linux-2.6.31.6/kernel/timer.c linux-2.6.31.6-vs2.3.0.36.24/kernel/timer.c
17383 --- linux-2.6.31.6/kernel/timer.c       2009-09-10 15:26:28.000000000 +0200
17384 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/timer.c 2009-10-06 04:09:06.000000000 +0200
17385 @@ -39,6 +39,10 @@
17386  #include <linux/kallsyms.h>
17387  #include <linux/perf_counter.h>
17388  #include <linux/sched.h>
17389 +#include <linux/vs_base.h>
17390 +#include <linux/vs_cvirt.h>
17391 +#include <linux/vs_pid.h>
17392 +#include <linux/vserver/sched.h>
17393  
17394  #include <asm/uaccess.h>
17395  #include <asm/unistd.h>
17396 @@ -1214,12 +1218,6 @@ SYSCALL_DEFINE1(alarm, unsigned int, sec
17397  
17398  #endif
17399  
17400 -#ifndef __alpha__
17401 -
17402 -/*
17403 - * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this
17404 - * should be moved into arch/i386 instead?
17405 - */
17406  
17407  /**
17408   * sys_getpid - return the thread group id of the current process
17409 @@ -1248,10 +1246,23 @@ SYSCALL_DEFINE0(getppid)
17410         rcu_read_lock();
17411         pid = task_tgid_vnr(current->real_parent);
17412         rcu_read_unlock();
17413 +       return vx_map_pid(pid);
17414 +}
17415  
17416 -       return pid;
17417 +#ifdef __alpha__
17418 +
17419 +/*
17420 + * The Alpha uses getxpid, getxuid, and getxgid instead.
17421 + */
17422 +
17423 +asmlinkage long do_getxpid(long *ppid)
17424 +{
17425 +       *ppid = sys_getppid();
17426 +       return sys_getpid();
17427  }
17428  
17429 +#else /* _alpha_ */
17430 +
17431  SYSCALL_DEFINE0(getuid)
17432  {
17433         /* Only we change this so SMP safe */
17434 diff -NurpP --minimal linux-2.6.31.6/kernel/user.c linux-2.6.31.6-vs2.3.0.36.24/kernel/user.c
17435 --- linux-2.6.31.6/kernel/user.c        2009-09-10 15:26:28.000000000 +0200
17436 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/user.c  2009-09-10 16:11:43.000000000 +0200
17437 @@ -251,10 +251,10 @@ static struct kobj_type uids_ktype = {
17438   *
17439   * See Documentation/scheduler/sched-design-CFS.txt for ramifications.
17440   */
17441 -static int uids_user_create(struct user_struct *up)
17442 +static int uids_user_create(struct user_namespace *ns, struct user_struct *up)
17443  {
17444         struct kobject *kobj = &up->kobj;
17445 -       int error;
17446 +       int error = 0;
17447  
17448         memset(kobj, 0, sizeof(struct kobject));
17449         if (up->user_ns != &init_user_ns)
17450 @@ -282,7 +282,7 @@ int __init uids_sysfs_init(void)
17451         if (!uids_kset)
17452                 return -ENOMEM;
17453  
17454 -       return uids_user_create(&root_user);
17455 +       return uids_user_create(NULL, &root_user);
17456  }
17457  
17458  /* delayed work function to remove sysfs directory for a user and free up
17459 @@ -353,7 +353,8 @@ static struct user_struct *uid_hash_find
17460  }
17461  
17462  int uids_sysfs_init(void) { return 0; }
17463 -static inline int uids_user_create(struct user_struct *up) { return 0; }
17464 +static inline int uids_user_create(struct user_namespace *ns,
17465 +       struct user_struct *up) { return 0; }
17466  static inline void uids_mutex_lock(void) { }
17467  static inline void uids_mutex_unlock(void) { }
17468  
17469 @@ -450,7 +451,7 @@ struct user_struct *alloc_uid(struct use
17470  
17471                 new->user_ns = get_user_ns(ns);
17472  
17473 -               if (uids_user_create(new))
17474 +               if (uids_user_create(ns, new))
17475                         goto out_destoy_sched;
17476  
17477                 /*
17478 diff -NurpP --minimal linux-2.6.31.6/kernel/user_namespace.c linux-2.6.31.6-vs2.3.0.36.24/kernel/user_namespace.c
17479 --- linux-2.6.31.6/kernel/user_namespace.c      2009-03-24 14:22:45.000000000 +0100
17480 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/user_namespace.c        2009-09-10 16:11:43.000000000 +0200
17481 @@ -10,6 +10,7 @@
17482  #include <linux/slab.h>
17483  #include <linux/user_namespace.h>
17484  #include <linux/cred.h>
17485 +#include <linux/vserver/global.h>
17486  
17487  /*
17488   * Create a new user namespace, deriving the creator from the user in the
17489 @@ -30,6 +31,7 @@ int create_user_ns(struct cred *new)
17490                 return -ENOMEM;
17491  
17492         kref_init(&ns->kref);
17493 +       atomic_inc(&vs_global_user_ns);
17494  
17495         for (n = 0; n < UIDHASH_SZ; ++n)
17496                 INIT_HLIST_HEAD(ns->uidhash_table + n);
17497 @@ -78,6 +80,8 @@ void free_user_ns(struct kref *kref)
17498         struct user_namespace *ns =
17499                 container_of(kref, struct user_namespace, kref);
17500  
17501 +       /* FIXME: maybe move into destroyer? */
17502 +       atomic_dec(&vs_global_user_ns);
17503         INIT_WORK(&ns->destroyer, free_user_ns_work);
17504         schedule_work(&ns->destroyer);
17505  }
17506 diff -NurpP --minimal linux-2.6.31.6/kernel/utsname.c linux-2.6.31.6-vs2.3.0.36.24/kernel/utsname.c
17507 --- linux-2.6.31.6/kernel/utsname.c     2009-09-10 15:26:28.000000000 +0200
17508 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/utsname.c       2009-09-10 16:44:37.000000000 +0200
17509 @@ -14,14 +14,17 @@
17510  #include <linux/utsname.h>
17511  #include <linux/err.h>
17512  #include <linux/slab.h>
17513 +#include <linux/vserver/global.h>
17514  
17515  static struct uts_namespace *create_uts_ns(void)
17516  {
17517         struct uts_namespace *uts_ns;
17518  
17519         uts_ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
17520 -       if (uts_ns)
17521 +       if (uts_ns) {
17522                 kref_init(&uts_ns->kref);
17523 +               atomic_inc(&vs_global_uts_ns);
17524 +       }
17525         return uts_ns;
17526  }
17527  
17528 @@ -71,5 +74,6 @@ void free_uts_ns(struct kref *kref)
17529         struct uts_namespace *ns;
17530  
17531         ns = container_of(kref, struct uts_namespace, kref);
17532 +       atomic_dec(&vs_global_uts_ns);
17533         kfree(ns);
17534  }
17535 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/cacct.c linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/cacct.c
17536 --- linux-2.6.31.6/kernel/vserver/cacct.c       1970-01-01 01:00:00.000000000 +0100
17537 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/cacct.c 2009-09-10 16:11:43.000000000 +0200
17538 @@ -0,0 +1,42 @@
17539 +/*
17540 + *  linux/kernel/vserver/cacct.c
17541 + *
17542 + *  Virtual Server: Context Accounting
17543 + *
17544 + *  Copyright (C) 2006-2007 Herbert Pötzl
17545 + *
17546 + *  V0.01  added accounting stats
17547 + *
17548 + */
17549 +
17550 +#include <linux/types.h>
17551 +#include <linux/vs_context.h>
17552 +#include <linux/vserver/cacct_cmd.h>
17553 +#include <linux/vserver/cacct_int.h>
17554 +
17555 +#include <asm/errno.h>
17556 +#include <asm/uaccess.h>
17557 +
17558 +
17559 +int vc_sock_stat(struct vx_info *vxi, void __user *data)
17560 +{
17561 +       struct vcmd_sock_stat_v0 vc_data;
17562 +       int j, field;
17563 +
17564 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17565 +               return -EFAULT;
17566 +
17567 +       field = vc_data.field;
17568 +       if ((field < 0) || (field >= VXA_SOCK_SIZE))
17569 +               return -EINVAL;
17570 +
17571 +       for (j = 0; j < 3; j++) {
17572 +               vc_data.count[j] = vx_sock_count(&vxi->cacct, field, j);
17573 +               vc_data.total[j] = vx_sock_total(&vxi->cacct, field, j);
17574 +       }
17575 +
17576 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17577 +               return -EFAULT;
17578 +       return 0;
17579 +}
17580 +
17581 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/cacct_init.h linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/cacct_init.h
17582 --- linux-2.6.31.6/kernel/vserver/cacct_init.h  1970-01-01 01:00:00.000000000 +0100
17583 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/cacct_init.h    2009-09-29 17:15:22.000000000 +0200
17584 @@ -0,0 +1,25 @@
17585 +
17586 +
17587 +static inline void vx_info_init_cacct(struct _vx_cacct *cacct)
17588 +{
17589 +       int i, j;
17590 +
17591 +
17592 +       for (i = 0; i < VXA_SOCK_SIZE; i++) {
17593 +               for (j = 0; j < 3; j++) {
17594 +                       atomic_long_set(&cacct->sock[i][j].count, 0);
17595 +                       atomic_long_set(&cacct->sock[i][j].total, 0);
17596 +               }
17597 +       }
17598 +       for (i = 0; i < 8; i++)
17599 +               atomic_set(&cacct->slab[i], 0);
17600 +       for (i = 0; i < 5; i++)
17601 +               for (j = 0; j < 4; j++)
17602 +                       atomic_set(&cacct->page[i][j], 0);
17603 +}
17604 +
17605 +static inline void vx_info_exit_cacct(struct _vx_cacct *cacct)
17606 +{
17607 +       return;
17608 +}
17609 +
17610 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/cacct_proc.h linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/cacct_proc.h
17611 --- linux-2.6.31.6/kernel/vserver/cacct_proc.h  1970-01-01 01:00:00.000000000 +0100
17612 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/cacct_proc.h    2009-09-10 16:11:43.000000000 +0200
17613 @@ -0,0 +1,53 @@
17614 +#ifndef _VX_CACCT_PROC_H
17615 +#define _VX_CACCT_PROC_H
17616 +
17617 +#include <linux/vserver/cacct_int.h>
17618 +
17619 +
17620 +#define VX_SOCKA_TOP   \
17621 +       "Type\t    recv #/bytes\t\t   send #/bytes\t\t    fail #/bytes\n"
17622 +
17623 +static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer)
17624 +{
17625 +       int i, j, length = 0;
17626 +       static char *type[VXA_SOCK_SIZE] = {
17627 +               "UNSPEC", "UNIX", "INET", "INET6", "PACKET", "OTHER"
17628 +       };
17629 +
17630 +       length += sprintf(buffer + length, VX_SOCKA_TOP);
17631 +       for (i = 0; i < VXA_SOCK_SIZE; i++) {
17632 +               length += sprintf(buffer + length, "%s:", type[i]);
17633 +               for (j = 0; j < 3; j++) {
17634 +                       length += sprintf(buffer + length,
17635 +                               "\t%10lu/%-10lu",
17636 +                               vx_sock_count(cacct, i, j),
17637 +                               vx_sock_total(cacct, i, j));
17638 +               }
17639 +               buffer[length++] = '\n';
17640 +       }
17641 +
17642 +       length += sprintf(buffer + length, "\n");
17643 +       length += sprintf(buffer + length,
17644 +               "slab:\t %8u %8u %8u %8u\n",
17645 +               atomic_read(&cacct->slab[1]),
17646 +               atomic_read(&cacct->slab[4]),
17647 +               atomic_read(&cacct->slab[0]),
17648 +               atomic_read(&cacct->slab[2]));
17649 +
17650 +       length += sprintf(buffer + length, "\n");
17651 +       for (i = 0; i < 5; i++) {
17652 +               length += sprintf(buffer + length,
17653 +                       "page[%d]: %8u %8u %8u %8u\t %8u %8u %8u %8u\n", i,
17654 +                       atomic_read(&cacct->page[i][0]),
17655 +                       atomic_read(&cacct->page[i][1]),
17656 +                       atomic_read(&cacct->page[i][2]),
17657 +                       atomic_read(&cacct->page[i][3]),
17658 +                       atomic_read(&cacct->page[i][4]),
17659 +                       atomic_read(&cacct->page[i][5]),
17660 +                       atomic_read(&cacct->page[i][6]),
17661 +                       atomic_read(&cacct->page[i][7]));
17662 +       }
17663 +       return length;
17664 +}
17665 +
17666 +#endif /* _VX_CACCT_PROC_H */
17667 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/context.c linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/context.c
17668 --- linux-2.6.31.6/kernel/vserver/context.c     1970-01-01 01:00:00.000000000 +0100
17669 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/context.c       2009-11-05 04:18:09.000000000 +0100
17670 @@ -0,0 +1,1032 @@
17671 +/*
17672 + *  linux/kernel/vserver/context.c
17673 + *
17674 + *  Virtual Server: Context Support
17675 + *
17676 + *  Copyright (C) 2003-2007  Herbert Pötzl
17677 + *
17678 + *  V0.01  context helper
17679 + *  V0.02  vx_ctx_kill syscall command
17680 + *  V0.03  replaced context_info calls
17681 + *  V0.04  redesign of struct (de)alloc
17682 + *  V0.05  rlimit basic implementation
17683 + *  V0.06  task_xid and info commands
17684 + *  V0.07  context flags and caps
17685 + *  V0.08  switch to RCU based hash
17686 + *  V0.09  revert to non RCU for now
17687 + *  V0.10  and back to working RCU hash
17688 + *  V0.11  and back to locking again
17689 + *  V0.12  referenced context store
17690 + *  V0.13  separate per cpu data
17691 + *  V0.14  changed vcmds to vxi arg
17692 + *  V0.15  added context stat
17693 + *  V0.16  have __create claim() the vxi
17694 + *  V0.17  removed older and legacy stuff
17695 + *
17696 + */
17697 +
17698 +#include <linux/slab.h>
17699 +#include <linux/types.h>
17700 +#include <linux/security.h>
17701 +#include <linux/pid_namespace.h>
17702 +
17703 +#include <linux/vserver/context.h>
17704 +#include <linux/vserver/network.h>
17705 +#include <linux/vserver/debug.h>
17706 +#include <linux/vserver/limit.h>
17707 +#include <linux/vserver/limit_int.h>
17708 +#include <linux/vserver/space.h>
17709 +#include <linux/init_task.h>
17710 +#include <linux/fs_struct.h>
17711 +
17712 +#include <linux/vs_context.h>
17713 +#include <linux/vs_limit.h>
17714 +#include <linux/vs_pid.h>
17715 +#include <linux/vserver/context_cmd.h>
17716 +
17717 +#include "cvirt_init.h"
17718 +#include "cacct_init.h"
17719 +#include "limit_init.h"
17720 +#include "sched_init.h"
17721 +
17722 +
17723 +atomic_t vx_global_ctotal      = ATOMIC_INIT(0);
17724 +atomic_t vx_global_cactive     = ATOMIC_INIT(0);
17725 +
17726 +
17727 +/*     now inactive context structures */
17728 +
17729 +static struct hlist_head vx_info_inactive = HLIST_HEAD_INIT;
17730 +
17731 +static spinlock_t vx_info_inactive_lock = SPIN_LOCK_UNLOCKED;
17732 +
17733 +
17734 +/*     __alloc_vx_info()
17735 +
17736 +       * allocate an initialized vx_info struct
17737 +       * doesn't make it visible (hash)                        */
17738 +
17739 +static struct vx_info *__alloc_vx_info(xid_t xid)
17740 +{
17741 +       struct vx_info *new = NULL;
17742 +       int cpu, index;
17743 +
17744 +       vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid);
17745 +
17746 +       /* would this benefit from a slab cache? */
17747 +       new = kmalloc(sizeof(struct vx_info), GFP_KERNEL);
17748 +       if (!new)
17749 +               return 0;
17750 +
17751 +       memset(new, 0, sizeof(struct vx_info));
17752 +#ifdef CONFIG_SMP
17753 +       new->ptr_pc = alloc_percpu(struct _vx_info_pc);
17754 +       if (!new->ptr_pc)
17755 +               goto error;
17756 +#endif
17757 +       new->vx_id = xid;
17758 +       INIT_HLIST_NODE(&new->vx_hlist);
17759 +       atomic_set(&new->vx_usecnt, 0);
17760 +       atomic_set(&new->vx_tasks, 0);
17761 +       new->vx_parent = NULL;
17762 +       new->vx_state = 0;
17763 +       init_waitqueue_head(&new->vx_wait);
17764 +
17765 +       /* prepare reaper */
17766 +       get_task_struct(init_pid_ns.child_reaper);
17767 +       new->vx_reaper = init_pid_ns.child_reaper;
17768 +       new->vx_badness_bias = 0;
17769 +
17770 +       /* rest of init goes here */
17771 +       vx_info_init_limit(&new->limit);
17772 +       vx_info_init_sched(&new->sched);
17773 +       vx_info_init_cvirt(&new->cvirt);
17774 +       vx_info_init_cacct(&new->cacct);
17775 +
17776 +       /* per cpu data structures */
17777 +       for_each_possible_cpu(cpu) {
17778 +               vx_info_init_sched_pc(
17779 +                       &vx_per_cpu(new, sched_pc, cpu), cpu);
17780 +               vx_info_init_cvirt_pc(
17781 +                       &vx_per_cpu(new, cvirt_pc, cpu), cpu);
17782 +       }
17783 +
17784 +       new->vx_flags = VXF_INIT_SET;
17785 +       cap_set_init_eff(new->vx_bcaps);
17786 +       new->vx_ccaps = 0;
17787 +       // new->vx_cap_bset = current->cap_bset;
17788 +
17789 +       new->reboot_cmd = 0;
17790 +       new->exit_code = 0;
17791 +
17792 +       // preconfig fs entries
17793 +       for (index = 0; index < VX_SPACES; index++) {
17794 +               write_lock(&init_fs.lock);
17795 +               init_fs.users++;
17796 +               write_unlock(&init_fs.lock);
17797 +               new->vx_fs[index] = &init_fs;
17798 +       }
17799 +
17800 +       vxdprintk(VXD_CBIT(xid, 0),
17801 +               "alloc_vx_info(%d) = %p", xid, new);
17802 +       vxh_alloc_vx_info(new);
17803 +       atomic_inc(&vx_global_ctotal);
17804 +       return new;
17805 +#ifdef CONFIG_SMP
17806 +error:
17807 +       kfree(new);
17808 +       return 0;
17809 +#endif
17810 +}
17811 +
17812 +/*     __dealloc_vx_info()
17813 +
17814 +       * final disposal of vx_info                             */
17815 +
17816 +static void __dealloc_vx_info(struct vx_info *vxi)
17817 +{
17818 +       struct vx_info_save vxis;
17819 +       int cpu;
17820 +
17821 +       vxdprintk(VXD_CBIT(xid, 0),
17822 +               "dealloc_vx_info(%p)", vxi);
17823 +       vxh_dealloc_vx_info(vxi);
17824 +
17825 +#ifdef CONFIG_VSERVER_WARN
17826 +       enter_vx_info(vxi, &vxis);
17827 +       vx_info_exit_limit(&vxi->limit);
17828 +       vx_info_exit_sched(&vxi->sched);
17829 +       vx_info_exit_cvirt(&vxi->cvirt);
17830 +       vx_info_exit_cacct(&vxi->cacct);
17831 +
17832 +       for_each_possible_cpu(cpu) {
17833 +               vx_info_exit_sched_pc(
17834 +                       &vx_per_cpu(vxi, sched_pc, cpu), cpu);
17835 +               vx_info_exit_cvirt_pc(
17836 +                       &vx_per_cpu(vxi, cvirt_pc, cpu), cpu);
17837 +       }
17838 +       leave_vx_info(&vxis);
17839 +#endif
17840 +
17841 +       vxi->vx_id = -1;
17842 +       vxi->vx_state |= VXS_RELEASED;
17843 +
17844 +#ifdef CONFIG_SMP
17845 +       free_percpu(vxi->ptr_pc);
17846 +#endif
17847 +       kfree(vxi);
17848 +       atomic_dec(&vx_global_ctotal);
17849 +}
17850 +
17851 +static void __shutdown_vx_info(struct vx_info *vxi)
17852 +{
17853 +       struct nsproxy *nsproxy;
17854 +       struct fs_struct *fs;
17855 +       int index, kill;
17856 +
17857 +       might_sleep();
17858 +
17859 +       vxi->vx_state |= VXS_SHUTDOWN;
17860 +       vs_state_change(vxi, VSC_SHUTDOWN);
17861 +
17862 +       for (index = 0; index < VX_SPACES; index++) {
17863 +               nsproxy = xchg(&vxi->vx_nsproxy[index], NULL);
17864 +               if (nsproxy)
17865 +                       put_nsproxy(nsproxy);
17866 +
17867 +               fs = xchg(&vxi->vx_fs[index], NULL);
17868 +               write_lock(&fs->lock);
17869 +               kill = !--fs->users;
17870 +               write_unlock(&fs->lock);
17871 +               if (kill)
17872 +                       free_fs_struct(fs);
17873 +       }
17874 +}
17875 +
17876 +/* exported stuff */
17877 +
17878 +void free_vx_info(struct vx_info *vxi)
17879 +{
17880 +       unsigned long flags;
17881 +       unsigned index;
17882 +
17883 +       /* check for reference counts first */
17884 +       BUG_ON(atomic_read(&vxi->vx_usecnt));
17885 +       BUG_ON(atomic_read(&vxi->vx_tasks));
17886 +
17887 +       /* context must not be hashed */
17888 +       BUG_ON(vx_info_state(vxi, VXS_HASHED));
17889 +
17890 +       /* context shutdown is mandatory */
17891 +       BUG_ON(!vx_info_state(vxi, VXS_SHUTDOWN));
17892 +
17893 +       /* nsproxy and fs check */
17894 +       for (index = 0; index < VX_SPACES; index++) {
17895 +               BUG_ON(vxi->vx_nsproxy[index]);
17896 +               BUG_ON(vxi->vx_fs[index]);
17897 +       }
17898 +
17899 +       spin_lock_irqsave(&vx_info_inactive_lock, flags);
17900 +       hlist_del(&vxi->vx_hlist);
17901 +       spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
17902 +
17903 +       __dealloc_vx_info(vxi);
17904 +}
17905 +
17906 +
17907 +/*     hash table for vx_info hash */
17908 +
17909 +#define VX_HASH_SIZE   13
17910 +
17911 +static struct hlist_head vx_info_hash[VX_HASH_SIZE] =
17912 +       { [0 ... VX_HASH_SIZE-1] = HLIST_HEAD_INIT };
17913 +
17914 +static spinlock_t vx_info_hash_lock = SPIN_LOCK_UNLOCKED;
17915 +
17916 +
17917 +static inline unsigned int __hashval(xid_t xid)
17918 +{
17919 +       return (xid % VX_HASH_SIZE);
17920 +}
17921 +
17922 +
17923 +
17924 +/*     __hash_vx_info()
17925 +
17926 +       * add the vxi to the global hash table
17927 +       * requires the hash_lock to be held                     */
17928 +
17929 +static inline void __hash_vx_info(struct vx_info *vxi)
17930 +{
17931 +       struct hlist_head *head;
17932 +
17933 +       vxd_assert_lock(&vx_info_hash_lock);
17934 +       vxdprintk(VXD_CBIT(xid, 4),
17935 +               "__hash_vx_info: %p[#%d]", vxi, vxi->vx_id);
17936 +       vxh_hash_vx_info(vxi);
17937 +
17938 +       /* context must not be hashed */
17939 +       BUG_ON(vx_info_state(vxi, VXS_HASHED));
17940 +
17941 +       vxi->vx_state |= VXS_HASHED;
17942 +       head = &vx_info_hash[__hashval(vxi->vx_id)];
17943 +       hlist_add_head(&vxi->vx_hlist, head);
17944 +       atomic_inc(&vx_global_cactive);
17945 +}
17946 +
17947 +/*     __unhash_vx_info()
17948 +
17949 +       * remove the vxi from the global hash table
17950 +       * requires the hash_lock to be held                     */
17951 +
17952 +static inline void __unhash_vx_info(struct vx_info *vxi)
17953 +{
17954 +       unsigned long flags;
17955 +
17956 +       vxd_assert_lock(&vx_info_hash_lock);
17957 +       vxdprintk(VXD_CBIT(xid, 4),
17958 +               "__unhash_vx_info: %p[#%d.%d.%d]", vxi, vxi->vx_id,
17959 +               atomic_read(&vxi->vx_usecnt), atomic_read(&vxi->vx_tasks));
17960 +       vxh_unhash_vx_info(vxi);
17961 +
17962 +       /* context must be hashed */
17963 +       BUG_ON(!vx_info_state(vxi, VXS_HASHED));
17964 +       /* but without tasks */
17965 +       BUG_ON(atomic_read(&vxi->vx_tasks));
17966 +
17967 +       vxi->vx_state &= ~VXS_HASHED;
17968 +       hlist_del_init(&vxi->vx_hlist);
17969 +       spin_lock_irqsave(&vx_info_inactive_lock, flags);
17970 +       hlist_add_head(&vxi->vx_hlist, &vx_info_inactive);
17971 +       spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
17972 +       atomic_dec(&vx_global_cactive);
17973 +}
17974 +
17975 +
17976 +/*     __lookup_vx_info()
17977 +
17978 +       * requires the hash_lock to be held
17979 +       * doesn't increment the vx_refcnt                       */
17980 +
17981 +static inline struct vx_info *__lookup_vx_info(xid_t xid)
17982 +{
17983 +       struct hlist_head *head = &vx_info_hash[__hashval(xid)];
17984 +       struct hlist_node *pos;
17985 +       struct vx_info *vxi;
17986 +
17987 +       vxd_assert_lock(&vx_info_hash_lock);
17988 +       hlist_for_each(pos, head) {
17989 +               vxi = hlist_entry(pos, struct vx_info, vx_hlist);
17990 +
17991 +               if (vxi->vx_id == xid)
17992 +                       goto found;
17993 +       }
17994 +       vxi = NULL;
17995 +found:
17996 +       vxdprintk(VXD_CBIT(xid, 0),
17997 +               "__lookup_vx_info(#%u): %p[#%u]",
17998 +               xid, vxi, vxi ? vxi->vx_id : 0);
17999 +       vxh_lookup_vx_info(vxi, xid);
18000 +       return vxi;
18001 +}
18002 +
18003 +
18004 +/*     __create_vx_info()
18005 +
18006 +       * create the requested context
18007 +       * get(), claim() and hash it                            */
18008 +
18009 +static struct vx_info *__create_vx_info(int id)
18010 +{
18011 +       struct vx_info *new, *vxi = NULL;
18012 +
18013 +       vxdprintk(VXD_CBIT(xid, 1), "create_vx_info(%d)*", id);
18014 +
18015 +       if (!(new = __alloc_vx_info(id)))
18016 +               return ERR_PTR(-ENOMEM);
18017 +
18018 +       /* required to make dynamic xids unique */
18019 +       spin_lock(&vx_info_hash_lock);
18020 +
18021 +       /* static context requested */
18022 +       if ((vxi = __lookup_vx_info(id))) {
18023 +               vxdprintk(VXD_CBIT(xid, 0),
18024 +                       "create_vx_info(%d) = %p (already there)", id, vxi);
18025 +               if (vx_info_flags(vxi, VXF_STATE_SETUP, 0))
18026 +                       vxi = ERR_PTR(-EBUSY);
18027 +               else
18028 +                       vxi = ERR_PTR(-EEXIST);
18029 +               goto out_unlock;
18030 +       }
18031 +       /* new context */
18032 +       vxdprintk(VXD_CBIT(xid, 0),
18033 +               "create_vx_info(%d) = %p (new)", id, new);
18034 +       claim_vx_info(new, NULL);
18035 +       __hash_vx_info(get_vx_info(new));
18036 +       vxi = new, new = NULL;
18037 +
18038 +out_unlock:
18039 +       spin_unlock(&vx_info_hash_lock);
18040 +       vxh_create_vx_info(IS_ERR(vxi) ? NULL : vxi, id);
18041 +       if (new)
18042 +               __dealloc_vx_info(new);
18043 +       return vxi;
18044 +}
18045 +
18046 +
18047 +/*     exported stuff                                          */
18048 +
18049 +
18050 +void unhash_vx_info(struct vx_info *vxi)
18051 +{
18052 +       __shutdown_vx_info(vxi);
18053 +       spin_lock(&vx_info_hash_lock);
18054 +       __unhash_vx_info(vxi);
18055 +       spin_unlock(&vx_info_hash_lock);
18056 +       __wakeup_vx_info(vxi);
18057 +}
18058 +
18059 +
18060 +/*     lookup_vx_info()
18061 +
18062 +       * search for a vx_info and get() it
18063 +       * negative id means current                             */
18064 +
18065 +struct vx_info *lookup_vx_info(int id)
18066 +{
18067 +       struct vx_info *vxi = NULL;
18068 +
18069 +       if (id < 0) {
18070 +               vxi = get_vx_info(current_vx_info());
18071 +       } else if (id > 1) {
18072 +               spin_lock(&vx_info_hash_lock);
18073 +               vxi = get_vx_info(__lookup_vx_info(id));
18074 +               spin_unlock(&vx_info_hash_lock);
18075 +       }
18076 +       return vxi;
18077 +}
18078 +
18079 +/*     xid_is_hashed()
18080 +
18081 +       * verify that xid is still hashed                       */
18082 +
18083 +int xid_is_hashed(xid_t xid)
18084 +{
18085 +       int hashed;
18086 +
18087 +       spin_lock(&vx_info_hash_lock);
18088 +       hashed = (__lookup_vx_info(xid) != NULL);
18089 +       spin_unlock(&vx_info_hash_lock);
18090 +       return hashed;
18091 +}
18092 +
18093 +#ifdef CONFIG_PROC_FS
18094 +
18095 +/*     get_xid_list()
18096 +
18097 +       * get a subset of hashed xids for proc
18098 +       * assumes size is at least one                          */
18099 +
18100 +int get_xid_list(int index, unsigned int *xids, int size)
18101 +{
18102 +       int hindex, nr_xids = 0;
18103 +
18104 +       /* only show current and children */
18105 +       if (!vx_check(0, VS_ADMIN | VS_WATCH)) {
18106 +               if (index > 0)
18107 +                       return 0;
18108 +               xids[nr_xids] = vx_current_xid();
18109 +               return 1;
18110 +       }
18111 +
18112 +       for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) {
18113 +               struct hlist_head *head = &vx_info_hash[hindex];
18114 +               struct hlist_node *pos;
18115 +
18116 +               spin_lock(&vx_info_hash_lock);
18117 +               hlist_for_each(pos, head) {
18118 +                       struct vx_info *vxi;
18119 +
18120 +                       if (--index > 0)
18121 +                               continue;
18122 +
18123 +                       vxi = hlist_entry(pos, struct vx_info, vx_hlist);
18124 +                       xids[nr_xids] = vxi->vx_id;
18125 +                       if (++nr_xids >= size) {
18126 +                               spin_unlock(&vx_info_hash_lock);
18127 +                               goto out;
18128 +                       }
18129 +               }
18130 +               /* keep the lock time short */
18131 +               spin_unlock(&vx_info_hash_lock);
18132 +       }
18133 +out:
18134 +       return nr_xids;
18135 +}
18136 +#endif
18137 +
18138 +#ifdef CONFIG_VSERVER_DEBUG
18139 +
18140 +void   dump_vx_info_inactive(int level)
18141 +{
18142 +       struct hlist_node *entry, *next;
18143 +
18144 +       hlist_for_each_safe(entry, next, &vx_info_inactive) {
18145 +               struct vx_info *vxi =
18146 +                       list_entry(entry, struct vx_info, vx_hlist);
18147 +
18148 +               dump_vx_info(vxi, level);
18149 +       }
18150 +}
18151 +
18152 +#endif
18153 +
18154 +#if 0
18155 +int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
18156 +{
18157 +       struct user_struct *new_user, *old_user;
18158 +
18159 +       if (!p || !vxi)
18160 +               BUG();
18161 +
18162 +       if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
18163 +               return -EACCES;
18164 +
18165 +       new_user = alloc_uid(vxi->vx_id, p->uid);
18166 +       if (!new_user)
18167 +               return -ENOMEM;
18168 +
18169 +       old_user = p->user;
18170 +       if (new_user != old_user) {
18171 +               atomic_inc(&new_user->processes);
18172 +               atomic_dec(&old_user->processes);
18173 +               p->user = new_user;
18174 +       }
18175 +       free_uid(old_user);
18176 +       return 0;
18177 +}
18178 +#endif
18179 +
18180 +#if 0
18181 +void vx_mask_cap_bset(struct vx_info *vxi, struct task_struct *p)
18182 +{
18183 +       // p->cap_effective &= vxi->vx_cap_bset;
18184 +       p->cap_effective =
18185 +               cap_intersect(p->cap_effective, vxi->cap_bset);
18186 +       // p->cap_inheritable &= vxi->vx_cap_bset;
18187 +       p->cap_inheritable =
18188 +               cap_intersect(p->cap_inheritable, vxi->cap_bset);
18189 +       // p->cap_permitted &= vxi->vx_cap_bset;
18190 +       p->cap_permitted =
18191 +               cap_intersect(p->cap_permitted, vxi->cap_bset);
18192 +}
18193 +#endif
18194 +
18195 +
18196 +#include <linux/file.h>
18197 +#include <linux/fdtable.h>
18198 +
18199 +static int vx_openfd_task(struct task_struct *tsk)
18200 +{
18201 +       struct files_struct *files = tsk->files;
18202 +       struct fdtable *fdt;
18203 +       const unsigned long *bptr;
18204 +       int count, total;
18205 +
18206 +       /* no rcu_read_lock() because of spin_lock() */
18207 +       spin_lock(&files->file_lock);
18208 +       fdt = files_fdtable(files);
18209 +       bptr = fdt->open_fds->fds_bits;
18210 +       count = fdt->max_fds / (sizeof(unsigned long) * 8);
18211 +       for (total = 0; count > 0; count--) {
18212 +               if (*bptr)
18213 +                       total += hweight_long(*bptr);
18214 +               bptr++;
18215 +       }
18216 +       spin_unlock(&files->file_lock);
18217 +       return total;
18218 +}
18219 +
18220 +
18221 +/*     for *space compatibility */
18222 +
18223 +asmlinkage long sys_unshare(unsigned long);
18224 +
18225 +/*
18226 + *     migrate task to new context
18227 + *     gets vxi, puts old_vxi on change
18228 + *     optionally unshares namespaces (hack)
18229 + */
18230 +
18231 +int vx_migrate_task(struct task_struct *p, struct vx_info *vxi, int unshare)
18232 +{
18233 +       struct vx_info *old_vxi;
18234 +       int ret = 0;
18235 +
18236 +       if (!p || !vxi)
18237 +               BUG();
18238 +
18239 +       vxdprintk(VXD_CBIT(xid, 5),
18240 +               "vx_migrate_task(%p,%p[#%d.%d])", p, vxi,
18241 +               vxi->vx_id, atomic_read(&vxi->vx_usecnt));
18242 +
18243 +       if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0) &&
18244 +               !vx_info_flags(vxi, VXF_STATE_SETUP, 0))
18245 +               return -EACCES;
18246 +
18247 +       if (vx_info_state(vxi, VXS_SHUTDOWN))
18248 +               return -EFAULT;
18249 +
18250 +       old_vxi = task_get_vx_info(p);
18251 +       if (old_vxi == vxi)
18252 +               goto out;
18253 +
18254 +//     if (!(ret = vx_migrate_user(p, vxi))) {
18255 +       {
18256 +               int openfd;
18257 +
18258 +               task_lock(p);
18259 +               openfd = vx_openfd_task(p);
18260 +
18261 +               if (old_vxi) {
18262 +                       atomic_dec(&old_vxi->cvirt.nr_threads);
18263 +                       atomic_dec(&old_vxi->cvirt.nr_running);
18264 +                       __rlim_dec(&old_vxi->limit, RLIMIT_NPROC);
18265 +                       /* FIXME: what about the struct files here? */
18266 +                       __rlim_sub(&old_vxi->limit, VLIMIT_OPENFD, openfd);
18267 +                       /* account for the executable */
18268 +                       __rlim_dec(&old_vxi->limit, VLIMIT_DENTRY);
18269 +               }
18270 +               atomic_inc(&vxi->cvirt.nr_threads);
18271 +               atomic_inc(&vxi->cvirt.nr_running);
18272 +               __rlim_inc(&vxi->limit, RLIMIT_NPROC);
18273 +               /* FIXME: what about the struct files here? */
18274 +               __rlim_add(&vxi->limit, VLIMIT_OPENFD, openfd);
18275 +               /* account for the executable */
18276 +               __rlim_inc(&vxi->limit, VLIMIT_DENTRY);
18277 +
18278 +               if (old_vxi) {
18279 +                       release_vx_info(old_vxi, p);
18280 +                       clr_vx_info(&p->vx_info);
18281 +               }
18282 +               claim_vx_info(vxi, p);
18283 +               set_vx_info(&p->vx_info, vxi);
18284 +               p->xid = vxi->vx_id;
18285 +
18286 +               vxdprintk(VXD_CBIT(xid, 5),
18287 +                       "moved task %p into vxi:%p[#%d]",
18288 +                       p, vxi, vxi->vx_id);
18289 +
18290 +               // vx_mask_cap_bset(vxi, p);
18291 +               task_unlock(p);
18292 +
18293 +               /* hack for *spaces to provide compatibility */
18294 +               if (unshare) {
18295 +                       struct nsproxy *old_nsp, *new_nsp;
18296 +
18297 +                       ret = unshare_nsproxy_namespaces(
18298 +                               CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER,
18299 +                               &new_nsp, NULL);
18300 +                       if (ret)
18301 +                               goto out;
18302 +
18303 +                       old_nsp = xchg(&p->nsproxy, new_nsp);
18304 +                       vx_set_space(vxi,
18305 +                               CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER, 0);
18306 +                       put_nsproxy(old_nsp);
18307 +               }
18308 +       }
18309 +out:
18310 +       put_vx_info(old_vxi);
18311 +       return ret;
18312 +}
18313 +
18314 +int vx_set_reaper(struct vx_info *vxi, struct task_struct *p)
18315 +{
18316 +       struct task_struct *old_reaper;
18317 +
18318 +       if (!vxi)
18319 +               return -EINVAL;
18320 +
18321 +       vxdprintk(VXD_CBIT(xid, 6),
18322 +               "vx_set_reaper(%p[#%d],%p[#%d,%d])",
18323 +               vxi, vxi->vx_id, p, p->xid, p->pid);
18324 +
18325 +       old_reaper = vxi->vx_reaper;
18326 +       if (old_reaper == p)
18327 +               return 0;
18328 +
18329 +       /* set new child reaper */
18330 +       get_task_struct(p);
18331 +       vxi->vx_reaper = p;
18332 +       put_task_struct(old_reaper);
18333 +       return 0;
18334 +}
18335 +
18336 +int vx_set_init(struct vx_info *vxi, struct task_struct *p)
18337 +{
18338 +       if (!vxi)
18339 +               return -EINVAL;
18340 +
18341 +       vxdprintk(VXD_CBIT(xid, 6),
18342 +               "vx_set_init(%p[#%d],%p[#%d,%d,%d])",
18343 +               vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
18344 +
18345 +       vxi->vx_flags &= ~VXF_STATE_INIT;
18346 +       // vxi->vx_initpid = p->tgid;
18347 +       vxi->vx_initpid = p->pid;
18348 +       return 0;
18349 +}
18350 +
18351 +void vx_exit_init(struct vx_info *vxi, struct task_struct *p, int code)
18352 +{
18353 +       vxdprintk(VXD_CBIT(xid, 6),
18354 +               "vx_exit_init(%p[#%d],%p[#%d,%d,%d])",
18355 +               vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
18356 +
18357 +       vxi->exit_code = code;
18358 +       vxi->vx_initpid = 0;
18359 +}
18360 +
18361 +
18362 +void vx_set_persistent(struct vx_info *vxi)
18363 +{
18364 +       vxdprintk(VXD_CBIT(xid, 6),
18365 +               "vx_set_persistent(%p[#%d])", vxi, vxi->vx_id);
18366 +
18367 +       get_vx_info(vxi);
18368 +       claim_vx_info(vxi, NULL);
18369 +}
18370 +
18371 +void vx_clear_persistent(struct vx_info *vxi)
18372 +{
18373 +       vxdprintk(VXD_CBIT(xid, 6),
18374 +               "vx_clear_persistent(%p[#%d])", vxi, vxi->vx_id);
18375 +
18376 +       release_vx_info(vxi, NULL);
18377 +       put_vx_info(vxi);
18378 +}
18379 +
18380 +void vx_update_persistent(struct vx_info *vxi)
18381 +{
18382 +       if (vx_info_flags(vxi, VXF_PERSISTENT, 0))
18383 +               vx_set_persistent(vxi);
18384 +       else
18385 +               vx_clear_persistent(vxi);
18386 +}
18387 +
18388 +
18389 +/*     task must be current or locked          */
18390 +
18391 +void   exit_vx_info(struct task_struct *p, int code)
18392 +{
18393 +       struct vx_info *vxi = p->vx_info;
18394 +
18395 +       if (vxi) {
18396 +               atomic_dec(&vxi->cvirt.nr_threads);
18397 +               vx_nproc_dec(p);
18398 +
18399 +               vxi->exit_code = code;
18400 +               release_vx_info(vxi, p);
18401 +       }
18402 +}
18403 +
18404 +void   exit_vx_info_early(struct task_struct *p, int code)
18405 +{
18406 +       struct vx_info *vxi = p->vx_info;
18407 +
18408 +       if (vxi) {
18409 +               if (vxi->vx_initpid == p->pid)
18410 +                       vx_exit_init(vxi, p, code);
18411 +               if (vxi->vx_reaper == p)
18412 +                       vx_set_reaper(vxi, init_pid_ns.child_reaper);
18413 +       }
18414 +}
18415 +
18416 +
18417 +/* vserver syscall commands below here */
18418 +
18419 +/* taks xid and vx_info functions */
18420 +
18421 +#include <asm/uaccess.h>
18422 +
18423 +
18424 +int vc_task_xid(uint32_t id)
18425 +{
18426 +       xid_t xid;
18427 +
18428 +       if (id) {
18429 +               struct task_struct *tsk;
18430 +
18431 +               read_lock(&tasklist_lock);
18432 +               tsk = find_task_by_real_pid(id);
18433 +               xid = (tsk) ? tsk->xid : -ESRCH;
18434 +               read_unlock(&tasklist_lock);
18435 +       } else
18436 +               xid = vx_current_xid();
18437 +       return xid;
18438 +}
18439 +
18440 +
18441 +int vc_vx_info(struct vx_info *vxi, void __user *data)
18442 +{
18443 +       struct vcmd_vx_info_v0 vc_data;
18444 +
18445 +       vc_data.xid = vxi->vx_id;
18446 +       vc_data.initpid = vxi->vx_initpid;
18447 +
18448 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18449 +               return -EFAULT;
18450 +       return 0;
18451 +}
18452 +
18453 +
18454 +int vc_ctx_stat(struct vx_info *vxi, void __user *data)
18455 +{
18456 +       struct vcmd_ctx_stat_v0 vc_data;
18457 +
18458 +       vc_data.usecnt = atomic_read(&vxi->vx_usecnt);
18459 +       vc_data.tasks = atomic_read(&vxi->vx_tasks);
18460 +
18461 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18462 +               return -EFAULT;
18463 +       return 0;
18464 +}
18465 +
18466 +
18467 +/* context functions */
18468 +
18469 +int vc_ctx_create(uint32_t xid, void __user *data)
18470 +{
18471 +       struct vcmd_ctx_create vc_data = { .flagword = VXF_INIT_SET };
18472 +       struct vx_info *new_vxi;
18473 +       int ret;
18474 +
18475 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
18476 +               return -EFAULT;
18477 +
18478 +       if ((xid > MAX_S_CONTEXT) || (xid < 2))
18479 +               return -EINVAL;
18480 +
18481 +       new_vxi = __create_vx_info(xid);
18482 +       if (IS_ERR(new_vxi))
18483 +               return PTR_ERR(new_vxi);
18484 +
18485 +       /* initial flags */
18486 +       new_vxi->vx_flags = vc_data.flagword;
18487 +
18488 +       ret = -ENOEXEC;
18489 +       if (vs_state_change(new_vxi, VSC_STARTUP))
18490 +               goto out;
18491 +
18492 +       ret = vx_migrate_task(current, new_vxi, (!data));
18493 +       if (ret)
18494 +               goto out;
18495 +
18496 +       /* return context id on success */
18497 +       ret = new_vxi->vx_id;
18498 +
18499 +       /* get a reference for persistent contexts */
18500 +       if ((vc_data.flagword & VXF_PERSISTENT))
18501 +               vx_set_persistent(new_vxi);
18502 +out:
18503 +       release_vx_info(new_vxi, NULL);
18504 +       put_vx_info(new_vxi);
18505 +       return ret;
18506 +}
18507 +
18508 +
18509 +int vc_ctx_migrate(struct vx_info *vxi, void __user *data)
18510 +{
18511 +       struct vcmd_ctx_migrate vc_data = { .flagword = 0 };
18512 +       int ret;
18513 +
18514 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
18515 +               return -EFAULT;
18516 +
18517 +       ret = vx_migrate_task(current, vxi, 0);
18518 +       if (ret)
18519 +               return ret;
18520 +       if (vc_data.flagword & VXM_SET_INIT)
18521 +               ret = vx_set_init(vxi, current);
18522 +       if (ret)
18523 +               return ret;
18524 +       if (vc_data.flagword & VXM_SET_REAPER)
18525 +               ret = vx_set_reaper(vxi, current);
18526 +       return ret;
18527 +}
18528 +
18529 +
18530 +int vc_get_cflags(struct vx_info *vxi, void __user *data)
18531 +{
18532 +       struct vcmd_ctx_flags_v0 vc_data;
18533 +
18534 +       vc_data.flagword = vxi->vx_flags;
18535 +
18536 +       /* special STATE flag handling */
18537 +       vc_data.mask = vs_mask_flags(~0ULL, vxi->vx_flags, VXF_ONE_TIME);
18538 +
18539 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18540 +               return -EFAULT;
18541 +       return 0;
18542 +}
18543 +
18544 +int vc_set_cflags(struct vx_info *vxi, void __user *data)
18545 +{
18546 +       struct vcmd_ctx_flags_v0 vc_data;
18547 +       uint64_t mask, trigger;
18548 +
18549 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18550 +               return -EFAULT;
18551 +
18552 +       /* special STATE flag handling */
18553 +       mask = vs_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME);
18554 +       trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword);
18555 +
18556 +       if (vxi == current_vx_info()) {
18557 +               /* if (trigger & VXF_STATE_SETUP)
18558 +                       vx_mask_cap_bset(vxi, current); */
18559 +               if (trigger & VXF_STATE_INIT) {
18560 +                       int ret;
18561 +
18562 +                       ret = vx_set_init(vxi, current);
18563 +                       if (ret)
18564 +                               return ret;
18565 +                       ret = vx_set_reaper(vxi, current);
18566 +                       if (ret)
18567 +                               return ret;
18568 +               }
18569 +       }
18570 +
18571 +       vxi->vx_flags = vs_mask_flags(vxi->vx_flags,
18572 +               vc_data.flagword, mask);
18573 +       if (trigger & VXF_PERSISTENT)
18574 +               vx_update_persistent(vxi);
18575 +
18576 +       return 0;
18577 +}
18578 +
18579 +
18580 +static inline uint64_t caps_from_cap_t(kernel_cap_t c)
18581 +{
18582 +       uint64_t v = c.cap[0] | ((uint64_t)c.cap[1] << 32);
18583 +
18584 +       // printk("caps_from_cap_t(%08x:%08x) = %016llx\n", c.cap[1], c.cap[0], v);
18585 +       return v;
18586 +}
18587 +
18588 +static inline kernel_cap_t cap_t_from_caps(uint64_t v)
18589 +{
18590 +       kernel_cap_t c = __cap_empty_set;
18591 +
18592 +       c.cap[0] = v & 0xFFFFFFFF;
18593 +       c.cap[1] = (v >> 32) & 0xFFFFFFFF;
18594 +
18595 +       // printk("cap_t_from_caps(%016llx) = %08x:%08x\n", v, c.cap[1], c.cap[0]);
18596 +       return c;
18597 +}
18598 +
18599 +
18600 +static int do_get_caps(struct vx_info *vxi, uint64_t *bcaps, uint64_t *ccaps)
18601 +{
18602 +       if (bcaps)
18603 +               *bcaps = caps_from_cap_t(vxi->vx_bcaps);
18604 +       if (ccaps)
18605 +               *ccaps = vxi->vx_ccaps;
18606 +
18607 +       return 0;
18608 +}
18609 +
18610 +int vc_get_ccaps(struct vx_info *vxi, void __user *data)
18611 +{
18612 +       struct vcmd_ctx_caps_v1 vc_data;
18613 +       int ret;
18614 +
18615 +       ret = do_get_caps(vxi, NULL, &vc_data.ccaps);
18616 +       if (ret)
18617 +               return ret;
18618 +       vc_data.cmask = ~0ULL;
18619 +
18620 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18621 +               return -EFAULT;
18622 +       return 0;
18623 +}
18624 +
18625 +static int do_set_caps(struct vx_info *vxi,
18626 +       uint64_t bcaps, uint64_t bmask, uint64_t ccaps, uint64_t cmask)
18627 +{
18628 +       uint64_t bcold = caps_from_cap_t(vxi->vx_bcaps);
18629 +
18630 +#if 0
18631 +       printk("do_set_caps(%16llx, %16llx, %16llx, %16llx)\n",
18632 +               bcaps, bmask, ccaps, cmask);
18633 +#endif
18634 +       vxi->vx_bcaps = cap_t_from_caps(
18635 +               vs_mask_flags(bcold, bcaps, bmask));
18636 +       vxi->vx_ccaps = vs_mask_flags(vxi->vx_ccaps, ccaps, cmask);
18637 +
18638 +       return 0;
18639 +}
18640 +
18641 +int vc_set_ccaps(struct vx_info *vxi, void __user *data)
18642 +{
18643 +       struct vcmd_ctx_caps_v1 vc_data;
18644 +
18645 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18646 +               return -EFAULT;
18647 +
18648 +       return do_set_caps(vxi, 0, 0, vc_data.ccaps, vc_data.cmask);
18649 +}
18650 +
18651 +int vc_get_bcaps(struct vx_info *vxi, void __user *data)
18652 +{
18653 +       struct vcmd_bcaps vc_data;
18654 +       int ret;
18655 +
18656 +       ret = do_get_caps(vxi, &vc_data.bcaps, NULL);
18657 +       if (ret)
18658 +               return ret;
18659 +       vc_data.bmask = ~0ULL;
18660 +
18661 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18662 +               return -EFAULT;
18663 +       return 0;
18664 +}
18665 +
18666 +int vc_set_bcaps(struct vx_info *vxi, void __user *data)
18667 +{
18668 +       struct vcmd_bcaps vc_data;
18669 +
18670 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18671 +               return -EFAULT;
18672 +
18673 +       return do_set_caps(vxi, vc_data.bcaps, vc_data.bmask, 0, 0);
18674 +}
18675 +
18676 +
18677 +int vc_get_badness(struct vx_info *vxi, void __user *data)
18678 +{
18679 +       struct vcmd_badness_v0 vc_data;
18680 +
18681 +       vc_data.bias = vxi->vx_badness_bias;
18682 +
18683 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18684 +               return -EFAULT;
18685 +       return 0;
18686 +}
18687 +
18688 +int vc_set_badness(struct vx_info *vxi, void __user *data)
18689 +{
18690 +       struct vcmd_badness_v0 vc_data;
18691 +
18692 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18693 +               return -EFAULT;
18694 +
18695 +       vxi->vx_badness_bias = vc_data.bias;
18696 +       return 0;
18697 +}
18698 +
18699 +#include <linux/module.h>
18700 +
18701 +EXPORT_SYMBOL_GPL(free_vx_info);
18702 +
18703 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/cvirt.c linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/cvirt.c
18704 --- linux-2.6.31.6/kernel/vserver/cvirt.c       1970-01-01 01:00:00.000000000 +0100
18705 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/cvirt.c 2009-11-05 04:21:59.000000000 +0100
18706 @@ -0,0 +1,304 @@
18707 +/*
18708 + *  linux/kernel/vserver/cvirt.c
18709 + *
18710 + *  Virtual Server: Context Virtualization
18711 + *
18712 + *  Copyright (C) 2004-2007  Herbert Pötzl
18713 + *
18714 + *  V0.01  broken out from limit.c
18715 + *  V0.02  added utsname stuff
18716 + *  V0.03  changed vcmds to vxi arg
18717 + *
18718 + */
18719 +
18720 +#include <linux/types.h>
18721 +#include <linux/utsname.h>
18722 +#include <linux/vs_cvirt.h>
18723 +#include <linux/vserver/switch.h>
18724 +#include <linux/vserver/cvirt_cmd.h>
18725 +
18726 +#include <asm/uaccess.h>
18727 +
18728 +
18729 +void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle)
18730 +{
18731 +       struct vx_info *vxi = current_vx_info();
18732 +
18733 +       set_normalized_timespec(uptime,
18734 +               uptime->tv_sec - vxi->cvirt.bias_uptime.tv_sec,
18735 +               uptime->tv_nsec - vxi->cvirt.bias_uptime.tv_nsec);
18736 +       if (!idle)
18737 +               return;
18738 +       set_normalized_timespec(idle,
18739 +               idle->tv_sec - vxi->cvirt.bias_idle.tv_sec,
18740 +               idle->tv_nsec - vxi->cvirt.bias_idle.tv_nsec);
18741 +       return;
18742 +}
18743 +
18744 +uint64_t vx_idle_jiffies(void)
18745 +{
18746 +       return init_task.utime + init_task.stime;
18747 +}
18748 +
18749 +
18750 +
18751 +static inline uint32_t __update_loadavg(uint32_t load,
18752 +       int wsize, int delta, int n)
18753 +{
18754 +       unsigned long long calc, prev;
18755 +
18756 +       /* just set it to n */
18757 +       if (unlikely(delta >= wsize))
18758 +               return (n << FSHIFT);
18759 +
18760 +       calc = delta * n;
18761 +       calc <<= FSHIFT;
18762 +       prev = (wsize - delta);
18763 +       prev *= load;
18764 +       calc += prev;
18765 +       do_div(calc, wsize);
18766 +       return calc;
18767 +}
18768 +
18769 +
18770 +void vx_update_load(struct vx_info *vxi)
18771 +{
18772 +       uint32_t now, last, delta;
18773 +       unsigned int nr_running, nr_uninterruptible;
18774 +       unsigned int total;
18775 +       unsigned long flags;
18776 +
18777 +       spin_lock_irqsave(&vxi->cvirt.load_lock, flags);
18778 +
18779 +       now = jiffies;
18780 +       last = vxi->cvirt.load_last;
18781 +       delta = now - last;
18782 +
18783 +       if (delta < 5*HZ)
18784 +               goto out;
18785 +
18786 +       nr_running = atomic_read(&vxi->cvirt.nr_running);
18787 +       nr_uninterruptible = atomic_read(&vxi->cvirt.nr_uninterruptible);
18788 +       total = nr_running + nr_uninterruptible;
18789 +
18790 +       vxi->cvirt.load[0] = __update_loadavg(vxi->cvirt.load[0],
18791 +               60*HZ, delta, total);
18792 +       vxi->cvirt.load[1] = __update_loadavg(vxi->cvirt.load[1],
18793 +               5*60*HZ, delta, total);
18794 +       vxi->cvirt.load[2] = __update_loadavg(vxi->cvirt.load[2],
18795 +               15*60*HZ, delta, total);
18796 +
18797 +       vxi->cvirt.load_last = now;
18798 +out:
18799 +       atomic_inc(&vxi->cvirt.load_updates);
18800 +       spin_unlock_irqrestore(&vxi->cvirt.load_lock, flags);
18801 +}
18802 +
18803 +
18804 +/*
18805 + * Commands to do_syslog:
18806 + *
18807 + *      0 -- Close the log.  Currently a NOP.
18808 + *      1 -- Open the log. Currently a NOP.
18809 + *      2 -- Read from the log.
18810 + *      3 -- Read all messages remaining in the ring buffer.
18811 + *      4 -- Read and clear all messages remaining in the ring buffer
18812 + *      5 -- Clear ring buffer.
18813 + *      6 -- Disable printk's to console
18814 + *      7 -- Enable printk's to console
18815 + *      8 -- Set level of messages printed to console
18816 + *      9 -- Return number of unread characters in the log buffer
18817 + *     10 -- Return size of the log buffer
18818 + */
18819 +int vx_do_syslog(int type, char __user *buf, int len)
18820 +{
18821 +       int error = 0;
18822 +       int do_clear = 0;
18823 +       struct vx_info *vxi = current_vx_info();
18824 +       struct _vx_syslog *log;
18825 +
18826 +       if (!vxi)
18827 +               return -EINVAL;
18828 +       log = &vxi->cvirt.syslog;
18829 +
18830 +       switch (type) {
18831 +       case 0:         /* Close log */
18832 +       case 1:         /* Open log */
18833 +               break;
18834 +       case 2:         /* Read from log */
18835 +               error = wait_event_interruptible(log->log_wait,
18836 +                       (log->log_start - log->log_end));
18837 +               if (error)
18838 +                       break;
18839 +               spin_lock_irq(&log->logbuf_lock);
18840 +               spin_unlock_irq(&log->logbuf_lock);
18841 +               break;
18842 +       case 4:         /* Read/clear last kernel messages */
18843 +               do_clear = 1;
18844 +               /* fall through */
18845 +       case 3:         /* Read last kernel messages */
18846 +               return 0;
18847 +
18848 +       case 5:         /* Clear ring buffer */
18849 +               return 0;
18850 +
18851 +       case 6:         /* Disable logging to console */
18852 +       case 7:         /* Enable logging to console */
18853 +       case 8:         /* Set level of messages printed to console */
18854 +               break;
18855 +
18856 +       case 9:         /* Number of chars in the log buffer */
18857 +               return 0;
18858 +       case 10:        /* Size of the log buffer */
18859 +               return 0;
18860 +       default:
18861 +               error = -EINVAL;
18862 +               break;
18863 +       }
18864 +       return error;
18865 +}
18866 +
18867 +
18868 +/* virtual host info names */
18869 +
18870 +static char *vx_vhi_name(struct vx_info *vxi, int id)
18871 +{
18872 +       struct nsproxy *nsproxy;
18873 +       struct uts_namespace *uts;
18874 +
18875 +       if (id == VHIN_CONTEXT)
18876 +               return vxi->vx_name;
18877 +
18878 +       nsproxy = vxi->vx_nsproxy[0];
18879 +       if (!nsproxy)
18880 +               return NULL;
18881 +
18882 +       uts = nsproxy->uts_ns;
18883 +       if (!uts)
18884 +               return NULL;
18885 +
18886 +       switch (id) {
18887 +       case VHIN_SYSNAME:
18888 +               return uts->name.sysname;
18889 +       case VHIN_NODENAME:
18890 +               return uts->name.nodename;
18891 +       case VHIN_RELEASE:
18892 +               return uts->name.release;
18893 +       case VHIN_VERSION:
18894 +               return uts->name.version;
18895 +       case VHIN_MACHINE:
18896 +               return uts->name.machine;
18897 +       case VHIN_DOMAINNAME:
18898 +               return uts->name.domainname;
18899 +       default:
18900 +               return NULL;
18901 +       }
18902 +       return NULL;
18903 +}
18904 +
18905 +int vc_set_vhi_name(struct vx_info *vxi, void __user *data)
18906 +{
18907 +       struct vcmd_vhi_name_v0 vc_data;
18908 +       char *name;
18909 +
18910 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18911 +               return -EFAULT;
18912 +
18913 +       name = vx_vhi_name(vxi, vc_data.field);
18914 +       if (!name)
18915 +               return -EINVAL;
18916 +
18917 +       memcpy(name, vc_data.name, 65);
18918 +       return 0;
18919 +}
18920 +
18921 +int vc_get_vhi_name(struct vx_info *vxi, void __user *data)
18922 +{
18923 +       struct vcmd_vhi_name_v0 vc_data;
18924 +       char *name;
18925 +
18926 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18927 +               return -EFAULT;
18928 +
18929 +       name = vx_vhi_name(vxi, vc_data.field);
18930 +       if (!name)
18931 +               return -EINVAL;
18932 +
18933 +       memcpy(vc_data.name, name, 65);
18934 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18935 +               return -EFAULT;
18936 +       return 0;
18937 +}
18938 +
18939 +
18940 +int vc_virt_stat(struct vx_info *vxi, void __user *data)
18941 +{
18942 +       struct vcmd_virt_stat_v0 vc_data;
18943 +       struct _vx_cvirt *cvirt = &vxi->cvirt;
18944 +       struct timespec uptime;
18945 +
18946 +       do_posix_clock_monotonic_gettime(&uptime);
18947 +       set_normalized_timespec(&uptime,
18948 +               uptime.tv_sec - cvirt->bias_uptime.tv_sec,
18949 +               uptime.tv_nsec - cvirt->bias_uptime.tv_nsec);
18950 +
18951 +       vc_data.offset = timeval_to_ns(&cvirt->bias_tv);
18952 +       vc_data.uptime = timespec_to_ns(&uptime);
18953 +       vc_data.nr_threads = atomic_read(&cvirt->nr_threads);
18954 +       vc_data.nr_running = atomic_read(&cvirt->nr_running);
18955 +       vc_data.nr_uninterruptible = atomic_read(&cvirt->nr_uninterruptible);
18956 +       vc_data.nr_onhold = atomic_read(&cvirt->nr_onhold);
18957 +       vc_data.nr_forks = atomic_read(&cvirt->total_forks);
18958 +       vc_data.load[0] = cvirt->load[0];
18959 +       vc_data.load[1] = cvirt->load[1];
18960 +       vc_data.load[2] = cvirt->load[2];
18961 +
18962 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18963 +               return -EFAULT;
18964 +       return 0;
18965 +}
18966 +
18967 +
18968 +#ifdef CONFIG_VSERVER_VTIME
18969 +
18970 +/* virtualized time base */
18971 +
18972 +void vx_gettimeofday(struct timeval *tv)
18973 +{
18974 +       struct vx_info *vxi;
18975 +
18976 +       do_gettimeofday(tv);
18977 +       if (!vx_flags(VXF_VIRT_TIME, 0))
18978 +               return;
18979 +
18980 +       vxi = current_vx_info();
18981 +       tv->tv_sec += vxi->cvirt.bias_tv.tv_sec;
18982 +       tv->tv_usec += vxi->cvirt.bias_tv.tv_usec;
18983 +
18984 +       if (tv->tv_usec >= USEC_PER_SEC) {
18985 +               tv->tv_sec++;
18986 +               tv->tv_usec -= USEC_PER_SEC;
18987 +       } else if (tv->tv_usec < 0) {
18988 +               tv->tv_sec--;
18989 +               tv->tv_usec += USEC_PER_SEC;
18990 +       }
18991 +}
18992 +
18993 +int vx_settimeofday(struct timespec *ts)
18994 +{
18995 +       struct timeval tv;
18996 +       struct vx_info *vxi;
18997 +
18998 +       if (!vx_flags(VXF_VIRT_TIME, 0))
18999 +               return do_settimeofday(ts);
19000 +
19001 +       do_gettimeofday(&tv);
19002 +       vxi = current_vx_info();
19003 +       vxi->cvirt.bias_tv.tv_sec = ts->tv_sec - tv.tv_sec;
19004 +       vxi->cvirt.bias_tv.tv_usec =
19005 +               (ts->tv_nsec/NSEC_PER_USEC) - tv.tv_usec;
19006 +       return 0;
19007 +}
19008 +
19009 +#endif
19010 +
19011 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/cvirt_init.h linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/cvirt_init.h
19012 --- linux-2.6.31.6/kernel/vserver/cvirt_init.h  1970-01-01 01:00:00.000000000 +0100
19013 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/cvirt_init.h    2009-09-10 16:11:43.000000000 +0200
19014 @@ -0,0 +1,69 @@
19015 +
19016 +
19017 +extern uint64_t vx_idle_jiffies(void);
19018 +
19019 +static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt)
19020 +{
19021 +       uint64_t idle_jiffies = vx_idle_jiffies();
19022 +       uint64_t nsuptime;
19023 +
19024 +       do_posix_clock_monotonic_gettime(&cvirt->bias_uptime);
19025 +       nsuptime = (unsigned long long)cvirt->bias_uptime.tv_sec
19026 +               * NSEC_PER_SEC + cvirt->bias_uptime.tv_nsec;
19027 +       cvirt->bias_clock = nsec_to_clock_t(nsuptime);
19028 +       cvirt->bias_tv.tv_sec = 0;
19029 +       cvirt->bias_tv.tv_usec = 0;
19030 +
19031 +       jiffies_to_timespec(idle_jiffies, &cvirt->bias_idle);
19032 +       atomic_set(&cvirt->nr_threads, 0);
19033 +       atomic_set(&cvirt->nr_running, 0);
19034 +       atomic_set(&cvirt->nr_uninterruptible, 0);
19035 +       atomic_set(&cvirt->nr_onhold, 0);
19036 +
19037 +       spin_lock_init(&cvirt->load_lock);
19038 +       cvirt->load_last = jiffies;
19039 +       atomic_set(&cvirt->load_updates, 0);
19040 +       cvirt->load[0] = 0;
19041 +       cvirt->load[1] = 0;
19042 +       cvirt->load[2] = 0;
19043 +       atomic_set(&cvirt->total_forks, 0);
19044 +
19045 +       spin_lock_init(&cvirt->syslog.logbuf_lock);
19046 +       init_waitqueue_head(&cvirt->syslog.log_wait);
19047 +       cvirt->syslog.log_start = 0;
19048 +       cvirt->syslog.log_end = 0;
19049 +       cvirt->syslog.con_start = 0;
19050 +       cvirt->syslog.logged_chars = 0;
19051 +}
19052 +
19053 +static inline
19054 +void vx_info_init_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu)
19055 +{
19056 +       // cvirt_pc->cpustat = { 0 };
19057 +}
19058 +
19059 +static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt)
19060 +{
19061 +       int value;
19062 +
19063 +       vxwprintk_xid((value = atomic_read(&cvirt->nr_threads)),
19064 +               "!!! cvirt: %p[nr_threads] = %d on exit.",
19065 +               cvirt, value);
19066 +       vxwprintk_xid((value = atomic_read(&cvirt->nr_running)),
19067 +               "!!! cvirt: %p[nr_running] = %d on exit.",
19068 +               cvirt, value);
19069 +       vxwprintk_xid((value = atomic_read(&cvirt->nr_uninterruptible)),
19070 +               "!!! cvirt: %p[nr_uninterruptible] = %d on exit.",
19071 +               cvirt, value);
19072 +       vxwprintk_xid((value = atomic_read(&cvirt->nr_onhold)),
19073 +               "!!! cvirt: %p[nr_onhold] = %d on exit.",
19074 +               cvirt, value);
19075 +       return;
19076 +}
19077 +
19078 +static inline
19079 +void vx_info_exit_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu)
19080 +{
19081 +       return;
19082 +}
19083 +
19084 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/cvirt_proc.h linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/cvirt_proc.h
19085 --- linux-2.6.31.6/kernel/vserver/cvirt_proc.h  1970-01-01 01:00:00.000000000 +0100
19086 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/cvirt_proc.h    2009-09-10 16:11:43.000000000 +0200
19087 @@ -0,0 +1,135 @@
19088 +#ifndef _VX_CVIRT_PROC_H
19089 +#define _VX_CVIRT_PROC_H
19090 +
19091 +#include <linux/nsproxy.h>
19092 +#include <linux/mnt_namespace.h>
19093 +#include <linux/ipc_namespace.h>
19094 +#include <linux/utsname.h>
19095 +#include <linux/ipc.h>
19096 +
19097 +
19098 +static inline
19099 +int vx_info_proc_nsproxy(struct nsproxy *nsproxy, char *buffer)
19100 +{
19101 +       struct mnt_namespace *ns;
19102 +       struct uts_namespace *uts;
19103 +       struct ipc_namespace *ipc;
19104 +       struct path path;
19105 +       char *pstr, *root;
19106 +       int length = 0;
19107 +
19108 +       if (!nsproxy)
19109 +               goto out;
19110 +
19111 +       length += sprintf(buffer + length,
19112 +               "NSProxy:\t%p [%p,%p,%p]\n",
19113 +               nsproxy, nsproxy->mnt_ns,
19114 +               nsproxy->uts_ns, nsproxy->ipc_ns);
19115 +
19116 +       ns = nsproxy->mnt_ns;
19117 +       if (!ns)
19118 +               goto skip_ns;
19119 +
19120 +       pstr = kmalloc(PATH_MAX, GFP_KERNEL);
19121 +       if (!pstr)
19122 +               goto skip_ns;
19123 +
19124 +       path.mnt = ns->root;
19125 +       path.dentry = ns->root->mnt_root;
19126 +       root = d_path(&path, pstr, PATH_MAX - 2);
19127 +       length += sprintf(buffer + length,
19128 +               "Namespace:\t%p [#%u]\n"
19129 +               "RootPath:\t%s\n",
19130 +               ns, atomic_read(&ns->count),
19131 +               root);
19132 +       kfree(pstr);
19133 +skip_ns:
19134 +
19135 +       uts = nsproxy->uts_ns;
19136 +       if (!uts)
19137 +               goto skip_uts;
19138 +
19139 +       length += sprintf(buffer + length,
19140 +               "SysName:\t%.*s\n"
19141 +               "NodeName:\t%.*s\n"
19142 +               "Release:\t%.*s\n"
19143 +               "Version:\t%.*s\n"
19144 +               "Machine:\t%.*s\n"
19145 +               "DomainName:\t%.*s\n",
19146 +               __NEW_UTS_LEN, uts->name.sysname,
19147 +               __NEW_UTS_LEN, uts->name.nodename,
19148 +               __NEW_UTS_LEN, uts->name.release,
19149 +               __NEW_UTS_LEN, uts->name.version,
19150 +               __NEW_UTS_LEN, uts->name.machine,
19151 +               __NEW_UTS_LEN, uts->name.domainname);
19152 +skip_uts:
19153 +
19154 +       ipc = nsproxy->ipc_ns;
19155 +       if (!ipc)
19156 +               goto skip_ipc;
19157 +
19158 +       length += sprintf(buffer + length,
19159 +               "SEMS:\t\t%d %d %d %d  %d\n"
19160 +               "MSG:\t\t%d %d %d\n"
19161 +               "SHM:\t\t%lu %lu  %d %d\n",
19162 +               ipc->sem_ctls[0], ipc->sem_ctls[1],
19163 +               ipc->sem_ctls[2], ipc->sem_ctls[3],
19164 +               ipc->used_sems,
19165 +               ipc->msg_ctlmax, ipc->msg_ctlmnb, ipc->msg_ctlmni,
19166 +               (unsigned long)ipc->shm_ctlmax,
19167 +               (unsigned long)ipc->shm_ctlall,
19168 +               ipc->shm_ctlmni, ipc->shm_tot);
19169 +skip_ipc:
19170 +out:
19171 +       return length;
19172 +}
19173 +
19174 +
19175 +#include <linux/sched.h>
19176 +
19177 +#define LOAD_INT(x) ((x) >> FSHIFT)
19178 +#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1 - 1)) * 100)
19179 +
19180 +static inline
19181 +int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer)
19182 +{
19183 +       int length = 0;
19184 +       int a, b, c;
19185 +
19186 +       length += sprintf(buffer + length,
19187 +               "BiasUptime:\t%lu.%02lu\n",
19188 +               (unsigned long)cvirt->bias_uptime.tv_sec,
19189 +               (cvirt->bias_uptime.tv_nsec / (NSEC_PER_SEC / 100)));
19190 +
19191 +       a = cvirt->load[0] + (FIXED_1 / 200);
19192 +       b = cvirt->load[1] + (FIXED_1 / 200);
19193 +       c = cvirt->load[2] + (FIXED_1 / 200);
19194 +       length += sprintf(buffer + length,
19195 +               "nr_threads:\t%d\n"
19196 +               "nr_running:\t%d\n"
19197 +               "nr_unintr:\t%d\n"
19198 +               "nr_onhold:\t%d\n"
19199 +               "load_updates:\t%d\n"
19200 +               "loadavg:\t%d.%02d %d.%02d %d.%02d\n"
19201 +               "total_forks:\t%d\n",
19202 +               atomic_read(&cvirt->nr_threads),
19203 +               atomic_read(&cvirt->nr_running),
19204 +               atomic_read(&cvirt->nr_uninterruptible),
19205 +               atomic_read(&cvirt->nr_onhold),
19206 +               atomic_read(&cvirt->load_updates),
19207 +               LOAD_INT(a), LOAD_FRAC(a),
19208 +               LOAD_INT(b), LOAD_FRAC(b),
19209 +               LOAD_INT(c), LOAD_FRAC(c),
19210 +               atomic_read(&cvirt->total_forks));
19211 +       return length;
19212 +}
19213 +
19214 +static inline
19215 +int vx_info_proc_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc,
19216 +       char *buffer, int cpu)
19217 +{
19218 +       int length = 0;
19219 +       return length;
19220 +}
19221 +
19222 +#endif /* _VX_CVIRT_PROC_H */
19223 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/debug.c linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/debug.c
19224 --- linux-2.6.31.6/kernel/vserver/debug.c       1970-01-01 01:00:00.000000000 +0100
19225 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/debug.c 2009-09-10 16:11:43.000000000 +0200
19226 @@ -0,0 +1,32 @@
19227 +/*
19228 + *  kernel/vserver/debug.c
19229 + *
19230 + *  Copyright (C) 2005-2007 Herbert Pötzl
19231 + *
19232 + *  V0.01  vx_info dump support
19233 + *
19234 + */
19235 +
19236 +#include <linux/module.h>
19237 +
19238 +#include <linux/vserver/context.h>
19239 +
19240 +
19241 +void   dump_vx_info(struct vx_info *vxi, int level)
19242 +{
19243 +       printk("vx_info %p[#%d, %d.%d, %4x]\n", vxi, vxi->vx_id,
19244 +               atomic_read(&vxi->vx_usecnt),
19245 +               atomic_read(&vxi->vx_tasks),
19246 +               vxi->vx_state);
19247 +       if (level > 0) {
19248 +               __dump_vx_limit(&vxi->limit);
19249 +               __dump_vx_sched(&vxi->sched);
19250 +               __dump_vx_cvirt(&vxi->cvirt);
19251 +               __dump_vx_cacct(&vxi->cacct);
19252 +       }
19253 +       printk("---\n");
19254 +}
19255 +
19256 +
19257 +EXPORT_SYMBOL_GPL(dump_vx_info);
19258 +
19259 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/device.c linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/device.c
19260 --- linux-2.6.31.6/kernel/vserver/device.c      1970-01-01 01:00:00.000000000 +0100
19261 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/device.c        2009-09-10 16:11:43.000000000 +0200
19262 @@ -0,0 +1,443 @@
19263 +/*
19264 + *  linux/kernel/vserver/device.c
19265 + *
19266 + *  Linux-VServer: Device Support
19267 + *
19268 + *  Copyright (C) 2006  Herbert Pötzl
19269 + *  Copyright (C) 2007  Daniel Hokka Zakrisson
19270 + *
19271 + *  V0.01  device mapping basics
19272 + *  V0.02  added defaults
19273 + *
19274 + */
19275 +
19276 +#include <linux/slab.h>
19277 +#include <linux/rcupdate.h>
19278 +#include <linux/fs.h>
19279 +#include <linux/namei.h>
19280 +#include <linux/hash.h>
19281 +
19282 +#include <asm/errno.h>
19283 +#include <asm/uaccess.h>
19284 +#include <linux/vserver/base.h>
19285 +#include <linux/vserver/debug.h>
19286 +#include <linux/vserver/context.h>
19287 +#include <linux/vserver/device.h>
19288 +#include <linux/vserver/device_cmd.h>
19289 +
19290 +
19291 +#define DMAP_HASH_BITS 4
19292 +
19293 +
19294 +struct vs_mapping {
19295 +       union {
19296 +               struct hlist_node hlist;
19297 +               struct list_head list;
19298 +       } u;
19299 +#define dm_hlist       u.hlist
19300 +#define dm_list                u.list
19301 +       xid_t xid;
19302 +       dev_t device;
19303 +       struct vx_dmap_target target;
19304 +};
19305 +
19306 +
19307 +static struct hlist_head dmap_main_hash[1 << DMAP_HASH_BITS];
19308 +
19309 +static spinlock_t dmap_main_hash_lock = SPIN_LOCK_UNLOCKED;
19310 +
19311 +static struct vx_dmap_target dmap_defaults[2] = {
19312 +       { .flags = DATTR_OPEN },
19313 +       { .flags = DATTR_OPEN },
19314 +};
19315 +
19316 +
19317 +struct kmem_cache *dmap_cachep __read_mostly;
19318 +
19319 +int __init dmap_cache_init(void)
19320 +{
19321 +       dmap_cachep = kmem_cache_create("dmap_cache",
19322 +               sizeof(struct vs_mapping), 0,
19323 +               SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
19324 +       return 0;
19325 +}
19326 +
19327 +__initcall(dmap_cache_init);
19328 +
19329 +
19330 +static inline unsigned int __hashval(dev_t dev, int bits)
19331 +{
19332 +       return hash_long((unsigned long)dev, bits);
19333 +}
19334 +
19335 +
19336 +/*     __hash_mapping()
19337 + *     add the mapping to the hash table
19338 + */
19339 +static inline void __hash_mapping(struct vx_info *vxi, struct vs_mapping *vdm)
19340 +{
19341 +       spinlock_t *hash_lock = &dmap_main_hash_lock;
19342 +       struct hlist_head *head, *hash = dmap_main_hash;
19343 +       int device = vdm->device;
19344 +
19345 +       spin_lock(hash_lock);
19346 +       vxdprintk(VXD_CBIT(misc, 8), "__hash_mapping: %p[#%d] %08x:%08x",
19347 +               vxi, vxi ? vxi->vx_id : 0, device, vdm->target.target);
19348 +
19349 +       head = &hash[__hashval(device, DMAP_HASH_BITS)];
19350 +       hlist_add_head(&vdm->dm_hlist, head);
19351 +       spin_unlock(hash_lock);
19352 +}
19353 +
19354 +
19355 +static inline int __mode_to_default(umode_t mode)
19356 +{
19357 +       switch (mode) {
19358 +       case S_IFBLK:
19359 +               return 0;
19360 +       case S_IFCHR:
19361 +               return 1;
19362 +       default:
19363 +               BUG();
19364 +       }
19365 +}
19366 +
19367 +
19368 +/*     __set_default()
19369 + *     set a default
19370 + */
19371 +static inline void __set_default(struct vx_info *vxi, umode_t mode,
19372 +       struct vx_dmap_target *vdmt)
19373 +{
19374 +       spinlock_t *hash_lock = &dmap_main_hash_lock;
19375 +       spin_lock(hash_lock);
19376 +
19377 +       if (vxi)
19378 +               vxi->dmap.targets[__mode_to_default(mode)] = *vdmt;
19379 +       else
19380 +               dmap_defaults[__mode_to_default(mode)] = *vdmt;
19381 +
19382 +
19383 +       spin_unlock(hash_lock);
19384 +
19385 +       vxdprintk(VXD_CBIT(misc, 8), "__set_default: %p[#%u] %08x %04x",
19386 +                 vxi, vxi ? vxi->vx_id : 0, vdmt->target, vdmt->flags);
19387 +}
19388 +
19389 +
19390 +/*     __remove_default()
19391 + *     remove a default
19392 + */
19393 +static inline int __remove_default(struct vx_info *vxi, umode_t mode)
19394 +{
19395 +       spinlock_t *hash_lock = &dmap_main_hash_lock;
19396 +       spin_lock(hash_lock);
19397 +
19398 +       if (vxi)
19399 +               vxi->dmap.targets[__mode_to_default(mode)].flags = 0;
19400 +       else    /* remove == reset */
19401 +               dmap_defaults[__mode_to_default(mode)].flags = DATTR_OPEN | mode;
19402 +
19403 +       spin_unlock(hash_lock);
19404 +       return 0;
19405 +}
19406 +
19407 +
19408 +/*     __find_mapping()
19409 + *     find a mapping in the hash table
19410 + *
19411 + *     caller must hold hash_lock
19412 + */
19413 +static inline int __find_mapping(xid_t xid, dev_t device, umode_t mode,
19414 +       struct vs_mapping **local, struct vs_mapping **global)
19415 +{
19416 +       struct hlist_head *hash = dmap_main_hash;
19417 +       struct hlist_head *head = &hash[__hashval(device, DMAP_HASH_BITS)];
19418 +       struct hlist_node *pos;
19419 +       struct vs_mapping *vdm;
19420 +
19421 +       *local = NULL;
19422 +       if (global)
19423 +               *global = NULL;
19424 +
19425 +       hlist_for_each(pos, head) {
19426 +               vdm = hlist_entry(pos, struct vs_mapping, dm_hlist);
19427 +
19428 +               if ((vdm->device == device) &&
19429 +                       !((vdm->target.flags ^ mode) & S_IFMT)) {
19430 +                       if (vdm->xid == xid) {
19431 +                               *local = vdm;
19432 +                               return 1;
19433 +                       } else if (global && vdm->xid == 0)
19434 +                               *global = vdm;
19435 +               }
19436 +       }
19437 +
19438 +       if (global && *global)
19439 +               return 0;
19440 +       else
19441 +               return -ENOENT;
19442 +}
19443 +
19444 +
19445 +/*     __lookup_mapping()
19446 + *     find a mapping and store the result in target and flags
19447 + */
19448 +static inline int __lookup_mapping(struct vx_info *vxi,
19449 +       dev_t device, dev_t *target, int *flags, umode_t mode)
19450 +{
19451 +       spinlock_t *hash_lock = &dmap_main_hash_lock;
19452 +       struct vs_mapping *vdm, *global;
19453 +       struct vx_dmap_target *vdmt;
19454 +       int ret = 0;
19455 +       xid_t xid = vxi->vx_id;
19456 +       int index;
19457 +
19458 +       spin_lock(hash_lock);
19459 +       if (__find_mapping(xid, device, mode, &vdm, &global) > 0) {
19460 +               ret = 1;
19461 +               vdmt = &vdm->target;
19462 +               goto found;
19463 +       }
19464 +
19465 +       index = __mode_to_default(mode);
19466 +       if (vxi && vxi->dmap.targets[index].flags) {
19467 +               ret = 2;
19468 +               vdmt = &vxi->dmap.targets[index];
19469 +       } else if (global) {
19470 +               ret = 3;
19471 +               vdmt = &global->target;
19472 +               goto found;
19473 +       } else {
19474 +               ret = 4;
19475 +               vdmt = &dmap_defaults[index];
19476 +       }
19477 +
19478 +found:
19479 +       if (target && (vdmt->flags & DATTR_REMAP))
19480 +               *target = vdmt->target;
19481 +       else if (target)
19482 +               *target = device;
19483 +       if (flags)
19484 +               *flags = vdmt->flags;
19485 +
19486 +       spin_unlock(hash_lock);
19487 +
19488 +       return ret;
19489 +}
19490 +
19491 +
19492 +/*     __remove_mapping()
19493 + *     remove a mapping from the hash table
19494 + */
19495 +static inline int __remove_mapping(struct vx_info *vxi, dev_t device,
19496 +       umode_t mode)
19497 +{
19498 +       spinlock_t *hash_lock = &dmap_main_hash_lock;
19499 +       struct vs_mapping *vdm = NULL;
19500 +       int ret = 0;
19501 +
19502 +       spin_lock(hash_lock);
19503 +
19504 +       ret = __find_mapping((vxi ? vxi->vx_id : 0), device, mode, &vdm,
19505 +               NULL);
19506 +       vxdprintk(VXD_CBIT(misc, 8), "__remove_mapping: %p[#%d] %08x %04x",
19507 +               vxi, vxi ? vxi->vx_id : 0, device, mode);
19508 +       if (ret < 0)
19509 +               goto out;
19510 +       hlist_del(&vdm->dm_hlist);
19511 +
19512 +out:
19513 +       spin_unlock(hash_lock);
19514 +       if (vdm)
19515 +               kmem_cache_free(dmap_cachep, vdm);
19516 +       return ret;
19517 +}
19518 +
19519 +
19520 +
19521 +int vs_map_device(struct vx_info *vxi,
19522 +       dev_t device, dev_t *target, umode_t mode)
19523 +{
19524 +       int ret, flags = DATTR_MASK;
19525 +
19526 +       if (!vxi) {
19527 +               if (target)
19528 +                       *target = device;
19529 +               goto out;
19530 +       }
19531 +       ret = __lookup_mapping(vxi, device, target, &flags, mode);
19532 +       vxdprintk(VXD_CBIT(misc, 8), "vs_map_device: %08x target: %08x flags: %04x mode: %04x mapped=%d",
19533 +               device, target ? *target : 0, flags, mode, ret);
19534 +out:
19535 +       return (flags & DATTR_MASK);
19536 +}
19537 +
19538 +
19539 +
19540 +static int do_set_mapping(struct vx_info *vxi,
19541 +       dev_t device, dev_t target, int flags, umode_t mode)
19542 +{
19543 +       if (device) {
19544 +               struct vs_mapping *new;
19545 +
19546 +               new = kmem_cache_alloc(dmap_cachep, GFP_KERNEL);
19547 +               if (!new)
19548 +                       return -ENOMEM;
19549 +
19550 +               INIT_HLIST_NODE(&new->dm_hlist);
19551 +               new->device = device;
19552 +               new->target.target = target;
19553 +               new->target.flags = flags | mode;
19554 +               new->xid = (vxi ? vxi->vx_id : 0);
19555 +
19556 +               vxdprintk(VXD_CBIT(misc, 8), "do_set_mapping: %08x target: %08x flags: %04x", device, target, flags);
19557 +               __hash_mapping(vxi, new);
19558 +       } else {
19559 +               struct vx_dmap_target new = {
19560 +                       .target = target,
19561 +                       .flags = flags | mode,
19562 +               };
19563 +               __set_default(vxi, mode, &new);
19564 +       }
19565 +       return 0;
19566 +}
19567 +
19568 +
19569 +static int do_unset_mapping(struct vx_info *vxi,
19570 +       dev_t device, dev_t target, int flags, umode_t mode)
19571 +{
19572 +       int ret = -EINVAL;
19573 +
19574 +       if (device) {
19575 +               ret = __remove_mapping(vxi, device, mode);
19576 +               if (ret < 0)
19577 +                       goto out;
19578 +       } else {
19579 +               ret = __remove_default(vxi, mode);
19580 +               if (ret < 0)
19581 +                       goto out;
19582 +       }
19583 +
19584 +out:
19585 +       return ret;
19586 +}
19587 +
19588 +
19589 +static inline int __user_device(const char __user *name, dev_t *dev,
19590 +       umode_t *mode)
19591 +{
19592 +       struct nameidata nd;
19593 +       int ret;
19594 +
19595 +       if (!name) {
19596 +               *dev = 0;
19597 +               return 0;
19598 +       }
19599 +       ret = user_lpath(name, &nd.path);
19600 +       if (ret)
19601 +               return ret;
19602 +       if (nd.path.dentry->d_inode) {
19603 +               *dev = nd.path.dentry->d_inode->i_rdev;
19604 +               *mode = nd.path.dentry->d_inode->i_mode;
19605 +       }
19606 +       path_put(&nd.path);
19607 +       return 0;
19608 +}
19609 +
19610 +static inline int __mapping_mode(dev_t device, dev_t target,
19611 +       umode_t device_mode, umode_t target_mode, umode_t *mode)
19612 +{
19613 +       if (device)
19614 +               *mode = device_mode & S_IFMT;
19615 +       else if (target)
19616 +               *mode = target_mode & S_IFMT;
19617 +       else
19618 +               return -EINVAL;
19619 +
19620 +       /* if both given, device and target mode have to match */
19621 +       if (device && target &&
19622 +               ((device_mode ^ target_mode) & S_IFMT))
19623 +               return -EINVAL;
19624 +       return 0;
19625 +}
19626 +
19627 +
19628 +static inline int do_mapping(struct vx_info *vxi, const char __user *device_path,
19629 +       const char __user *target_path, int flags, int set)
19630 +{
19631 +       dev_t device = ~0, target = ~0;
19632 +       umode_t device_mode = 0, target_mode = 0, mode;
19633 +       int ret;
19634 +
19635 +       ret = __user_device(device_path, &device, &device_mode);
19636 +       if (ret)
19637 +               return ret;
19638 +       ret = __user_device(target_path, &target, &target_mode);
19639 +       if (ret)
19640 +               return ret;
19641 +
19642 +       ret = __mapping_mode(device, target,
19643 +               device_mode, target_mode, &mode);
19644 +       if (ret)
19645 +               return ret;
19646 +
19647 +       if (set)
19648 +               return do_set_mapping(vxi, device, target,
19649 +                       flags, mode);
19650 +       else
19651 +               return do_unset_mapping(vxi, device, target,
19652 +                       flags, mode);
19653 +}
19654 +
19655 +
19656 +int vc_set_mapping(struct vx_info *vxi, void __user *data)
19657 +{
19658 +       struct vcmd_set_mapping_v0 vc_data;
19659 +
19660 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19661 +               return -EFAULT;
19662 +
19663 +       return do_mapping(vxi, vc_data.device, vc_data.target,
19664 +               vc_data.flags, 1);
19665 +}
19666 +
19667 +int vc_unset_mapping(struct vx_info *vxi, void __user *data)
19668 +{
19669 +       struct vcmd_set_mapping_v0 vc_data;
19670 +
19671 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19672 +               return -EFAULT;
19673 +
19674 +       return do_mapping(vxi, vc_data.device, vc_data.target,
19675 +               vc_data.flags, 0);
19676 +}
19677 +
19678 +
19679 +#ifdef CONFIG_COMPAT
19680 +
19681 +int vc_set_mapping_x32(struct vx_info *vxi, void __user *data)
19682 +{
19683 +       struct vcmd_set_mapping_v0_x32 vc_data;
19684 +
19685 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19686 +               return -EFAULT;
19687 +
19688 +       return do_mapping(vxi, compat_ptr(vc_data.device_ptr),
19689 +               compat_ptr(vc_data.target_ptr), vc_data.flags, 1);
19690 +}
19691 +
19692 +int vc_unset_mapping_x32(struct vx_info *vxi, void __user *data)
19693 +{
19694 +       struct vcmd_set_mapping_v0_x32 vc_data;
19695 +
19696 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19697 +               return -EFAULT;
19698 +
19699 +       return do_mapping(vxi, compat_ptr(vc_data.device_ptr),
19700 +               compat_ptr(vc_data.target_ptr), vc_data.flags, 0);
19701 +}
19702 +
19703 +#endif /* CONFIG_COMPAT */
19704 +
19705 +
19706 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/dlimit.c linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/dlimit.c
19707 --- linux-2.6.31.6/kernel/vserver/dlimit.c      1970-01-01 01:00:00.000000000 +0100
19708 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/dlimit.c        2009-09-10 16:11:43.000000000 +0200
19709 @@ -0,0 +1,522 @@
19710 +/*
19711 + *  linux/kernel/vserver/dlimit.c
19712 + *
19713 + *  Virtual Server: Context Disk Limits
19714 + *
19715 + *  Copyright (C) 2004-2007  Herbert Pötzl
19716 + *
19717 + *  V0.01  initial version
19718 + *  V0.02  compat32 splitup
19719 + *
19720 + */
19721 +
19722 +#include <linux/statfs.h>
19723 +#include <linux/sched.h>
19724 +#include <linux/namei.h>
19725 +#include <linux/vs_tag.h>
19726 +#include <linux/vs_dlimit.h>
19727 +#include <linux/vserver/dlimit_cmd.h>
19728 +
19729 +#include <asm/uaccess.h>
19730 +
19731 +/*     __alloc_dl_info()
19732 +
19733 +       * allocate an initialized dl_info struct
19734 +       * doesn't make it visible (hash)                        */
19735 +
19736 +static struct dl_info *__alloc_dl_info(struct super_block *sb, tag_t tag)
19737 +{
19738 +       struct dl_info *new = NULL;
19739 +
19740 +       vxdprintk(VXD_CBIT(dlim, 5),
19741 +               "alloc_dl_info(%p,%d)*", sb, tag);
19742 +
19743 +       /* would this benefit from a slab cache? */
19744 +       new = kmalloc(sizeof(struct dl_info), GFP_KERNEL);
19745 +       if (!new)
19746 +               return 0;
19747 +
19748 +       memset(new, 0, sizeof(struct dl_info));
19749 +       new->dl_tag = tag;
19750 +       new->dl_sb = sb;
19751 +       INIT_RCU_HEAD(&new->dl_rcu);
19752 +       INIT_HLIST_NODE(&new->dl_hlist);
19753 +       spin_lock_init(&new->dl_lock);
19754 +       atomic_set(&new->dl_refcnt, 0);
19755 +       atomic_set(&new->dl_usecnt, 0);
19756 +
19757 +       /* rest of init goes here */
19758 +
19759 +       vxdprintk(VXD_CBIT(dlim, 4),
19760 +               "alloc_dl_info(%p,%d) = %p", sb, tag, new);
19761 +       return new;
19762 +}
19763 +
19764 +/*     __dealloc_dl_info()
19765 +
19766 +       * final disposal of dl_info                             */
19767 +
19768 +static void __dealloc_dl_info(struct dl_info *dli)
19769 +{
19770 +       vxdprintk(VXD_CBIT(dlim, 4),
19771 +               "dealloc_dl_info(%p)", dli);
19772 +
19773 +       dli->dl_hlist.next = LIST_POISON1;
19774 +       dli->dl_tag = -1;
19775 +       dli->dl_sb = 0;
19776 +
19777 +       BUG_ON(atomic_read(&dli->dl_usecnt));
19778 +       BUG_ON(atomic_read(&dli->dl_refcnt));
19779 +
19780 +       kfree(dli);
19781 +}
19782 +
19783 +
19784 +/*     hash table for dl_info hash */
19785 +
19786 +#define DL_HASH_SIZE   13
19787 +
19788 +struct hlist_head dl_info_hash[DL_HASH_SIZE];
19789 +
19790 +static spinlock_t dl_info_hash_lock = SPIN_LOCK_UNLOCKED;
19791 +
19792 +
19793 +static inline unsigned int __hashval(struct super_block *sb, tag_t tag)
19794 +{
19795 +       return ((tag ^ (unsigned long)sb) % DL_HASH_SIZE);
19796 +}
19797 +
19798 +
19799 +
19800 +/*     __hash_dl_info()
19801 +
19802 +       * add the dli to the global hash table
19803 +       * requires the hash_lock to be held                     */
19804 +
19805 +static inline void __hash_dl_info(struct dl_info *dli)
19806 +{
19807 +       struct hlist_head *head;
19808 +
19809 +       vxdprintk(VXD_CBIT(dlim, 6),
19810 +               "__hash_dl_info: %p[#%d]", dli, dli->dl_tag);
19811 +       get_dl_info(dli);
19812 +       head = &dl_info_hash[__hashval(dli->dl_sb, dli->dl_tag)];
19813 +       hlist_add_head_rcu(&dli->dl_hlist, head);
19814 +}
19815 +
19816 +/*     __unhash_dl_info()
19817 +
19818 +       * remove the dli from the global hash table
19819 +       * requires the hash_lock to be held                     */
19820 +
19821 +static inline void __unhash_dl_info(struct dl_info *dli)
19822 +{
19823 +       vxdprintk(VXD_CBIT(dlim, 6),
19824 +               "__unhash_dl_info: %p[#%d]", dli, dli->dl_tag);
19825 +       hlist_del_rcu(&dli->dl_hlist);
19826 +       put_dl_info(dli);
19827 +}
19828 +
19829 +
19830 +/*     __lookup_dl_info()
19831 +
19832 +       * requires the rcu_read_lock()
19833 +       * doesn't increment the dl_refcnt                       */
19834 +
19835 +static inline struct dl_info *__lookup_dl_info(struct super_block *sb, tag_t tag)
19836 +{
19837 +       struct hlist_head *head = &dl_info_hash[__hashval(sb, tag)];
19838 +       struct hlist_node *pos;
19839 +       struct dl_info *dli;
19840 +
19841 +       hlist_for_each_entry_rcu(dli, pos, head, dl_hlist) {
19842 +
19843 +               if (dli->dl_tag == tag && dli->dl_sb == sb) {
19844 +                       return dli;
19845 +               }
19846 +       }
19847 +       return NULL;
19848 +}
19849 +
19850 +
19851 +struct dl_info *locate_dl_info(struct super_block *sb, tag_t tag)
19852 +{
19853 +       struct dl_info *dli;
19854 +
19855 +       rcu_read_lock();
19856 +       dli = get_dl_info(__lookup_dl_info(sb, tag));
19857 +       vxdprintk(VXD_CBIT(dlim, 7),
19858 +               "locate_dl_info(%p,#%d) = %p", sb, tag, dli);
19859 +       rcu_read_unlock();
19860 +       return dli;
19861 +}
19862 +
19863 +void rcu_free_dl_info(struct rcu_head *head)
19864 +{
19865 +       struct dl_info *dli = container_of(head, struct dl_info, dl_rcu);
19866 +       int usecnt, refcnt;
19867 +
19868 +       BUG_ON(!dli || !head);
19869 +
19870 +       usecnt = atomic_read(&dli->dl_usecnt);
19871 +       BUG_ON(usecnt < 0);
19872 +
19873 +       refcnt = atomic_read(&dli->dl_refcnt);
19874 +       BUG_ON(refcnt < 0);
19875 +
19876 +       vxdprintk(VXD_CBIT(dlim, 3),
19877 +               "rcu_free_dl_info(%p)", dli);
19878 +       if (!usecnt)
19879 +               __dealloc_dl_info(dli);
19880 +       else
19881 +               printk("!!! rcu didn't free\n");
19882 +}
19883 +
19884 +
19885 +
19886 +
19887 +static int do_addrem_dlimit(uint32_t id, const char __user *name,
19888 +       uint32_t flags, int add)
19889 +{
19890 +       struct path path;
19891 +       int ret;
19892 +
19893 +       ret = user_lpath(name, &path);
19894 +       if (!ret) {
19895 +               struct super_block *sb;
19896 +               struct dl_info *dli;
19897 +
19898 +               ret = -EINVAL;
19899 +               if (!path.dentry->d_inode)
19900 +                       goto out_release;
19901 +               if (!(sb = path.dentry->d_inode->i_sb))
19902 +                       goto out_release;
19903 +
19904 +               if (add) {
19905 +                       dli = __alloc_dl_info(sb, id);
19906 +                       spin_lock(&dl_info_hash_lock);
19907 +
19908 +                       ret = -EEXIST;
19909 +                       if (__lookup_dl_info(sb, id))
19910 +                               goto out_unlock;
19911 +                       __hash_dl_info(dli);
19912 +                       dli = NULL;
19913 +               } else {
19914 +                       spin_lock(&dl_info_hash_lock);
19915 +                       dli = __lookup_dl_info(sb, id);
19916 +
19917 +                       ret = -ESRCH;
19918 +                       if (!dli)
19919 +                               goto out_unlock;
19920 +                       __unhash_dl_info(dli);
19921 +               }
19922 +               ret = 0;
19923 +       out_unlock:
19924 +               spin_unlock(&dl_info_hash_lock);
19925 +               if (add && dli)
19926 +                       __dealloc_dl_info(dli);
19927 +       out_release:
19928 +               path_put(&path);
19929 +       }
19930 +       return ret;
19931 +}
19932 +
19933 +int vc_add_dlimit(uint32_t id, void __user *data)
19934 +{
19935 +       struct vcmd_ctx_dlimit_base_v0 vc_data;
19936 +
19937 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19938 +               return -EFAULT;
19939 +
19940 +       return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 1);
19941 +}
19942 +
19943 +int vc_rem_dlimit(uint32_t id, void __user *data)
19944 +{
19945 +       struct vcmd_ctx_dlimit_base_v0 vc_data;
19946 +
19947 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19948 +               return -EFAULT;
19949 +
19950 +       return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 0);
19951 +}
19952 +
19953 +#ifdef CONFIG_COMPAT
19954 +
19955 +int vc_add_dlimit_x32(uint32_t id, void __user *data)
19956 +{
19957 +       struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
19958 +
19959 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19960 +               return -EFAULT;
19961 +
19962 +       return do_addrem_dlimit(id,
19963 +               compat_ptr(vc_data.name_ptr), vc_data.flags, 1);
19964 +}
19965 +
19966 +int vc_rem_dlimit_x32(uint32_t id, void __user *data)
19967 +{
19968 +       struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
19969 +
19970 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19971 +               return -EFAULT;
19972 +
19973 +       return do_addrem_dlimit(id,
19974 +               compat_ptr(vc_data.name_ptr), vc_data.flags, 0);
19975 +}
19976 +
19977 +#endif /* CONFIG_COMPAT */
19978 +
19979 +
19980 +static inline
19981 +int do_set_dlimit(uint32_t id, const char __user *name,
19982 +       uint32_t space_used, uint32_t space_total,
19983 +       uint32_t inodes_used, uint32_t inodes_total,
19984 +       uint32_t reserved, uint32_t flags)
19985 +{
19986 +       struct path path;
19987 +       int ret;
19988 +
19989 +       ret = user_lpath(name, &path);
19990 +       if (!ret) {
19991 +               struct super_block *sb;
19992 +               struct dl_info *dli;
19993 +
19994 +               ret = -EINVAL;
19995 +               if (!path.dentry->d_inode)
19996 +                       goto out_release;
19997 +               if (!(sb = path.dentry->d_inode->i_sb))
19998 +                       goto out_release;
19999 +               if ((reserved != CDLIM_KEEP &&
20000 +                       reserved > 100) ||
20001 +                       (inodes_used != CDLIM_KEEP &&
20002 +                       inodes_used > inodes_total) ||
20003 +                       (space_used != CDLIM_KEEP &&
20004 +                       space_used > space_total))
20005 +                       goto out_release;
20006 +
20007 +               ret = -ESRCH;
20008 +               dli = locate_dl_info(sb, id);
20009 +               if (!dli)
20010 +                       goto out_release;
20011 +
20012 +               spin_lock(&dli->dl_lock);
20013 +
20014 +               if (inodes_used != CDLIM_KEEP)
20015 +                       dli->dl_inodes_used = inodes_used;
20016 +               if (inodes_total != CDLIM_KEEP)
20017 +                       dli->dl_inodes_total = inodes_total;
20018 +               if (space_used != CDLIM_KEEP) {
20019 +                       dli->dl_space_used = space_used;
20020 +                       dli->dl_space_used <<= 10;
20021 +               }
20022 +               if (space_total == CDLIM_INFINITY)
20023 +                       dli->dl_space_total = DLIM_INFINITY;
20024 +               else if (space_total != CDLIM_KEEP) {
20025 +                       dli->dl_space_total = space_total;
20026 +                       dli->dl_space_total <<= 10;
20027 +               }
20028 +               if (reserved != CDLIM_KEEP)
20029 +                       dli->dl_nrlmult = (1 << 10) * (100 - reserved) / 100;
20030 +
20031 +               spin_unlock(&dli->dl_lock);
20032 +
20033 +               put_dl_info(dli);
20034 +               ret = 0;
20035 +
20036 +       out_release:
20037 +               path_put(&path);
20038 +       }
20039 +       return ret;
20040 +}
20041 +
20042 +int vc_set_dlimit(uint32_t id, void __user *data)
20043 +{
20044 +       struct vcmd_ctx_dlimit_v0 vc_data;
20045 +
20046 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
20047 +               return -EFAULT;
20048 +
20049 +       return do_set_dlimit(id, vc_data.name,
20050 +               vc_data.space_used, vc_data.space_total,
20051 +               vc_data.inodes_used, vc_data.inodes_total,
20052 +               vc_data.reserved, vc_data.flags);
20053 +}
20054 +
20055 +#ifdef CONFIG_COMPAT
20056 +
20057 +int vc_set_dlimit_x32(uint32_t id, void __user *data)
20058 +{
20059 +       struct vcmd_ctx_dlimit_v0_x32 vc_data;
20060 +
20061 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
20062 +               return -EFAULT;
20063 +
20064 +       return do_set_dlimit(id, compat_ptr(vc_data.name_ptr),
20065 +               vc_data.space_used, vc_data.space_total,
20066 +               vc_data.inodes_used, vc_data.inodes_total,
20067 +               vc_data.reserved, vc_data.flags);
20068 +}
20069 +
20070 +#endif /* CONFIG_COMPAT */
20071 +
20072 +
20073 +static inline
20074 +int do_get_dlimit(uint32_t id, const char __user *name,
20075 +       uint32_t *space_used, uint32_t *space_total,
20076 +       uint32_t *inodes_used, uint32_t *inodes_total,
20077 +       uint32_t *reserved, uint32_t *flags)
20078 +{
20079 +       struct path path;
20080 +       int ret;
20081 +
20082 +       ret = user_lpath(name, &path);
20083 +       if (!ret) {
20084 +               struct super_block *sb;
20085 +               struct dl_info *dli;
20086 +
20087 +               ret = -EINVAL;
20088 +               if (!path.dentry->d_inode)
20089 +                       goto out_release;
20090 +               if (!(sb = path.dentry->d_inode->i_sb))
20091 +                       goto out_release;
20092 +
20093 +               ret = -ESRCH;
20094 +               dli = locate_dl_info(sb, id);
20095 +               if (!dli)
20096 +                       goto out_release;
20097 +
20098 +               spin_lock(&dli->dl_lock);
20099 +               *inodes_used = dli->dl_inodes_used;
20100 +               *inodes_total = dli->dl_inodes_total;
20101 +               *space_used = dli->dl_space_used >> 10;
20102 +               if (dli->dl_space_total == DLIM_INFINITY)
20103 +                       *space_total = CDLIM_INFINITY;
20104 +               else
20105 +                       *space_total = dli->dl_space_total >> 10;
20106 +
20107 +               *reserved = 100 - ((dli->dl_nrlmult * 100 + 512) >> 10);
20108 +               spin_unlock(&dli->dl_lock);
20109 +
20110 +               put_dl_info(dli);
20111 +               ret = -EFAULT;
20112 +
20113 +               ret = 0;
20114 +       out_release:
20115 +               path_put(&path);
20116 +       }
20117 +       return ret;
20118 +}
20119 +
20120 +
20121 +int vc_get_dlimit(uint32_t id, void __user *data)
20122 +{
20123 +       struct vcmd_ctx_dlimit_v0 vc_data;
20124 +       int ret;
20125 +
20126 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
20127 +               return -EFAULT;
20128 +
20129 +       ret = do_get_dlimit(id, vc_data.name,
20130 +               &vc_data.space_used, &vc_data.space_total,
20131 +               &vc_data.inodes_used, &vc_data.inodes_total,
20132 +               &vc_data.reserved, &vc_data.flags);
20133 +       if (ret)
20134 +               return ret;
20135 +
20136 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
20137 +               return -EFAULT;
20138 +       return 0;
20139 +}
20140 +
20141 +#ifdef CONFIG_COMPAT
20142 +
20143 +int vc_get_dlimit_x32(uint32_t id, void __user *data)
20144 +{
20145 +       struct vcmd_ctx_dlimit_v0_x32 vc_data;
20146 +       int ret;
20147 +
20148 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
20149 +               return -EFAULT;
20150 +
20151 +       ret = do_get_dlimit(id, compat_ptr(vc_data.name_ptr),
20152 +               &vc_data.space_used, &vc_data.space_total,
20153 +               &vc_data.inodes_used, &vc_data.inodes_total,
20154 +               &vc_data.reserved, &vc_data.flags);
20155 +       if (ret)
20156 +               return ret;
20157 +
20158 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
20159 +               return -EFAULT;
20160 +       return 0;
20161 +}
20162 +
20163 +#endif /* CONFIG_COMPAT */
20164 +
20165 +
20166 +void vx_vsi_statfs(struct super_block *sb, struct kstatfs *buf)
20167 +{
20168 +       struct dl_info *dli;
20169 +       __u64 blimit, bfree, bavail;
20170 +       __u32 ifree;
20171 +
20172 +       dli = locate_dl_info(sb, dx_current_tag());
20173 +       if (!dli)
20174 +               return;
20175 +
20176 +       spin_lock(&dli->dl_lock);
20177 +       if (dli->dl_inodes_total == (unsigned long)DLIM_INFINITY)
20178 +               goto no_ilim;
20179 +
20180 +       /* reduce max inodes available to limit */
20181 +       if (buf->f_files > dli->dl_inodes_total)
20182 +               buf->f_files = dli->dl_inodes_total;
20183 +
20184 +       ifree = dli->dl_inodes_total - dli->dl_inodes_used;
20185 +       /* reduce free inodes to min */
20186 +       if (ifree < buf->f_ffree)
20187 +               buf->f_ffree = ifree;
20188 +
20189 +no_ilim:
20190 +       if (dli->dl_space_total == DLIM_INFINITY)
20191 +               goto no_blim;
20192 +
20193 +       blimit = dli->dl_space_total >> sb->s_blocksize_bits;
20194 +
20195 +       if (dli->dl_space_total < dli->dl_space_used)
20196 +               bfree = 0;
20197 +       else
20198 +               bfree = (dli->dl_space_total - dli->dl_space_used)
20199 +                       >> sb->s_blocksize_bits;
20200 +
20201 +       bavail = ((dli->dl_space_total >> 10) * dli->dl_nrlmult);
20202 +       if (bavail < dli->dl_space_used)
20203 +               bavail = 0;
20204 +       else
20205 +               bavail = (bavail - dli->dl_space_used)
20206 +                       >> sb->s_blocksize_bits;
20207 +
20208 +       /* reduce max space available to limit */
20209 +       if (buf->f_blocks > blimit)
20210 +               buf->f_blocks = blimit;
20211 +
20212 +       /* reduce free space to min */
20213 +       if (bfree < buf->f_bfree)
20214 +               buf->f_bfree = bfree;
20215 +
20216 +       /* reduce avail space to min */
20217 +       if (bavail < buf->f_bavail)
20218 +               buf->f_bavail = bavail;
20219 +
20220 +no_blim:
20221 +       spin_unlock(&dli->dl_lock);
20222 +       put_dl_info(dli);
20223 +
20224 +       return;
20225 +}
20226 +
20227 +#include <linux/module.h>
20228 +
20229 +EXPORT_SYMBOL_GPL(locate_dl_info);
20230 +EXPORT_SYMBOL_GPL(rcu_free_dl_info);
20231 +
20232 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/helper.c linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/helper.c
20233 --- linux-2.6.31.6/kernel/vserver/helper.c      1970-01-01 01:00:00.000000000 +0100
20234 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/helper.c        2009-11-05 04:22:08.000000000 +0100
20235 @@ -0,0 +1,199 @@
20236 +/*
20237 + *  linux/kernel/vserver/helper.c
20238 + *
20239 + *  Virtual Context Support
20240 + *
20241 + *  Copyright (C) 2004-2007  Herbert Pötzl
20242 + *
20243 + *  V0.01  basic helper
20244 + *
20245 + */
20246 +
20247 +#include <linux/kmod.h>
20248 +#include <linux/reboot.h>
20249 +#include <linux/vs_context.h>
20250 +#include <linux/vs_network.h>
20251 +#include <linux/vserver/signal.h>
20252 +
20253 +
20254 +char vshelper_path[255] = "/sbin/vshelper";
20255 +
20256 +
20257 +static int do_vshelper(char *name, char *argv[], char *envp[], int sync)
20258 +{
20259 +       int ret;
20260 +
20261 +       if ((ret = call_usermodehelper(name, argv, envp, sync))) {
20262 +               printk( KERN_WARNING
20263 +                       "%s: (%s %s) returned %s with %d\n",
20264 +                       name, argv[1], argv[2],
20265 +                       sync ? "sync" : "async", ret);
20266 +       }
20267 +       vxdprintk(VXD_CBIT(switch, 4),
20268 +               "%s: (%s %s) returned %s with %d",
20269 +               name, argv[1], argv[2], sync ? "sync" : "async", ret);
20270 +       return ret;
20271 +}
20272 +
20273 +/*
20274 + *      vshelper path is set via /proc/sys
20275 + *      invoked by vserver sys_reboot(), with
20276 + *      the following arguments
20277 + *
20278 + *      argv [0] = vshelper_path;
20279 + *      argv [1] = action: "restart", "halt", "poweroff", ...
20280 + *      argv [2] = context identifier
20281 + *
20282 + *      envp [*] = type-specific parameters
20283 + */
20284 +
20285 +long vs_reboot_helper(struct vx_info *vxi, int cmd, void __user *arg)
20286 +{
20287 +       char id_buf[8], cmd_buf[16];
20288 +       char uid_buf[16], pid_buf[16];
20289 +       int ret;
20290 +
20291 +       char *argv[] = {vshelper_path, NULL, id_buf, 0};
20292 +       char *envp[] = {"HOME=/", "TERM=linux",
20293 +                       "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
20294 +                       uid_buf, pid_buf, cmd_buf, 0};
20295 +
20296 +       if (vx_info_state(vxi, VXS_HELPER))
20297 +               return -EAGAIN;
20298 +       vxi->vx_state |= VXS_HELPER;
20299 +
20300 +       snprintf(id_buf, sizeof(id_buf)-1, "%d", vxi->vx_id);
20301 +
20302 +       snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
20303 +       snprintf(uid_buf, sizeof(uid_buf)-1, "VS_UID=%d", current_uid());
20304 +       snprintf(pid_buf, sizeof(pid_buf)-1, "VS_PID=%d", current->pid);
20305 +
20306 +       switch (cmd) {
20307 +       case LINUX_REBOOT_CMD_RESTART:
20308 +               argv[1] = "restart";
20309 +               break;
20310 +
20311 +       case LINUX_REBOOT_CMD_HALT:
20312 +               argv[1] = "halt";
20313 +               break;
20314 +
20315 +       case LINUX_REBOOT_CMD_POWER_OFF:
20316 +               argv[1] = "poweroff";
20317 +               break;
20318 +
20319 +       case LINUX_REBOOT_CMD_SW_SUSPEND:
20320 +               argv[1] = "swsusp";
20321 +               break;
20322 +
20323 +       default:
20324 +               vxi->vx_state &= ~VXS_HELPER;
20325 +               return 0;
20326 +       }
20327 +
20328 +       ret = do_vshelper(vshelper_path, argv, envp, 0);
20329 +       vxi->vx_state &= ~VXS_HELPER;
20330 +       __wakeup_vx_info(vxi);
20331 +       return (ret) ? -EPERM : 0;
20332 +}
20333 +
20334 +
20335 +long vs_reboot(unsigned int cmd, void __user *arg)
20336 +{
20337 +       struct vx_info *vxi = current_vx_info();
20338 +       long ret = 0;
20339 +
20340 +       vxdprintk(VXD_CBIT(misc, 5),
20341 +               "vs_reboot(%p[#%d],%d)",
20342 +               vxi, vxi ? vxi->vx_id : 0, cmd);
20343 +
20344 +       ret = vs_reboot_helper(vxi, cmd, arg);
20345 +       if (ret)
20346 +               return ret;
20347 +
20348 +       vxi->reboot_cmd = cmd;
20349 +       if (vx_info_flags(vxi, VXF_REBOOT_KILL, 0)) {
20350 +               switch (cmd) {
20351 +               case LINUX_REBOOT_CMD_RESTART:
20352 +               case LINUX_REBOOT_CMD_HALT:
20353 +               case LINUX_REBOOT_CMD_POWER_OFF:
20354 +                       vx_info_kill(vxi, 0, SIGKILL);
20355 +                       vx_info_kill(vxi, 1, SIGKILL);
20356 +               default:
20357 +                       break;
20358 +               }
20359 +       }
20360 +       return 0;
20361 +}
20362 +
20363 +
20364 +/*
20365 + *      argv [0] = vshelper_path;
20366 + *      argv [1] = action: "startup", "shutdown"
20367 + *      argv [2] = context identifier
20368 + *
20369 + *      envp [*] = type-specific parameters
20370 + */
20371 +
20372 +long vs_state_change(struct vx_info *vxi, unsigned int cmd)
20373 +{
20374 +       char id_buf[8], cmd_buf[16];
20375 +       char *argv[] = {vshelper_path, NULL, id_buf, 0};
20376 +       char *envp[] = {"HOME=/", "TERM=linux",
20377 +                       "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
20378 +
20379 +       if (!vx_info_flags(vxi, VXF_SC_HELPER, 0))
20380 +               return 0;
20381 +
20382 +       snprintf(id_buf, sizeof(id_buf)-1, "%d", vxi->vx_id);
20383 +       snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
20384 +
20385 +       switch (cmd) {
20386 +       case VSC_STARTUP:
20387 +               argv[1] = "startup";
20388 +               break;
20389 +       case VSC_SHUTDOWN:
20390 +               argv[1] = "shutdown";
20391 +               break;
20392 +       default:
20393 +               return 0;
20394 +       }
20395 +
20396 +       return do_vshelper(vshelper_path, argv, envp, 1);
20397 +}
20398 +
20399 +
20400 +/*
20401 + *      argv [0] = vshelper_path;
20402 + *      argv [1] = action: "netup", "netdown"
20403 + *      argv [2] = context identifier
20404 + *
20405 + *      envp [*] = type-specific parameters
20406 + */
20407 +
20408 +long vs_net_change(struct nx_info *nxi, unsigned int cmd)
20409 +{
20410 +       char id_buf[8], cmd_buf[16];
20411 +       char *argv[] = {vshelper_path, NULL, id_buf, 0};
20412 +       char *envp[] = {"HOME=/", "TERM=linux",
20413 +                       "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
20414 +
20415 +       if (!nx_info_flags(nxi, NXF_SC_HELPER, 0))
20416 +               return 0;
20417 +
20418 +       snprintf(id_buf, sizeof(id_buf)-1, "%d", nxi->nx_id);
20419 +       snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
20420 +
20421 +       switch (cmd) {
20422 +       case VSC_NETUP:
20423 +               argv[1] = "netup";
20424 +               break;
20425 +       case VSC_NETDOWN:
20426 +               argv[1] = "netdown";
20427 +               break;
20428 +       default:
20429 +               return 0;
20430 +       }
20431 +
20432 +       return do_vshelper(vshelper_path, argv, envp, 1);
20433 +}
20434 +
20435 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/history.c linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/history.c
20436 --- linux-2.6.31.6/kernel/vserver/history.c     1970-01-01 01:00:00.000000000 +0100
20437 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/history.c       2009-09-10 16:11:43.000000000 +0200
20438 @@ -0,0 +1,258 @@
20439 +/*
20440 + *  kernel/vserver/history.c
20441 + *
20442 + *  Virtual Context History Backtrace
20443 + *
20444 + *  Copyright (C) 2004-2007  Herbert Pötzl
20445 + *
20446 + *  V0.01  basic structure
20447 + *  V0.02  hash/unhash and trace
20448 + *  V0.03  preemption fixes
20449 + *
20450 + */
20451 +
20452 +#include <linux/module.h>
20453 +#include <asm/uaccess.h>
20454 +
20455 +#include <linux/vserver/context.h>
20456 +#include <linux/vserver/debug.h>
20457 +#include <linux/vserver/debug_cmd.h>
20458 +#include <linux/vserver/history.h>
20459 +
20460 +
20461 +#ifdef CONFIG_VSERVER_HISTORY
20462 +#define VXH_SIZE       CONFIG_VSERVER_HISTORY_SIZE
20463 +#else
20464 +#define VXH_SIZE       64
20465 +#endif
20466 +
20467 +struct _vx_history {
20468 +       unsigned int counter;
20469 +
20470 +       struct _vx_hist_entry entry[VXH_SIZE + 1];
20471 +};
20472 +
20473 +
20474 +DEFINE_PER_CPU(struct _vx_history, vx_history_buffer);
20475 +
20476 +unsigned volatile int vxh_active = 1;
20477 +
20478 +static atomic_t sequence = ATOMIC_INIT(0);
20479 +
20480 +
20481 +/*     vxh_advance()
20482 +
20483 +       * requires disabled preemption                          */
20484 +
20485 +struct _vx_hist_entry *vxh_advance(void *loc)
20486 +{
20487 +       unsigned int cpu = smp_processor_id();
20488 +       struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu);
20489 +       struct _vx_hist_entry *entry;
20490 +       unsigned int index;
20491 +
20492 +       index = vxh_active ? (hist->counter++ % VXH_SIZE) : VXH_SIZE;
20493 +       entry = &hist->entry[index];
20494 +
20495 +       entry->seq = atomic_inc_return(&sequence);
20496 +       entry->loc = loc;
20497 +       return entry;
20498 +}
20499 +
20500 +EXPORT_SYMBOL_GPL(vxh_advance);
20501 +
20502 +
20503 +#define VXH_LOC_FMTS   "(#%04x,*%d):%p"
20504 +
20505 +#define VXH_LOC_ARGS(e)        (e)->seq, cpu, (e)->loc
20506 +
20507 +
20508 +#define VXH_VXI_FMTS   "%p[#%d,%d.%d]"
20509 +
20510 +#define VXH_VXI_ARGS(e)        (e)->vxi.ptr,                           \
20511 +                       (e)->vxi.ptr ? (e)->vxi.xid : 0,        \
20512 +                       (e)->vxi.ptr ? (e)->vxi.usecnt : 0,     \
20513 +                       (e)->vxi.ptr ? (e)->vxi.tasks : 0
20514 +
20515 +void   vxh_dump_entry(struct _vx_hist_entry *e, unsigned cpu)
20516 +{
20517 +       switch (e->type) {
20518 +       case VXH_THROW_OOPS:
20519 +               printk( VXH_LOC_FMTS " oops \n", VXH_LOC_ARGS(e));
20520 +               break;
20521 +
20522 +       case VXH_GET_VX_INFO:
20523 +       case VXH_PUT_VX_INFO:
20524 +               printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
20525 +                       VXH_LOC_ARGS(e),
20526 +                       (e->type == VXH_GET_VX_INFO) ? "get" : "put",
20527 +                       VXH_VXI_ARGS(e));
20528 +               break;
20529 +
20530 +       case VXH_INIT_VX_INFO:
20531 +       case VXH_SET_VX_INFO:
20532 +       case VXH_CLR_VX_INFO:
20533 +               printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
20534 +                       VXH_LOC_ARGS(e),
20535 +                       (e->type == VXH_INIT_VX_INFO) ? "init" :
20536 +                       ((e->type == VXH_SET_VX_INFO) ? "set" : "clr"),
20537 +                       VXH_VXI_ARGS(e), e->sc.data);
20538 +               break;
20539 +
20540 +       case VXH_CLAIM_VX_INFO:
20541 +       case VXH_RELEASE_VX_INFO:
20542 +               printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
20543 +                       VXH_LOC_ARGS(e),
20544 +                       (e->type == VXH_CLAIM_VX_INFO) ? "claim" : "release",
20545 +                       VXH_VXI_ARGS(e), e->sc.data);
20546 +               break;
20547 +
20548 +       case VXH_ALLOC_VX_INFO:
20549 +       case VXH_DEALLOC_VX_INFO:
20550 +               printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
20551 +                       VXH_LOC_ARGS(e),
20552 +                       (e->type == VXH_ALLOC_VX_INFO) ? "alloc" : "dealloc",
20553 +                       VXH_VXI_ARGS(e));
20554 +               break;
20555 +
20556 +       case VXH_HASH_VX_INFO:
20557 +       case VXH_UNHASH_VX_INFO:
20558 +               printk( VXH_LOC_FMTS " __%s_vx_info " VXH_VXI_FMTS "\n",
20559 +                       VXH_LOC_ARGS(e),
20560 +                       (e->type == VXH_HASH_VX_INFO) ? "hash" : "unhash",
20561 +                       VXH_VXI_ARGS(e));
20562 +               break;
20563 +
20564 +       case VXH_LOC_VX_INFO:
20565 +       case VXH_LOOKUP_VX_INFO:
20566 +       case VXH_CREATE_VX_INFO:
20567 +               printk( VXH_LOC_FMTS " __%s_vx_info [#%d] -> " VXH_VXI_FMTS "\n",
20568 +                       VXH_LOC_ARGS(e),
20569 +                       (e->type == VXH_CREATE_VX_INFO) ? "create" :
20570 +                       ((e->type == VXH_LOC_VX_INFO) ? "loc" : "lookup"),
20571 +                       e->ll.arg, VXH_VXI_ARGS(e));
20572 +               break;
20573 +       }
20574 +}
20575 +
20576 +static void __vxh_dump_history(void)
20577 +{
20578 +       unsigned int i, cpu;
20579 +
20580 +       printk("History:\tSEQ: %8x\tNR_CPUS: %d\n",
20581 +               atomic_read(&sequence), NR_CPUS);
20582 +
20583 +       for (i = 0; i < VXH_SIZE; i++) {
20584 +               for_each_online_cpu(cpu) {
20585 +                       struct _vx_history *hist =
20586 +                               &per_cpu(vx_history_buffer, cpu);
20587 +                       unsigned int index = (hist->counter - i) % VXH_SIZE;
20588 +                       struct _vx_hist_entry *entry = &hist->entry[index];
20589 +
20590 +                       vxh_dump_entry(entry, cpu);
20591 +               }
20592 +       }
20593 +}
20594 +
20595 +void   vxh_dump_history(void)
20596 +{
20597 +       vxh_active = 0;
20598 +#ifdef CONFIG_SMP
20599 +       local_irq_enable();
20600 +       smp_send_stop();
20601 +       local_irq_disable();
20602 +#endif
20603 +       __vxh_dump_history();
20604 +}
20605 +
20606 +
20607 +/* vserver syscall commands below here */
20608 +
20609 +
20610 +int vc_dump_history(uint32_t id)
20611 +{
20612 +       vxh_active = 0;
20613 +       __vxh_dump_history();
20614 +       vxh_active = 1;
20615 +
20616 +       return 0;
20617 +}
20618 +
20619 +
20620 +int do_read_history(struct __user _vx_hist_entry *data,
20621 +       int cpu, uint32_t *index, uint32_t *count)
20622 +{
20623 +       int pos, ret = 0;
20624 +       struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu);
20625 +       int end = hist->counter;
20626 +       int start = end - VXH_SIZE + 2;
20627 +       int idx = *index;
20628 +
20629 +       /* special case: get current pos */
20630 +       if (!*count) {
20631 +               *index = end;
20632 +               return 0;
20633 +       }
20634 +
20635 +       /* have we lost some data? */
20636 +       if (idx < start)
20637 +               idx = start;
20638 +
20639 +       for (pos = 0; (pos < *count) && (idx < end); pos++, idx++) {
20640 +               struct _vx_hist_entry *entry =
20641 +                       &hist->entry[idx % VXH_SIZE];
20642 +
20643 +               /* send entry to userspace */
20644 +               ret = copy_to_user(&data[pos], entry, sizeof(*entry));
20645 +               if (ret)
20646 +                       break;
20647 +       }
20648 +       /* save new index and count */
20649 +       *index = idx;
20650 +       *count = pos;
20651 +       return ret ? ret : (*index < end);
20652 +}
20653 +
20654 +int vc_read_history(uint32_t id, void __user *data)
20655 +{
20656 +       struct vcmd_read_history_v0 vc_data;
20657 +       int ret;
20658 +
20659 +       if (id >= NR_CPUS)
20660 +               return -EINVAL;
20661 +
20662 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
20663 +               return -EFAULT;
20664 +
20665 +       ret = do_read_history((struct __user _vx_hist_entry *)vc_data.data,
20666 +               id, &vc_data.index, &vc_data.count);
20667 +
20668 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
20669 +               return -EFAULT;
20670 +       return ret;
20671 +}
20672 +
20673 +#ifdef CONFIG_COMPAT
20674 +
20675 +int vc_read_history_x32(uint32_t id, void __user *data)
20676 +{
20677 +       struct vcmd_read_history_v0_x32 vc_data;
20678 +       int ret;
20679 +
20680 +       if (id >= NR_CPUS)
20681 +               return -EINVAL;
20682 +
20683 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
20684 +               return -EFAULT;
20685 +
20686 +       ret = do_read_history((struct __user _vx_hist_entry *)
20687 +               compat_ptr(vc_data.data_ptr),
20688 +               id, &vc_data.index, &vc_data.count);
20689 +
20690 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
20691 +               return -EFAULT;
20692 +       return ret;
20693 +}
20694 +
20695 +#endif /* CONFIG_COMPAT */
20696 +
20697 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/inet.c linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/inet.c
20698 --- linux-2.6.31.6/kernel/vserver/inet.c        1970-01-01 01:00:00.000000000 +0100
20699 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/inet.c  2009-09-10 16:11:43.000000000 +0200
20700 @@ -0,0 +1,225 @@
20701 +
20702 +#include <linux/in.h>
20703 +#include <linux/inetdevice.h>
20704 +#include <linux/vs_inet.h>
20705 +#include <linux/vs_inet6.h>
20706 +#include <linux/vserver/debug.h>
20707 +#include <net/route.h>
20708 +#include <net/addrconf.h>
20709 +
20710 +
20711 +int nx_v4_addr_conflict(struct nx_info *nxi1, struct nx_info *nxi2)
20712 +{
20713 +       int ret = 0;
20714 +
20715 +       if (!nxi1 || !nxi2 || nxi1 == nxi2)
20716 +               ret = 1;
20717 +       else {
20718 +               struct nx_addr_v4 *ptr;
20719 +
20720 +               for (ptr = &nxi1->v4; ptr; ptr = ptr->next) {
20721 +                       if (v4_nx_addr_in_nx_info(nxi2, ptr, -1)) {
20722 +                               ret = 1;
20723 +                               break;
20724 +                       }
20725 +               }
20726 +       }
20727 +
20728 +       vxdprintk(VXD_CBIT(net, 2),
20729 +               "nx_v4_addr_conflict(%p,%p): %d",
20730 +               nxi1, nxi2, ret);
20731 +
20732 +       return ret;
20733 +}
20734 +
20735 +
20736 +#ifdef CONFIG_IPV6
20737 +
20738 +int nx_v6_addr_conflict(struct nx_info *nxi1, struct nx_info *nxi2)
20739 +{
20740 +       int ret = 0;
20741 +
20742 +       if (!nxi1 || !nxi2 || nxi1 == nxi2)
20743 +               ret = 1;
20744 +       else {
20745 +               struct nx_addr_v6 *ptr;
20746 +
20747 +               for (ptr = &nxi1->v6; ptr; ptr = ptr->next) {
20748 +                       if (v6_nx_addr_in_nx_info(nxi2, ptr, -1)) {
20749 +                               ret = 1;
20750 +                               break;
20751 +                       }
20752 +               }
20753 +       }
20754 +
20755 +       vxdprintk(VXD_CBIT(net, 2),
20756 +               "nx_v6_addr_conflict(%p,%p): %d",
20757 +               nxi1, nxi2, ret);
20758 +
20759 +       return ret;
20760 +}
20761 +
20762 +#endif
20763 +
20764 +int v4_dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
20765 +{
20766 +       struct in_device *in_dev;
20767 +       struct in_ifaddr **ifap;
20768 +       struct in_ifaddr *ifa;
20769 +       int ret = 0;
20770 +
20771 +       if (!dev)
20772 +               goto out;
20773 +       in_dev = in_dev_get(dev);
20774 +       if (!in_dev)
20775 +               goto out;
20776 +
20777 +       for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
20778 +               ifap = &ifa->ifa_next) {
20779 +               if (v4_addr_in_nx_info(nxi, ifa->ifa_local, NXA_MASK_SHOW)) {
20780 +                       ret = 1;
20781 +                       break;
20782 +               }
20783 +       }
20784 +       in_dev_put(in_dev);
20785 +out:
20786 +       return ret;
20787 +}
20788 +
20789 +
20790 +#ifdef CONFIG_IPV6
20791 +
20792 +int v6_dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
20793 +{
20794 +       struct inet6_dev *in_dev;
20795 +       struct inet6_ifaddr **ifap;
20796 +       struct inet6_ifaddr *ifa;
20797 +       int ret = 0;
20798 +
20799 +       if (!dev)
20800 +               goto out;
20801 +       in_dev = in6_dev_get(dev);
20802 +       if (!in_dev)
20803 +               goto out;
20804 +
20805 +       for (ifap = &in_dev->addr_list; (ifa = *ifap) != NULL;
20806 +               ifap = &ifa->if_next) {
20807 +               if (v6_addr_in_nx_info(nxi, &ifa->addr, -1)) {
20808 +                       ret = 1;
20809 +                       break;
20810 +               }
20811 +       }
20812 +       in6_dev_put(in_dev);
20813 +out:
20814 +       return ret;
20815 +}
20816 +
20817 +#endif
20818 +
20819 +int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
20820 +{
20821 +       int ret = 1;
20822 +
20823 +       if (!nxi)
20824 +               goto out;
20825 +       if (nxi->v4.type && v4_dev_in_nx_info(dev, nxi))
20826 +               goto out;
20827 +#ifdef CONFIG_IPV6
20828 +       ret = 2;
20829 +       if (nxi->v6.type && v6_dev_in_nx_info(dev, nxi))
20830 +               goto out;
20831 +#endif
20832 +       ret = 0;
20833 +out:
20834 +       vxdprintk(VXD_CBIT(net, 3),
20835 +               "dev_in_nx_info(%p,%p[#%d]) = %d",
20836 +               dev, nxi, nxi ? nxi->nx_id : 0, ret);
20837 +       return ret;
20838 +}
20839 +
20840 +int ip_v4_find_src(struct net *net, struct nx_info *nxi,
20841 +       struct rtable **rp, struct flowi *fl)
20842 +{
20843 +       if (!nxi)
20844 +               return 0;
20845 +
20846 +       /* FIXME: handle lback only case */
20847 +       if (!NX_IPV4(nxi))
20848 +               return -EPERM;
20849 +
20850 +       vxdprintk(VXD_CBIT(net, 4),
20851 +               "ip_v4_find_src(%p[#%u]) " NIPQUAD_FMT " -> " NIPQUAD_FMT,
20852 +               nxi, nxi ? nxi->nx_id : 0,
20853 +               NIPQUAD(fl->fl4_src), NIPQUAD(fl->fl4_dst));
20854 +
20855 +       /* single IP is unconditional */
20856 +       if (nx_info_flags(nxi, NXF_SINGLE_IP, 0) &&
20857 +               (fl->fl4_src == INADDR_ANY))
20858 +               fl->fl4_src = nxi->v4.ip[0].s_addr;
20859 +
20860 +       if (fl->fl4_src == INADDR_ANY) {
20861 +               struct nx_addr_v4 *ptr;
20862 +               __be32 found = 0;
20863 +               int err;
20864 +
20865 +               err = __ip_route_output_key(net, rp, fl);
20866 +               if (!err) {
20867 +                       found = (*rp)->rt_src;
20868 +                       ip_rt_put(*rp);
20869 +                       vxdprintk(VXD_CBIT(net, 4),
20870 +                               "ip_v4_find_src(%p[#%u]) rok[%u]: " NIPQUAD_FMT,
20871 +                               nxi, nxi ? nxi->nx_id : 0, fl->oif, NIPQUAD(found));
20872 +                       if (v4_addr_in_nx_info(nxi, found, NXA_MASK_BIND))
20873 +                               goto found;
20874 +               }
20875 +
20876 +               for (ptr = &nxi->v4; ptr; ptr = ptr->next) {
20877 +                       __be32 primary = ptr->ip[0].s_addr;
20878 +                       __be32 mask = ptr->mask.s_addr;
20879 +                       __be32 neta = primary & mask;
20880 +
20881 +                       vxdprintk(VXD_CBIT(net, 4), "ip_v4_find_src(%p[#%u]) chk: "
20882 +                               NIPQUAD_FMT "/" NIPQUAD_FMT "/" NIPQUAD_FMT,
20883 +                               nxi, nxi ? nxi->nx_id : 0, NIPQUAD(primary),
20884 +                               NIPQUAD(mask), NIPQUAD(neta));
20885 +                       if ((found & mask) != neta)
20886 +                               continue;
20887 +
20888 +                       fl->fl4_src = primary;
20889 +                       err = __ip_route_output_key(net, rp, fl);
20890 +                       vxdprintk(VXD_CBIT(net, 4),
20891 +                               "ip_v4_find_src(%p[#%u]) rok[%u]: " NIPQUAD_FMT,
20892 +                               nxi, nxi ? nxi->nx_id : 0, fl->oif, NIPQUAD(primary));
20893 +                       if (!err) {
20894 +                               found = (*rp)->rt_src;
20895 +                               ip_rt_put(*rp);
20896 +                               if (found == primary)
20897 +                                       goto found;
20898 +                       }
20899 +               }
20900 +               /* still no source ip? */
20901 +               found = ipv4_is_loopback(fl->fl4_dst)
20902 +                       ? IPI_LOOPBACK : nxi->v4.ip[0].s_addr;
20903 +       found:
20904 +               /* assign src ip to flow */
20905 +               fl->fl4_src = found;
20906 +
20907 +       } else {
20908 +               if (!v4_addr_in_nx_info(nxi, fl->fl4_src, NXA_MASK_BIND))
20909 +                       return -EPERM;
20910 +       }
20911 +
20912 +       if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0)) {
20913 +               if (ipv4_is_loopback(fl->fl4_dst))
20914 +                       fl->fl4_dst = nxi->v4_lback.s_addr;
20915 +               if (ipv4_is_loopback(fl->fl4_src))
20916 +                       fl->fl4_src = nxi->v4_lback.s_addr;
20917 +       } else if (ipv4_is_loopback(fl->fl4_dst) &&
20918 +               !nx_info_flags(nxi, NXF_LBACK_ALLOW, 0))
20919 +               return -EPERM;
20920 +
20921 +       return 0;
20922 +}
20923 +
20924 +EXPORT_SYMBOL_GPL(ip_v4_find_src);
20925 +
20926 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/init.c linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/init.c
20927 --- linux-2.6.31.6/kernel/vserver/init.c        1970-01-01 01:00:00.000000000 +0100
20928 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/init.c  2009-09-10 16:11:43.000000000 +0200
20929 @@ -0,0 +1,45 @@
20930 +/*
20931 + *  linux/kernel/init.c
20932 + *
20933 + *  Virtual Server Init
20934 + *
20935 + *  Copyright (C) 2004-2007  Herbert Pötzl
20936 + *
20937 + *  V0.01  basic structure
20938 + *
20939 + */
20940 +
20941 +#include <linux/init.h>
20942 +
20943 +int    vserver_register_sysctl(void);
20944 +void   vserver_unregister_sysctl(void);
20945 +
20946 +
20947 +static int __init init_vserver(void)
20948 +{
20949 +       int ret = 0;
20950 +
20951 +#ifdef CONFIG_VSERVER_DEBUG
20952 +       vserver_register_sysctl();
20953 +#endif
20954 +       return ret;
20955 +}
20956 +
20957 +
20958 +static void __exit exit_vserver(void)
20959 +{
20960 +
20961 +#ifdef CONFIG_VSERVER_DEBUG
20962 +       vserver_unregister_sysctl();
20963 +#endif
20964 +       return;
20965 +}
20966 +
20967 +/* FIXME: GFP_ZONETYPES gone
20968 +long vx_slab[GFP_ZONETYPES]; */
20969 +long vx_area;
20970 +
20971 +
20972 +module_init(init_vserver);
20973 +module_exit(exit_vserver);
20974 +
20975 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/inode.c linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/inode.c
20976 --- linux-2.6.31.6/kernel/vserver/inode.c       1970-01-01 01:00:00.000000000 +0100
20977 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/inode.c 2009-10-09 20:55:03.000000000 +0200
20978 @@ -0,0 +1,433 @@
20979 +/*
20980 + *  linux/kernel/vserver/inode.c
20981 + *
20982 + *  Virtual Server: File System Support
20983 + *
20984 + *  Copyright (C) 2004-2007  Herbert Pötzl
20985 + *
20986 + *  V0.01  separated from vcontext V0.05
20987 + *  V0.02  moved to tag (instead of xid)
20988 + *
20989 + */
20990 +
20991 +#include <linux/tty.h>
20992 +#include <linux/proc_fs.h>
20993 +#include <linux/devpts_fs.h>
20994 +#include <linux/fs.h>
20995 +#include <linux/file.h>
20996 +#include <linux/mount.h>
20997 +#include <linux/parser.h>
20998 +#include <linux/namei.h>
20999 +#include <linux/vserver/inode.h>
21000 +#include <linux/vserver/inode_cmd.h>
21001 +#include <linux/vs_base.h>
21002 +#include <linux/vs_tag.h>
21003 +
21004 +#include <asm/uaccess.h>
21005 +
21006 +
21007 +static int __vc_get_iattr(struct inode *in, uint32_t *tag, uint32_t *flags, uint32_t *mask)
21008 +{
21009 +       struct proc_dir_entry *entry;
21010 +
21011 +       if (!in || !in->i_sb)
21012 +               return -ESRCH;
21013 +
21014 +       *flags = IATTR_TAG
21015 +               | (IS_IMMUTABLE(in) ? IATTR_IMMUTABLE : 0)
21016 +               | (IS_IXUNLINK(in) ? IATTR_IXUNLINK : 0)
21017 +               | (IS_BARRIER(in) ? IATTR_BARRIER : 0)
21018 +               | (IS_COW(in) ? IATTR_COW : 0);
21019 +       *mask = IATTR_IXUNLINK | IATTR_IMMUTABLE | IATTR_COW;
21020 +
21021 +       if (S_ISDIR(in->i_mode))
21022 +               *mask |= IATTR_BARRIER;
21023 +
21024 +       if (IS_TAGGED(in)) {
21025 +               *tag = in->i_tag;
21026 +               *mask |= IATTR_TAG;
21027 +       }
21028 +
21029 +       switch (in->i_sb->s_magic) {
21030 +       case PROC_SUPER_MAGIC:
21031 +               entry = PROC_I(in)->pde;
21032 +
21033 +               /* check for specific inodes? */
21034 +               if (entry)
21035 +                       *mask |= IATTR_FLAGS;
21036 +               if (entry)
21037 +                       *flags |= (entry->vx_flags & IATTR_FLAGS);
21038 +               else
21039 +                       *flags |= (PROC_I(in)->vx_flags & IATTR_FLAGS);
21040 +               break;
21041 +
21042 +       case DEVPTS_SUPER_MAGIC:
21043 +               *tag = in->i_tag;
21044 +               *mask |= IATTR_TAG;
21045 +               break;
21046 +
21047 +       default:
21048 +               break;
21049 +       }
21050 +       return 0;
21051 +}
21052 +
21053 +int vc_get_iattr(void __user *data)
21054 +{
21055 +       struct path path;
21056 +       struct vcmd_ctx_iattr_v1 vc_data = { .tag = -1 };
21057 +       int ret;
21058 +
21059 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21060 +               return -EFAULT;
21061 +
21062 +       ret = user_lpath(vc_data.name, &path);
21063 +       if (!ret) {
21064 +               ret = __vc_get_iattr(path.dentry->d_inode,
21065 +                       &vc_data.tag, &vc_data.flags, &vc_data.mask);
21066 +               path_put(&path);
21067 +       }
21068 +       if (ret)
21069 +               return ret;
21070 +
21071 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21072 +               ret = -EFAULT;
21073 +       return ret;
21074 +}
21075 +
21076 +#ifdef CONFIG_COMPAT
21077 +
21078 +int vc_get_iattr_x32(void __user *data)
21079 +{
21080 +       struct path path;
21081 +       struct vcmd_ctx_iattr_v1_x32 vc_data = { .tag = -1 };
21082 +       int ret;
21083 +
21084 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21085 +               return -EFAULT;
21086 +
21087 +       ret = user_lpath(compat_ptr(vc_data.name_ptr), &path);
21088 +       if (!ret) {
21089 +               ret = __vc_get_iattr(path.dentry->d_inode,
21090 +                       &vc_data.tag, &vc_data.flags, &vc_data.mask);
21091 +               path_put(&path);
21092 +       }
21093 +       if (ret)
21094 +               return ret;
21095 +
21096 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21097 +               ret = -EFAULT;
21098 +       return ret;
21099 +}
21100 +
21101 +#endif /* CONFIG_COMPAT */
21102 +
21103 +
21104 +int vc_fget_iattr(uint32_t fd, void __user *data)
21105 +{
21106 +       struct file *filp;
21107 +       struct vcmd_ctx_fiattr_v0 vc_data = { .tag = -1 };
21108 +       int ret;
21109 +
21110 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21111 +               return -EFAULT;
21112 +
21113 +       filp = fget(fd);
21114 +       if (!filp || !filp->f_dentry || !filp->f_dentry->d_inode)
21115 +               return -EBADF;
21116 +
21117 +       ret = __vc_get_iattr(filp->f_dentry->d_inode,
21118 +               &vc_data.tag, &vc_data.flags, &vc_data.mask);
21119 +
21120 +       fput(filp);
21121 +
21122 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21123 +               ret = -EFAULT;
21124 +       return ret;
21125 +}
21126 +
21127 +
21128 +static int __vc_set_iattr(struct dentry *de, uint32_t *tag, uint32_t *flags, uint32_t *mask)
21129 +{
21130 +       struct inode *in = de->d_inode;
21131 +       int error = 0, is_proc = 0, has_tag = 0;
21132 +       struct iattr attr = { 0 };
21133 +
21134 +       if (!in || !in->i_sb)
21135 +               return -ESRCH;
21136 +
21137 +       is_proc = (in->i_sb->s_magic == PROC_SUPER_MAGIC);
21138 +       if ((*mask & IATTR_FLAGS) && !is_proc)
21139 +               return -EINVAL;
21140 +
21141 +       has_tag = IS_TAGGED(in) ||
21142 +               (in->i_sb->s_magic == DEVPTS_SUPER_MAGIC);
21143 +       if ((*mask & IATTR_TAG) && !has_tag)
21144 +               return -EINVAL;
21145 +
21146 +       mutex_lock(&in->i_mutex);
21147 +       if (*mask & IATTR_TAG) {
21148 +               attr.ia_tag = *tag;
21149 +               attr.ia_valid |= ATTR_TAG;
21150 +       }
21151 +
21152 +       if (*mask & IATTR_FLAGS) {
21153 +               struct proc_dir_entry *entry = PROC_I(in)->pde;
21154 +               unsigned int iflags = PROC_I(in)->vx_flags;
21155 +
21156 +               iflags = (iflags & ~(*mask & IATTR_FLAGS))
21157 +                       | (*flags & IATTR_FLAGS);
21158 +               PROC_I(in)->vx_flags = iflags;
21159 +               if (entry)
21160 +                       entry->vx_flags = iflags;
21161 +       }
21162 +
21163 +       if (*mask & (IATTR_IMMUTABLE | IATTR_IXUNLINK |
21164 +               IATTR_BARRIER | IATTR_COW)) {
21165 +               int iflags = in->i_flags;
21166 +               int vflags = in->i_vflags;
21167 +
21168 +               if (*mask & IATTR_IMMUTABLE) {
21169 +                       if (*flags & IATTR_IMMUTABLE)
21170 +                               iflags |= S_IMMUTABLE;
21171 +                       else
21172 +                               iflags &= ~S_IMMUTABLE;
21173 +               }
21174 +               if (*mask & IATTR_IXUNLINK) {
21175 +                       if (*flags & IATTR_IXUNLINK)
21176 +                               iflags |= S_IXUNLINK;
21177 +                       else
21178 +                               iflags &= ~S_IXUNLINK;
21179 +               }
21180 +               if (S_ISDIR(in->i_mode) && (*mask & IATTR_BARRIER)) {
21181 +                       if (*flags & IATTR_BARRIER)
21182 +                               vflags |= V_BARRIER;
21183 +                       else
21184 +                               vflags &= ~V_BARRIER;
21185 +               }
21186 +               if (S_ISREG(in->i_mode) && (*mask & IATTR_COW)) {
21187 +                       if (*flags & IATTR_COW)
21188 +                               vflags |= V_COW;
21189 +                       else
21190 +                               vflags &= ~V_COW;
21191 +               }
21192 +               if (in->i_op && in->i_op->sync_flags) {
21193 +                       error = in->i_op->sync_flags(in, iflags, vflags);
21194 +                       if (error)
21195 +                               goto out;
21196 +               }
21197 +       }
21198 +
21199 +       if (attr.ia_valid) {
21200 +               if (in->i_op && in->i_op->setattr)
21201 +                       error = in->i_op->setattr(de, &attr);
21202 +               else {
21203 +                       error = inode_change_ok(in, &attr);
21204 +                       if (!error)
21205 +                               error = inode_setattr(in, &attr);
21206 +               }
21207 +       }
21208 +
21209 +out:
21210 +       mutex_unlock(&in->i_mutex);
21211 +       return error;
21212 +}
21213 +
21214 +int vc_set_iattr(void __user *data)
21215 +{
21216 +       struct path path;
21217 +       struct vcmd_ctx_iattr_v1 vc_data;
21218 +       int ret;
21219 +
21220 +       if (!capable(CAP_LINUX_IMMUTABLE))
21221 +               return -EPERM;
21222 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21223 +               return -EFAULT;
21224 +
21225 +       ret = user_lpath(vc_data.name, &path);
21226 +       if (!ret) {
21227 +               ret = __vc_set_iattr(path.dentry,
21228 +                       &vc_data.tag, &vc_data.flags, &vc_data.mask);
21229 +               path_put(&path);
21230 +       }
21231 +
21232 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21233 +               ret = -EFAULT;
21234 +       return ret;
21235 +}
21236 +
21237 +#ifdef CONFIG_COMPAT
21238 +
21239 +int vc_set_iattr_x32(void __user *data)
21240 +{
21241 +       struct path path;
21242 +       struct vcmd_ctx_iattr_v1_x32 vc_data;
21243 +       int ret;
21244 +
21245 +       if (!capable(CAP_LINUX_IMMUTABLE))
21246 +               return -EPERM;
21247 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21248 +               return -EFAULT;
21249 +
21250 +       ret = user_lpath(compat_ptr(vc_data.name_ptr), &path);
21251 +       if (!ret) {
21252 +               ret = __vc_set_iattr(path.dentry,
21253 +                       &vc_data.tag, &vc_data.flags, &vc_data.mask);
21254 +               path_put(&path);
21255 +       }
21256 +
21257 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21258 +               ret = -EFAULT;
21259 +       return ret;
21260 +}
21261 +
21262 +#endif /* CONFIG_COMPAT */
21263 +
21264 +int vc_fset_iattr(uint32_t fd, void __user *data)
21265 +{
21266 +       struct file *filp;
21267 +       struct vcmd_ctx_fiattr_v0 vc_data;
21268 +       int ret;
21269 +
21270 +       if (!capable(CAP_LINUX_IMMUTABLE))
21271 +               return -EPERM;
21272 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21273 +               return -EFAULT;
21274 +
21275 +       filp = fget(fd);
21276 +       if (!filp || !filp->f_dentry || !filp->f_dentry->d_inode)
21277 +               return -EBADF;
21278 +
21279 +       ret = __vc_set_iattr(filp->f_dentry, &vc_data.tag,
21280 +               &vc_data.flags, &vc_data.mask);
21281 +
21282 +       fput(filp);
21283 +
21284 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21285 +               return -EFAULT;
21286 +       return ret;
21287 +}
21288 +
21289 +
21290 +enum { Opt_notagcheck, Opt_tag, Opt_notag, Opt_tagid, Opt_err };
21291 +
21292 +static match_table_t tokens = {
21293 +       {Opt_notagcheck, "notagcheck"},
21294 +#ifdef CONFIG_PROPAGATE
21295 +       {Opt_notag, "notag"},
21296 +       {Opt_tag, "tag"},
21297 +       {Opt_tagid, "tagid=%u"},
21298 +#endif
21299 +       {Opt_err, NULL}
21300 +};
21301 +
21302 +
21303 +static void __dx_parse_remove(char *string, char *opt)
21304 +{
21305 +       char *p = strstr(string, opt);
21306 +       char *q = p;
21307 +
21308 +       if (p) {
21309 +               while (*q != '\0' && *q != ',')
21310 +                       q++;
21311 +               while (*q)
21312 +                       *p++ = *q++;
21313 +               while (*p)
21314 +                       *p++ = '\0';
21315 +       }
21316 +}
21317 +
21318 +int dx_parse_tag(char *string, tag_t *tag, int remove, int *mnt_flags,
21319 +                unsigned long *flags)
21320 +{
21321 +       int set = 0;
21322 +       substring_t args[MAX_OPT_ARGS];
21323 +       int token, option = 0;
21324 +       char *s, *p, *opts;
21325 +
21326 +       if (!string)
21327 +               return 0;
21328 +       s = kstrdup(string, GFP_KERNEL | GFP_ATOMIC);
21329 +       if (!s)
21330 +               return 0;
21331 +
21332 +       opts = s;
21333 +       while ((p = strsep(&opts, ",")) != NULL) {
21334 +               token = match_token(p, tokens, args);
21335 +
21336 +               vxdprintk(VXD_CBIT(tag, 7),
21337 +                       "dx_parse_tag(»%s«): %d:#%d",
21338 +                       p, token, option);
21339 +
21340 +               switch (token) {
21341 +#ifdef CONFIG_PROPAGATE
21342 +               case Opt_tag:
21343 +                       if (tag)
21344 +                               *tag = 0;
21345 +                       if (remove)
21346 +                               __dx_parse_remove(s, "tag");
21347 +                       *mnt_flags |= MNT_TAGID;
21348 +                       set |= MNT_TAGID;
21349 +                       break;
21350 +               case Opt_notag:
21351 +                       if (remove)
21352 +                               __dx_parse_remove(s, "notag");
21353 +                       *mnt_flags |= MNT_NOTAG;
21354 +                       set |= MNT_NOTAG;
21355 +                       break;
21356 +               case Opt_tagid:
21357 +                       if (tag && !match_int(args, &option))
21358 +                               *tag = option;
21359 +                       if (remove)
21360 +                               __dx_parse_remove(s, "tagid");
21361 +                       *mnt_flags |= MNT_TAGID;
21362 +                       set |= MNT_TAGID;
21363 +                       break;
21364 +#endif
21365 +               case Opt_notagcheck:
21366 +                       if (remove)
21367 +                               __dx_parse_remove(s, "notagcheck");
21368 +                       *flags |= MS_NOTAGCHECK;
21369 +                       set |= MS_NOTAGCHECK;
21370 +                       break;
21371 +               }
21372 +       }
21373 +       if (set)
21374 +               strcpy(string, s);
21375 +       kfree(s);
21376 +       return set;
21377 +}
21378 +
21379 +#ifdef CONFIG_PROPAGATE
21380 +
21381 +void __dx_propagate_tag(struct nameidata *nd, struct inode *inode)
21382 +{
21383 +       tag_t new_tag = 0;
21384 +       struct vfsmount *mnt;
21385 +       int propagate;
21386 +
21387 +       if (!nd)
21388 +               return;
21389 +       mnt = nd->path.mnt;
21390 +       if (!mnt)
21391 +               return;
21392 +
21393 +       propagate = (mnt->mnt_flags & MNT_TAGID);
21394 +       if (propagate)
21395 +               new_tag = mnt->mnt_tag;
21396 +
21397 +       vxdprintk(VXD_CBIT(tag, 7),
21398 +               "dx_propagate_tag(%p[#%lu.%d]): %d,%d",
21399 +               inode, inode->i_ino, inode->i_tag,
21400 +               new_tag, (propagate) ? 1 : 0);
21401 +
21402 +       if (propagate)
21403 +               inode->i_tag = new_tag;
21404 +}
21405 +
21406 +#include <linux/module.h>
21407 +
21408 +EXPORT_SYMBOL_GPL(__dx_propagate_tag);
21409 +
21410 +#endif /* CONFIG_PROPAGATE */
21411 +
21412 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/Kconfig linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/Kconfig
21413 --- linux-2.6.31.6/kernel/vserver/Kconfig       1970-01-01 01:00:00.000000000 +0100
21414 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/Kconfig 2009-09-10 16:11:43.000000000 +0200
21415 @@ -0,0 +1,251 @@
21416 +#
21417 +# Linux VServer configuration
21418 +#
21419 +
21420 +menu "Linux VServer"
21421 +
21422 +config VSERVER_AUTO_LBACK
21423 +       bool    "Automatically Assign Loopback IP"
21424 +       default y
21425 +       help
21426 +         Automatically assign a guest specific loopback
21427 +         IP and add it to the kernel network stack on
21428 +         startup.
21429 +
21430 +config VSERVER_AUTO_SINGLE
21431 +       bool    "Automatic Single IP Special Casing"
21432 +       depends on EXPERIMENTAL
21433 +       default y
21434 +       help
21435 +         This allows network contexts with a single IP to
21436 +         automatically remap 0.0.0.0 bindings to that IP,
21437 +         avoiding further network checks and improving
21438 +         performance.
21439 +
21440 +         (note: such guests do not allow to change the ip
21441 +          on the fly and do not show loopback addresses)
21442 +
21443 +config VSERVER_COWBL
21444 +       bool    "Enable COW Immutable Link Breaking"
21445 +       default y
21446 +       help
21447 +         This enables the COW (Copy-On-Write) link break code.
21448 +         It allows you to treat unified files like normal files
21449 +         when writing to them (which will implicitely break the
21450 +         link and create a copy of the unified file)
21451 +
21452 +config VSERVER_VTIME
21453 +       bool    "Enable Virtualized Guest Time"
21454 +       depends on EXPERIMENTAL
21455 +       default n
21456 +       help
21457 +         This enables per guest time offsets to allow for
21458 +         adjusting the system clock individually per guest.
21459 +         this adds some overhead to the time functions and
21460 +         therefore should not be enabled without good reason.
21461 +
21462 +config VSERVER_DEVICE
21463 +       bool    "Enable Guest Device Mapping"
21464 +       depends on EXPERIMENTAL
21465 +       default n
21466 +       help
21467 +         This enables generic device remapping.
21468 +
21469 +config VSERVER_PROC_SECURE
21470 +       bool    "Enable Proc Security"
21471 +       depends on PROC_FS
21472 +       default y
21473 +       help
21474 +         This configures ProcFS security to initially hide
21475 +         non-process entries for all contexts except the main and
21476 +         spectator context (i.e. for all guests), which is a secure
21477 +         default.
21478 +
21479 +         (note: on 1.2x the entries were visible by default)
21480 +
21481 +config VSERVER_HARDCPU
21482 +       bool    "Enable Hard CPU Limits"
21483 +       default y
21484 +       help
21485 +         Activate the Hard CPU Limits
21486 +
21487 +         This will compile in code that allows the Token Bucket
21488 +         Scheduler to put processes on hold when a context's
21489 +         tokens are depleted (provided that its per-context
21490 +         sched_hard flag is set).
21491 +
21492 +         Processes belonging to that context will not be able
21493 +         to consume CPU resources again until a per-context
21494 +         configured minimum of tokens has been reached.
21495 +
21496 +config VSERVER_IDLETIME
21497 +       bool    "Avoid idle CPUs by skipping Time"
21498 +       depends on VSERVER_HARDCPU
21499 +       default y
21500 +       help
21501 +         This option allows the scheduler to artificially
21502 +         advance time (per cpu) when otherwise the idle
21503 +         task would be scheduled, thus keeping the cpu
21504 +         busy and sharing the available resources among
21505 +         certain contexts.
21506 +
21507 +config VSERVER_IDLELIMIT
21508 +       bool    "Limit the IDLE task"
21509 +       depends on VSERVER_HARDCPU
21510 +       default n
21511 +       help
21512 +         Limit the idle slices, so the the next context
21513 +         will be scheduled as soon as possible.
21514 +
21515 +         This might improve interactivity and latency, but
21516 +         will also marginally increase scheduling overhead.
21517 +
21518 +choice
21519 +       prompt  "Persistent Inode Tagging"
21520 +       default TAGGING_ID24
21521 +       help
21522 +         This adds persistent context information to filesystems
21523 +         mounted with the tagxid option. Tagging is a requirement
21524 +         for per-context disk limits and per-context quota.
21525 +
21526 +
21527 +config TAGGING_NONE
21528 +       bool    "Disabled"
21529 +       help
21530 +         do not store per-context information in inodes.
21531 +
21532 +config TAGGING_UID16
21533 +       bool    "UID16/GID32"
21534 +       help
21535 +         reduces UID to 16 bit, but leaves GID at 32 bit.
21536 +
21537 +config TAGGING_GID16
21538 +       bool    "UID32/GID16"
21539 +       help
21540 +         reduces GID to 16 bit, but leaves UID at 32 bit.
21541 +
21542 +config TAGGING_ID24
21543 +       bool    "UID24/GID24"
21544 +       help
21545 +         uses the upper 8bit from UID and GID for XID tagging
21546 +         which leaves 24bit for UID/GID each, which should be
21547 +         more than sufficient for normal use.
21548 +
21549 +config TAGGING_INTERN
21550 +       bool    "UID32/GID32"
21551 +       help
21552 +         this uses otherwise reserved inode fields in the on
21553 +         disk representation, which limits the use to a few
21554 +         filesystems (currently ext2 and ext3)
21555 +
21556 +endchoice
21557 +
21558 +config TAG_NFSD
21559 +       bool    "Tag NFSD User Auth and Files"
21560 +       default n
21561 +       help
21562 +         Enable this if you do want the in-kernel NFS
21563 +         Server to use the tagging specified above.
21564 +         (will require patched clients too)
21565 +
21566 +config VSERVER_PRIVACY
21567 +       bool    "Honor Privacy Aspects of Guests"
21568 +       default n
21569 +       help
21570 +         When enabled, most context checks will disallow
21571 +         access to structures assigned to a specific context,
21572 +         like ptys or loop devices.
21573 +
21574 +config VSERVER_CONTEXTS
21575 +       int     "Maximum number of Contexts (1-65533)"  if EMBEDDED
21576 +       range 1 65533
21577 +       default "768"   if 64BIT
21578 +       default "256"
21579 +       help
21580 +         This setting will optimize certain data structures
21581 +         and memory allocations according to the expected
21582 +         maximum.
21583 +
21584 +         note: this is not a strict upper limit.
21585 +
21586 +config VSERVER_WARN
21587 +       bool    "VServer Warnings"
21588 +       default y
21589 +       help
21590 +         This enables various runtime warnings, which will
21591 +         notify about potential manipulation attempts or
21592 +         resource shortage. It is generally considered to
21593 +         be a good idea to have that enabled.
21594 +
21595 +config VSERVER_DEBUG
21596 +       bool    "VServer Debugging Code"
21597 +       default n
21598 +       help
21599 +         Set this to yes if you want to be able to activate
21600 +         debugging output at runtime. It adds a very small
21601 +         overhead to all vserver related functions and
21602 +         increases the kernel size by about 20k.
21603 +
21604 +config VSERVER_HISTORY
21605 +       bool    "VServer History Tracing"
21606 +       depends on VSERVER_DEBUG
21607 +       default n
21608 +       help
21609 +         Set this to yes if you want to record the history of
21610 +         linux-vserver activities, so they can be replayed in
21611 +         the event of a kernel panic or oops.
21612 +
21613 +config VSERVER_HISTORY_SIZE
21614 +       int     "Per-CPU History Size (32-65536)"
21615 +       depends on VSERVER_HISTORY
21616 +       range 32 65536
21617 +       default 64
21618 +       help
21619 +         This allows you to specify the number of entries in
21620 +         the per-CPU history buffer.
21621 +
21622 +config VSERVER_MONITOR
21623 +       bool    "VServer Scheduling Monitor"
21624 +       depends on VSERVER_DISABLED
21625 +       default n
21626 +       help
21627 +         Set this to yes if you want to record the scheduling
21628 +         decisions, so that they can be relayed to userspace
21629 +         for detailed analysis.
21630 +
21631 +config VSERVER_MONITOR_SIZE
21632 +       int     "Per-CPU Monitor Queue Size (32-65536)"
21633 +       depends on VSERVER_MONITOR
21634 +       range 32 65536
21635 +       default 1024
21636 +       help
21637 +         This allows you to specify the number of entries in
21638 +         the per-CPU scheduling monitor buffer.
21639 +
21640 +config VSERVER_MONITOR_SYNC
21641 +       int     "Per-CPU Monitor Sync Interval (0-65536)"
21642 +       depends on VSERVER_MONITOR
21643 +       range 0 65536
21644 +       default 256
21645 +       help
21646 +         This allows you to specify the interval in ticks
21647 +         when a time sync entry is inserted.
21648 +
21649 +endmenu
21650 +
21651 +
21652 +config VSERVER
21653 +       bool
21654 +       default y
21655 +       select NAMESPACES
21656 +       select UTS_NS
21657 +       select IPC_NS
21658 +       select USER_NS
21659 +       select SYSVIPC
21660 +
21661 +config VSERVER_SECURITY
21662 +       bool
21663 +       depends on SECURITY
21664 +       default y
21665 +       select SECURITY_CAPABILITIES
21666 +
21667 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/limit.c linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/limit.c
21668 --- linux-2.6.31.6/kernel/vserver/limit.c       1970-01-01 01:00:00.000000000 +0100
21669 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/limit.c 2009-11-05 04:22:22.000000000 +0100
21670 @@ -0,0 +1,333 @@
21671 +/*
21672 + *  linux/kernel/vserver/limit.c
21673 + *
21674 + *  Virtual Server: Context Limits
21675 + *
21676 + *  Copyright (C) 2004-2007  Herbert Pötzl
21677 + *
21678 + *  V0.01  broken out from vcontext V0.05
21679 + *  V0.02  changed vcmds to vxi arg
21680 + *
21681 + */
21682 +
21683 +#include <linux/sched.h>
21684 +#include <linux/module.h>
21685 +#include <linux/vs_limit.h>
21686 +#include <linux/vserver/limit.h>
21687 +#include <linux/vserver/limit_cmd.h>
21688 +
21689 +#include <asm/uaccess.h>
21690 +
21691 +
21692 +const char *vlimit_name[NUM_LIMITS] = {
21693 +       [RLIMIT_CPU]            = "CPU",
21694 +       [RLIMIT_RSS]            = "RSS",
21695 +       [RLIMIT_NPROC]          = "NPROC",
21696 +       [RLIMIT_NOFILE]         = "NOFILE",
21697 +       [RLIMIT_MEMLOCK]        = "VML",
21698 +       [RLIMIT_AS]             = "VM",
21699 +       [RLIMIT_LOCKS]          = "LOCKS",
21700 +       [RLIMIT_SIGPENDING]     = "SIGP",
21701 +       [RLIMIT_MSGQUEUE]       = "MSGQ",
21702 +
21703 +       [VLIMIT_NSOCK]          = "NSOCK",
21704 +       [VLIMIT_OPENFD]         = "OPENFD",
21705 +       [VLIMIT_ANON]           = "ANON",
21706 +       [VLIMIT_SHMEM]          = "SHMEM",
21707 +       [VLIMIT_DENTRY]         = "DENTRY",
21708 +};
21709 +
21710 +EXPORT_SYMBOL_GPL(vlimit_name);
21711 +
21712 +#define MASK_ENTRY(x)  (1 << (x))
21713 +
21714 +const struct vcmd_ctx_rlimit_mask_v0 vlimit_mask = {
21715 +               /* minimum */
21716 +       0
21717 +       ,       /* softlimit */
21718 +       MASK_ENTRY( RLIMIT_RSS          ) |
21719 +       MASK_ENTRY( VLIMIT_ANON         ) |
21720 +       0
21721 +       ,       /* maximum */
21722 +       MASK_ENTRY( RLIMIT_RSS          ) |
21723 +       MASK_ENTRY( RLIMIT_NPROC        ) |
21724 +       MASK_ENTRY( RLIMIT_NOFILE       ) |
21725 +       MASK_ENTRY( RLIMIT_MEMLOCK      ) |
21726 +       MASK_ENTRY( RLIMIT_AS           ) |
21727 +       MASK_ENTRY( RLIMIT_LOCKS        ) |
21728 +       MASK_ENTRY( RLIMIT_MSGQUEUE     ) |
21729 +
21730 +       MASK_ENTRY( VLIMIT_NSOCK        ) |
21731 +       MASK_ENTRY( VLIMIT_OPENFD       ) |
21732 +       MASK_ENTRY( VLIMIT_ANON         ) |
21733 +       MASK_ENTRY( VLIMIT_SHMEM        ) |
21734 +       MASK_ENTRY( VLIMIT_DENTRY       ) |
21735 +       0
21736 +};
21737 +               /* accounting only */
21738 +uint32_t account_mask =
21739 +       MASK_ENTRY( VLIMIT_SEMARY       ) |
21740 +       MASK_ENTRY( VLIMIT_NSEMS        ) |
21741 +       MASK_ENTRY( VLIMIT_MAPPED       ) |
21742 +       0;
21743 +
21744 +
21745 +static int is_valid_vlimit(int id)
21746 +{
21747 +       uint32_t mask = vlimit_mask.minimum |
21748 +               vlimit_mask.softlimit | vlimit_mask.maximum;
21749 +       return mask & (1 << id);
21750 +}
21751 +
21752 +static int is_accounted_vlimit(int id)
21753 +{
21754 +       if (is_valid_vlimit(id))
21755 +               return 1;
21756 +       return account_mask & (1 << id);
21757 +}
21758 +
21759 +
21760 +static inline uint64_t vc_get_soft(struct vx_info *vxi, int id)
21761 +{
21762 +       rlim_t limit = __rlim_soft(&vxi->limit, id);
21763 +       return VX_VLIM(limit);
21764 +}
21765 +
21766 +static inline uint64_t vc_get_hard(struct vx_info *vxi, int id)
21767 +{
21768 +       rlim_t limit = __rlim_hard(&vxi->limit, id);
21769 +       return VX_VLIM(limit);
21770 +}
21771 +
21772 +static int do_get_rlimit(struct vx_info *vxi, uint32_t id,
21773 +       uint64_t *minimum, uint64_t *softlimit, uint64_t *maximum)
21774 +{
21775 +       if (!is_valid_vlimit(id))
21776 +               return -EINVAL;
21777 +
21778 +       if (minimum)
21779 +               *minimum = CRLIM_UNSET;
21780 +       if (softlimit)
21781 +               *softlimit = vc_get_soft(vxi, id);
21782 +       if (maximum)
21783 +               *maximum = vc_get_hard(vxi, id);
21784 +       return 0;
21785 +}
21786 +
21787 +int vc_get_rlimit(struct vx_info *vxi, void __user *data)
21788 +{
21789 +       struct vcmd_ctx_rlimit_v0 vc_data;
21790 +       int ret;
21791 +
21792 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21793 +               return -EFAULT;
21794 +
21795 +       ret = do_get_rlimit(vxi, vc_data.id,
21796 +               &vc_data.minimum, &vc_data.softlimit, &vc_data.maximum);
21797 +       if (ret)
21798 +               return ret;
21799 +
21800 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21801 +               return -EFAULT;
21802 +       return 0;
21803 +}
21804 +
21805 +static int do_set_rlimit(struct vx_info *vxi, uint32_t id,
21806 +       uint64_t minimum, uint64_t softlimit, uint64_t maximum)
21807 +{
21808 +       if (!is_valid_vlimit(id))
21809 +               return -EINVAL;
21810 +
21811 +       if (maximum != CRLIM_KEEP)
21812 +               __rlim_hard(&vxi->limit, id) = VX_RLIM(maximum);
21813 +       if (softlimit != CRLIM_KEEP)
21814 +               __rlim_soft(&vxi->limit, id) = VX_RLIM(softlimit);
21815 +
21816 +       /* clamp soft limit */
21817 +       if (__rlim_soft(&vxi->limit, id) > __rlim_hard(&vxi->limit, id))
21818 +               __rlim_soft(&vxi->limit, id) = __rlim_hard(&vxi->limit, id);
21819 +
21820 +       return 0;
21821 +}
21822 +
21823 +int vc_set_rlimit(struct vx_info *vxi, void __user *data)
21824 +{
21825 +       struct vcmd_ctx_rlimit_v0 vc_data;
21826 +
21827 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21828 +               return -EFAULT;
21829 +
21830 +       return do_set_rlimit(vxi, vc_data.id,
21831 +               vc_data.minimum, vc_data.softlimit, vc_data.maximum);
21832 +}
21833 +
21834 +#ifdef CONFIG_IA32_EMULATION
21835 +
21836 +int vc_set_rlimit_x32(struct vx_info *vxi, void __user *data)
21837 +{
21838 +       struct vcmd_ctx_rlimit_v0_x32 vc_data;
21839 +
21840 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21841 +               return -EFAULT;
21842 +
21843 +       return do_set_rlimit(vxi, vc_data.id,
21844 +               vc_data.minimum, vc_data.softlimit, vc_data.maximum);
21845 +}
21846 +
21847 +int vc_get_rlimit_x32(struct vx_info *vxi, void __user *data)
21848 +{
21849 +       struct vcmd_ctx_rlimit_v0_x32 vc_data;
21850 +       int ret;
21851 +
21852 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21853 +               return -EFAULT;
21854 +
21855 +       ret = do_get_rlimit(vxi, vc_data.id,
21856 +               &vc_data.minimum, &vc_data.softlimit, &vc_data.maximum);
21857 +       if (ret)
21858 +               return ret;
21859 +
21860 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21861 +               return -EFAULT;
21862 +       return 0;
21863 +}
21864 +
21865 +#endif /* CONFIG_IA32_EMULATION */
21866 +
21867 +
21868 +int vc_get_rlimit_mask(uint32_t id, void __user *data)
21869 +{
21870 +       if (copy_to_user(data, &vlimit_mask, sizeof(vlimit_mask)))
21871 +               return -EFAULT;
21872 +       return 0;
21873 +}
21874 +
21875 +
21876 +static inline void vx_reset_hits(struct _vx_limit *limit)
21877 +{
21878 +       int lim;
21879 +
21880 +       for (lim = 0; lim < NUM_LIMITS; lim++) {
21881 +               atomic_set(&__rlim_lhit(limit, lim), 0);
21882 +       }
21883 +}
21884 +
21885 +int vc_reset_hits(struct vx_info *vxi, void __user *data)
21886 +{
21887 +       vx_reset_hits(&vxi->limit);
21888 +       return 0;
21889 +}
21890 +
21891 +static inline void vx_reset_minmax(struct _vx_limit *limit)
21892 +{
21893 +       rlim_t value;
21894 +       int lim;
21895 +
21896 +       for (lim = 0; lim < NUM_LIMITS; lim++) {
21897 +               value = __rlim_get(limit, lim);
21898 +               __rlim_rmax(limit, lim) = value;
21899 +               __rlim_rmin(limit, lim) = value;
21900 +       }
21901 +}
21902 +
21903 +int vc_reset_minmax(struct vx_info *vxi, void __user *data)
21904 +{
21905 +       vx_reset_minmax(&vxi->limit);
21906 +       return 0;
21907 +}
21908 +
21909 +
21910 +int vc_rlimit_stat(struct vx_info *vxi, void __user *data)
21911 +{
21912 +       struct vcmd_rlimit_stat_v0 vc_data;
21913 +       struct _vx_limit *limit = &vxi->limit;
21914 +       int id;
21915 +
21916 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21917 +               return -EFAULT;
21918 +
21919 +       id = vc_data.id;
21920 +       if (!is_accounted_vlimit(id))
21921 +               return -EINVAL;
21922 +
21923 +       vx_limit_fixup(limit, id);
21924 +       vc_data.hits = atomic_read(&__rlim_lhit(limit, id));
21925 +       vc_data.value = __rlim_get(limit, id);
21926 +       vc_data.minimum = __rlim_rmin(limit, id);
21927 +       vc_data.maximum = __rlim_rmax(limit, id);
21928 +
21929 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21930 +               return -EFAULT;
21931 +       return 0;
21932 +}
21933 +
21934 +
21935 +void vx_vsi_meminfo(struct sysinfo *val)
21936 +{
21937 +       struct vx_info *vxi = current_vx_info();
21938 +       unsigned long totalram, freeram;
21939 +       rlim_t v;
21940 +
21941 +       /* we blindly accept the max */
21942 +       v = __rlim_soft(&vxi->limit, RLIMIT_RSS);
21943 +       totalram = (v != RLIM_INFINITY) ? v : val->totalram;
21944 +
21945 +       /* total minus used equals free */
21946 +       v = __vx_cres_array_fixup(&vxi->limit, VLA_RSS);
21947 +       freeram = (v < totalram) ? totalram - v : 0;
21948 +
21949 +       val->totalram = totalram;
21950 +       val->freeram = freeram;
21951 +       val->bufferram = 0;
21952 +       val->totalhigh = 0;
21953 +       val->freehigh = 0;
21954 +       return;
21955 +}
21956 +
21957 +void vx_vsi_swapinfo(struct sysinfo *val)
21958 +{
21959 +       struct vx_info *vxi = current_vx_info();
21960 +       unsigned long totalswap, freeswap;
21961 +       rlim_t v, w;
21962 +
21963 +       v = __rlim_soft(&vxi->limit, RLIMIT_RSS);
21964 +       if (v == RLIM_INFINITY) {
21965 +               val->freeswap = val->totalswap;
21966 +               return;
21967 +       }
21968 +
21969 +       /* we blindly accept the max */
21970 +       w = __rlim_hard(&vxi->limit, RLIMIT_RSS);
21971 +       totalswap = (w != RLIM_INFINITY) ? (w - v) : val->totalswap;
21972 +
21973 +       /* currently 'used' swap */
21974 +       w = __vx_cres_array_fixup(&vxi->limit, VLA_RSS);
21975 +       w -= (w > v) ? v : w;
21976 +
21977 +       /* total minus used equals free */
21978 +       freeswap = (w < totalswap) ? totalswap - w : 0;
21979 +
21980 +       val->totalswap = totalswap;
21981 +       val->freeswap = freeswap;
21982 +       return;
21983 +}
21984 +
21985 +
21986 +unsigned long vx_badness(struct task_struct *task, struct mm_struct *mm)
21987 +{
21988 +       struct vx_info *vxi = mm->mm_vx_info;
21989 +       unsigned long points;
21990 +       rlim_t v, w;
21991 +
21992 +       if (!vxi)
21993 +               return 0;
21994 +
21995 +       points = vxi->vx_badness_bias;
21996 +
21997 +       v = __vx_cres_array_fixup(&vxi->limit, VLA_RSS);
21998 +       w = __rlim_soft(&vxi->limit, RLIMIT_RSS);
21999 +       points += (v > w) ? (v - w) : 0;
22000 +
22001 +       return points;
22002 +}
22003 +
22004 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/limit_init.h linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/limit_init.h
22005 --- linux-2.6.31.6/kernel/vserver/limit_init.h  1970-01-01 01:00:00.000000000 +0100
22006 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/limit_init.h    2009-09-10 16:11:43.000000000 +0200
22007 @@ -0,0 +1,31 @@
22008 +
22009 +
22010 +static inline void vx_info_init_limit(struct _vx_limit *limit)
22011 +{
22012 +       int lim;
22013 +
22014 +       for (lim = 0; lim < NUM_LIMITS; lim++) {
22015 +               __rlim_soft(limit, lim) = RLIM_INFINITY;
22016 +               __rlim_hard(limit, lim) = RLIM_INFINITY;
22017 +               __rlim_set(limit, lim, 0);
22018 +               atomic_set(&__rlim_lhit(limit, lim), 0);
22019 +               __rlim_rmin(limit, lim) = 0;
22020 +               __rlim_rmax(limit, lim) = 0;
22021 +       }
22022 +}
22023 +
22024 +static inline void vx_info_exit_limit(struct _vx_limit *limit)
22025 +{
22026 +       rlim_t value;
22027 +       int lim;
22028 +
22029 +       for (lim = 0; lim < NUM_LIMITS; lim++) {
22030 +               if ((1 << lim) & VLIM_NOCHECK)
22031 +                       continue;
22032 +               value = __rlim_get(limit, lim);
22033 +               vxwprintk_xid(value,
22034 +                       "!!! limit: %p[%s,%d] = %ld on exit.",
22035 +                       limit, vlimit_name[lim], lim, (long)value);
22036 +       }
22037 +}
22038 +
22039 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/limit_proc.h linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/limit_proc.h
22040 --- linux-2.6.31.6/kernel/vserver/limit_proc.h  1970-01-01 01:00:00.000000000 +0100
22041 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/limit_proc.h    2009-09-10 16:11:43.000000000 +0200
22042 @@ -0,0 +1,57 @@
22043 +#ifndef _VX_LIMIT_PROC_H
22044 +#define _VX_LIMIT_PROC_H
22045 +
22046 +#include <linux/vserver/limit_int.h>
22047 +
22048 +
22049 +#define VX_LIMIT_FMT   ":\t%8ld\t%8ld/%8ld\t%8lld/%8lld\t%6d\n"
22050 +#define VX_LIMIT_TOP   \
22051 +       "Limit\t current\t     min/max\t\t    soft/hard\t\thits\n"
22052 +
22053 +#define VX_LIMIT_ARG(r)                                \
22054 +       (unsigned long)__rlim_get(limit, r),    \
22055 +       (unsigned long)__rlim_rmin(limit, r),   \
22056 +       (unsigned long)__rlim_rmax(limit, r),   \
22057 +       VX_VLIM(__rlim_soft(limit, r)),         \
22058 +       VX_VLIM(__rlim_hard(limit, r)),         \
22059 +       atomic_read(&__rlim_lhit(limit, r))
22060 +
22061 +static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer)
22062 +{
22063 +       vx_limit_fixup(limit, -1);
22064 +       return sprintf(buffer, VX_LIMIT_TOP
22065 +               "PROC"  VX_LIMIT_FMT
22066 +               "VM"    VX_LIMIT_FMT
22067 +               "VML"   VX_LIMIT_FMT
22068 +               "RSS"   VX_LIMIT_FMT
22069 +               "ANON"  VX_LIMIT_FMT
22070 +               "RMAP"  VX_LIMIT_FMT
22071 +               "FILES" VX_LIMIT_FMT
22072 +               "OFD"   VX_LIMIT_FMT
22073 +               "LOCKS" VX_LIMIT_FMT
22074 +               "SOCK"  VX_LIMIT_FMT
22075 +               "MSGQ"  VX_LIMIT_FMT
22076 +               "SHM"   VX_LIMIT_FMT
22077 +               "SEMA"  VX_LIMIT_FMT
22078 +               "SEMS"  VX_LIMIT_FMT
22079 +               "DENT"  VX_LIMIT_FMT,
22080 +               VX_LIMIT_ARG(RLIMIT_NPROC),
22081 +               VX_LIMIT_ARG(RLIMIT_AS),
22082 +               VX_LIMIT_ARG(RLIMIT_MEMLOCK),
22083 +               VX_LIMIT_ARG(RLIMIT_RSS),
22084 +               VX_LIMIT_ARG(VLIMIT_ANON),
22085 +               VX_LIMIT_ARG(VLIMIT_MAPPED),
22086 +               VX_LIMIT_ARG(RLIMIT_NOFILE),
22087 +               VX_LIMIT_ARG(VLIMIT_OPENFD),
22088 +               VX_LIMIT_ARG(RLIMIT_LOCKS),
22089 +               VX_LIMIT_ARG(VLIMIT_NSOCK),
22090 +               VX_LIMIT_ARG(RLIMIT_MSGQUEUE),
22091 +               VX_LIMIT_ARG(VLIMIT_SHMEM),
22092 +               VX_LIMIT_ARG(VLIMIT_SEMARY),
22093 +               VX_LIMIT_ARG(VLIMIT_NSEMS),
22094 +               VX_LIMIT_ARG(VLIMIT_DENTRY));
22095 +}
22096 +
22097 +#endif /* _VX_LIMIT_PROC_H */
22098 +
22099 +
22100 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/Makefile linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/Makefile
22101 --- linux-2.6.31.6/kernel/vserver/Makefile      1970-01-01 01:00:00.000000000 +0100
22102 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/Makefile        2009-09-10 16:11:43.000000000 +0200
22103 @@ -0,0 +1,18 @@
22104 +#
22105 +# Makefile for the Linux vserver routines.
22106 +#
22107 +
22108 +
22109 +obj-y          += vserver.o
22110 +
22111 +vserver-y      := switch.o context.o space.o sched.o network.o inode.o \
22112 +                  limit.o cvirt.o cacct.o signal.o helper.o init.o \
22113 +                  dlimit.o tag.o
22114 +
22115 +vserver-$(CONFIG_INET) += inet.o
22116 +vserver-$(CONFIG_PROC_FS) += proc.o
22117 +vserver-$(CONFIG_VSERVER_DEBUG) += sysctl.o debug.o
22118 +vserver-$(CONFIG_VSERVER_HISTORY) += history.o
22119 +vserver-$(CONFIG_VSERVER_MONITOR) += monitor.o
22120 +vserver-$(CONFIG_VSERVER_DEVICE) += device.o
22121 +
22122 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/monitor.c linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/monitor.c
22123 --- linux-2.6.31.6/kernel/vserver/monitor.c     1970-01-01 01:00:00.000000000 +0100
22124 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/monitor.c       2009-09-10 16:11:43.000000000 +0200
22125 @@ -0,0 +1,138 @@
22126 +/*
22127 + *  kernel/vserver/monitor.c
22128 + *
22129 + *  Virtual Context Scheduler Monitor
22130 + *
22131 + *  Copyright (C) 2006-2007 Herbert Pötzl
22132 + *
22133 + *  V0.01  basic design
22134 + *
22135 + */
22136 +
22137 +#include <linux/module.h>
22138 +#include <linux/jiffies.h>
22139 +#include <asm/uaccess.h>
22140 +#include <asm/atomic.h>
22141 +
22142 +#include <linux/vserver/monitor.h>
22143 +#include <linux/vserver/debug_cmd.h>
22144 +
22145 +
22146 +#ifdef CONFIG_VSERVER_MONITOR
22147 +#define VXM_SIZE       CONFIG_VSERVER_MONITOR_SIZE
22148 +#else
22149 +#define VXM_SIZE       64
22150 +#endif
22151 +
22152 +struct _vx_monitor {
22153 +       unsigned int counter;
22154 +
22155 +       struct _vx_mon_entry entry[VXM_SIZE+1];
22156 +};
22157 +
22158 +
22159 +DEFINE_PER_CPU(struct _vx_monitor, vx_monitor_buffer);
22160 +
22161 +unsigned volatile int vxm_active = 1;
22162 +
22163 +static atomic_t sequence = ATOMIC_INIT(0);
22164 +
22165 +
22166 +/*     vxm_advance()
22167 +
22168 +       * requires disabled preemption                          */
22169 +
22170 +struct _vx_mon_entry *vxm_advance(int cpu)
22171 +{
22172 +       struct _vx_monitor *mon = &per_cpu(vx_monitor_buffer, cpu);
22173 +       struct _vx_mon_entry *entry;
22174 +       unsigned int index;
22175 +
22176 +       index = vxm_active ? (mon->counter++ % VXM_SIZE) : VXM_SIZE;
22177 +       entry = &mon->entry[index];
22178 +
22179 +       entry->ev.seq = atomic_inc_return(&sequence);
22180 +       entry->ev.jif = jiffies;
22181 +       return entry;
22182 +}
22183 +
22184 +EXPORT_SYMBOL_GPL(vxm_advance);
22185 +
22186 +
22187 +int do_read_monitor(struct __user _vx_mon_entry *data,
22188 +       int cpu, uint32_t *index, uint32_t *count)
22189 +{
22190 +       int pos, ret = 0;
22191 +       struct _vx_monitor *mon = &per_cpu(vx_monitor_buffer, cpu);
22192 +       int end = mon->counter;
22193 +       int start = end - VXM_SIZE + 2;
22194 +       int idx = *index;
22195 +
22196 +       /* special case: get current pos */
22197 +       if (!*count) {
22198 +               *index = end;
22199 +               return 0;
22200 +       }
22201 +
22202 +       /* have we lost some data? */
22203 +       if (idx < start)
22204 +               idx = start;
22205 +
22206 +       for (pos = 0; (pos < *count) && (idx < end); pos++, idx++) {
22207 +               struct _vx_mon_entry *entry =
22208 +                       &mon->entry[idx % VXM_SIZE];
22209 +
22210 +               /* send entry to userspace */
22211 +               ret = copy_to_user(&data[pos], entry, sizeof(*entry));
22212 +               if (ret)
22213 +                       break;
22214 +       }
22215 +       /* save new index and count */
22216 +       *index = idx;
22217 +       *count = pos;
22218 +       return ret ? ret : (*index < end);
22219 +}
22220 +
22221 +int vc_read_monitor(uint32_t id, void __user *data)
22222 +{
22223 +       struct vcmd_read_monitor_v0 vc_data;
22224 +       int ret;
22225 +
22226 +       if (id >= NR_CPUS)
22227 +               return -EINVAL;
22228 +
22229 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
22230 +               return -EFAULT;
22231 +
22232 +       ret = do_read_monitor((struct __user _vx_mon_entry *)vc_data.data,
22233 +               id, &vc_data.index, &vc_data.count);
22234 +
22235 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
22236 +               return -EFAULT;
22237 +       return ret;
22238 +}
22239 +
22240 +#ifdef CONFIG_COMPAT
22241 +
22242 +int vc_read_monitor_x32(uint32_t id, void __user *data)
22243 +{
22244 +       struct vcmd_read_monitor_v0_x32 vc_data;
22245 +       int ret;
22246 +
22247 +       if (id >= NR_CPUS)
22248 +               return -EINVAL;
22249 +
22250 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
22251 +               return -EFAULT;
22252 +
22253 +       ret = do_read_monitor((struct __user _vx_mon_entry *)
22254 +               compat_ptr(vc_data.data_ptr),
22255 +               id, &vc_data.index, &vc_data.count);
22256 +
22257 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
22258 +               return -EFAULT;
22259 +       return ret;
22260 +}
22261 +
22262 +#endif /* CONFIG_COMPAT */
22263 +
22264 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/network.c linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/network.c
22265 --- linux-2.6.31.6/kernel/vserver/network.c     1970-01-01 01:00:00.000000000 +0100
22266 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/network.c       2009-11-05 03:49:07.000000000 +0100
22267 @@ -0,0 +1,864 @@
22268 +/*
22269 + *  linux/kernel/vserver/network.c
22270 + *
22271 + *  Virtual Server: Network Support
22272 + *
22273 + *  Copyright (C) 2003-2007  Herbert Pötzl
22274 + *
22275 + *  V0.01  broken out from vcontext V0.05
22276 + *  V0.02  cleaned up implementation
22277 + *  V0.03  added equiv nx commands
22278 + *  V0.04  switch to RCU based hash
22279 + *  V0.05  and back to locking again
22280 + *  V0.06  changed vcmds to nxi arg
22281 + *  V0.07  have __create claim() the nxi
22282 + *
22283 + */
22284 +
22285 +#include <linux/err.h>
22286 +#include <linux/slab.h>
22287 +#include <linux/rcupdate.h>
22288 +
22289 +#include <linux/vs_network.h>
22290 +#include <linux/vs_pid.h>
22291 +#include <linux/vserver/network_cmd.h>
22292 +
22293 +
22294 +atomic_t nx_global_ctotal      = ATOMIC_INIT(0);
22295 +atomic_t nx_global_cactive     = ATOMIC_INIT(0);
22296 +
22297 +static struct kmem_cache *nx_addr_v4_cachep = NULL;
22298 +static struct kmem_cache *nx_addr_v6_cachep = NULL;
22299 +
22300 +
22301 +static int __init init_network(void)
22302 +{
22303 +       nx_addr_v4_cachep = kmem_cache_create("nx_v4_addr_cache",
22304 +               sizeof(struct nx_addr_v4), 0,
22305 +               SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
22306 +       nx_addr_v6_cachep = kmem_cache_create("nx_v6_addr_cache",
22307 +               sizeof(struct nx_addr_v6), 0,
22308 +               SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
22309 +       return 0;
22310 +}
22311 +
22312 +
22313 +/*     __alloc_nx_addr_v4()                                    */
22314 +
22315 +static inline struct nx_addr_v4 *__alloc_nx_addr_v4(void)
22316 +{
22317 +       struct nx_addr_v4 *nxa = kmem_cache_alloc(
22318 +               nx_addr_v4_cachep, GFP_KERNEL);
22319 +
22320 +       if (!IS_ERR(nxa))
22321 +               memset(nxa, 0, sizeof(*nxa));
22322 +       return nxa;
22323 +}
22324 +
22325 +/*     __dealloc_nx_addr_v4()                                  */
22326 +
22327 +static inline void __dealloc_nx_addr_v4(struct nx_addr_v4 *nxa)
22328 +{
22329 +       kmem_cache_free(nx_addr_v4_cachep, nxa);
22330 +}
22331 +
22332 +/*     __dealloc_nx_addr_v4_all()                              */
22333 +
22334 +static inline void __dealloc_nx_addr_v4_all(struct nx_addr_v4 *nxa)
22335 +{
22336 +       while (nxa) {
22337 +               struct nx_addr_v4 *next = nxa->next;
22338 +
22339 +               __dealloc_nx_addr_v4(nxa);
22340 +               nxa = next;
22341 +       }
22342 +}
22343 +
22344 +
22345 +#ifdef CONFIG_IPV6
22346 +
22347 +/*     __alloc_nx_addr_v6()                                    */
22348 +
22349 +static inline struct nx_addr_v6 *__alloc_nx_addr_v6(void)
22350 +{
22351 +       struct nx_addr_v6 *nxa = kmem_cache_alloc(
22352 +               nx_addr_v6_cachep, GFP_KERNEL);
22353 +
22354 +       if (!IS_ERR(nxa))
22355 +               memset(nxa, 0, sizeof(*nxa));
22356 +       return nxa;
22357 +}
22358 +
22359 +/*     __dealloc_nx_addr_v6()                                  */
22360 +
22361 +static inline void __dealloc_nx_addr_v6(struct nx_addr_v6 *nxa)
22362 +{
22363 +       kmem_cache_free(nx_addr_v6_cachep, nxa);
22364 +}
22365 +
22366 +/*     __dealloc_nx_addr_v6_all()                              */
22367 +
22368 +static inline void __dealloc_nx_addr_v6_all(struct nx_addr_v6 *nxa)
22369 +{
22370 +       while (nxa) {
22371 +               struct nx_addr_v6 *next = nxa->next;
22372 +
22373 +               __dealloc_nx_addr_v6(nxa);
22374 +               nxa = next;
22375 +       }
22376 +}
22377 +
22378 +#endif /* CONFIG_IPV6 */
22379 +
22380 +/*     __alloc_nx_info()
22381 +
22382 +       * allocate an initialized nx_info struct
22383 +       * doesn't make it visible (hash)                        */
22384 +
22385 +static struct nx_info *__alloc_nx_info(nid_t nid)
22386 +{
22387 +       struct nx_info *new = NULL;
22388 +
22389 +       vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid);
22390 +
22391 +       /* would this benefit from a slab cache? */
22392 +       new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
22393 +       if (!new)
22394 +               return 0;
22395 +
22396 +       memset(new, 0, sizeof(struct nx_info));
22397 +       new->nx_id = nid;
22398 +       INIT_HLIST_NODE(&new->nx_hlist);
22399 +       atomic_set(&new->nx_usecnt, 0);
22400 +       atomic_set(&new->nx_tasks, 0);
22401 +       new->nx_state = 0;
22402 +
22403 +       new->nx_flags = NXF_INIT_SET;
22404 +
22405 +       /* rest of init goes here */
22406 +
22407 +       new->v4_lback.s_addr = htonl(INADDR_LOOPBACK);
22408 +       new->v4_bcast.s_addr = htonl(INADDR_BROADCAST);
22409 +
22410 +       vxdprintk(VXD_CBIT(nid, 0),
22411 +               "alloc_nx_info(%d) = %p", nid, new);
22412 +       atomic_inc(&nx_global_ctotal);
22413 +       return new;
22414 +}
22415 +
22416 +/*     __dealloc_nx_info()
22417 +
22418 +       * final disposal of nx_info                             */
22419 +
22420 +static void __dealloc_nx_info(struct nx_info *nxi)
22421 +{
22422 +       vxdprintk(VXD_CBIT(nid, 0),
22423 +               "dealloc_nx_info(%p)", nxi);
22424 +
22425 +       nxi->nx_hlist.next = LIST_POISON1;
22426 +       nxi->nx_id = -1;
22427 +
22428 +       BUG_ON(atomic_read(&nxi->nx_usecnt));
22429 +       BUG_ON(atomic_read(&nxi->nx_tasks));
22430 +
22431 +       __dealloc_nx_addr_v4_all(nxi->v4.next);
22432 +
22433 +       nxi->nx_state |= NXS_RELEASED;
22434 +       kfree(nxi);
22435 +       atomic_dec(&nx_global_ctotal);
22436 +}
22437 +
22438 +static void __shutdown_nx_info(struct nx_info *nxi)
22439 +{
22440 +       nxi->nx_state |= NXS_SHUTDOWN;
22441 +       vs_net_change(nxi, VSC_NETDOWN);
22442 +}
22443 +
22444 +/*     exported stuff                                          */
22445 +
22446 +void free_nx_info(struct nx_info *nxi)
22447 +{
22448 +       /* context shutdown is mandatory */
22449 +       BUG_ON(nxi->nx_state != NXS_SHUTDOWN);
22450 +
22451 +       /* context must not be hashed */
22452 +       BUG_ON(nxi->nx_state & NXS_HASHED);
22453 +
22454 +       BUG_ON(atomic_read(&nxi->nx_usecnt));
22455 +       BUG_ON(atomic_read(&nxi->nx_tasks));
22456 +
22457 +       __dealloc_nx_info(nxi);
22458 +}
22459 +
22460 +
22461 +void __nx_set_lback(struct nx_info *nxi)
22462 +{
22463 +       int nid = nxi->nx_id;
22464 +       __be32 lback = htonl(INADDR_LOOPBACK ^ ((nid & 0xFFFF) << 8));
22465 +
22466 +       nxi->v4_lback.s_addr = lback;
22467 +}
22468 +
22469 +extern int __nx_inet_add_lback(__be32 addr);
22470 +extern int __nx_inet_del_lback(__be32 addr);
22471 +
22472 +
22473 +/*     hash table for nx_info hash */
22474 +
22475 +#define NX_HASH_SIZE   13
22476 +
22477 +struct hlist_head nx_info_hash[NX_HASH_SIZE];
22478 +
22479 +static spinlock_t nx_info_hash_lock = SPIN_LOCK_UNLOCKED;
22480 +
22481 +
22482 +static inline unsigned int __hashval(nid_t nid)
22483 +{
22484 +       return (nid % NX_HASH_SIZE);
22485 +}
22486 +
22487 +
22488 +
22489 +/*     __hash_nx_info()
22490 +
22491 +       * add the nxi to the global hash table
22492 +       * requires the hash_lock to be held                     */
22493 +
22494 +static inline void __hash_nx_info(struct nx_info *nxi)
22495 +{
22496 +       struct hlist_head *head;
22497 +
22498 +       vxd_assert_lock(&nx_info_hash_lock);
22499 +       vxdprintk(VXD_CBIT(nid, 4),
22500 +               "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id);
22501 +
22502 +       /* context must not be hashed */
22503 +       BUG_ON(nx_info_state(nxi, NXS_HASHED));
22504 +
22505 +       nxi->nx_state |= NXS_HASHED;
22506 +       head = &nx_info_hash[__hashval(nxi->nx_id)];
22507 +       hlist_add_head(&nxi->nx_hlist, head);
22508 +       atomic_inc(&nx_global_cactive);
22509 +}
22510 +
22511 +/*     __unhash_nx_info()
22512 +
22513 +       * remove the nxi from the global hash table
22514 +       * requires the hash_lock to be held                     */
22515 +
22516 +static inline void __unhash_nx_info(struct nx_info *nxi)
22517 +{
22518 +       vxd_assert_lock(&nx_info_hash_lock);
22519 +       vxdprintk(VXD_CBIT(nid, 4),
22520 +               "__unhash_nx_info: %p[#%d.%d.%d]", nxi, nxi->nx_id,
22521 +               atomic_read(&nxi->nx_usecnt), atomic_read(&nxi->nx_tasks));
22522 +
22523 +       /* context must be hashed */
22524 +       BUG_ON(!nx_info_state(nxi, NXS_HASHED));
22525 +       /* but without tasks */
22526 +       BUG_ON(atomic_read(&nxi->nx_tasks));
22527 +
22528 +       nxi->nx_state &= ~NXS_HASHED;
22529 +       hlist_del(&nxi->nx_hlist);
22530 +       atomic_dec(&nx_global_cactive);
22531 +}
22532 +
22533 +
22534 +/*     __lookup_nx_info()
22535 +
22536 +       * requires the hash_lock to be held
22537 +       * doesn't increment the nx_refcnt                       */
22538 +
22539 +static inline struct nx_info *__lookup_nx_info(nid_t nid)
22540 +{
22541 +       struct hlist_head *head = &nx_info_hash[__hashval(nid)];
22542 +       struct hlist_node *pos;
22543 +       struct nx_info *nxi;
22544 +
22545 +       vxd_assert_lock(&nx_info_hash_lock);
22546 +       hlist_for_each(pos, head) {
22547 +               nxi = hlist_entry(pos, struct nx_info, nx_hlist);
22548 +
22549 +               if (nxi->nx_id == nid)
22550 +                       goto found;
22551 +       }
22552 +       nxi = NULL;
22553 +found:
22554 +       vxdprintk(VXD_CBIT(nid, 0),
22555 +               "__lookup_nx_info(#%u): %p[#%u]",
22556 +               nid, nxi, nxi ? nxi->nx_id : 0);
22557 +       return nxi;
22558 +}
22559 +
22560 +
22561 +/*     __create_nx_info()
22562 +
22563 +       * create the requested context
22564 +       * get(), claim() and hash it                            */
22565 +
22566 +static struct nx_info *__create_nx_info(int id)
22567 +{
22568 +       struct nx_info *new, *nxi = NULL;
22569 +
22570 +       vxdprintk(VXD_CBIT(nid, 1), "create_nx_info(%d)*", id);
22571 +
22572 +       if (!(new = __alloc_nx_info(id)))
22573 +               return ERR_PTR(-ENOMEM);
22574 +
22575 +       /* required to make dynamic xids unique */
22576 +       spin_lock(&nx_info_hash_lock);
22577 +
22578 +       /* static context requested */
22579 +       if ((nxi = __lookup_nx_info(id))) {
22580 +               vxdprintk(VXD_CBIT(nid, 0),
22581 +                       "create_nx_info(%d) = %p (already there)", id, nxi);
22582 +               if (nx_info_flags(nxi, NXF_STATE_SETUP, 0))
22583 +                       nxi = ERR_PTR(-EBUSY);
22584 +               else
22585 +                       nxi = ERR_PTR(-EEXIST);
22586 +               goto out_unlock;
22587 +       }
22588 +       /* new context */
22589 +       vxdprintk(VXD_CBIT(nid, 0),
22590 +               "create_nx_info(%d) = %p (new)", id, new);
22591 +       claim_nx_info(new, NULL);
22592 +       __nx_set_lback(new);
22593 +       __hash_nx_info(get_nx_info(new));
22594 +       nxi = new, new = NULL;
22595 +
22596 +out_unlock:
22597 +       spin_unlock(&nx_info_hash_lock);
22598 +       if (new)
22599 +               __dealloc_nx_info(new);
22600 +       return nxi;
22601 +}
22602 +
22603 +
22604 +
22605 +/*     exported stuff                                          */
22606 +
22607 +
22608 +void unhash_nx_info(struct nx_info *nxi)
22609 +{
22610 +       __shutdown_nx_info(nxi);
22611 +       spin_lock(&nx_info_hash_lock);
22612 +       __unhash_nx_info(nxi);
22613 +       spin_unlock(&nx_info_hash_lock);
22614 +}
22615 +
22616 +/*     lookup_nx_info()
22617 +
22618 +       * search for a nx_info and get() it
22619 +       * negative id means current                             */
22620 +
22621 +struct nx_info *lookup_nx_info(int id)
22622 +{
22623 +       struct nx_info *nxi = NULL;
22624 +
22625 +       if (id < 0) {
22626 +               nxi = get_nx_info(current_nx_info());
22627 +       } else if (id > 1) {
22628 +               spin_lock(&nx_info_hash_lock);
22629 +               nxi = get_nx_info(__lookup_nx_info(id));
22630 +               spin_unlock(&nx_info_hash_lock);
22631 +       }
22632 +       return nxi;
22633 +}
22634 +
22635 +/*     nid_is_hashed()
22636 +
22637 +       * verify that nid is still hashed                       */
22638 +
22639 +int nid_is_hashed(nid_t nid)
22640 +{
22641 +       int hashed;
22642 +
22643 +       spin_lock(&nx_info_hash_lock);
22644 +       hashed = (__lookup_nx_info(nid) != NULL);
22645 +       spin_unlock(&nx_info_hash_lock);
22646 +       return hashed;
22647 +}
22648 +
22649 +
22650 +#ifdef CONFIG_PROC_FS
22651 +
22652 +/*     get_nid_list()
22653 +
22654 +       * get a subset of hashed nids for proc
22655 +       * assumes size is at least one                          */
22656 +
22657 +int get_nid_list(int index, unsigned int *nids, int size)
22658 +{
22659 +       int hindex, nr_nids = 0;
22660 +
22661 +       /* only show current and children */
22662 +       if (!nx_check(0, VS_ADMIN | VS_WATCH)) {
22663 +               if (index > 0)
22664 +                       return 0;
22665 +               nids[nr_nids] = nx_current_nid();
22666 +               return 1;
22667 +       }
22668 +
22669 +       for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
22670 +               struct hlist_head *head = &nx_info_hash[hindex];
22671 +               struct hlist_node *pos;
22672 +
22673 +               spin_lock(&nx_info_hash_lock);
22674 +               hlist_for_each(pos, head) {
22675 +                       struct nx_info *nxi;
22676 +
22677 +                       if (--index > 0)
22678 +                               continue;
22679 +
22680 +                       nxi = hlist_entry(pos, struct nx_info, nx_hlist);
22681 +                       nids[nr_nids] = nxi->nx_id;
22682 +                       if (++nr_nids >= size) {
22683 +                               spin_unlock(&nx_info_hash_lock);
22684 +                               goto out;
22685 +                       }
22686 +               }
22687 +               /* keep the lock time short */
22688 +               spin_unlock(&nx_info_hash_lock);
22689 +       }
22690 +out:
22691 +       return nr_nids;
22692 +}
22693 +#endif
22694 +
22695 +
22696 +/*
22697 + *     migrate task to new network
22698 + *     gets nxi, puts old_nxi on change
22699 + */
22700 +
22701 +int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
22702 +{
22703 +       struct nx_info *old_nxi;
22704 +       int ret = 0;
22705 +
22706 +       if (!p || !nxi)
22707 +               BUG();
22708 +
22709 +       vxdprintk(VXD_CBIT(nid, 5),
22710 +               "nx_migrate_task(%p,%p[#%d.%d.%d])",
22711 +               p, nxi, nxi->nx_id,
22712 +               atomic_read(&nxi->nx_usecnt),
22713 +               atomic_read(&nxi->nx_tasks));
22714 +
22715 +       if (nx_info_flags(nxi, NXF_INFO_PRIVATE, 0) &&
22716 +               !nx_info_flags(nxi, NXF_STATE_SETUP, 0))
22717 +               return -EACCES;
22718 +
22719 +       if (nx_info_state(nxi, NXS_SHUTDOWN))
22720 +               return -EFAULT;
22721 +
22722 +       /* maybe disallow this completely? */
22723 +       old_nxi = task_get_nx_info(p);
22724 +       if (old_nxi == nxi)
22725 +               goto out;
22726 +
22727 +       task_lock(p);
22728 +       if (old_nxi)
22729 +               clr_nx_info(&p->nx_info);
22730 +       claim_nx_info(nxi, p);
22731 +       set_nx_info(&p->nx_info, nxi);
22732 +       p->nid = nxi->nx_id;
22733 +       task_unlock(p);
22734 +
22735 +       vxdprintk(VXD_CBIT(nid, 5),
22736 +               "moved task %p into nxi:%p[#%d]",
22737 +               p, nxi, nxi->nx_id);
22738 +
22739 +       if (old_nxi)
22740 +               release_nx_info(old_nxi, p);
22741 +       ret = 0;
22742 +out:
22743 +       put_nx_info(old_nxi);
22744 +       return ret;
22745 +}
22746 +
22747 +
22748 +void nx_set_persistent(struct nx_info *nxi)
22749 +{
22750 +       vxdprintk(VXD_CBIT(nid, 6),
22751 +               "nx_set_persistent(%p[#%d])", nxi, nxi->nx_id);
22752 +
22753 +       get_nx_info(nxi);
22754 +       claim_nx_info(nxi, NULL);
22755 +}
22756 +
22757 +void nx_clear_persistent(struct nx_info *nxi)
22758 +{
22759 +       vxdprintk(VXD_CBIT(nid, 6),
22760 +               "nx_clear_persistent(%p[#%d])", nxi, nxi->nx_id);
22761 +
22762 +       release_nx_info(nxi, NULL);
22763 +       put_nx_info(nxi);
22764 +}
22765 +
22766 +void nx_update_persistent(struct nx_info *nxi)
22767 +{
22768 +       if (nx_info_flags(nxi, NXF_PERSISTENT, 0))
22769 +               nx_set_persistent(nxi);
22770 +       else
22771 +               nx_clear_persistent(nxi);
22772 +}
22773 +
22774 +/* vserver syscall commands below here */
22775 +
22776 +/* taks nid and nx_info functions */
22777 +
22778 +#include <asm/uaccess.h>
22779 +
22780 +
22781 +int vc_task_nid(uint32_t id)
22782 +{
22783 +       nid_t nid;
22784 +
22785 +       if (id) {
22786 +               struct task_struct *tsk;
22787 +
22788 +               read_lock(&tasklist_lock);
22789 +               tsk = find_task_by_real_pid(id);
22790 +               nid = (tsk) ? tsk->nid : -ESRCH;
22791 +               read_unlock(&tasklist_lock);
22792 +       } else
22793 +               nid = nx_current_nid();
22794 +       return nid;
22795 +}
22796 +
22797 +
22798 +int vc_nx_info(struct nx_info *nxi, void __user *data)
22799 +{
22800 +       struct vcmd_nx_info_v0 vc_data;
22801 +
22802 +       vc_data.nid = nxi->nx_id;
22803 +
22804 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
22805 +               return -EFAULT;
22806 +       return 0;
22807 +}
22808 +
22809 +
22810 +/* network functions */
22811 +
22812 +int vc_net_create(uint32_t nid, void __user *data)
22813 +{
22814 +       struct vcmd_net_create vc_data = { .flagword = NXF_INIT_SET };
22815 +       struct nx_info *new_nxi;
22816 +       int ret;
22817 +
22818 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
22819 +               return -EFAULT;
22820 +
22821 +       if ((nid > MAX_S_CONTEXT) || (nid < 2))
22822 +               return -EINVAL;
22823 +
22824 +       new_nxi = __create_nx_info(nid);
22825 +       if (IS_ERR(new_nxi))
22826 +               return PTR_ERR(new_nxi);
22827 +
22828 +       /* initial flags */
22829 +       new_nxi->nx_flags = vc_data.flagword;
22830 +
22831 +       ret = -ENOEXEC;
22832 +       if (vs_net_change(new_nxi, VSC_NETUP))
22833 +               goto out;
22834 +
22835 +       ret = nx_migrate_task(current, new_nxi);
22836 +       if (ret)
22837 +               goto out;
22838 +
22839 +       /* return context id on success */
22840 +       ret = new_nxi->nx_id;
22841 +
22842 +       /* get a reference for persistent contexts */
22843 +       if ((vc_data.flagword & NXF_PERSISTENT))
22844 +               nx_set_persistent(new_nxi);
22845 +out:
22846 +       release_nx_info(new_nxi, NULL);
22847 +       put_nx_info(new_nxi);
22848 +       return ret;
22849 +}
22850 +
22851 +
22852 +int vc_net_migrate(struct nx_info *nxi, void __user *data)
22853 +{
22854 +       return nx_migrate_task(current, nxi);
22855 +}
22856 +
22857 +
22858 +
22859 +int do_add_v4_addr(struct nx_info *nxi, __be32 ip, __be32 ip2, __be32 mask,
22860 +       uint16_t type, uint16_t flags)
22861 +{
22862 +       struct nx_addr_v4 *nxa = &nxi->v4;
22863 +
22864 +       if (NX_IPV4(nxi)) {
22865 +               /* locate last entry */
22866 +               for (; nxa->next; nxa = nxa->next);
22867 +               nxa->next = __alloc_nx_addr_v4();
22868 +               nxa = nxa->next;
22869 +
22870 +               if (IS_ERR(nxa))
22871 +                       return PTR_ERR(nxa);
22872 +       }
22873 +
22874 +       if (nxi->v4.next)
22875 +               /* remove single ip for ip list */
22876 +               nxi->nx_flags &= ~NXF_SINGLE_IP;
22877 +
22878 +       nxa->ip[0].s_addr = ip;
22879 +       nxa->ip[1].s_addr = ip2;
22880 +       nxa->mask.s_addr = mask;
22881 +       nxa->type = type;
22882 +       nxa->flags = flags;
22883 +       return 0;
22884 +}
22885 +
22886 +
22887 +int vc_net_add(struct nx_info *nxi, void __user *data)
22888 +{
22889 +       struct vcmd_net_addr_v0 vc_data;
22890 +       int index, ret = 0;
22891 +
22892 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
22893 +               return -EFAULT;
22894 +
22895 +       switch (vc_data.type) {
22896 +       case NXA_TYPE_IPV4:
22897 +               if ((vc_data.count < 1) || (vc_data.count > 4))
22898 +                       return -EINVAL;
22899 +
22900 +               index = 0;
22901 +               while (index < vc_data.count) {
22902 +                       ret = do_add_v4_addr(nxi, vc_data.ip[index].s_addr, 0,
22903 +                               vc_data.mask[index].s_addr, NXA_TYPE_ADDR, 0);
22904 +                       if (ret)
22905 +                               return ret;
22906 +                       index++;
22907 +               }
22908 +               ret = index;
22909 +               break;
22910 +
22911 +       case NXA_TYPE_IPV4|NXA_MOD_BCAST:
22912 +               nxi->v4_bcast = vc_data.ip[0];
22913 +               ret = 1;
22914 +               break;
22915 +
22916 +       case NXA_TYPE_IPV4|NXA_MOD_LBACK:
22917 +               nxi->v4_lback = vc_data.ip[0];
22918 +               ret = 1;
22919 +               break;
22920 +
22921 +       default:
22922 +               ret = -EINVAL;
22923 +               break;
22924 +       }
22925 +       return ret;
22926 +}
22927 +
22928 +int vc_net_remove(struct nx_info *nxi, void __user *data)
22929 +{
22930 +       struct vcmd_net_addr_v0 vc_data;
22931 +
22932 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
22933 +               return -EFAULT;
22934 +
22935 +       switch (vc_data.type) {
22936 +       case NXA_TYPE_ANY:
22937 +               __dealloc_nx_addr_v4_all(xchg(&nxi->v4.next, NULL));
22938 +               memset(&nxi->v4, 0, sizeof(nxi->v4));
22939 +               break;
22940 +
22941 +       default:
22942 +               return -EINVAL;
22943 +       }
22944 +       return 0;
22945 +}
22946 +
22947 +
22948 +int vc_net_add_ipv4(struct nx_info *nxi, void __user *data)
22949 +{
22950 +       struct vcmd_net_addr_ipv4_v1 vc_data;
22951 +
22952 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
22953 +               return -EFAULT;
22954 +
22955 +       switch (vc_data.type) {
22956 +       case NXA_TYPE_ADDR:
22957 +       case NXA_TYPE_RANGE:
22958 +       case NXA_TYPE_MASK:
22959 +               return do_add_v4_addr(nxi, vc_data.ip.s_addr, 0,
22960 +                       vc_data.mask.s_addr, vc_data.type, vc_data.flags);
22961 +
22962 +       case NXA_TYPE_ADDR | NXA_MOD_BCAST:
22963 +               nxi->v4_bcast = vc_data.ip;
22964 +               break;
22965 +
22966 +       case NXA_TYPE_ADDR | NXA_MOD_LBACK:
22967 +               nxi->v4_lback = vc_data.ip;
22968 +               break;
22969 +
22970 +       default:
22971 +               return -EINVAL;
22972 +       }
22973 +       return 0;
22974 +}
22975 +
22976 +int vc_net_remove_ipv4(struct nx_info *nxi, void __user *data)
22977 +{
22978 +       struct vcmd_net_addr_ipv4_v1 vc_data;
22979 +
22980 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
22981 +               return -EFAULT;
22982 +
22983 +       switch (vc_data.type) {
22984 +/*     case NXA_TYPE_ADDR:
22985 +               break;          */
22986 +
22987 +       case NXA_TYPE_ANY:
22988 +               __dealloc_nx_addr_v4_all(xchg(&nxi->v4.next, NULL));
22989 +               memset(&nxi->v4, 0, sizeof(nxi->v4));
22990 +               break;
22991 +
22992 +       default:
22993 +               return -EINVAL;
22994 +       }
22995 +       return 0;
22996 +}
22997 +
22998 +
22999 +#ifdef CONFIG_IPV6
23000 +
23001 +int do_add_v6_addr(struct nx_info *nxi,
23002 +       struct in6_addr *ip, struct in6_addr *mask,
23003 +       uint32_t prefix, uint16_t type, uint16_t flags)
23004 +{
23005 +       struct nx_addr_v6 *nxa = &nxi->v6;
23006 +
23007 +       if (NX_IPV6(nxi)) {
23008 +               /* locate last entry */
23009 +               for (; nxa->next; nxa = nxa->next);
23010 +               nxa->next = __alloc_nx_addr_v6();
23011 +               nxa = nxa->next;
23012 +
23013 +               if (IS_ERR(nxa))
23014 +                       return PTR_ERR(nxa);
23015 +       }
23016 +
23017 +       nxa->ip = *ip;
23018 +       nxa->mask = *mask;
23019 +       nxa->prefix = prefix;
23020 +       nxa->type = type;
23021 +       nxa->flags = flags;
23022 +       return 0;
23023 +}
23024 +
23025 +
23026 +int vc_net_add_ipv6(struct nx_info *nxi, void __user *data)
23027 +{
23028 +       struct vcmd_net_addr_ipv6_v1 vc_data;
23029 +
23030 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
23031 +               return -EFAULT;
23032 +
23033 +       switch (vc_data.type) {
23034 +       case NXA_TYPE_ADDR:
23035 +       case NXA_TYPE_MASK:
23036 +               return do_add_v6_addr(nxi, &vc_data.ip, &vc_data.mask,
23037 +                       vc_data.prefix, vc_data.type, vc_data.flags);
23038 +       default:
23039 +               return -EINVAL;
23040 +       }
23041 +       return 0;
23042 +}
23043 +
23044 +int vc_net_remove_ipv6(struct nx_info *nxi, void __user *data)
23045 +{
23046 +       struct vcmd_net_addr_ipv6_v1 vc_data;
23047 +
23048 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
23049 +               return -EFAULT;
23050 +
23051 +       switch (vc_data.type) {
23052 +       case NXA_TYPE_ANY:
23053 +               __dealloc_nx_addr_v6_all(xchg(&nxi->v6.next, NULL));
23054 +               memset(&nxi->v6, 0, sizeof(nxi->v6));
23055 +               break;
23056 +
23057 +       default:
23058 +               return -EINVAL;
23059 +       }
23060 +       return 0;
23061 +}
23062 +
23063 +#endif /* CONFIG_IPV6 */
23064 +
23065 +
23066 +int vc_get_nflags(struct nx_info *nxi, void __user *data)
23067 +{
23068 +       struct vcmd_net_flags_v0 vc_data;
23069 +
23070 +       vc_data.flagword = nxi->nx_flags;
23071 +
23072 +       /* special STATE flag handling */
23073 +       vc_data.mask = vs_mask_flags(~0ULL, nxi->nx_flags, NXF_ONE_TIME);
23074 +
23075 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
23076 +               return -EFAULT;
23077 +       return 0;
23078 +}
23079 +
23080 +int vc_set_nflags(struct nx_info *nxi, void __user *data)
23081 +{
23082 +       struct vcmd_net_flags_v0 vc_data;
23083 +       uint64_t mask, trigger;
23084 +
23085 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
23086 +               return -EFAULT;
23087 +
23088 +       /* special STATE flag handling */
23089 +       mask = vs_mask_mask(vc_data.mask, nxi->nx_flags, NXF_ONE_TIME);
23090 +       trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
23091 +
23092 +       nxi->nx_flags = vs_mask_flags(nxi->nx_flags,
23093 +               vc_data.flagword, mask);
23094 +       if (trigger & NXF_PERSISTENT)
23095 +               nx_update_persistent(nxi);
23096 +
23097 +       return 0;
23098 +}
23099 +
23100 +int vc_get_ncaps(struct nx_info *nxi, void __user *data)
23101 +{
23102 +       struct vcmd_net_caps_v0 vc_data;
23103 +
23104 +       vc_data.ncaps = nxi->nx_ncaps;
23105 +       vc_data.cmask = ~0ULL;
23106 +
23107 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
23108 +               return -EFAULT;
23109 +       return 0;
23110 +}
23111 +
23112 +int vc_set_ncaps(struct nx_info *nxi, void __user *data)
23113 +{
23114 +       struct vcmd_net_caps_v0 vc_data;
23115 +
23116 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
23117 +               return -EFAULT;
23118 +
23119 +       nxi->nx_ncaps = vs_mask_flags(nxi->nx_ncaps,
23120 +               vc_data.ncaps, vc_data.cmask);
23121 +       return 0;
23122 +}
23123 +
23124 +
23125 +#include <linux/module.h>
23126 +
23127 +module_init(init_network);
23128 +
23129 +EXPORT_SYMBOL_GPL(free_nx_info);
23130 +EXPORT_SYMBOL_GPL(unhash_nx_info);
23131 +
23132 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/proc.c linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/proc.c
23133 --- linux-2.6.31.6/kernel/vserver/proc.c        1970-01-01 01:00:00.000000000 +0100
23134 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/proc.c  2009-09-10 17:45:54.000000000 +0200
23135 @@ -0,0 +1,1098 @@
23136 +/*
23137 + *  linux/kernel/vserver/proc.c
23138 + *
23139 + *  Virtual Context Support
23140 + *
23141 + *  Copyright (C) 2003-2007  Herbert Pötzl
23142 + *
23143 + *  V0.01  basic structure
23144 + *  V0.02  adaptation vs1.3.0
23145 + *  V0.03  proc permissions
23146 + *  V0.04  locking/generic
23147 + *  V0.05  next generation procfs
23148 + *  V0.06  inode validation
23149 + *  V0.07  generic rewrite vid
23150 + *  V0.08  remove inode type
23151 + *
23152 + */
23153 +
23154 +#include <linux/proc_fs.h>
23155 +#include <linux/fs_struct.h>
23156 +#include <linux/mount.h>
23157 +#include <asm/unistd.h>
23158 +
23159 +#include <linux/vs_context.h>
23160 +#include <linux/vs_network.h>
23161 +#include <linux/vs_cvirt.h>
23162 +
23163 +#include <linux/in.h>
23164 +#include <linux/inetdevice.h>
23165 +#include <linux/vs_inet.h>
23166 +#include <linux/vs_inet6.h>
23167 +
23168 +#include <linux/vserver/global.h>
23169 +
23170 +#include "cvirt_proc.h"
23171 +#include "cacct_proc.h"
23172 +#include "limit_proc.h"
23173 +#include "sched_proc.h"
23174 +#include "vci_config.h"
23175 +
23176 +
23177 +static inline char *print_cap_t(char *buffer, kernel_cap_t *c)
23178 +{
23179 +       unsigned __capi;
23180 +
23181 +       CAP_FOR_EACH_U32(__capi) {
23182 +               buffer += sprintf(buffer, "%08x",
23183 +                       c->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
23184 +       }
23185 +       return buffer;
23186 +}
23187 +
23188 +
23189 +static struct proc_dir_entry *proc_virtual;
23190 +
23191 +static struct proc_dir_entry *proc_virtnet;
23192 +
23193 +
23194 +/* first the actual feeds */
23195 +
23196 +
23197 +static int proc_vci(char *buffer)
23198 +{
23199 +       return sprintf(buffer,
23200 +               "VCIVersion:\t%04x:%04x\n"
23201 +               "VCISyscall:\t%d\n"
23202 +               "VCIKernel:\t%08x\n",
23203 +               VCI_VERSION >> 16,
23204 +               VCI_VERSION & 0xFFFF,
23205 +               __NR_vserver,
23206 +               vci_kernel_config());
23207 +}
23208 +
23209 +static int proc_virtual_info(char *buffer)
23210 +{
23211 +       return proc_vci(buffer);
23212 +}
23213 +
23214 +static int proc_virtual_status(char *buffer)
23215 +{
23216 +       return sprintf(buffer,
23217 +               "#CTotal:\t%d\n"
23218 +               "#CActive:\t%d\n"
23219 +               "#NSProxy:\t%d\t%d %d %d %d %d %d\n"
23220 +               "#InitTask:\t%d\t%d %d\n",
23221 +               atomic_read(&vx_global_ctotal),
23222 +               atomic_read(&vx_global_cactive),
23223 +               atomic_read(&vs_global_nsproxy),
23224 +               atomic_read(&vs_global_fs),
23225 +               atomic_read(&vs_global_mnt_ns),
23226 +               atomic_read(&vs_global_uts_ns),
23227 +               atomic_read(&nr_ipc_ns),
23228 +               atomic_read(&vs_global_user_ns),
23229 +               atomic_read(&vs_global_pid_ns),
23230 +               atomic_read(&init_task.usage),
23231 +               atomic_read(&init_task.nsproxy->count),
23232 +               init_task.fs->users);
23233 +}
23234 +
23235 +
23236 +int proc_vxi_info(struct vx_info *vxi, char *buffer)
23237 +{
23238 +       int length;
23239 +
23240 +       length = sprintf(buffer,
23241 +               "ID:\t%d\n"
23242 +               "Info:\t%p\n"
23243 +               "Init:\t%d\n"
23244 +               "OOM:\t%lld\n",
23245 +               vxi->vx_id,
23246 +               vxi,
23247 +               vxi->vx_initpid,
23248 +               vxi->vx_badness_bias);
23249 +       return length;
23250 +}
23251 +
23252 +int proc_vxi_status(struct vx_info *vxi, char *buffer)
23253 +{
23254 +       char *orig = buffer;
23255 +
23256 +       buffer += sprintf(buffer,
23257 +               "UseCnt:\t%d\n"
23258 +               "Tasks:\t%d\n"
23259 +               "Flags:\t%016llx\n",
23260 +               atomic_read(&vxi->vx_usecnt),
23261 +               atomic_read(&vxi->vx_tasks),
23262 +               (unsigned long long)vxi->vx_flags);
23263 +
23264 +       buffer += sprintf(buffer, "BCaps:\t");
23265 +       buffer = print_cap_t(buffer, &vxi->vx_bcaps);
23266 +       buffer += sprintf(buffer, "\n");
23267 +
23268 +       buffer += sprintf(buffer,
23269 +               "CCaps:\t%016llx\n"
23270 +               "Spaces:\t%08lx %08lx\n",
23271 +               (unsigned long long)vxi->vx_ccaps,
23272 +               vxi->vx_nsmask[0], vxi->vx_nsmask[1]);
23273 +       return buffer - orig;
23274 +}
23275 +
23276 +int proc_vxi_limit(struct vx_info *vxi, char *buffer)
23277 +{
23278 +       return vx_info_proc_limit(&vxi->limit, buffer);
23279 +}
23280 +
23281 +int proc_vxi_sched(struct vx_info *vxi, char *buffer)
23282 +{
23283 +       int cpu, length;
23284 +
23285 +       length = vx_info_proc_sched(&vxi->sched, buffer);
23286 +       for_each_online_cpu(cpu) {
23287 +               length += vx_info_proc_sched_pc(
23288 +                       &vx_per_cpu(vxi, sched_pc, cpu),
23289 +                       buffer + length, cpu);
23290 +       }
23291 +       return length;
23292 +}
23293 +
23294 +int proc_vxi_nsproxy0(struct vx_info *vxi, char *buffer)
23295 +{
23296 +       return vx_info_proc_nsproxy(vxi->vx_nsproxy[0], buffer);
23297 +}
23298 +
23299 +int proc_vxi_nsproxy1(struct vx_info *vxi, char *buffer)
23300 +{
23301 +       return vx_info_proc_nsproxy(vxi->vx_nsproxy[1], buffer);
23302 +}
23303 +
23304 +int proc_vxi_cvirt(struct vx_info *vxi, char *buffer)
23305 +{
23306 +       int cpu, length;
23307 +
23308 +       vx_update_load(vxi);
23309 +       length = vx_info_proc_cvirt(&vxi->cvirt, buffer);
23310 +       for_each_online_cpu(cpu) {
23311 +               length += vx_info_proc_cvirt_pc(
23312 +                       &vx_per_cpu(vxi, cvirt_pc, cpu),
23313 +                       buffer + length, cpu);
23314 +       }
23315 +       return length;
23316 +}
23317 +
23318 +int proc_vxi_cacct(struct vx_info *vxi, char *buffer)
23319 +{
23320 +       return vx_info_proc_cacct(&vxi->cacct, buffer);
23321 +}
23322 +
23323 +
23324 +static int proc_virtnet_info(char *buffer)
23325 +{
23326 +       return proc_vci(buffer);
23327 +}
23328 +
23329 +static int proc_virtnet_status(char *buffer)
23330 +{
23331 +       return sprintf(buffer,
23332 +               "#CTotal:\t%d\n"
23333 +               "#CActive:\t%d\n",
23334 +               atomic_read(&nx_global_ctotal),
23335 +               atomic_read(&nx_global_cactive));
23336 +}
23337 +
23338 +int proc_nxi_info(struct nx_info *nxi, char *buffer)
23339 +{
23340 +       struct nx_addr_v4 *v4a;
23341 +#ifdef CONFIG_IPV6
23342 +       struct nx_addr_v6 *v6a;
23343 +#endif
23344 +       int length, i;
23345 +
23346 +       length = sprintf(buffer,
23347 +               "ID:\t%d\n"
23348 +               "Info:\t%p\n"
23349 +               "Bcast:\t" NIPQUAD_FMT "\n"
23350 +               "Lback:\t" NIPQUAD_FMT "\n",
23351 +               nxi->nx_id,
23352 +               nxi,
23353 +               NIPQUAD(nxi->v4_bcast.s_addr),
23354 +               NIPQUAD(nxi->v4_lback.s_addr));
23355 +
23356 +       if (!NX_IPV4(nxi))
23357 +               goto skip_v4;
23358 +       for (i = 0, v4a = &nxi->v4; v4a; i++, v4a = v4a->next)
23359 +               length += sprintf(buffer + length, "%d:\t" NXAV4_FMT "\n",
23360 +                       i, NXAV4(v4a));
23361 +skip_v4:
23362 +#ifdef CONFIG_IPV6
23363 +       if (!NX_IPV6(nxi))
23364 +               goto skip_v6;
23365 +       for (i = 0, v6a = &nxi->v6; v6a; i++, v6a = v6a->next)
23366 +               length += sprintf(buffer + length, "%d:\t" NXAV6_FMT "\n",
23367 +                       i, NXAV6(v6a));
23368 +skip_v6:
23369 +#endif
23370 +       return length;
23371 +}
23372 +
23373 +int proc_nxi_status(struct nx_info *nxi, char *buffer)
23374 +{
23375 +       int length;
23376 +
23377 +       length = sprintf(buffer,
23378 +               "UseCnt:\t%d\n"
23379 +               "Tasks:\t%d\n"
23380 +               "Flags:\t%016llx\n"
23381 +               "NCaps:\t%016llx\n",
23382 +               atomic_read(&nxi->nx_usecnt),
23383 +               atomic_read(&nxi->nx_tasks),
23384 +               (unsigned long long)nxi->nx_flags,
23385 +               (unsigned long long)nxi->nx_ncaps);
23386 +       return length;
23387 +}
23388 +
23389 +
23390 +
23391 +/* here the inode helpers */
23392 +
23393 +struct vs_entry {
23394 +       int len;
23395 +       char *name;
23396 +       mode_t mode;
23397 +       struct inode_operations *iop;
23398 +       struct file_operations *fop;
23399 +       union proc_op op;
23400 +};
23401 +
23402 +static struct inode *vs_proc_make_inode(struct super_block *sb, struct vs_entry *p)
23403 +{
23404 +       struct inode *inode = new_inode(sb);
23405 +
23406 +       if (!inode)
23407 +               goto out;
23408 +
23409 +       inode->i_mode = p->mode;
23410 +       if (p->iop)
23411 +               inode->i_op = p->iop;
23412 +       if (p->fop)
23413 +               inode->i_fop = p->fop;
23414 +
23415 +       inode->i_nlink = (p->mode & S_IFDIR) ? 2 : 1;
23416 +       inode->i_flags |= S_IMMUTABLE;
23417 +
23418 +       inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
23419 +
23420 +       inode->i_uid = 0;
23421 +       inode->i_gid = 0;
23422 +       inode->i_tag = 0;
23423 +out:
23424 +       return inode;
23425 +}
23426 +
23427 +static struct dentry *vs_proc_instantiate(struct inode *dir,
23428 +       struct dentry *dentry, int id, void *ptr)
23429 +{
23430 +       struct vs_entry *p = ptr;
23431 +       struct inode *inode = vs_proc_make_inode(dir->i_sb, p);
23432 +       struct dentry *error = ERR_PTR(-EINVAL);
23433 +
23434 +       if (!inode)
23435 +               goto out;
23436 +
23437 +       PROC_I(inode)->op = p->op;
23438 +       PROC_I(inode)->fd = id;
23439 +       d_add(dentry, inode);
23440 +       error = NULL;
23441 +out:
23442 +       return error;
23443 +}
23444 +
23445 +/* Lookups */
23446 +
23447 +typedef struct dentry *instantiate_t(struct inode *, struct dentry *, int, void *);
23448 +
23449 +/*
23450 + * Fill a directory entry.
23451 + *
23452 + * If possible create the dcache entry and derive our inode number and
23453 + * file type from dcache entry.
23454 + *
23455 + * Since all of the proc inode numbers are dynamically generated, the inode
23456 + * numbers do not exist until the inode is cache.  This means creating the
23457 + * the dcache entry in readdir is necessary to keep the inode numbers
23458 + * reported by readdir in sync with the inode numbers reported
23459 + * by stat.
23460 + */
23461 +static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
23462 +       char *name, int len, instantiate_t instantiate, int id, void *ptr)
23463 +{
23464 +       struct dentry *child, *dir = filp->f_dentry;
23465 +       struct inode *inode;
23466 +       struct qstr qname;
23467 +       ino_t ino = 0;
23468 +       unsigned type = DT_UNKNOWN;
23469 +
23470 +       qname.name = name;
23471 +       qname.len  = len;
23472 +       qname.hash = full_name_hash(name, len);
23473 +
23474 +       child = d_lookup(dir, &qname);
23475 +       if (!child) {
23476 +               struct dentry *new;
23477 +               new = d_alloc(dir, &qname);
23478 +               if (new) {
23479 +                       child = instantiate(dir->d_inode, new, id, ptr);
23480 +                       if (child)
23481 +                               dput(new);
23482 +                       else
23483 +                               child = new;
23484 +               }
23485 +       }
23486 +       if (!child || IS_ERR(child) || !child->d_inode)
23487 +               goto end_instantiate;
23488 +       inode = child->d_inode;
23489 +       if (inode) {
23490 +               ino = inode->i_ino;
23491 +               type = inode->i_mode >> 12;
23492 +       }
23493 +       dput(child);
23494 +end_instantiate:
23495 +       if (!ino)
23496 +               ino = find_inode_number(dir, &qname);
23497 +       if (!ino)
23498 +               ino = 1;
23499 +       return filldir(dirent, name, len, filp->f_pos, ino, type);
23500 +}
23501 +
23502 +
23503 +
23504 +/* get and revalidate vx_info/xid */
23505 +
23506 +static inline
23507 +struct vx_info *get_proc_vx_info(struct inode *inode)
23508 +{
23509 +       return lookup_vx_info(PROC_I(inode)->fd);
23510 +}
23511 +
23512 +static int proc_xid_revalidate(struct dentry *dentry, struct nameidata *nd)
23513 +{
23514 +       struct inode *inode = dentry->d_inode;
23515 +       xid_t xid = PROC_I(inode)->fd;
23516 +
23517 +       if (!xid || xid_is_hashed(xid))
23518 +               return 1;
23519 +       d_drop(dentry);
23520 +       return 0;
23521 +}
23522 +
23523 +
23524 +/* get and revalidate nx_info/nid */
23525 +
23526 +static int proc_nid_revalidate(struct dentry *dentry, struct nameidata *nd)
23527 +{
23528 +       struct inode *inode = dentry->d_inode;
23529 +       nid_t nid = PROC_I(inode)->fd;
23530 +
23531 +       if (!nid || nid_is_hashed(nid))
23532 +               return 1;
23533 +       d_drop(dentry);
23534 +       return 0;
23535 +}
23536 +
23537 +
23538 +
23539 +#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
23540 +
23541 +static ssize_t proc_vs_info_read(struct file *file, char __user *buf,
23542 +                         size_t count, loff_t *ppos)
23543 +{
23544 +       struct inode *inode = file->f_dentry->d_inode;
23545 +       unsigned long page;
23546 +       ssize_t length = 0;
23547 +
23548 +       if (count > PROC_BLOCK_SIZE)
23549 +               count = PROC_BLOCK_SIZE;
23550 +
23551 +       /* fade that out as soon as stable */
23552 +       WARN_ON(PROC_I(inode)->fd);
23553 +
23554 +       if (!(page = __get_free_page(GFP_KERNEL)))
23555 +               return -ENOMEM;
23556 +
23557 +       BUG_ON(!PROC_I(inode)->op.proc_vs_read);
23558 +       length = PROC_I(inode)->op.proc_vs_read((char *)page);
23559 +
23560 +       if (length >= 0)
23561 +               length = simple_read_from_buffer(buf, count, ppos,
23562 +                       (char *)page, length);
23563 +
23564 +       free_page(page);
23565 +       return length;
23566 +}
23567 +
23568 +static ssize_t proc_vx_info_read(struct file *file, char __user *buf,
23569 +                         size_t count, loff_t *ppos)
23570 +{
23571 +       struct inode *inode = file->f_dentry->d_inode;
23572 +       struct vx_info *vxi = NULL;
23573 +       xid_t xid = PROC_I(inode)->fd;
23574 +       unsigned long page;
23575 +       ssize_t length = 0;
23576 +
23577 +       if (count > PROC_BLOCK_SIZE)
23578 +               count = PROC_BLOCK_SIZE;
23579 +
23580 +       /* fade that out as soon as stable */
23581 +       WARN_ON(!xid);
23582 +       vxi = lookup_vx_info(xid);
23583 +       if (!vxi)
23584 +               goto out;
23585 +
23586 +       length = -ENOMEM;
23587 +       if (!(page = __get_free_page(GFP_KERNEL)))
23588 +               goto out_put;
23589 +
23590 +       BUG_ON(!PROC_I(inode)->op.proc_vxi_read);
23591 +       length = PROC_I(inode)->op.proc_vxi_read(vxi, (char *)page);
23592 +
23593 +       if (length >= 0)
23594 +               length = simple_read_from_buffer(buf, count, ppos,
23595 +                       (char *)page, length);
23596 +
23597 +       free_page(page);
23598 +out_put:
23599 +       put_vx_info(vxi);
23600 +out:
23601 +       return length;
23602 +}
23603 +
23604 +static ssize_t proc_nx_info_read(struct file *file, char __user *buf,
23605 +                         size_t count, loff_t *ppos)
23606 +{
23607 +       struct inode *inode = file->f_dentry->d_inode;
23608 +       struct nx_info *nxi = NULL;
23609 +       nid_t nid = PROC_I(inode)->fd;
23610 +       unsigned long page;
23611 +       ssize_t length = 0;
23612 +
23613 +       if (count > PROC_BLOCK_SIZE)
23614 +               count = PROC_BLOCK_SIZE;
23615 +
23616 +       /* fade that out as soon as stable */
23617 +       WARN_ON(!nid);
23618 +       nxi = lookup_nx_info(nid);
23619 +       if (!nxi)
23620 +               goto out;
23621 +
23622 +       length = -ENOMEM;
23623 +       if (!(page = __get_free_page(GFP_KERNEL)))
23624 +               goto out_put;
23625 +
23626 +       BUG_ON(!PROC_I(inode)->op.proc_nxi_read);
23627 +       length = PROC_I(inode)->op.proc_nxi_read(nxi, (char *)page);
23628 +
23629 +       if (length >= 0)
23630 +               length = simple_read_from_buffer(buf, count, ppos,
23631 +                       (char *)page, length);
23632 +
23633 +       free_page(page);
23634 +out_put:
23635 +       put_nx_info(nxi);
23636 +out:
23637 +       return length;
23638 +}
23639 +
23640 +
23641 +
23642 +/* here comes the lower level */
23643 +
23644 +
23645 +#define NOD(NAME, MODE, IOP, FOP, OP) {        \
23646 +       .len  = sizeof(NAME) - 1,       \
23647 +       .name = (NAME),                 \
23648 +       .mode = MODE,                   \
23649 +       .iop  = IOP,                    \
23650 +       .fop  = FOP,                    \
23651 +       .op   = OP,                     \
23652 +}
23653 +
23654 +
23655 +#define DIR(NAME, MODE, OTYPE)                         \
23656 +       NOD(NAME, (S_IFDIR | (MODE)),                   \
23657 +               &proc_ ## OTYPE ## _inode_operations,   \
23658 +               &proc_ ## OTYPE ## _file_operations, { } )
23659 +
23660 +#define INF(NAME, MODE, OTYPE)                         \
23661 +       NOD(NAME, (S_IFREG | (MODE)), NULL,             \
23662 +               &proc_vs_info_file_operations,          \
23663 +               { .proc_vs_read = &proc_##OTYPE } )
23664 +
23665 +#define VINF(NAME, MODE, OTYPE)                                \
23666 +       NOD(NAME, (S_IFREG | (MODE)), NULL,             \
23667 +               &proc_vx_info_file_operations,          \
23668 +               { .proc_vxi_read = &proc_##OTYPE } )
23669 +
23670 +#define NINF(NAME, MODE, OTYPE)                                \
23671 +       NOD(NAME, (S_IFREG | (MODE)), NULL,             \
23672 +               &proc_nx_info_file_operations,          \
23673 +               { .proc_nxi_read = &proc_##OTYPE } )
23674 +
23675 +
23676 +static struct file_operations proc_vs_info_file_operations = {
23677 +       .read =         proc_vs_info_read,
23678 +};
23679 +
23680 +static struct file_operations proc_vx_info_file_operations = {
23681 +       .read =         proc_vx_info_read,
23682 +};
23683 +
23684 +static struct dentry_operations proc_xid_dentry_operations = {
23685 +       .d_revalidate = proc_xid_revalidate,
23686 +};
23687 +
23688 +static struct vs_entry vx_base_stuff[] = {
23689 +       VINF("info",    S_IRUGO, vxi_info),
23690 +       VINF("status",  S_IRUGO, vxi_status),
23691 +       VINF("limit",   S_IRUGO, vxi_limit),
23692 +       VINF("sched",   S_IRUGO, vxi_sched),
23693 +       VINF("nsproxy", S_IRUGO, vxi_nsproxy0),
23694 +       VINF("nsproxy1",S_IRUGO, vxi_nsproxy1),
23695 +       VINF("cvirt",   S_IRUGO, vxi_cvirt),
23696 +       VINF("cacct",   S_IRUGO, vxi_cacct),
23697 +       {}
23698 +};
23699 +
23700 +
23701 +
23702 +
23703 +static struct dentry *proc_xid_instantiate(struct inode *dir,
23704 +       struct dentry *dentry, int id, void *ptr)
23705 +{
23706 +       dentry->d_op = &proc_xid_dentry_operations;
23707 +       return vs_proc_instantiate(dir, dentry, id, ptr);
23708 +}
23709 +
23710 +static struct dentry *proc_xid_lookup(struct inode *dir,
23711 +       struct dentry *dentry, struct nameidata *nd)
23712 +{
23713 +       struct vs_entry *p = vx_base_stuff;
23714 +       struct dentry *error = ERR_PTR(-ENOENT);
23715 +
23716 +       for (; p->name; p++) {
23717 +               if (p->len != dentry->d_name.len)
23718 +                       continue;
23719 +               if (!memcmp(dentry->d_name.name, p->name, p->len))
23720 +                       break;
23721 +       }
23722 +       if (!p->name)
23723 +               goto out;
23724 +
23725 +       error = proc_xid_instantiate(dir, dentry, PROC_I(dir)->fd, p);
23726 +out:
23727 +       return error;
23728 +}
23729 +
23730 +static int proc_xid_readdir(struct file *filp,
23731 +       void *dirent, filldir_t filldir)
23732 +{
23733 +       struct dentry *dentry = filp->f_dentry;
23734 +       struct inode *inode = dentry->d_inode;
23735 +       struct vs_entry *p = vx_base_stuff;
23736 +       int size = sizeof(vx_base_stuff) / sizeof(struct vs_entry);
23737 +       int pos, index;
23738 +       u64 ino;
23739 +
23740 +       pos = filp->f_pos;
23741 +       switch (pos) {
23742 +       case 0:
23743 +               ino = inode->i_ino;
23744 +               if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
23745 +                       goto out;
23746 +               pos++;
23747 +               /* fall through */
23748 +       case 1:
23749 +               ino = parent_ino(dentry);
23750 +               if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
23751 +                       goto out;
23752 +               pos++;
23753 +               /* fall through */
23754 +       default:
23755 +               index = pos - 2;
23756 +               if (index >= size)
23757 +                       goto out;
23758 +               for (p += index; p->name; p++) {
23759 +                       if (proc_fill_cache(filp, dirent, filldir, p->name, p->len,
23760 +                               vs_proc_instantiate, PROC_I(inode)->fd, p))
23761 +                               goto out;
23762 +                       pos++;
23763 +               }
23764 +       }
23765 +out:
23766 +       filp->f_pos = pos;
23767 +       return 1;
23768 +}
23769 +
23770 +
23771 +
23772 +static struct file_operations proc_nx_info_file_operations = {
23773 +       .read =         proc_nx_info_read,
23774 +};
23775 +
23776 +static struct dentry_operations proc_nid_dentry_operations = {
23777 +       .d_revalidate = proc_nid_revalidate,
23778 +};
23779 +
23780 +static struct vs_entry nx_base_stuff[] = {
23781 +       NINF("info",    S_IRUGO, nxi_info),
23782 +       NINF("status",  S_IRUGO, nxi_status),
23783 +       {}
23784 +};
23785 +
23786 +
23787 +static struct dentry *proc_nid_instantiate(struct inode *dir,
23788 +       struct dentry *dentry, int id, void *ptr)
23789 +{
23790 +       dentry->d_op = &proc_nid_dentry_operations;
23791 +       return vs_proc_instantiate(dir, dentry, id, ptr);
23792 +}
23793 +
23794 +static struct dentry *proc_nid_lookup(struct inode *dir,
23795 +       struct dentry *dentry, struct nameidata *nd)
23796 +{
23797 +       struct vs_entry *p = nx_base_stuff;
23798 +       struct dentry *error = ERR_PTR(-ENOENT);
23799 +
23800 +       for (; p->name; p++) {
23801 +               if (p->len != dentry->d_name.len)
23802 +                       continue;
23803 +               if (!memcmp(dentry->d_name.name, p->name, p->len))
23804 +                       break;
23805 +       }
23806 +       if (!p->name)
23807 +               goto out;
23808 +
23809 +       error = proc_nid_instantiate(dir, dentry, PROC_I(dir)->fd, p);
23810 +out:
23811 +       return error;
23812 +}
23813 +
23814 +static int proc_nid_readdir(struct file *filp,
23815 +       void *dirent, filldir_t filldir)
23816 +{
23817 +       struct dentry *dentry = filp->f_dentry;
23818 +       struct inode *inode = dentry->d_inode;
23819 +       struct vs_entry *p = nx_base_stuff;
23820 +       int size = sizeof(nx_base_stuff) / sizeof(struct vs_entry);
23821 +       int pos, index;
23822 +       u64 ino;
23823 +
23824 +       pos = filp->f_pos;
23825 +       switch (pos) {
23826 +       case 0:
23827 +               ino = inode->i_ino;
23828 +               if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
23829 +                       goto out;
23830 +               pos++;
23831 +               /* fall through */
23832 +       case 1:
23833 +               ino = parent_ino(dentry);
23834 +               if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
23835 +                       goto out;
23836 +               pos++;
23837 +               /* fall through */
23838 +       default:
23839 +               index = pos - 2;
23840 +               if (index >= size)
23841 +                       goto out;
23842 +               for (p += index; p->name; p++) {
23843 +                       if (proc_fill_cache(filp, dirent, filldir, p->name, p->len,
23844 +                               vs_proc_instantiate, PROC_I(inode)->fd, p))
23845 +                               goto out;
23846 +                       pos++;
23847 +               }
23848 +       }
23849 +out:
23850 +       filp->f_pos = pos;
23851 +       return 1;
23852 +}
23853 +
23854 +
23855 +#define MAX_MULBY10    ((~0U - 9) / 10)
23856 +
23857 +static inline int atovid(const char *str, int len)
23858 +{
23859 +       int vid, c;
23860 +
23861 +       vid = 0;
23862 +       while (len-- > 0) {
23863 +               c = *str - '0';
23864 +               str++;
23865 +               if (c > 9)
23866 +                       return -1;
23867 +               if (vid >= MAX_MULBY10)
23868 +                       return -1;
23869 +               vid *= 10;
23870 +               vid += c;
23871 +               if (!vid)
23872 +                       return -1;
23873 +       }
23874 +       return vid;
23875 +}
23876 +
23877 +/* now the upper level (virtual) */
23878 +
23879 +
23880 +static struct file_operations proc_xid_file_operations = {
23881 +       .read =         generic_read_dir,
23882 +       .readdir =      proc_xid_readdir,
23883 +};
23884 +
23885 +static struct inode_operations proc_xid_inode_operations = {
23886 +       .lookup =       proc_xid_lookup,
23887 +};
23888 +
23889 +static struct vs_entry vx_virtual_stuff[] = {
23890 +       INF("info",     S_IRUGO, virtual_info),
23891 +       INF("status",   S_IRUGO, virtual_status),
23892 +       DIR(NULL,       S_IRUGO | S_IXUGO, xid),
23893 +};
23894 +
23895 +
23896 +static struct dentry *proc_virtual_lookup(struct inode *dir,
23897 +       struct dentry *dentry, struct nameidata *nd)
23898 +{
23899 +       struct vs_entry *p = vx_virtual_stuff;
23900 +       struct dentry *error = ERR_PTR(-ENOENT);
23901 +       int id = 0;
23902 +
23903 +       for (; p->name; p++) {
23904 +               if (p->len != dentry->d_name.len)
23905 +                       continue;
23906 +               if (!memcmp(dentry->d_name.name, p->name, p->len))
23907 +                       break;
23908 +       }
23909 +       if (p->name)
23910 +               goto instantiate;
23911 +
23912 +       id = atovid(dentry->d_name.name, dentry->d_name.len);
23913 +       if ((id < 0) || !xid_is_hashed(id))
23914 +               goto out;
23915 +
23916 +instantiate:
23917 +       error = proc_xid_instantiate(dir, dentry, id, p);
23918 +out:
23919 +       return error;
23920 +}
23921 +
23922 +static struct file_operations proc_nid_file_operations = {
23923 +       .read =         generic_read_dir,
23924 +       .readdir =      proc_nid_readdir,
23925 +};
23926 +
23927 +static struct inode_operations proc_nid_inode_operations = {
23928 +       .lookup =       proc_nid_lookup,
23929 +};
23930 +
23931 +static struct vs_entry nx_virtnet_stuff[] = {
23932 +       INF("info",     S_IRUGO, virtnet_info),
23933 +       INF("status",   S_IRUGO, virtnet_status),
23934 +       DIR(NULL,       S_IRUGO | S_IXUGO, nid),
23935 +};
23936 +
23937 +
23938 +static struct dentry *proc_virtnet_lookup(struct inode *dir,
23939 +       struct dentry *dentry, struct nameidata *nd)
23940 +{
23941 +       struct vs_entry *p = nx_virtnet_stuff;
23942 +       struct dentry *error = ERR_PTR(-ENOENT);
23943 +       int id = 0;
23944 +
23945 +       for (; p->name; p++) {
23946 +               if (p->len != dentry->d_name.len)
23947 +                       continue;
23948 +               if (!memcmp(dentry->d_name.name, p->name, p->len))
23949 +                       break;
23950 +       }
23951 +       if (p->name)
23952 +               goto instantiate;
23953 +
23954 +       id = atovid(dentry->d_name.name, dentry->d_name.len);
23955 +       if ((id < 0) || !nid_is_hashed(id))
23956 +               goto out;
23957 +
23958 +instantiate:
23959 +       error = proc_nid_instantiate(dir, dentry, id, p);
23960 +out:
23961 +       return error;
23962 +}
23963 +
23964 +
23965 +#define PROC_MAXVIDS 32
23966 +
23967 +int proc_virtual_readdir(struct file *filp,
23968 +       void *dirent, filldir_t filldir)
23969 +{
23970 +       struct dentry *dentry = filp->f_dentry;
23971 +       struct inode *inode = dentry->d_inode;
23972 +       struct vs_entry *p = vx_virtual_stuff;
23973 +       int size = sizeof(vx_virtual_stuff) / sizeof(struct vs_entry);
23974 +       int pos, index;
23975 +       unsigned int xid_array[PROC_MAXVIDS];
23976 +       char buf[PROC_NUMBUF];
23977 +       unsigned int nr_xids, i;
23978 +       u64 ino;
23979 +
23980 +       pos = filp->f_pos;
23981 +       switch (pos) {
23982 +       case 0:
23983 +               ino = inode->i_ino;
23984 +               if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
23985 +                       goto out;
23986 +               pos++;
23987 +               /* fall through */
23988 +       case 1:
23989 +               ino = parent_ino(dentry);
23990 +               if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
23991 +                       goto out;
23992 +               pos++;
23993 +               /* fall through */
23994 +       default:
23995 +               index = pos - 2;
23996 +               if (index >= size)
23997 +                       goto entries;
23998 +               for (p += index; p->name; p++) {
23999 +                       if (proc_fill_cache(filp, dirent, filldir, p->name, p->len,
24000 +                               vs_proc_instantiate, 0, p))
24001 +                               goto out;
24002 +                       pos++;
24003 +               }
24004 +       entries:
24005 +               index = pos - size;
24006 +               p = &vx_virtual_stuff[size - 1];
24007 +               nr_xids = get_xid_list(index, xid_array, PROC_MAXVIDS);
24008 +               for (i = 0; i < nr_xids; i++) {
24009 +                       int n, xid = xid_array[i];
24010 +                       unsigned int j = PROC_NUMBUF;
24011 +
24012 +                       n = xid;
24013 +                       do
24014 +                               buf[--j] = '0' + (n % 10);
24015 +                       while (n /= 10);
24016 +
24017 +                       if (proc_fill_cache(filp, dirent, filldir,
24018 +                               buf + j, PROC_NUMBUF - j,
24019 +                               vs_proc_instantiate, xid, p))
24020 +                               goto out;
24021 +                       pos++;
24022 +               }
24023 +       }
24024 +out:
24025 +       filp->f_pos = pos;
24026 +       return 0;
24027 +}
24028 +
24029 +static int proc_virtual_getattr(struct vfsmount *mnt,
24030 +       struct dentry *dentry, struct kstat *stat)
24031 +{
24032 +       struct inode *inode = dentry->d_inode;
24033 +
24034 +       generic_fillattr(inode, stat);
24035 +       stat->nlink = 2 + atomic_read(&vx_global_cactive);
24036 +       return 0;
24037 +}
24038 +
24039 +static struct file_operations proc_virtual_dir_operations = {
24040 +       .read =         generic_read_dir,
24041 +       .readdir =      proc_virtual_readdir,
24042 +};
24043 +
24044 +static struct inode_operations proc_virtual_dir_inode_operations = {
24045 +       .getattr =      proc_virtual_getattr,
24046 +       .lookup =       proc_virtual_lookup,
24047 +};
24048 +
24049 +
24050 +
24051 +
24052 +
24053 +int proc_virtnet_readdir(struct file *filp,
24054 +       void *dirent, filldir_t filldir)
24055 +{
24056 +       struct dentry *dentry = filp->f_dentry;
24057 +       struct inode *inode = dentry->d_inode;
24058 +       struct vs_entry *p = nx_virtnet_stuff;
24059 +       int size = sizeof(nx_virtnet_stuff) / sizeof(struct vs_entry);
24060 +       int pos, index;
24061 +       unsigned int nid_array[PROC_MAXVIDS];
24062 +       char buf[PROC_NUMBUF];
24063 +       unsigned int nr_nids, i;
24064 +       u64 ino;
24065 +
24066 +       pos = filp->f_pos;
24067 +       switch (pos) {
24068 +       case 0:
24069 +               ino = inode->i_ino;
24070 +               if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
24071 +                       goto out;
24072 +               pos++;
24073 +               /* fall through */
24074 +       case 1:
24075 +               ino = parent_ino(dentry);
24076 +               if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
24077 +                       goto out;
24078 +               pos++;
24079 +               /* fall through */
24080 +       default:
24081 +               index = pos - 2;
24082 +               if (index >= size)
24083 +                       goto entries;
24084 +               for (p += index; p->name; p++) {
24085 +                       if (proc_fill_cache(filp, dirent, filldir, p->name, p->len,
24086 +                               vs_proc_instantiate, 0, p))
24087 +                               goto out;
24088 +                       pos++;
24089 +               }
24090 +       entries:
24091 +               index = pos - size;
24092 +               p = &nx_virtnet_stuff[size - 1];
24093 +               nr_nids = get_nid_list(index, nid_array, PROC_MAXVIDS);
24094 +               for (i = 0; i < nr_nids; i++) {
24095 +                       int n, nid = nid_array[i];
24096 +                       unsigned int j = PROC_NUMBUF;
24097 +
24098 +                       n = nid;
24099 +                       do
24100 +                               buf[--j] = '0' + (n % 10);
24101 +                       while (n /= 10);
24102 +
24103 +                       if (proc_fill_cache(filp, dirent, filldir,
24104 +                               buf + j, PROC_NUMBUF - j,
24105 +                               vs_proc_instantiate, nid, p))
24106 +                               goto out;
24107 +                       pos++;
24108 +               }
24109 +       }
24110 +out:
24111 +       filp->f_pos = pos;
24112 +       return 0;
24113 +}
24114 +
24115 +static int proc_virtnet_getattr(struct vfsmount *mnt,
24116 +       struct dentry *dentry, struct kstat *stat)
24117 +{
24118 +       struct inode *inode = dentry->d_inode;
24119 +
24120 +       generic_fillattr(inode, stat);
24121 +       stat->nlink = 2 + atomic_read(&nx_global_cactive);
24122 +       return 0;
24123 +}
24124 +
24125 +static struct file_operations proc_virtnet_dir_operations = {
24126 +       .read =         generic_read_dir,
24127 +       .readdir =      proc_virtnet_readdir,
24128 +};
24129 +
24130 +static struct inode_operations proc_virtnet_dir_inode_operations = {
24131 +       .getattr =      proc_virtnet_getattr,
24132 +       .lookup =       proc_virtnet_lookup,
24133 +};
24134 +
24135 +
24136 +
24137 +void proc_vx_init(void)
24138 +{
24139 +       struct proc_dir_entry *ent;
24140 +
24141 +       ent = proc_mkdir("virtual", 0);
24142 +       if (ent) {
24143 +               ent->proc_fops = &proc_virtual_dir_operations;
24144 +               ent->proc_iops = &proc_virtual_dir_inode_operations;
24145 +       }
24146 +       proc_virtual = ent;
24147 +
24148 +       ent = proc_mkdir("virtnet", 0);
24149 +       if (ent) {
24150 +               ent->proc_fops = &proc_virtnet_dir_operations;
24151 +               ent->proc_iops = &proc_virtnet_dir_inode_operations;
24152 +       }
24153 +       proc_virtnet = ent;
24154 +}
24155 +
24156 +
24157 +
24158 +
24159 +/* per pid info */
24160 +
24161 +
24162 +int proc_pid_vx_info(struct task_struct *p, char *buffer)
24163 +{
24164 +       struct vx_info *vxi;
24165 +       char *orig = buffer;
24166 +
24167 +       buffer += sprintf(buffer, "XID:\t%d\n", vx_task_xid(p));
24168 +
24169 +       vxi = task_get_vx_info(p);
24170 +       if (!vxi)
24171 +               goto out;
24172 +
24173 +       buffer += sprintf(buffer, "BCaps:\t");
24174 +       buffer = print_cap_t(buffer, &vxi->vx_bcaps);
24175 +       buffer += sprintf(buffer, "\n");
24176 +       buffer += sprintf(buffer, "CCaps:\t%016llx\n",
24177 +               (unsigned long long)vxi->vx_ccaps);
24178 +       buffer += sprintf(buffer, "CFlags:\t%016llx\n",
24179 +               (unsigned long long)vxi->vx_flags);
24180 +       buffer += sprintf(buffer, "CIPid:\t%d\n", vxi->vx_initpid);
24181 +
24182 +       put_vx_info(vxi);
24183 +out:
24184 +       return buffer - orig;
24185 +}
24186 +
24187 +
24188 +int proc_pid_nx_info(struct task_struct *p, char *buffer)
24189 +{
24190 +       struct nx_info *nxi;
24191 +       struct nx_addr_v4 *v4a;
24192 +#ifdef CONFIG_IPV6
24193 +       struct nx_addr_v6 *v6a;
24194 +#endif
24195 +       char *orig = buffer;
24196 +       int i;
24197 +
24198 +       buffer += sprintf(buffer, "NID:\t%d\n", nx_task_nid(p));
24199 +
24200 +       nxi = task_get_nx_info(p);
24201 +       if (!nxi)
24202 +               goto out;
24203 +
24204 +       buffer += sprintf(buffer, "NCaps:\t%016llx\n",
24205 +               (unsigned long long)nxi->nx_ncaps);
24206 +       buffer += sprintf(buffer, "NFlags:\t%016llx\n",
24207 +               (unsigned long long)nxi->nx_flags);
24208 +
24209 +       buffer += sprintf(buffer,
24210 +               "V4Root[bcast]:\t" NIPQUAD_FMT "\n",
24211 +               NIPQUAD(nxi->v4_bcast.s_addr));
24212 +       buffer += sprintf (buffer,
24213 +               "V4Root[lback]:\t" NIPQUAD_FMT "\n",
24214 +               NIPQUAD(nxi->v4_lback.s_addr));
24215 +       if (!NX_IPV4(nxi))
24216 +               goto skip_v4;
24217 +       for (i = 0, v4a = &nxi->v4; v4a; i++, v4a = v4a->next)
24218 +               buffer += sprintf(buffer, "V4Root[%d]:\t" NXAV4_FMT "\n",
24219 +                       i, NXAV4(v4a));
24220 +skip_v4:
24221 +#ifdef CONFIG_IPV6
24222 +       if (!NX_IPV6(nxi))
24223 +               goto skip_v6;
24224 +       for (i = 0, v6a = &nxi->v6; v6a; i++, v6a = v6a->next)
24225 +               buffer += sprintf(buffer, "V6Root[%d]:\t" NXAV6_FMT "\n",
24226 +                       i, NXAV6(v6a));
24227 +skip_v6:
24228 +#endif
24229 +       put_nx_info(nxi);
24230 +out:
24231 +       return buffer - orig;
24232 +}
24233 +
24234 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/sched.c linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/sched.c
24235 --- linux-2.6.31.6/kernel/vserver/sched.c       1970-01-01 01:00:00.000000000 +0100
24236 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/sched.c 2009-09-10 16:11:43.000000000 +0200
24237 @@ -0,0 +1,414 @@
24238 +/*
24239 + *  linux/kernel/vserver/sched.c
24240 + *
24241 + *  Virtual Server: Scheduler Support
24242 + *
24243 + *  Copyright (C) 2004-2007  Herbert Pötzl
24244 + *
24245 + *  V0.01  adapted Sam Vilains version to 2.6.3
24246 + *  V0.02  removed legacy interface
24247 + *  V0.03  changed vcmds to vxi arg
24248 + *  V0.04  removed older and legacy interfaces
24249 + *
24250 + */
24251 +
24252 +#include <linux/vs_context.h>
24253 +#include <linux/vs_sched.h>
24254 +#include <linux/vserver/sched_cmd.h>
24255 +
24256 +#include <asm/uaccess.h>
24257 +
24258 +
24259 +#define vxd_check_range(val, min, max) do {            \
24260 +       vxlprintk((val < min) || (val > max),           \
24261 +               "check_range(%ld,%ld,%ld)",             \
24262 +               (long)val, (long)min, (long)max,        \
24263 +               __FILE__, __LINE__);                    \
24264 +       } while (0)
24265 +
24266 +
24267 +void vx_update_sched_param(struct _vx_sched *sched,
24268 +       struct _vx_sched_pc *sched_pc)
24269 +{
24270 +       unsigned int set_mask = sched->update_mask;
24271 +
24272 +       if (set_mask & VXSM_FILL_RATE)
24273 +               sched_pc->fill_rate[0] = sched->fill_rate[0];
24274 +       if (set_mask & VXSM_INTERVAL)
24275 +               sched_pc->interval[0] = sched->interval[0];
24276 +       if (set_mask & VXSM_FILL_RATE2)
24277 +               sched_pc->fill_rate[1] = sched->fill_rate[1];
24278 +       if (set_mask & VXSM_INTERVAL2)
24279 +               sched_pc->interval[1] = sched->interval[1];
24280 +       if (set_mask & VXSM_TOKENS)
24281 +               sched_pc->tokens = sched->tokens;
24282 +       if (set_mask & VXSM_TOKENS_MIN)
24283 +               sched_pc->tokens_min = sched->tokens_min;
24284 +       if (set_mask & VXSM_TOKENS_MAX)
24285 +               sched_pc->tokens_max = sched->tokens_max;
24286 +       if (set_mask & VXSM_PRIO_BIAS)
24287 +               sched_pc->prio_bias = sched->prio_bias;
24288 +
24289 +       if (set_mask & VXSM_IDLE_TIME)
24290 +               sched_pc->flags |= VXSF_IDLE_TIME;
24291 +       else
24292 +               sched_pc->flags &= ~VXSF_IDLE_TIME;
24293 +
24294 +       /* reset time */
24295 +       sched_pc->norm_time = jiffies;
24296 +}
24297 +
24298 +
24299 +/*
24300 + * recalculate the context's scheduling tokens
24301 + *
24302 + * ret > 0 : number of tokens available
24303 + * ret < 0 : on hold, check delta_min[]
24304 + *          -1 only jiffies
24305 + *          -2 also idle time
24306 + *
24307 + */
24308 +int vx_tokens_recalc(struct _vx_sched_pc *sched_pc,
24309 +       unsigned long *norm_time, unsigned long *idle_time, int delta_min[2])
24310 +{
24311 +       long delta;
24312 +       long tokens = 0;
24313 +       int flags = sched_pc->flags;
24314 +
24315 +       /* how much time did pass? */
24316 +       delta = *norm_time - sched_pc->norm_time;
24317 +       // printk("@ %ld, %ld, %ld\n", *norm_time, sched_pc->norm_time, jiffies);
24318 +       vxd_check_range(delta, 0, INT_MAX);
24319 +
24320 +       if (delta >= sched_pc->interval[0]) {
24321 +               long tokens, integral;
24322 +
24323 +               /* calc integral token part */
24324 +               tokens = delta / sched_pc->interval[0];
24325 +               integral = tokens * sched_pc->interval[0];
24326 +               tokens *= sched_pc->fill_rate[0];
24327 +#ifdef CONFIG_VSERVER_HARDCPU
24328 +               delta_min[0] = delta - integral;
24329 +               vxd_check_range(delta_min[0], 0, sched_pc->interval[0]);
24330 +#endif
24331 +               /* advance time */
24332 +               sched_pc->norm_time += delta;
24333 +
24334 +               /* add tokens */
24335 +               sched_pc->tokens += tokens;
24336 +               sched_pc->token_time += tokens;
24337 +       } else
24338 +               delta_min[0] = delta;
24339 +
24340 +#ifdef CONFIG_VSERVER_IDLETIME
24341 +       if (!(flags & VXSF_IDLE_TIME))
24342 +               goto skip_idle;
24343 +
24344 +       /* how much was the idle skip? */
24345 +       delta = *idle_time - sched_pc->idle_time;
24346 +       vxd_check_range(delta, 0, INT_MAX);
24347 +
24348 +       if (delta >= sched_pc->interval[1]) {
24349 +               long tokens, integral;
24350 +
24351 +               /* calc fair share token part */
24352 +               tokens = delta / sched_pc->interval[1];
24353 +               integral = tokens * sched_pc->interval[1];
24354 +               tokens *= sched_pc->fill_rate[1];
24355 +               delta_min[1] = delta - integral;
24356 +               vxd_check_range(delta_min[1], 0, sched_pc->interval[1]);
24357 +
24358 +               /* advance idle time */
24359 +               sched_pc->idle_time += integral;
24360 +
24361 +               /* add tokens */
24362 +               sched_pc->tokens += tokens;
24363 +               sched_pc->token_time += tokens;
24364 +       } else
24365 +               delta_min[1] = delta;
24366 +skip_idle:
24367 +#endif
24368 +
24369 +       /* clip at maximum */
24370 +       if (sched_pc->tokens > sched_pc->tokens_max)
24371 +               sched_pc->tokens = sched_pc->tokens_max;
24372 +       tokens = sched_pc->tokens;
24373 +
24374 +       if ((flags & VXSF_ONHOLD)) {
24375 +               /* can we unhold? */
24376 +               if (tokens >= sched_pc->tokens_min) {
24377 +                       flags &= ~VXSF_ONHOLD;
24378 +                       sched_pc->hold_ticks +=
24379 +                               *norm_time - sched_pc->onhold;
24380 +               } else
24381 +                       goto on_hold;
24382 +       } else {
24383 +               /* put on hold? */
24384 +               if (tokens <= 0) {
24385 +                       flags |= VXSF_ONHOLD;
24386 +                       sched_pc->onhold = *norm_time;
24387 +                       goto on_hold;
24388 +               }
24389 +       }
24390 +       sched_pc->flags = flags;
24391 +       return tokens;
24392 +
24393 +on_hold:
24394 +       tokens = sched_pc->tokens_min - tokens;
24395 +       sched_pc->flags = flags;
24396 +       // BUG_ON(tokens < 0); probably doesn't hold anymore
24397 +
24398 +#ifdef CONFIG_VSERVER_HARDCPU
24399 +       /* next interval? */
24400 +       if (!sched_pc->fill_rate[0])
24401 +               delta_min[0] = HZ;
24402 +       else if (tokens > sched_pc->fill_rate[0])
24403 +               delta_min[0] += sched_pc->interval[0] *
24404 +                       tokens / sched_pc->fill_rate[0];
24405 +       else
24406 +               delta_min[0] = sched_pc->interval[0] - delta_min[0];
24407 +       vxd_check_range(delta_min[0], 0, INT_MAX);
24408 +
24409 +#ifdef CONFIG_VSERVER_IDLETIME
24410 +       if (!(flags & VXSF_IDLE_TIME))
24411 +               return -1;
24412 +
24413 +       /* next interval? */
24414 +       if (!sched_pc->fill_rate[1])
24415 +               delta_min[1] = HZ;
24416 +       else if (tokens > sched_pc->fill_rate[1])
24417 +               delta_min[1] += sched_pc->interval[1] *
24418 +                       tokens / sched_pc->fill_rate[1];
24419 +       else
24420 +               delta_min[1] = sched_pc->interval[1] - delta_min[1];
24421 +       vxd_check_range(delta_min[1], 0, INT_MAX);
24422 +
24423 +       return -2;
24424 +#else
24425 +       return -1;
24426 +#endif /* CONFIG_VSERVER_IDLETIME */
24427 +#else
24428 +       return 0;
24429 +#endif /* CONFIG_VSERVER_HARDCPU */
24430 +}
24431 +
24432 +static inline unsigned long msec_to_ticks(unsigned long msec)
24433 +{
24434 +       return msecs_to_jiffies(msec);
24435 +}
24436 +
24437 +static inline unsigned long ticks_to_msec(unsigned long ticks)
24438 +{
24439 +       return jiffies_to_msecs(ticks);
24440 +}
24441 +
24442 +static inline unsigned long ticks_to_usec(unsigned long ticks)
24443 +{
24444 +       return jiffies_to_usecs(ticks);
24445 +}
24446 +
24447 +
24448 +static int do_set_sched(struct vx_info *vxi, struct vcmd_sched_v5 *data)
24449 +{
24450 +       unsigned int set_mask = data->mask;
24451 +       unsigned int update_mask;
24452 +       int i, cpu;
24453 +
24454 +       /* Sanity check data values */
24455 +       if (data->tokens_max <= 0)
24456 +               data->tokens_max = HZ;
24457 +       if (data->tokens_min < 0)
24458 +               data->tokens_min = HZ / 3;
24459 +       if (data->tokens_min >= data->tokens_max)
24460 +               data->tokens_min = data->tokens_max;
24461 +
24462 +       if (data->prio_bias > MAX_PRIO_BIAS)
24463 +               data->prio_bias = MAX_PRIO_BIAS;
24464 +       if (data->prio_bias < MIN_PRIO_BIAS)
24465 +               data->prio_bias = MIN_PRIO_BIAS;
24466 +
24467 +       spin_lock(&vxi->sched.tokens_lock);
24468 +
24469 +       /* sync up on delayed updates */
24470 +       for_each_cpu_mask(cpu, vxi->sched.update)
24471 +               vx_update_sched_param(&vxi->sched,
24472 +                       &vx_per_cpu(vxi, sched_pc, cpu));
24473 +
24474 +       if (set_mask & VXSM_FILL_RATE)
24475 +               vxi->sched.fill_rate[0] = data->fill_rate[0];
24476 +       if (set_mask & VXSM_FILL_RATE2)
24477 +               vxi->sched.fill_rate[1] = data->fill_rate[1];
24478 +       if (set_mask & VXSM_INTERVAL)
24479 +               vxi->sched.interval[0] = (set_mask & VXSM_MSEC) ?
24480 +                       msec_to_ticks(data->interval[0]) : data->interval[0];
24481 +       if (set_mask & VXSM_INTERVAL2)
24482 +               vxi->sched.interval[1] = (set_mask & VXSM_MSEC) ?
24483 +                       msec_to_ticks(data->interval[1]) : data->interval[1];
24484 +       if (set_mask & VXSM_TOKENS)
24485 +               vxi->sched.tokens = data->tokens;
24486 +       if (set_mask & VXSM_TOKENS_MIN)
24487 +               vxi->sched.tokens_min = data->tokens_min;
24488 +       if (set_mask & VXSM_TOKENS_MAX)
24489 +               vxi->sched.tokens_max = data->tokens_max;
24490 +       if (set_mask & VXSM_PRIO_BIAS)
24491 +               vxi->sched.prio_bias = data->prio_bias;
24492 +
24493 +       /* Sanity check rate/interval */
24494 +       for (i = 0; i < 2; i++) {
24495 +               if (data->fill_rate[i] < 0)
24496 +                       data->fill_rate[i] = 0;
24497 +               if (data->interval[i] <= 0)
24498 +                       data->interval[i] = HZ;
24499 +       }
24500 +
24501 +       update_mask = vxi->sched.update_mask & VXSM_SET_MASK;
24502 +       update_mask |= (set_mask & (VXSM_SET_MASK | VXSM_IDLE_TIME));
24503 +       vxi->sched.update_mask = update_mask;
24504 +
24505 +#ifdef CONFIG_SMP
24506 +       rmb();
24507 +       if (set_mask & VXSM_CPU_ID) {
24508 +               vxi->sched.update = cpumask_of_cpu(data->cpu_id);
24509 +               cpus_and(vxi->sched.update, cpu_online_map,
24510 +                       vxi->sched.update);
24511 +       } else
24512 +               vxi->sched.update = cpu_online_map;
24513 +
24514 +       /* forced reload? */
24515 +       if (set_mask & VXSM_FORCE) {
24516 +               for_each_cpu_mask(cpu, vxi->sched.update)
24517 +                       vx_update_sched_param(&vxi->sched,
24518 +                               &vx_per_cpu(vxi, sched_pc, cpu));
24519 +               vxi->sched.update = CPU_MASK_NONE;
24520 +       }
24521 +#else
24522 +       /* on UP we update immediately */
24523 +       vx_update_sched_param(&vxi->sched,
24524 +               &vx_per_cpu(vxi, sched_pc, 0));
24525 +#endif
24526 +
24527 +       spin_unlock(&vxi->sched.tokens_lock);
24528 +       return 0;
24529 +}
24530 +
24531 +
24532 +#define COPY_IDS(C) C(cpu_id); C(bucket_id)
24533 +#define COPY_PRI(C) C(prio_bias)
24534 +#define COPY_TOK(C) C(tokens); C(tokens_min); C(tokens_max)
24535 +#define COPY_FRI(C) C(fill_rate[0]); C(interval[0]);   \
24536 +                   C(fill_rate[1]); C(interval[1]);
24537 +
24538 +#define COPY_VALUE(name) vc_data.name = data->name
24539 +
24540 +static int do_set_sched_v4(struct vx_info *vxi, struct vcmd_set_sched_v4 *data)
24541 +{
24542 +       struct vcmd_sched_v5 vc_data;
24543 +
24544 +       vc_data.mask = data->set_mask;
24545 +       COPY_IDS(COPY_VALUE);
24546 +       COPY_PRI(COPY_VALUE);
24547 +       COPY_TOK(COPY_VALUE);
24548 +       vc_data.fill_rate[0] = vc_data.fill_rate[1] = data->fill_rate;
24549 +       vc_data.interval[0] = vc_data.interval[1] = data->interval;
24550 +       return do_set_sched(vxi, &vc_data);
24551 +}
24552 +
24553 +int vc_set_sched_v4(struct vx_info *vxi, void __user *data)
24554 +{
24555 +       struct vcmd_set_sched_v4 vc_data;
24556 +
24557 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
24558 +               return -EFAULT;
24559 +
24560 +       return do_set_sched_v4(vxi, &vc_data);
24561 +}
24562 +
24563 +       /* latest interface is v5 */
24564 +
24565 +int vc_set_sched(struct vx_info *vxi, void __user *data)
24566 +{
24567 +       struct vcmd_sched_v5 vc_data;
24568 +
24569 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
24570 +               return -EFAULT;
24571 +
24572 +       return do_set_sched(vxi, &vc_data);
24573 +}
24574 +
24575 +
24576 +#define COPY_PRI(C) C(prio_bias)
24577 +#define COPY_TOK(C) C(tokens); C(tokens_min); C(tokens_max)
24578 +#define COPY_FRI(C) C(fill_rate[0]); C(interval[0]);    \
24579 +                   C(fill_rate[1]); C(interval[1]);
24580 +
24581 +#define COPY_VALUE(name) vc_data.name = data->name
24582 +
24583 +
24584 +int vc_get_sched(struct vx_info *vxi, void __user *data)
24585 +{
24586 +       struct vcmd_sched_v5 vc_data;
24587 +
24588 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
24589 +               return -EFAULT;
24590 +
24591 +       if (vc_data.mask & VXSM_CPU_ID) {
24592 +               int cpu = vc_data.cpu_id;
24593 +               struct _vx_sched_pc *data;
24594 +
24595 +               if (!cpu_possible(cpu))
24596 +                       return -EINVAL;
24597 +
24598 +               data = &vx_per_cpu(vxi, sched_pc, cpu);
24599 +               COPY_TOK(COPY_VALUE);
24600 +               COPY_PRI(COPY_VALUE);
24601 +               COPY_FRI(COPY_VALUE);
24602 +
24603 +               if (data->flags & VXSF_IDLE_TIME)
24604 +                       vc_data.mask |= VXSM_IDLE_TIME;
24605 +       } else {
24606 +               struct _vx_sched *data = &vxi->sched;
24607 +
24608 +               COPY_TOK(COPY_VALUE);
24609 +               COPY_PRI(COPY_VALUE);
24610 +               COPY_FRI(COPY_VALUE);
24611 +       }
24612 +
24613 +       if (vc_data.mask & VXSM_MSEC) {
24614 +               vc_data.interval[0] = ticks_to_msec(vc_data.interval[0]);
24615 +               vc_data.interval[1] = ticks_to_msec(vc_data.interval[1]);
24616 +       }
24617 +
24618 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
24619 +               return -EFAULT;
24620 +       return 0;
24621 +}
24622 +
24623 +
24624 +int vc_sched_info(struct vx_info *vxi, void __user *data)
24625 +{
24626 +       struct vcmd_sched_info vc_data;
24627 +       int cpu;
24628 +
24629 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
24630 +               return -EFAULT;
24631 +
24632 +       cpu = vc_data.cpu_id;
24633 +       if (!cpu_possible(cpu))
24634 +               return -EINVAL;
24635 +
24636 +       if (vxi) {
24637 +               struct _vx_sched_pc *sched_pc =
24638 +                       &vx_per_cpu(vxi, sched_pc, cpu);
24639 +
24640 +               vc_data.user_msec = ticks_to_msec(sched_pc->user_ticks);
24641 +               vc_data.sys_msec = ticks_to_msec(sched_pc->sys_ticks);
24642 +               vc_data.hold_msec = ticks_to_msec(sched_pc->hold_ticks);
24643 +               vc_data.vavavoom = sched_pc->vavavoom;
24644 +       }
24645 +       vc_data.token_usec = ticks_to_usec(1);
24646 +
24647 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
24648 +               return -EFAULT;
24649 +       return 0;
24650 +}
24651 +
24652 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/sched_init.h linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/sched_init.h
24653 --- linux-2.6.31.6/kernel/vserver/sched_init.h  1970-01-01 01:00:00.000000000 +0100
24654 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/sched_init.h    2009-09-10 16:11:43.000000000 +0200
24655 @@ -0,0 +1,50 @@
24656 +
24657 +static inline void vx_info_init_sched(struct _vx_sched *sched)
24658 +{
24659 +       static struct lock_class_key tokens_lock_key;
24660 +
24661 +       /* scheduling; hard code starting values as constants */
24662 +       sched->fill_rate[0]     = 1;
24663 +       sched->interval[0]      = 4;
24664 +       sched->fill_rate[1]     = 1;
24665 +       sched->interval[1]      = 8;
24666 +       sched->tokens           = HZ >> 2;
24667 +       sched->tokens_min       = HZ >> 4;
24668 +       sched->tokens_max       = HZ >> 1;
24669 +       sched->tokens_lock      = SPIN_LOCK_UNLOCKED;
24670 +       sched->prio_bias        = 0;
24671 +
24672 +       lockdep_set_class(&sched->tokens_lock, &tokens_lock_key);
24673 +}
24674 +
24675 +static inline
24676 +void vx_info_init_sched_pc(struct _vx_sched_pc *sched_pc, int cpu)
24677 +{
24678 +       sched_pc->fill_rate[0]  = 1;
24679 +       sched_pc->interval[0]   = 4;
24680 +       sched_pc->fill_rate[1]  = 1;
24681 +       sched_pc->interval[1]   = 8;
24682 +       sched_pc->tokens        = HZ >> 2;
24683 +       sched_pc->tokens_min    = HZ >> 4;
24684 +       sched_pc->tokens_max    = HZ >> 1;
24685 +       sched_pc->prio_bias     = 0;
24686 +       sched_pc->vavavoom      = 0;
24687 +       sched_pc->token_time    = 0;
24688 +       sched_pc->idle_time     = 0;
24689 +       sched_pc->norm_time     = jiffies;
24690 +
24691 +       sched_pc->user_ticks = 0;
24692 +       sched_pc->sys_ticks = 0;
24693 +       sched_pc->hold_ticks = 0;
24694 +}
24695 +
24696 +static inline void vx_info_exit_sched(struct _vx_sched *sched)
24697 +{
24698 +       return;
24699 +}
24700 +
24701 +static inline
24702 +void vx_info_exit_sched_pc(struct _vx_sched_pc *sched_pc, int cpu)
24703 +{
24704 +       return;
24705 +}
24706 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/sched_proc.h linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/sched_proc.h
24707 --- linux-2.6.31.6/kernel/vserver/sched_proc.h  1970-01-01 01:00:00.000000000 +0100
24708 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/sched_proc.h    2009-09-10 16:11:43.000000000 +0200
24709 @@ -0,0 +1,57 @@
24710 +#ifndef _VX_SCHED_PROC_H
24711 +#define _VX_SCHED_PROC_H
24712 +
24713 +
24714 +static inline
24715 +int vx_info_proc_sched(struct _vx_sched *sched, char *buffer)
24716 +{
24717 +       int length = 0;
24718 +
24719 +       length += sprintf(buffer,
24720 +               "FillRate:\t%8d,%d\n"
24721 +               "Interval:\t%8d,%d\n"
24722 +               "TokensMin:\t%8d\n"
24723 +               "TokensMax:\t%8d\n"
24724 +               "PrioBias:\t%8d\n",
24725 +               sched->fill_rate[0],
24726 +               sched->fill_rate[1],
24727 +               sched->interval[0],
24728 +               sched->interval[1],
24729 +               sched->tokens_min,
24730 +               sched->tokens_max,
24731 +               sched->prio_bias);
24732 +       return length;
24733 +}
24734 +
24735 +static inline
24736 +int vx_info_proc_sched_pc(struct _vx_sched_pc *sched_pc,
24737 +       char *buffer, int cpu)
24738 +{
24739 +       int length = 0;
24740 +
24741 +       length += sprintf(buffer + length,
24742 +               "cpu %d: %lld %lld %lld %ld %ld", cpu,
24743 +               (unsigned long long)sched_pc->user_ticks,
24744 +               (unsigned long long)sched_pc->sys_ticks,
24745 +               (unsigned long long)sched_pc->hold_ticks,
24746 +               sched_pc->token_time,
24747 +               sched_pc->idle_time);
24748 +       length += sprintf(buffer + length,
24749 +               " %c%c %d %d %d %d/%d %d/%d",
24750 +               (sched_pc->flags & VXSF_ONHOLD) ? 'H' : 'R',
24751 +               (sched_pc->flags & VXSF_IDLE_TIME) ? 'I' : '-',
24752 +               sched_pc->tokens,
24753 +               sched_pc->tokens_min,
24754 +               sched_pc->tokens_max,
24755 +               sched_pc->fill_rate[0],
24756 +               sched_pc->interval[0],
24757 +               sched_pc->fill_rate[1],
24758 +               sched_pc->interval[1]);
24759 +       length += sprintf(buffer + length,
24760 +               " %d %d\n",
24761 +               sched_pc->prio_bias,
24762 +               sched_pc->vavavoom);
24763 +       return length;
24764 +}
24765 +
24766 +#endif /* _VX_SCHED_PROC_H */
24767 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/signal.c linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/signal.c
24768 --- linux-2.6.31.6/kernel/vserver/signal.c      1970-01-01 01:00:00.000000000 +0100
24769 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/signal.c        2009-09-10 16:11:43.000000000 +0200
24770 @@ -0,0 +1,132 @@
24771 +/*
24772 + *  linux/kernel/vserver/signal.c
24773 + *
24774 + *  Virtual Server: Signal Support
24775 + *
24776 + *  Copyright (C) 2003-2007  Herbert Pötzl
24777 + *
24778 + *  V0.01  broken out from vcontext V0.05
24779 + *  V0.02  changed vcmds to vxi arg
24780 + *  V0.03  adjusted siginfo for kill
24781 + *
24782 + */
24783 +
24784 +#include <asm/uaccess.h>
24785 +
24786 +#include <linux/vs_context.h>
24787 +#include <linux/vs_pid.h>
24788 +#include <linux/vserver/signal_cmd.h>
24789 +
24790 +
24791 +int vx_info_kill(struct vx_info *vxi, int pid, int sig)
24792 +{
24793 +       int retval, count = 0;
24794 +       struct task_struct *p;
24795 +       struct siginfo *sip = SEND_SIG_PRIV;
24796 +
24797 +       retval = -ESRCH;
24798 +       vxdprintk(VXD_CBIT(misc, 4),
24799 +               "vx_info_kill(%p[#%d],%d,%d)*",
24800 +               vxi, vxi->vx_id, pid, sig);
24801 +       read_lock(&tasklist_lock);
24802 +       switch (pid) {
24803 +       case  0:
24804 +       case -1:
24805 +               for_each_process(p) {
24806 +                       int err = 0;
24807 +
24808 +                       if (vx_task_xid(p) != vxi->vx_id || p->pid <= 1 ||
24809 +                               (pid && vxi->vx_initpid == p->pid))
24810 +                               continue;
24811 +
24812 +                       err = group_send_sig_info(sig, sip, p);
24813 +                       ++count;
24814 +                       if (err != -EPERM)
24815 +                               retval = err;
24816 +               }
24817 +               break;
24818 +
24819 +       case 1:
24820 +               if (vxi->vx_initpid) {
24821 +                       pid = vxi->vx_initpid;
24822 +                       /* for now, only SIGINT to private init ... */
24823 +                       if (!vx_info_flags(vxi, VXF_STATE_ADMIN, 0) &&
24824 +                               /* ... as long as there are tasks left */
24825 +                               (atomic_read(&vxi->vx_tasks) > 1))
24826 +                               sig = SIGINT;
24827 +               }
24828 +               /* fallthrough */
24829 +       default:
24830 +               p = find_task_by_real_pid(pid);
24831 +               if (p) {
24832 +                       if (vx_task_xid(p) == vxi->vx_id)
24833 +                               retval = group_send_sig_info(sig, sip, p);
24834 +               }
24835 +               break;
24836 +       }
24837 +       read_unlock(&tasklist_lock);
24838 +       vxdprintk(VXD_CBIT(misc, 4),
24839 +               "vx_info_kill(%p[#%d],%d,%d,%ld) = %d",
24840 +               vxi, vxi->vx_id, pid, sig, (long)sip, retval);
24841 +       return retval;
24842 +}
24843 +
24844 +int vc_ctx_kill(struct vx_info *vxi, void __user *data)
24845 +{
24846 +       struct vcmd_ctx_kill_v0 vc_data;
24847 +
24848 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
24849 +               return -EFAULT;
24850 +
24851 +       /* special check to allow guest shutdown */
24852 +       if (!vx_info_flags(vxi, VXF_STATE_ADMIN, 0) &&
24853 +               /* forbid killall pid=0 when init is present */
24854 +               (((vc_data.pid < 1) && vxi->vx_initpid) ||
24855 +               (vc_data.pid > 1)))
24856 +               return -EACCES;
24857 +
24858 +       return vx_info_kill(vxi, vc_data.pid, vc_data.sig);
24859 +}
24860 +
24861 +
24862 +static int __wait_exit(struct vx_info *vxi)
24863 +{
24864 +       DECLARE_WAITQUEUE(wait, current);
24865 +       int ret = 0;
24866 +
24867 +       add_wait_queue(&vxi->vx_wait, &wait);
24868 +       set_current_state(TASK_INTERRUPTIBLE);
24869 +
24870 +wait:
24871 +       if (vx_info_state(vxi,
24872 +               VXS_SHUTDOWN | VXS_HASHED | VXS_HELPER) == VXS_SHUTDOWN)
24873 +               goto out;
24874 +       if (signal_pending(current)) {
24875 +               ret = -ERESTARTSYS;
24876 +               goto out;
24877 +       }
24878 +       schedule();
24879 +       goto wait;
24880 +
24881 +out:
24882 +       set_current_state(TASK_RUNNING);
24883 +       remove_wait_queue(&vxi->vx_wait, &wait);
24884 +       return ret;
24885 +}
24886 +
24887 +
24888 +
24889 +int vc_wait_exit(struct vx_info *vxi, void __user *data)
24890 +{
24891 +       struct vcmd_wait_exit_v0 vc_data;
24892 +       int ret;
24893 +
24894 +       ret = __wait_exit(vxi);
24895 +       vc_data.reboot_cmd = vxi->reboot_cmd;
24896 +       vc_data.exit_code = vxi->exit_code;
24897 +
24898 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
24899 +               ret = -EFAULT;
24900 +       return ret;
24901 +}
24902 +
24903 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/space.c linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/space.c
24904 --- linux-2.6.31.6/kernel/vserver/space.c       1970-01-01 01:00:00.000000000 +0100
24905 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/space.c 2009-09-10 16:11:43.000000000 +0200
24906 @@ -0,0 +1,375 @@
24907 +/*
24908 + *  linux/kernel/vserver/space.c
24909 + *
24910 + *  Virtual Server: Context Space Support
24911 + *
24912 + *  Copyright (C) 2003-2007  Herbert Pötzl
24913 + *
24914 + *  V0.01  broken out from context.c 0.07
24915 + *  V0.02  added task locking for namespace
24916 + *  V0.03  broken out vx_enter_namespace
24917 + *  V0.04  added *space support and commands
24918 + *
24919 + */
24920 +
24921 +#include <linux/utsname.h>
24922 +#include <linux/nsproxy.h>
24923 +#include <linux/err.h>
24924 +#include <linux/fs_struct.h>
24925 +#include <asm/uaccess.h>
24926 +
24927 +#include <linux/vs_context.h>
24928 +#include <linux/vserver/space.h>
24929 +#include <linux/vserver/space_cmd.h>
24930 +
24931 +atomic_t vs_global_nsproxy     = ATOMIC_INIT(0);
24932 +atomic_t vs_global_fs          = ATOMIC_INIT(0);
24933 +atomic_t vs_global_mnt_ns      = ATOMIC_INIT(0);
24934 +atomic_t vs_global_uts_ns      = ATOMIC_INIT(0);
24935 +atomic_t vs_global_user_ns     = ATOMIC_INIT(0);
24936 +atomic_t vs_global_pid_ns      = ATOMIC_INIT(0);
24937 +
24938 +
24939 +/* namespace functions */
24940 +
24941 +#include <linux/mnt_namespace.h>
24942 +#include <linux/user_namespace.h>
24943 +#include <linux/pid_namespace.h>
24944 +#include <linux/ipc_namespace.h>
24945 +#include <net/net_namespace.h>
24946 +
24947 +
24948 +static const struct vcmd_space_mask_v1 space_mask_v0 = {
24949 +       .mask = CLONE_FS |
24950 +               CLONE_NEWNS |
24951 +               CLONE_NEWUTS |
24952 +               CLONE_NEWIPC |
24953 +               CLONE_NEWUSER |
24954 +               0
24955 +};
24956 +
24957 +static const struct vcmd_space_mask_v1 space_mask = {
24958 +       .mask = CLONE_FS |
24959 +               CLONE_NEWNS |
24960 +               CLONE_NEWUTS |
24961 +               CLONE_NEWIPC |
24962 +               CLONE_NEWUSER |
24963 +#ifdef CONFIG_PID_NS
24964 +               CLONE_NEWPID |
24965 +#endif
24966 +#ifdef CONFIG_NET_NS
24967 +               CLONE_NEWNET |
24968 +#endif
24969 +               0
24970 +};
24971 +
24972 +static const struct vcmd_space_mask_v1 default_space_mask = {
24973 +       .mask = CLONE_FS |
24974 +               CLONE_NEWNS |
24975 +               CLONE_NEWUTS |
24976 +               CLONE_NEWIPC |
24977 +               CLONE_NEWUSER |
24978 +#ifdef CONFIG_PID_NS
24979 +//             CLONE_NEWPID |
24980 +#endif
24981 +               0
24982 +};
24983 +
24984 +/*
24985 + *     build a new nsproxy mix
24986 + *      assumes that both proxies are 'const'
24987 + *     does not touch nsproxy refcounts
24988 + *     will hold a reference on the result.
24989 + */
24990 +
24991 +struct nsproxy *vs_mix_nsproxy(struct nsproxy *old_nsproxy,
24992 +       struct nsproxy *new_nsproxy, unsigned long mask)
24993 +{
24994 +       struct mnt_namespace *old_ns;
24995 +       struct uts_namespace *old_uts;
24996 +       struct ipc_namespace *old_ipc;
24997 +#ifdef CONFIG_PID_NS
24998 +       struct pid_namespace *old_pid;
24999 +#endif
25000 +#ifdef CONFIG_NET_NS
25001 +       struct net *old_net;
25002 +#endif
25003 +       struct nsproxy *nsproxy;
25004 +
25005 +       nsproxy = copy_nsproxy(old_nsproxy);
25006 +       if (!nsproxy)
25007 +               goto out;
25008 +
25009 +       if (mask & CLONE_NEWNS) {
25010 +               old_ns = nsproxy->mnt_ns;
25011 +               nsproxy->mnt_ns = new_nsproxy->mnt_ns;
25012 +               if (nsproxy->mnt_ns)
25013 +                       get_mnt_ns(nsproxy->mnt_ns);
25014 +       } else
25015 +               old_ns = NULL;
25016 +
25017 +       if (mask & CLONE_NEWUTS) {
25018 +               old_uts = nsproxy->uts_ns;
25019 +               nsproxy->uts_ns = new_nsproxy->uts_ns;
25020 +               if (nsproxy->uts_ns)
25021 +                       get_uts_ns(nsproxy->uts_ns);
25022 +       } else
25023 +               old_uts = NULL;
25024 +
25025 +       if (mask & CLONE_NEWIPC) {
25026 +               old_ipc = nsproxy->ipc_ns;
25027 +               nsproxy->ipc_ns = new_nsproxy->ipc_ns;
25028 +               if (nsproxy->ipc_ns)
25029 +                       get_ipc_ns(nsproxy->ipc_ns);
25030 +       } else
25031 +               old_ipc = NULL;
25032 +
25033 +#ifdef CONFIG_PID_NS
25034 +       if (mask & CLONE_NEWPID) {
25035 +               old_pid = nsproxy->pid_ns;
25036 +               nsproxy->pid_ns = new_nsproxy->pid_ns;
25037 +               if (nsproxy->pid_ns)
25038 +                       get_pid_ns(nsproxy->pid_ns);
25039 +       } else
25040 +               old_pid = NULL;
25041 +#endif
25042 +#ifdef CONFIG_NET_NS
25043 +       if (mask & CLONE_NEWNET) {
25044 +               old_net = nsproxy->net_ns;
25045 +               nsproxy->net_ns = new_nsproxy->net_ns;
25046 +               if (nsproxy->net_ns)
25047 +                       get_net(nsproxy->net_ns);
25048 +       } else
25049 +               old_net = NULL;
25050 +#endif
25051 +       if (old_ns)
25052 +               put_mnt_ns(old_ns);
25053 +       if (old_uts)
25054 +               put_uts_ns(old_uts);
25055 +       if (old_ipc)
25056 +               put_ipc_ns(old_ipc);
25057 +#ifdef CONFIG_PID_NS
25058 +       if (old_pid)
25059 +               put_pid_ns(old_pid);
25060 +#endif
25061 +#ifdef CONFIG_NET_NS
25062 +       if (old_net)
25063 +               put_net(old_net);
25064 +#endif
25065 +out:
25066 +       return nsproxy;
25067 +}
25068 +
25069 +
25070 +/*
25071 + *     merge two nsproxy structs into a new one.
25072 + *     will hold a reference on the result.
25073 + */
25074 +
25075 +static inline
25076 +struct nsproxy *__vs_merge_nsproxy(struct nsproxy *old,
25077 +       struct nsproxy *proxy, unsigned long mask)
25078 +{
25079 +       struct nsproxy null_proxy = { .mnt_ns = NULL };
25080 +
25081 +       if (!proxy)
25082 +               return NULL;
25083 +
25084 +       if (mask) {
25085 +               /* vs_mix_nsproxy returns with reference */
25086 +               return vs_mix_nsproxy(old ? old : &null_proxy,
25087 +                       proxy, mask);
25088 +       }
25089 +       get_nsproxy(proxy);
25090 +       return proxy;
25091 +}
25092 +
25093 +
25094 +int vx_enter_space(struct vx_info *vxi, unsigned long mask, unsigned index)
25095 +{
25096 +       struct nsproxy *proxy, *proxy_cur, *proxy_new;
25097 +       struct fs_struct *fs_cur, *fs = NULL;
25098 +       int ret, kill = 0;
25099 +
25100 +       vxdprintk(VXD_CBIT(space, 8), "vx_enter_space(%p[#%u],0x%08lx,%d)",
25101 +               vxi, vxi->vx_id, mask, index);
25102 +
25103 +       if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
25104 +               return -EACCES;
25105 +
25106 +       if (!mask)
25107 +               mask = vxi->vx_nsmask[index];
25108 +
25109 +       if ((mask & vxi->vx_nsmask[index]) != mask)
25110 +               return -EINVAL;
25111 +
25112 +       if (mask & CLONE_FS) {
25113 +               fs = copy_fs_struct(vxi->vx_fs[index]);
25114 +               if (!fs)
25115 +                       return -ENOMEM;
25116 +       }
25117 +       proxy = vxi->vx_nsproxy[index];
25118 +
25119 +       vxdprintk(VXD_CBIT(space, 9),
25120 +               "vx_enter_space(%p[#%u],0x%08lx,%d) -> (%p,%p)",
25121 +               vxi, vxi->vx_id, mask, index, proxy, fs);
25122 +
25123 +       task_lock(current);
25124 +       fs_cur = current->fs;
25125 +
25126 +       if (mask & CLONE_FS) {
25127 +               write_lock(&fs_cur->lock);
25128 +               current->fs = fs;
25129 +               kill = !--fs_cur->users;
25130 +               write_unlock(&fs_cur->lock);
25131 +       }
25132 +
25133 +       proxy_cur = current->nsproxy;
25134 +       get_nsproxy(proxy_cur);
25135 +       task_unlock(current);
25136 +
25137 +       if (kill)
25138 +               free_fs_struct(fs_cur);
25139 +
25140 +       proxy_new = __vs_merge_nsproxy(proxy_cur, proxy, mask);
25141 +       if (IS_ERR(proxy_new)) {
25142 +               ret = PTR_ERR(proxy_new);
25143 +               goto out_put;
25144 +       }
25145 +
25146 +       proxy_new = xchg(&current->nsproxy, proxy_new);
25147 +       ret = 0;
25148 +
25149 +       if (proxy_new)
25150 +               put_nsproxy(proxy_new);
25151 +out_put:
25152 +       if (proxy_cur)
25153 +               put_nsproxy(proxy_cur);
25154 +       return ret;
25155 +}
25156 +
25157 +
25158 +int vx_set_space(struct vx_info *vxi, unsigned long mask, unsigned index)
25159 +{
25160 +       struct nsproxy *proxy_vxi, *proxy_cur, *proxy_new;
25161 +       struct fs_struct *fs_vxi, *fs;
25162 +       int ret, kill = 0;
25163 +
25164 +       vxdprintk(VXD_CBIT(space, 8), "vx_set_space(%p[#%u],0x%08lx,%d)",
25165 +               vxi, vxi->vx_id, mask, index);
25166 +#if 0
25167 +       if (!mask)
25168 +               mask = default_space_mask.mask;
25169 +#endif
25170 +       if ((mask & space_mask.mask) != mask)
25171 +               return -EINVAL;
25172 +
25173 +       proxy_vxi = vxi->vx_nsproxy[index];
25174 +       fs_vxi = vxi->vx_fs[index];
25175 +
25176 +       if (mask & CLONE_FS) {
25177 +               fs = copy_fs_struct(current->fs);
25178 +               if (!fs)
25179 +                       return -ENOMEM;
25180 +       }
25181 +
25182 +       task_lock(current);
25183 +
25184 +       if (mask & CLONE_FS) {
25185 +               write_lock(&fs_vxi->lock);
25186 +               vxi->vx_fs[index] = fs;
25187 +               kill = !--fs_vxi->users;
25188 +               write_unlock(&fs_vxi->lock);
25189 +       }
25190 +
25191 +       proxy_cur = current->nsproxy;
25192 +       get_nsproxy(proxy_cur);
25193 +       task_unlock(current);
25194 +
25195 +       if (kill)
25196 +               free_fs_struct(fs_vxi);
25197 +
25198 +       proxy_new = __vs_merge_nsproxy(proxy_vxi, proxy_cur, mask);
25199 +       if (IS_ERR(proxy_new)) {
25200 +               ret = PTR_ERR(proxy_new);
25201 +               goto out_put;
25202 +       }
25203 +
25204 +       proxy_new = xchg(&vxi->vx_nsproxy[index], proxy_new);
25205 +       vxi->vx_nsmask[index] |= mask;
25206 +       ret = 0;
25207 +
25208 +       if (proxy_new)
25209 +               put_nsproxy(proxy_new);
25210 +out_put:
25211 +       if (proxy_cur)
25212 +               put_nsproxy(proxy_cur);
25213 +       return ret;
25214 +}
25215 +
25216 +
25217 +int vc_enter_space_v1(struct vx_info *vxi, void __user *data)
25218 +{
25219 +       struct vcmd_space_mask_v1 vc_data = { .mask = 0 };
25220 +
25221 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
25222 +               return -EFAULT;
25223 +
25224 +       return vx_enter_space(vxi, vc_data.mask, 0);
25225 +}
25226 +
25227 +int vc_enter_space(struct vx_info *vxi, void __user *data)
25228 +{
25229 +       struct vcmd_space_mask_v2 vc_data = { .mask = 0 };
25230 +
25231 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
25232 +               return -EFAULT;
25233 +
25234 +       if (vc_data.index >= VX_SPACES)
25235 +               return -EINVAL;
25236 +
25237 +       return vx_enter_space(vxi, vc_data.mask, vc_data.index);
25238 +}
25239 +
25240 +int vc_set_space_v1(struct vx_info *vxi, void __user *data)
25241 +{
25242 +       struct vcmd_space_mask_v1 vc_data = { .mask = 0 };
25243 +
25244 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
25245 +               return -EFAULT;
25246 +
25247 +       return vx_set_space(vxi, vc_data.mask, 0);
25248 +}
25249 +
25250 +int vc_set_space(struct vx_info *vxi, void __user *data)
25251 +{
25252 +       struct vcmd_space_mask_v2 vc_data = { .mask = 0 };
25253 +
25254 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
25255 +               return -EFAULT;
25256 +
25257 +       if (vc_data.index >= VX_SPACES)
25258 +               return -EINVAL;
25259 +
25260 +       return vx_set_space(vxi, vc_data.mask, vc_data.index);
25261 +}
25262 +
25263 +int vc_get_space_mask(void __user *data, int type)
25264 +{
25265 +       const struct vcmd_space_mask_v1 *mask;
25266 +
25267 +       if (type == 0)
25268 +               mask = &space_mask_v0;
25269 +       else if (type == 1)
25270 +               mask = &space_mask;
25271 +       else
25272 +               mask = &default_space_mask;
25273 +
25274 +       vxdprintk(VXD_CBIT(space, 10),
25275 +               "vc_get_space_mask(%d) = %08llx", type, mask->mask);
25276 +
25277 +       if (copy_to_user(data, mask, sizeof(*mask)))
25278 +               return -EFAULT;
25279 +       return 0;
25280 +}
25281 +
25282 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/switch.c linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/switch.c
25283 --- linux-2.6.31.6/kernel/vserver/switch.c      1970-01-01 01:00:00.000000000 +0100
25284 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/switch.c        2009-09-10 16:11:43.000000000 +0200
25285 @@ -0,0 +1,546 @@
25286 +/*
25287 + *  linux/kernel/vserver/switch.c
25288 + *
25289 + *  Virtual Server: Syscall Switch
25290 + *
25291 + *  Copyright (C) 2003-2007  Herbert Pötzl
25292 + *
25293 + *  V0.01  syscall switch
25294 + *  V0.02  added signal to context
25295 + *  V0.03  added rlimit functions
25296 + *  V0.04  added iattr, task/xid functions
25297 + *  V0.05  added debug/history stuff
25298 + *  V0.06  added compat32 layer
25299 + *  V0.07  vcmd args and perms
25300 + *  V0.08  added status commands
25301 + *  V0.09  added tag commands
25302 + *  V0.10  added oom bias
25303 + *  V0.11  added device commands
25304 + *
25305 + */
25306 +
25307 +#include <linux/vs_context.h>
25308 +#include <linux/vs_network.h>
25309 +#include <linux/vserver/switch.h>
25310 +
25311 +#include "vci_config.h"
25312 +
25313 +
25314 +static inline
25315 +int vc_get_version(uint32_t id)
25316 +{
25317 +       return VCI_VERSION;
25318 +}
25319 +
25320 +static inline
25321 +int vc_get_vci(uint32_t id)
25322 +{
25323 +       return vci_kernel_config();
25324 +}
25325 +
25326 +#include <linux/vserver/context_cmd.h>
25327 +#include <linux/vserver/cvirt_cmd.h>
25328 +#include <linux/vserver/cacct_cmd.h>
25329 +#include <linux/vserver/limit_cmd.h>
25330 +#include <linux/vserver/network_cmd.h>
25331 +#include <linux/vserver/sched_cmd.h>
25332 +#include <linux/vserver/debug_cmd.h>
25333 +#include <linux/vserver/inode_cmd.h>
25334 +#include <linux/vserver/dlimit_cmd.h>
25335 +#include <linux/vserver/signal_cmd.h>
25336 +#include <linux/vserver/space_cmd.h>
25337 +#include <linux/vserver/tag_cmd.h>
25338 +#include <linux/vserver/device_cmd.h>
25339 +
25340 +#include <linux/vserver/inode.h>
25341 +#include <linux/vserver/dlimit.h>
25342 +
25343 +
25344 +#ifdef CONFIG_COMPAT
25345 +#define __COMPAT(name, id, data, compat)       \
25346 +       (compat) ? name ## _x32(id, data) : name(id, data)
25347 +#define __COMPAT_NO_ID(name, data, compat)     \
25348 +       (compat) ? name ## _x32(data) : name(data)
25349 +#else
25350 +#define __COMPAT(name, id, data, compat)       \
25351 +       name(id, data)
25352 +#define __COMPAT_NO_ID(name, data, compat)     \
25353 +       name(data)
25354 +#endif
25355 +
25356 +
25357 +static inline
25358 +long do_vcmd(uint32_t cmd, uint32_t id,
25359 +       struct vx_info *vxi, struct nx_info *nxi,
25360 +       void __user *data, int compat)
25361 +{
25362 +       switch (cmd) {
25363 +
25364 +       case VCMD_get_version:
25365 +               return vc_get_version(id);
25366 +       case VCMD_get_vci:
25367 +               return vc_get_vci(id);
25368 +
25369 +       case VCMD_task_xid:
25370 +               return vc_task_xid(id);
25371 +       case VCMD_vx_info:
25372 +               return vc_vx_info(vxi, data);
25373 +
25374 +       case VCMD_task_nid:
25375 +               return vc_task_nid(id);
25376 +       case VCMD_nx_info:
25377 +               return vc_nx_info(nxi, data);
25378 +
25379 +       case VCMD_task_tag:
25380 +               return vc_task_tag(id);
25381 +
25382 +       case VCMD_set_space_v1:
25383 +               return vc_set_space_v1(vxi, data);
25384 +       /* this is version 2 */
25385 +       case VCMD_set_space:
25386 +               return vc_set_space(vxi, data);
25387 +
25388 +       case VCMD_get_space_mask_v0:
25389 +               return vc_get_space_mask(data, 0);
25390 +       /* this is version 1 */
25391 +       case VCMD_get_space_mask:
25392 +               return vc_get_space_mask(data, 1);
25393 +
25394 +       case VCMD_get_space_default:
25395 +               return vc_get_space_mask(data, -1);
25396 +
25397 +#ifdef CONFIG_IA32_EMULATION
25398 +       case VCMD_get_rlimit:
25399 +               return __COMPAT(vc_get_rlimit, vxi, data, compat);
25400 +       case VCMD_set_rlimit:
25401 +               return __COMPAT(vc_set_rlimit, vxi, data, compat);
25402 +#else
25403 +       case VCMD_get_rlimit:
25404 +               return vc_get_rlimit(vxi, data);
25405 +       case VCMD_set_rlimit:
25406 +               return vc_set_rlimit(vxi, data);
25407 +#endif
25408 +       case VCMD_get_rlimit_mask:
25409 +               return vc_get_rlimit_mask(id, data);
25410 +       case VCMD_reset_hits:
25411 +               return vc_reset_hits(vxi, data);
25412 +       case VCMD_reset_minmax:
25413 +               return vc_reset_minmax(vxi, data);
25414 +
25415 +       case VCMD_get_vhi_name:
25416 +               return vc_get_vhi_name(vxi, data);
25417 +       case VCMD_set_vhi_name:
25418 +               return vc_set_vhi_name(vxi, data);
25419 +
25420 +       case VCMD_ctx_stat:
25421 +               return vc_ctx_stat(vxi, data);
25422 +       case VCMD_virt_stat:
25423 +               return vc_virt_stat(vxi, data);
25424 +       case VCMD_sock_stat:
25425 +               return vc_sock_stat(vxi, data);
25426 +       case VCMD_rlimit_stat:
25427 +               return vc_rlimit_stat(vxi, data);
25428 +
25429 +       case VCMD_set_cflags:
25430 +               return vc_set_cflags(vxi, data);
25431 +       case VCMD_get_cflags:
25432 +               return vc_get_cflags(vxi, data);
25433 +
25434 +       /* this is version 1 */
25435 +       case VCMD_set_ccaps:
25436 +               return vc_set_ccaps(vxi, data);
25437 +       /* this is version 1 */
25438 +       case VCMD_get_ccaps:
25439 +               return vc_get_ccaps(vxi, data);
25440 +       case VCMD_set_bcaps:
25441 +               return vc_set_bcaps(vxi, data);
25442 +       case VCMD_get_bcaps:
25443 +               return vc_get_bcaps(vxi, data);
25444 +
25445 +       case VCMD_set_badness:
25446 +               return vc_set_badness(vxi, data);
25447 +       case VCMD_get_badness:
25448 +               return vc_get_badness(vxi, data);
25449 +
25450 +       case VCMD_set_nflags:
25451 +               return vc_set_nflags(nxi, data);
25452 +       case VCMD_get_nflags:
25453 +               return vc_get_nflags(nxi, data);
25454 +
25455 +       case VCMD_set_ncaps:
25456 +               return vc_set_ncaps(nxi, data);
25457 +       case VCMD_get_ncaps:
25458 +               return vc_get_ncaps(nxi, data);
25459 +
25460 +       case VCMD_set_sched_v4:
25461 +               return vc_set_sched_v4(vxi, data);
25462 +       /* this is version 5 */
25463 +       case VCMD_set_sched:
25464 +               return vc_set_sched(vxi, data);
25465 +       case VCMD_get_sched:
25466 +               return vc_get_sched(vxi, data);
25467 +       case VCMD_sched_info:
25468 +               return vc_sched_info(vxi, data);
25469 +
25470 +       case VCMD_add_dlimit:
25471 +               return __COMPAT(vc_add_dlimit, id, data, compat);
25472 +       case VCMD_rem_dlimit:
25473 +               return __COMPAT(vc_rem_dlimit, id, data, compat);
25474 +       case VCMD_set_dlimit:
25475 +               return __COMPAT(vc_set_dlimit, id, data, compat);
25476 +       case VCMD_get_dlimit:
25477 +               return __COMPAT(vc_get_dlimit, id, data, compat);
25478 +
25479 +       case VCMD_ctx_kill:
25480 +               return vc_ctx_kill(vxi, data);
25481 +
25482 +       case VCMD_wait_exit:
25483 +               return vc_wait_exit(vxi, data);
25484 +
25485 +       case VCMD_get_iattr:
25486 +               return __COMPAT_NO_ID(vc_get_iattr, data, compat);
25487 +       case VCMD_set_iattr:
25488 +               return __COMPAT_NO_ID(vc_set_iattr, data, compat);
25489 +
25490 +       case VCMD_fget_iattr:
25491 +               return vc_fget_iattr(id, data);
25492 +       case VCMD_fset_iattr:
25493 +               return vc_fset_iattr(id, data);
25494 +
25495 +       case VCMD_enter_space_v0:
25496 +               return vc_enter_space_v1(vxi, NULL);
25497 +       case VCMD_enter_space_v1:
25498 +               return vc_enter_space_v1(vxi, data);
25499 +       /* this is version 2 */
25500 +       case VCMD_enter_space:
25501 +               return vc_enter_space(vxi, data);
25502 +
25503 +       case VCMD_ctx_create_v0:
25504 +               return vc_ctx_create(id, NULL);
25505 +       case VCMD_ctx_create:
25506 +               return vc_ctx_create(id, data);
25507 +       case VCMD_ctx_migrate_v0:
25508 +               return vc_ctx_migrate(vxi, NULL);
25509 +       case VCMD_ctx_migrate:
25510 +               return vc_ctx_migrate(vxi, data);
25511 +
25512 +       case VCMD_net_create_v0:
25513 +               return vc_net_create(id, NULL);
25514 +       case VCMD_net_create:
25515 +               return vc_net_create(id, data);
25516 +       case VCMD_net_migrate:
25517 +               return vc_net_migrate(nxi, data);
25518 +
25519 +       case VCMD_tag_migrate:
25520 +               return vc_tag_migrate(id);
25521 +
25522 +       case VCMD_net_add:
25523 +               return vc_net_add(nxi, data);
25524 +       case VCMD_net_remove:
25525 +               return vc_net_remove(nxi, data);
25526 +
25527 +       case VCMD_net_add_ipv4:
25528 +               return vc_net_add_ipv4(nxi, data);
25529 +       case VCMD_net_remove_ipv4:
25530 +               return vc_net_remove_ipv4(nxi, data);
25531 +#ifdef CONFIG_IPV6
25532 +       case VCMD_net_add_ipv6:
25533 +               return vc_net_add_ipv6(nxi, data);
25534 +       case VCMD_net_remove_ipv6:
25535 +               return vc_net_remove_ipv6(nxi, data);
25536 +#endif
25537 +/*     case VCMD_add_match_ipv4:
25538 +               return vc_add_match_ipv4(nxi, data);
25539 +       case VCMD_get_match_ipv4:
25540 +               return vc_get_match_ipv4(nxi, data);
25541 +#ifdef CONFIG_IPV6
25542 +       case VCMD_add_match_ipv6:
25543 +               return vc_add_match_ipv6(nxi, data);
25544 +       case VCMD_get_match_ipv6:
25545 +               return vc_get_match_ipv6(nxi, data);
25546 +#endif */
25547 +
25548 +#ifdef CONFIG_VSERVER_DEVICE
25549 +       case VCMD_set_mapping:
25550 +               return __COMPAT(vc_set_mapping, vxi, data, compat);
25551 +       case VCMD_unset_mapping:
25552 +               return __COMPAT(vc_unset_mapping, vxi, data, compat);
25553 +#endif
25554 +#ifdef CONFIG_VSERVER_HISTORY
25555 +       case VCMD_dump_history:
25556 +               return vc_dump_history(id);
25557 +       case VCMD_read_history:
25558 +               return __COMPAT(vc_read_history, id, data, compat);
25559 +#endif
25560 +#ifdef CONFIG_VSERVER_MONITOR
25561 +       case VCMD_read_monitor:
25562 +               return __COMPAT(vc_read_monitor, id, data, compat);
25563 +#endif
25564 +       default:
25565 +               vxwprintk_task(1, "unimplemented VCMD_%02d_%d[%d]",
25566 +                       VC_CATEGORY(cmd), VC_COMMAND(cmd), VC_VERSION(cmd));
25567 +       }
25568 +       return -ENOSYS;
25569 +}
25570 +
25571 +
25572 +#define        __VCMD(vcmd, _perm, _args, _flags)              \
25573 +       case VCMD_ ## vcmd: perm = _perm;               \
25574 +               args = _args; flags = _flags; break
25575 +
25576 +
25577 +#define VCA_NONE       0x00
25578 +#define VCA_VXI                0x01
25579 +#define VCA_NXI                0x02
25580 +
25581 +#define VCF_NONE       0x00
25582 +#define VCF_INFO       0x01
25583 +#define VCF_ADMIN      0x02
25584 +#define VCF_ARES       0x06    /* includes admin */
25585 +#define VCF_SETUP      0x08
25586 +
25587 +#define VCF_ZIDOK      0x10    /* zero id okay */
25588 +
25589 +
25590 +static inline
25591 +long do_vserver(uint32_t cmd, uint32_t id, void __user *data, int compat)
25592 +{
25593 +       long ret;
25594 +       int permit = -1, state = 0;
25595 +       int perm = -1, args = 0, flags = 0;
25596 +       struct vx_info *vxi = NULL;
25597 +       struct nx_info *nxi = NULL;
25598 +
25599 +       switch (cmd) {
25600 +       /* unpriviledged commands */
25601 +       __VCMD(get_version,      0, VCA_NONE,   0);
25602 +       __VCMD(get_vci,          0, VCA_NONE,   0);
25603 +       __VCMD(get_rlimit_mask,  0, VCA_NONE,   0);
25604 +       __VCMD(get_space_mask_v0,0, VCA_NONE,   0);
25605 +       __VCMD(get_space_mask,   0, VCA_NONE,   0);
25606 +       __VCMD(get_space_default,0, VCA_NONE,   0);
25607 +
25608 +       /* info commands */
25609 +       __VCMD(task_xid,         2, VCA_NONE,   0);
25610 +       __VCMD(reset_hits,       2, VCA_VXI,    0);
25611 +       __VCMD(reset_minmax,     2, VCA_VXI,    0);
25612 +       __VCMD(vx_info,          3, VCA_VXI,    VCF_INFO);
25613 +       __VCMD(get_bcaps,        3, VCA_VXI,    VCF_INFO);
25614 +       __VCMD(get_ccaps,        3, VCA_VXI,    VCF_INFO);
25615 +       __VCMD(get_cflags,       3, VCA_VXI,    VCF_INFO);
25616 +       __VCMD(get_badness,      3, VCA_VXI,    VCF_INFO);
25617 +       __VCMD(get_vhi_name,     3, VCA_VXI,    VCF_INFO);
25618 +       __VCMD(get_rlimit,       3, VCA_VXI,    VCF_INFO);
25619 +
25620 +       __VCMD(ctx_stat,         3, VCA_VXI,    VCF_INFO);
25621 +       __VCMD(virt_stat,        3, VCA_VXI,    VCF_INFO);
25622 +       __VCMD(sock_stat,        3, VCA_VXI,    VCF_INFO);
25623 +       __VCMD(rlimit_stat,      3, VCA_VXI,    VCF_INFO);
25624 +
25625 +       __VCMD(task_nid,         2, VCA_NONE,   0);
25626 +       __VCMD(nx_info,          3, VCA_NXI,    VCF_INFO);
25627 +       __VCMD(get_ncaps,        3, VCA_NXI,    VCF_INFO);
25628 +       __VCMD(get_nflags,       3, VCA_NXI,    VCF_INFO);
25629 +
25630 +       __VCMD(task_tag,         2, VCA_NONE,   0);
25631 +
25632 +       __VCMD(get_iattr,        2, VCA_NONE,   0);
25633 +       __VCMD(fget_iattr,       2, VCA_NONE,   0);
25634 +       __VCMD(get_dlimit,       3, VCA_NONE,   VCF_INFO);
25635 +       __VCMD(get_sched,        3, VCA_VXI,    VCF_INFO);
25636 +       __VCMD(sched_info,       3, VCA_VXI,    VCF_INFO | VCF_ZIDOK);
25637 +
25638 +       /* lower admin commands */
25639 +       __VCMD(wait_exit,        4, VCA_VXI,    VCF_INFO);
25640 +       __VCMD(ctx_create_v0,    5, VCA_NONE,   0);
25641 +       __VCMD(ctx_create,       5, VCA_NONE,   0);
25642 +       __VCMD(ctx_migrate_v0,   5, VCA_VXI,    VCF_ADMIN);
25643 +       __VCMD(ctx_migrate,      5, VCA_VXI,    VCF_ADMIN);
25644 +       __VCMD(enter_space_v0,   5, VCA_VXI,    VCF_ADMIN);
25645 +       __VCMD(enter_space_v1,   5, VCA_VXI,    VCF_ADMIN);
25646 +       __VCMD(enter_space,      5, VCA_VXI,    VCF_ADMIN);
25647 +
25648 +       __VCMD(net_create_v0,    5, VCA_NONE,   0);
25649 +       __VCMD(net_create,       5, VCA_NONE,   0);
25650 +       __VCMD(net_migrate,      5, VCA_NXI,    VCF_ADMIN);
25651 +
25652 +       __VCMD(tag_migrate,      5, VCA_NONE,   VCF_ADMIN);
25653 +
25654 +       /* higher admin commands */
25655 +       __VCMD(ctx_kill,         6, VCA_VXI,    VCF_ARES);
25656 +       __VCMD(set_space_v1,     7, VCA_VXI,    VCF_ARES | VCF_SETUP);
25657 +       __VCMD(set_space,        7, VCA_VXI,    VCF_ARES | VCF_SETUP);
25658 +
25659 +       __VCMD(set_ccaps,        7, VCA_VXI,    VCF_ARES | VCF_SETUP);
25660 +       __VCMD(set_bcaps,        7, VCA_VXI,    VCF_ARES | VCF_SETUP);
25661 +       __VCMD(set_cflags,       7, VCA_VXI,    VCF_ARES | VCF_SETUP);
25662 +       __VCMD(set_badness,      7, VCA_VXI,    VCF_ARES | VCF_SETUP);
25663 +
25664 +       __VCMD(set_vhi_name,     7, VCA_VXI,    VCF_ARES | VCF_SETUP);
25665 +       __VCMD(set_rlimit,       7, VCA_VXI,    VCF_ARES | VCF_SETUP);
25666 +       __VCMD(set_sched,        7, VCA_VXI,    VCF_ARES | VCF_SETUP);
25667 +       __VCMD(set_sched_v4,     7, VCA_VXI,    VCF_ARES | VCF_SETUP);
25668 +
25669 +       __VCMD(set_ncaps,        7, VCA_NXI,    VCF_ARES | VCF_SETUP);
25670 +       __VCMD(set_nflags,       7, VCA_NXI,    VCF_ARES | VCF_SETUP);
25671 +       __VCMD(net_add,          8, VCA_NXI,    VCF_ARES | VCF_SETUP);
25672 +       __VCMD(net_remove,       8, VCA_NXI,    VCF_ARES | VCF_SETUP);
25673 +       __VCMD(net_add_ipv4,     8, VCA_NXI,    VCF_ARES | VCF_SETUP);
25674 +       __VCMD(net_remove_ipv4,  8, VCA_NXI,    VCF_ARES | VCF_SETUP);
25675 +#ifdef CONFIG_IPV6
25676 +       __VCMD(net_add_ipv6,     8, VCA_NXI,    VCF_ARES | VCF_SETUP);
25677 +       __VCMD(net_remove_ipv6,  8, VCA_NXI,    VCF_ARES | VCF_SETUP);
25678 +#endif
25679 +       __VCMD(set_iattr,        7, VCA_NONE,   0);
25680 +       __VCMD(fset_iattr,       7, VCA_NONE,   0);
25681 +       __VCMD(set_dlimit,       7, VCA_NONE,   VCF_ARES);
25682 +       __VCMD(add_dlimit,       8, VCA_NONE,   VCF_ARES);
25683 +       __VCMD(rem_dlimit,       8, VCA_NONE,   VCF_ARES);
25684 +
25685 +#ifdef CONFIG_VSERVER_DEVICE
25686 +       __VCMD(set_mapping,      8, VCA_VXI,    VCF_ARES|VCF_ZIDOK);
25687 +       __VCMD(unset_mapping,    8, VCA_VXI,    VCF_ARES|VCF_ZIDOK);
25688 +#endif
25689 +       /* debug level admin commands */
25690 +#ifdef CONFIG_VSERVER_HISTORY
25691 +       __VCMD(dump_history,     9, VCA_NONE,   0);
25692 +       __VCMD(read_history,     9, VCA_NONE,   0);
25693 +#endif
25694 +#ifdef CONFIG_VSERVER_MONITOR
25695 +       __VCMD(read_monitor,     9, VCA_NONE,   0);
25696 +#endif
25697 +
25698 +       default:
25699 +               perm = -1;
25700 +       }
25701 +
25702 +       vxdprintk(VXD_CBIT(switch, 0),
25703 +               "vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]",
25704 +               VC_CATEGORY(cmd), VC_COMMAND(cmd),
25705 +               VC_VERSION(cmd), id, data, compat,
25706 +               perm, args, flags);
25707 +
25708 +       ret = -ENOSYS;
25709 +       if (perm < 0)
25710 +               goto out;
25711 +
25712 +       state = 1;
25713 +       if (!capable(CAP_CONTEXT))
25714 +               goto out;
25715 +
25716 +       state = 2;
25717 +       /* moved here from the individual commands */
25718 +       ret = -EPERM;
25719 +       if ((perm > 1) && !capable(CAP_SYS_ADMIN))
25720 +               goto out;
25721 +
25722 +       state = 3;
25723 +       /* vcmd involves resource management  */
25724 +       ret = -EPERM;
25725 +       if ((flags & VCF_ARES) && !capable(CAP_SYS_RESOURCE))
25726 +               goto out;
25727 +
25728 +       state = 4;
25729 +       /* various legacy exceptions */
25730 +       switch (cmd) {
25731 +       /* will go away when spectator is a cap */
25732 +       case VCMD_ctx_migrate_v0:
25733 +       case VCMD_ctx_migrate:
25734 +               if (id == 1) {
25735 +                       current->xid = 1;
25736 +                       ret = 1;
25737 +                       goto out;
25738 +               }
25739 +               break;
25740 +
25741 +       /* will go away when spectator is a cap */
25742 +       case VCMD_net_migrate:
25743 +               if (id == 1) {
25744 +                       current->nid = 1;
25745 +                       ret = 1;
25746 +                       goto out;
25747 +               }
25748 +               break;
25749 +       }
25750 +
25751 +       /* vcmds are fine by default */
25752 +       permit = 1;
25753 +
25754 +       /* admin type vcmds require admin ... */
25755 +       if (flags & VCF_ADMIN)
25756 +               permit = vx_check(0, VS_ADMIN) ? 1 : 0;
25757 +
25758 +       /* ... but setup type vcmds override that */
25759 +       if (!permit && (flags & VCF_SETUP))
25760 +               permit = vx_flags(VXF_STATE_SETUP, 0) ? 2 : 0;
25761 +
25762 +       state = 5;
25763 +       ret = -EPERM;
25764 +       if (!permit)
25765 +               goto out;
25766 +
25767 +       state = 6;
25768 +       if (!id && (flags & VCF_ZIDOK))
25769 +               goto skip_id;
25770 +
25771 +       ret = -ESRCH;
25772 +       if (args & VCA_VXI) {
25773 +               vxi = lookup_vx_info(id);
25774 +               if (!vxi)
25775 +                       goto out;
25776 +
25777 +               if ((flags & VCF_ADMIN) &&
25778 +                       /* special case kill for shutdown */
25779 +                       (cmd != VCMD_ctx_kill) &&
25780 +                       /* can context be administrated? */
25781 +                       !vx_info_flags(vxi, VXF_STATE_ADMIN, 0)) {
25782 +                       ret = -EACCES;
25783 +                       goto out_vxi;
25784 +               }
25785 +       }
25786 +       state = 7;
25787 +       if (args & VCA_NXI) {
25788 +               nxi = lookup_nx_info(id);
25789 +               if (!nxi)
25790 +                       goto out_vxi;
25791 +
25792 +               if ((flags & VCF_ADMIN) &&
25793 +                       /* can context be administrated? */
25794 +                       !nx_info_flags(nxi, NXF_STATE_ADMIN, 0)) {
25795 +                       ret = -EACCES;
25796 +                       goto out_nxi;
25797 +               }
25798 +       }
25799 +skip_id:
25800 +       state = 8;
25801 +       ret = do_vcmd(cmd, id, vxi, nxi, data, compat);
25802 +
25803 +out_nxi:
25804 +       if ((args & VCA_NXI) && nxi)
25805 +               put_nx_info(nxi);
25806 +out_vxi:
25807 +       if ((args & VCA_VXI) && vxi)
25808 +               put_vx_info(vxi);
25809 +out:
25810 +       vxdprintk(VXD_CBIT(switch, 1),
25811 +               "vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]",
25812 +               VC_CATEGORY(cmd), VC_COMMAND(cmd),
25813 +               VC_VERSION(cmd), ret, ret, state, permit);
25814 +       return ret;
25815 +}
25816 +
25817 +asmlinkage long
25818 +sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
25819 +{
25820 +       return do_vserver(cmd, id, data, 0);
25821 +}
25822 +
25823 +#ifdef CONFIG_COMPAT
25824 +
25825 +asmlinkage long
25826 +sys32_vserver(uint32_t cmd, uint32_t id, void __user *data)
25827 +{
25828 +       return do_vserver(cmd, id, data, 1);
25829 +}
25830 +
25831 +#endif /* CONFIG_COMPAT */
25832 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/sysctl.c linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/sysctl.c
25833 --- linux-2.6.31.6/kernel/vserver/sysctl.c      1970-01-01 01:00:00.000000000 +0100
25834 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/sysctl.c        2009-09-10 16:11:43.000000000 +0200
25835 @@ -0,0 +1,244 @@
25836 +/*
25837 + *  kernel/vserver/sysctl.c
25838 + *
25839 + *  Virtual Context Support
25840 + *
25841 + *  Copyright (C) 2004-2007  Herbert Pötzl
25842 + *
25843 + *  V0.01  basic structure
25844 + *
25845 + */
25846 +
25847 +#include <linux/module.h>
25848 +#include <linux/ctype.h>
25849 +#include <linux/sysctl.h>
25850 +#include <linux/parser.h>
25851 +#include <asm/uaccess.h>
25852 +
25853 +
25854 +enum {
25855 +       CTL_DEBUG_ERROR         = 0,
25856 +       CTL_DEBUG_SWITCH        = 1,
25857 +       CTL_DEBUG_XID,
25858 +       CTL_DEBUG_NID,
25859 +       CTL_DEBUG_TAG,
25860 +       CTL_DEBUG_NET,
25861 +       CTL_DEBUG_LIMIT,
25862 +       CTL_DEBUG_CRES,
25863 +       CTL_DEBUG_DLIM,
25864 +       CTL_DEBUG_QUOTA,
25865 +       CTL_DEBUG_CVIRT,
25866 +       CTL_DEBUG_SPACE,
25867 +       CTL_DEBUG_MISC,
25868 +};
25869 +
25870 +
25871 +unsigned int vx_debug_switch   = 0;
25872 +unsigned int vx_debug_xid      = 0;
25873 +unsigned int vx_debug_nid      = 0;
25874 +unsigned int vx_debug_tag      = 0;
25875 +unsigned int vx_debug_net      = 0;
25876 +unsigned int vx_debug_limit    = 0;
25877 +unsigned int vx_debug_cres     = 0;
25878 +unsigned int vx_debug_dlim     = 0;
25879 +unsigned int vx_debug_quota    = 0;
25880 +unsigned int vx_debug_cvirt    = 0;
25881 +unsigned int vx_debug_space    = 0;
25882 +unsigned int vx_debug_misc     = 0;
25883 +
25884 +
25885 +static struct ctl_table_header *vserver_table_header;
25886 +static ctl_table vserver_root_table[];
25887 +
25888 +
25889 +void vserver_register_sysctl(void)
25890 +{
25891 +       if (!vserver_table_header) {
25892 +               vserver_table_header = register_sysctl_table(vserver_root_table);
25893 +       }
25894 +
25895 +}
25896 +
25897 +void vserver_unregister_sysctl(void)
25898 +{
25899 +       if (vserver_table_header) {
25900 +               unregister_sysctl_table(vserver_table_header);
25901 +               vserver_table_header = NULL;
25902 +       }
25903 +}
25904 +
25905 +
25906 +static int proc_dodebug(ctl_table *table, int write,
25907 +       struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
25908 +{
25909 +       char            tmpbuf[20], *p, c;
25910 +       unsigned int    value;
25911 +       size_t          left, len;
25912 +
25913 +       if ((*ppos && !write) || !*lenp) {
25914 +               *lenp = 0;
25915 +               return 0;
25916 +       }
25917 +
25918 +       left = *lenp;
25919 +
25920 +       if (write) {
25921 +               if (!access_ok(VERIFY_READ, buffer, left))
25922 +                       return -EFAULT;
25923 +               p = (char *)buffer;
25924 +               while (left && __get_user(c, p) >= 0 && isspace(c))
25925 +                       left--, p++;
25926 +               if (!left)
25927 +                       goto done;
25928 +
25929 +               if (left > sizeof(tmpbuf) - 1)
25930 +                       return -EINVAL;
25931 +               if (copy_from_user(tmpbuf, p, left))
25932 +                       return -EFAULT;
25933 +               tmpbuf[left] = '\0';
25934 +
25935 +               for (p = tmpbuf, value = 0; '0' <= *p && *p <= '9'; p++, left--)
25936 +                       value = 10 * value + (*p - '0');
25937 +               if (*p && !isspace(*p))
25938 +                       return -EINVAL;
25939 +               while (left && isspace(*p))
25940 +                       left--, p++;
25941 +               *(unsigned int *)table->data = value;
25942 +       } else {
25943 +               if (!access_ok(VERIFY_WRITE, buffer, left))
25944 +                       return -EFAULT;
25945 +               len = sprintf(tmpbuf, "%d", *(unsigned int *)table->data);
25946 +               if (len > left)
25947 +                       len = left;
25948 +               if (__copy_to_user(buffer, tmpbuf, len))
25949 +                       return -EFAULT;
25950 +               if ((left -= len) > 0) {
25951 +                       if (put_user('\n', (char *)buffer + len))
25952 +                               return -EFAULT;
25953 +                       left--;
25954 +               }
25955 +       }
25956 +
25957 +done:
25958 +       *lenp -= left;
25959 +       *ppos += *lenp;
25960 +       return 0;
25961 +}
25962 +
25963 +static int zero;
25964 +
25965 +#define        CTL_ENTRY(ctl, name)                            \
25966 +       {                                               \
25967 +               .ctl_name       = ctl,                  \
25968 +               .procname       = #name,                \
25969 +               .data           = &vx_ ## name,         \
25970 +               .maxlen         = sizeof(int),          \
25971 +               .mode           = 0644,                 \
25972 +               .proc_handler   = &proc_dodebug,        \
25973 +               .strategy       = &sysctl_intvec,       \
25974 +               .extra1         = &zero,                \
25975 +       }
25976 +
25977 +static ctl_table vserver_debug_table[] = {
25978 +       CTL_ENTRY(CTL_DEBUG_SWITCH,     debug_switch),
25979 +       CTL_ENTRY(CTL_DEBUG_XID,        debug_xid),
25980 +       CTL_ENTRY(CTL_DEBUG_NID,        debug_nid),
25981 +       CTL_ENTRY(CTL_DEBUG_TAG,        debug_tag),
25982 +       CTL_ENTRY(CTL_DEBUG_NET,        debug_net),
25983 +       CTL_ENTRY(CTL_DEBUG_LIMIT,      debug_limit),
25984 +       CTL_ENTRY(CTL_DEBUG_CRES,       debug_cres),
25985 +       CTL_ENTRY(CTL_DEBUG_DLIM,       debug_dlim),
25986 +       CTL_ENTRY(CTL_DEBUG_QUOTA,      debug_quota),
25987 +       CTL_ENTRY(CTL_DEBUG_CVIRT,      debug_cvirt),
25988 +       CTL_ENTRY(CTL_DEBUG_SPACE,      debug_space),
25989 +       CTL_ENTRY(CTL_DEBUG_MISC,       debug_misc),
25990 +       { .ctl_name = 0 }
25991 +};
25992 +
25993 +static ctl_table vserver_root_table[] = {
25994 +       {
25995 +               .ctl_name       = CTL_VSERVER,
25996 +               .procname       = "vserver",
25997 +               .mode           = 0555,
25998 +               .child          = vserver_debug_table
25999 +       },
26000 +       { .ctl_name = 0 }
26001 +};
26002 +
26003 +
26004 +static match_table_t tokens = {
26005 +       { CTL_DEBUG_SWITCH,     "switch=%x"     },
26006 +       { CTL_DEBUG_XID,        "xid=%x"        },
26007 +       { CTL_DEBUG_NID,        "nid=%x"        },
26008 +       { CTL_DEBUG_TAG,        "tag=%x"        },
26009 +       { CTL_DEBUG_NET,        "net=%x"        },
26010 +       { CTL_DEBUG_LIMIT,      "limit=%x"      },
26011 +       { CTL_DEBUG_CRES,       "cres=%x"       },
26012 +       { CTL_DEBUG_DLIM,       "dlim=%x"       },
26013 +       { CTL_DEBUG_QUOTA,      "quota=%x"      },
26014 +       { CTL_DEBUG_CVIRT,      "cvirt=%x"      },
26015 +       { CTL_DEBUG_SPACE,      "space=%x"      },
26016 +       { CTL_DEBUG_MISC,       "misc=%x"       },
26017 +       { CTL_DEBUG_ERROR,      NULL            }
26018 +};
26019 +
26020 +#define        HANDLE_CASE(id, name, val)                              \
26021 +       case CTL_DEBUG_ ## id:                                  \
26022 +               vx_debug_ ## name = val;                        \
26023 +               printk("vs_debug_" #name "=0x%x\n", val);       \
26024 +               break
26025 +
26026 +
26027 +static int __init vs_debug_setup(char *str)
26028 +{
26029 +       char *p;
26030 +       int token;
26031 +
26032 +       printk("vs_debug_setup(%s)\n", str);
26033 +       while ((p = strsep(&str, ",")) != NULL) {
26034 +               substring_t args[MAX_OPT_ARGS];
26035 +               unsigned int value;
26036 +
26037 +               if (!*p)
26038 +                       continue;
26039 +
26040 +               token = match_token(p, tokens, args);
26041 +               value = (token > 0) ? simple_strtoul(args[0].from, NULL, 0) : 0;
26042 +
26043 +               switch (token) {
26044 +               HANDLE_CASE(SWITCH, switch, value);
26045 +               HANDLE_CASE(XID,    xid,    value);
26046 +               HANDLE_CASE(NID,    nid,    value);
26047 +               HANDLE_CASE(TAG,    tag,    value);
26048 +               HANDLE_CASE(NET,    net,    value);
26049 +               HANDLE_CASE(LIMIT,  limit,  value);
26050 +               HANDLE_CASE(CRES,   cres,   value);
26051 +               HANDLE_CASE(DLIM,   dlim,   value);
26052 +               HANDLE_CASE(QUOTA,  quota,  value);
26053 +               HANDLE_CASE(CVIRT,  cvirt,  value);
26054 +               HANDLE_CASE(SPACE,  space,  value);
26055 +               HANDLE_CASE(MISC,   misc,   value);
26056 +               default:
26057 +                       return -EINVAL;
26058 +                       break;
26059 +               }
26060 +       }
26061 +       return 1;
26062 +}
26063 +
26064 +__setup("vsdebug=", vs_debug_setup);
26065 +
26066 +
26067 +
26068 +EXPORT_SYMBOL_GPL(vx_debug_switch);
26069 +EXPORT_SYMBOL_GPL(vx_debug_xid);
26070 +EXPORT_SYMBOL_GPL(vx_debug_nid);
26071 +EXPORT_SYMBOL_GPL(vx_debug_net);
26072 +EXPORT_SYMBOL_GPL(vx_debug_limit);
26073 +EXPORT_SYMBOL_GPL(vx_debug_cres);
26074 +EXPORT_SYMBOL_GPL(vx_debug_dlim);
26075 +EXPORT_SYMBOL_GPL(vx_debug_quota);
26076 +EXPORT_SYMBOL_GPL(vx_debug_cvirt);
26077 +EXPORT_SYMBOL_GPL(vx_debug_space);
26078 +EXPORT_SYMBOL_GPL(vx_debug_misc);
26079 +
26080 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/tag.c linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/tag.c
26081 --- linux-2.6.31.6/kernel/vserver/tag.c 1970-01-01 01:00:00.000000000 +0100
26082 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/tag.c   2009-09-10 16:11:43.000000000 +0200
26083 @@ -0,0 +1,63 @@
26084 +/*
26085 + *  linux/kernel/vserver/tag.c
26086 + *
26087 + *  Virtual Server: Shallow Tag Space
26088 + *
26089 + *  Copyright (C) 2007  Herbert Pötzl
26090 + *
26091 + *  V0.01  basic implementation
26092 + *
26093 + */
26094 +
26095 +#include <linux/sched.h>
26096 +#include <linux/vserver/debug.h>
26097 +#include <linux/vs_pid.h>
26098 +#include <linux/vs_tag.h>
26099 +
26100 +#include <linux/vserver/tag_cmd.h>
26101 +
26102 +
26103 +int dx_migrate_task(struct task_struct *p, tag_t tag)
26104 +{
26105 +       if (!p)
26106 +               BUG();
26107 +
26108 +       vxdprintk(VXD_CBIT(tag, 5),
26109 +               "dx_migrate_task(%p[#%d],#%d)", p, p->tag, tag);
26110 +
26111 +       task_lock(p);
26112 +       p->tag = tag;
26113 +       task_unlock(p);
26114 +
26115 +       vxdprintk(VXD_CBIT(tag, 5),
26116 +               "moved task %p into [#%d]", p, tag);
26117 +       return 0;
26118 +}
26119 +
26120 +/* vserver syscall commands below here */
26121 +
26122 +/* taks xid and vx_info functions */
26123 +
26124 +
26125 +int vc_task_tag(uint32_t id)
26126 +{
26127 +       tag_t tag;
26128 +
26129 +       if (id) {
26130 +               struct task_struct *tsk;
26131 +               read_lock(&tasklist_lock);
26132 +               tsk = find_task_by_real_pid(id);
26133 +               tag = (tsk) ? tsk->tag : -ESRCH;
26134 +               read_unlock(&tasklist_lock);
26135 +       } else
26136 +               tag = dx_current_tag();
26137 +       return tag;
26138 +}
26139 +
26140 +
26141 +int vc_tag_migrate(uint32_t tag)
26142 +{
26143 +       return dx_migrate_task(current, tag & 0xFFFF);
26144 +}
26145 +
26146 +
26147 diff -NurpP --minimal linux-2.6.31.6/kernel/vserver/vci_config.h linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/vci_config.h
26148 --- linux-2.6.31.6/kernel/vserver/vci_config.h  1970-01-01 01:00:00.000000000 +0100
26149 +++ linux-2.6.31.6-vs2.3.0.36.24/kernel/vserver/vci_config.h    2009-09-10 16:11:43.000000000 +0200
26150 @@ -0,0 +1,81 @@
26151 +
26152 +/*  interface version */
26153 +
26154 +#define VCI_VERSION            0x00020304
26155 +
26156 +
26157 +enum {
26158 +       VCI_KCBIT_NO_DYNAMIC = 0,
26159 +
26160 +       VCI_KCBIT_PROC_SECURE = 4,
26161 +       VCI_KCBIT_HARDCPU = 5,
26162 +       VCI_KCBIT_IDLELIMIT = 6,
26163 +       VCI_KCBIT_IDLETIME = 7,
26164 +
26165 +       VCI_KCBIT_COWBL = 8,
26166 +       VCI_KCBIT_FULLCOWBL = 9,
26167 +       VCI_KCBIT_SPACES = 10,
26168 +       VCI_KCBIT_NETV2 = 11,
26169 +
26170 +       VCI_KCBIT_DEBUG = 16,
26171 +       VCI_KCBIT_HISTORY = 20,
26172 +       VCI_KCBIT_TAGGED = 24,
26173 +       VCI_KCBIT_PPTAG = 28,
26174 +
26175 +       VCI_KCBIT_MORE = 31,
26176 +};
26177 +
26178 +
26179 +static inline uint32_t vci_kernel_config(void)
26180 +{
26181 +       return
26182 +       (1 << VCI_KCBIT_NO_DYNAMIC) |
26183 +
26184 +       /* configured features */
26185 +#ifdef CONFIG_VSERVER_PROC_SECURE
26186 +       (1 << VCI_KCBIT_PROC_SECURE) |
26187 +#endif
26188 +#ifdef CONFIG_VSERVER_HARDCPU
26189 +       (1 << VCI_KCBIT_HARDCPU) |
26190 +#endif
26191 +#ifdef CONFIG_VSERVER_IDLELIMIT
26192 +       (1 << VCI_KCBIT_IDLELIMIT) |
26193 +#endif
26194 +#ifdef CONFIG_VSERVER_IDLETIME
26195 +       (1 << VCI_KCBIT_IDLETIME) |
26196 +#endif
26197 +#ifdef CONFIG_VSERVER_COWBL
26198 +       (1 << VCI_KCBIT_COWBL) |
26199 +       (1 << VCI_KCBIT_FULLCOWBL) |
26200 +#endif
26201 +       (1 << VCI_KCBIT_SPACES) |
26202 +       (1 << VCI_KCBIT_NETV2) |
26203 +
26204 +       /* debug options */
26205 +#ifdef CONFIG_VSERVER_DEBUG
26206 +       (1 << VCI_KCBIT_DEBUG) |
26207 +#endif
26208 +#ifdef CONFIG_VSERVER_HISTORY
26209 +       (1 << VCI_KCBIT_HISTORY) |
26210 +#endif
26211 +
26212 +       /* inode context tagging */
26213 +#if    defined(CONFIG_TAGGING_NONE)
26214 +       (0 << VCI_KCBIT_TAGGED) |
26215 +#elif  defined(CONFIG_TAGGING_UID16)
26216 +       (1 << VCI_KCBIT_TAGGED) |
26217 +#elif  defined(CONFIG_TAGGING_GID16)
26218 +       (2 << VCI_KCBIT_TAGGED) |
26219 +#elif  defined(CONFIG_TAGGING_ID24)
26220 +       (3 << VCI_KCBIT_TAGGED) |
26221 +#elif  defined(CONFIG_TAGGING_INTERN)
26222 +       (4 << VCI_KCBIT_TAGGED) |
26223 +#elif  defined(CONFIG_TAGGING_RUNTIME)
26224 +       (5 << VCI_KCBIT_TAGGED) |
26225 +#else
26226 +       (7 << VCI_KCBIT_TAGGED) |
26227 +#endif
26228 +       (1 << VCI_KCBIT_PPTAG) |
26229 +       0;
26230 +}
26231 +
26232 diff -NurpP --minimal linux-2.6.31.6/mm/filemap_xip.c linux-2.6.31.6-vs2.3.0.36.24/mm/filemap_xip.c
26233 --- linux-2.6.31.6/mm/filemap_xip.c     2009-06-11 17:13:27.000000000 +0200
26234 +++ linux-2.6.31.6-vs2.3.0.36.24/mm/filemap_xip.c       2009-09-10 16:11:43.000000000 +0200
26235 @@ -17,6 +17,7 @@
26236  #include <linux/sched.h>
26237  #include <linux/seqlock.h>
26238  #include <linux/mutex.h>
26239 +#include <linux/vs_memory.h>
26240  #include <asm/tlbflush.h>
26241  #include <asm/io.h>
26242  
26243 diff -NurpP --minimal linux-2.6.31.6/mm/fremap.c linux-2.6.31.6-vs2.3.0.36.24/mm/fremap.c
26244 --- linux-2.6.31.6/mm/fremap.c  2009-03-24 14:22:45.000000000 +0100
26245 +++ linux-2.6.31.6-vs2.3.0.36.24/mm/fremap.c    2009-09-10 16:11:43.000000000 +0200
26246 @@ -16,6 +16,7 @@
26247  #include <linux/module.h>
26248  #include <linux/syscalls.h>
26249  #include <linux/mmu_notifier.h>
26250 +#include <linux/vs_memory.h>
26251  
26252  #include <asm/mmu_context.h>
26253  #include <asm/cacheflush.h>
26254 diff -NurpP --minimal linux-2.6.31.6/mm/hugetlb.c linux-2.6.31.6-vs2.3.0.36.24/mm/hugetlb.c
26255 --- linux-2.6.31.6/mm/hugetlb.c 2009-11-12 12:10:12.000000000 +0100
26256 +++ linux-2.6.31.6-vs2.3.0.36.24/mm/hugetlb.c   2009-10-05 23:35:52.000000000 +0200
26257 @@ -24,6 +24,7 @@
26258  #include <asm/io.h>
26259  
26260  #include <linux/hugetlb.h>
26261 +#include <linux/vs_memory.h>
26262  #include "internal.h"
26263  
26264  const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
26265 diff -NurpP --minimal linux-2.6.31.6/mm/memory.c linux-2.6.31.6-vs2.3.0.36.24/mm/memory.c
26266 --- linux-2.6.31.6/mm/memory.c  2009-11-12 12:10:12.000000000 +0100
26267 +++ linux-2.6.31.6-vs2.3.0.36.24/mm/memory.c    2009-10-05 23:35:52.000000000 +0200
26268 @@ -55,6 +55,7 @@
26269  #include <linux/kallsyms.h>
26270  #include <linux/swapops.h>
26271  #include <linux/elf.h>
26272 +// #include <linux/vs_memory.h>
26273  
26274  #include <asm/pgalloc.h>
26275  #include <asm/uaccess.h>
26276 @@ -613,6 +614,9 @@ static int copy_pte_range(struct mm_stru
26277         int progress = 0;
26278         int rss[2];
26279  
26280 +       if (!vx_rss_avail(dst_mm, ((end - addr)/PAGE_SIZE + 1)))
26281 +               return -ENOMEM;
26282 +
26283  again:
26284         rss[1] = rss[0] = 0;
26285         dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
26286 @@ -2627,6 +2631,8 @@ static int do_anonymous_page(struct mm_s
26287         /* Allocate our own private page. */
26288         pte_unmap(page_table);
26289  
26290 +       if (!vx_rss_avail(mm, 1))
26291 +               goto oom;
26292         if (unlikely(anon_vma_prepare(vma)))
26293                 goto oom;
26294         page = alloc_zeroed_user_highpage_movable(vma, address);
26295 @@ -2910,6 +2916,7 @@ static inline int handle_pte_fault(struc
26296  {
26297         pte_t entry;
26298         spinlock_t *ptl;
26299 +       int ret = 0, type = VXPT_UNKNOWN;
26300  
26301         entry = *pte;
26302         if (!pte_present(entry)) {
26303 @@ -2934,9 +2941,12 @@ static inline int handle_pte_fault(struc
26304         if (unlikely(!pte_same(*pte, entry)))
26305                 goto unlock;
26306         if (flags & FAULT_FLAG_WRITE) {
26307 -               if (!pte_write(entry))
26308 -                       return do_wp_page(mm, vma, address,
26309 +               if (!pte_write(entry)) {
26310 +                       ret = do_wp_page(mm, vma, address,
26311                                         pte, pmd, ptl, entry);
26312 +                       type = VXPT_WRITE;
26313 +                       goto out;
26314 +               }
26315                 entry = pte_mkdirty(entry);
26316         }
26317         entry = pte_mkyoung(entry);
26318 @@ -2954,7 +2964,10 @@ static inline int handle_pte_fault(struc
26319         }
26320  unlock:
26321         pte_unmap_unlock(pte, ptl);
26322 -       return 0;
26323 +       ret = 0;
26324 +out:
26325 +       vx_page_fault(mm, vma, type, ret);
26326 +       return ret;
26327  }
26328  
26329  /*
26330 diff -NurpP --minimal linux-2.6.31.6/mm/mlock.c linux-2.6.31.6-vs2.3.0.36.24/mm/mlock.c
26331 --- linux-2.6.31.6/mm/mlock.c   2009-11-12 12:10:12.000000000 +0100
26332 +++ linux-2.6.31.6-vs2.3.0.36.24/mm/mlock.c     2009-10-05 23:35:52.000000000 +0200
26333 @@ -18,6 +18,7 @@
26334  #include <linux/rmap.h>
26335  #include <linux/mmzone.h>
26336  #include <linux/hugetlb.h>
26337 +#include <linux/vs_memory.h>
26338  
26339  #include "internal.h"
26340  
26341 @@ -378,7 +379,7 @@ success:
26342         nr_pages = (end - start) >> PAGE_SHIFT;
26343         if (!lock)
26344                 nr_pages = -nr_pages;
26345 -       mm->locked_vm += nr_pages;
26346 +       vx_vmlocked_add(mm, nr_pages);
26347  
26348         /*
26349          * vm_flags is protected by the mmap_sem held in write mode.
26350 @@ -451,7 +452,7 @@ static int do_mlock(unsigned long start,
26351  
26352  SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
26353  {
26354 -       unsigned long locked;
26355 +       unsigned long locked, grow;
26356         unsigned long lock_limit;
26357         int error = -ENOMEM;
26358  
26359 @@ -464,8 +465,10 @@ SYSCALL_DEFINE2(mlock, unsigned long, st
26360         len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
26361         start &= PAGE_MASK;
26362  
26363 -       locked = len >> PAGE_SHIFT;
26364 -       locked += current->mm->locked_vm;
26365 +       grow = len >> PAGE_SHIFT;
26366 +       if (!vx_vmlocked_avail(current->mm, grow))
26367 +               goto out;
26368 +       locked = current->mm->locked_vm + grow;
26369  
26370         lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
26371         lock_limit >>= PAGE_SHIFT;
26372 @@ -473,6 +476,7 @@ SYSCALL_DEFINE2(mlock, unsigned long, st
26373         /* check against resource limits */
26374         if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
26375                 error = do_mlock(start, len, 1);
26376 +out:
26377         up_write(&current->mm->mmap_sem);
26378         return error;
26379  }
26380 @@ -534,6 +538,8 @@ SYSCALL_DEFINE1(mlockall, int, flags)
26381         lock_limit >>= PAGE_SHIFT;
26382  
26383         ret = -ENOMEM;
26384 +       if (!vx_vmlocked_avail(current->mm, current->mm->total_vm))
26385 +               goto out;
26386         if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
26387             capable(CAP_IPC_LOCK))
26388                 ret = do_mlockall(flags);
26389 @@ -608,8 +614,10 @@ int account_locked_memory(struct mm_stru
26390         if (lim < vm)
26391                 goto out;
26392  
26393 -       mm->total_vm  += pgsz;
26394 -       mm->locked_vm += pgsz;
26395 +       // mm->total_vm  += pgsz;
26396 +       vx_vmpages_add(mm, pgsz);
26397 +       // mm->locked_vm += pgsz;
26398 +       vx_vmlocked_add(mm, pgsz);
26399  
26400         error = 0;
26401   out:
26402 @@ -623,8 +631,10 @@ void refund_locked_memory(struct mm_stru
26403  
26404         down_write(&mm->mmap_sem);
26405  
26406 -       mm->total_vm  -= pgsz;
26407 -       mm->locked_vm -= pgsz;
26408 +       // mm->total_vm  -= pgsz;
26409 +       vx_vmpages_sub(mm, pgsz);
26410 +       // mm->locked_vm -= pgsz;
26411 +       vx_vmlocked_sub(mm, pgsz);
26412  
26413         up_write(&mm->mmap_sem);
26414  }
26415 diff -NurpP --minimal linux-2.6.31.6/mm/mmap.c linux-2.6.31.6-vs2.3.0.36.24/mm/mmap.c
26416 --- linux-2.6.31.6/mm/mmap.c    2009-11-12 12:10:12.000000000 +0100
26417 +++ linux-2.6.31.6-vs2.3.0.36.24/mm/mmap.c      2009-10-05 23:35:52.000000000 +0200
26418 @@ -1222,7 +1222,8 @@ munmap_back:
26419  out:
26420         perf_counter_mmap(vma);
26421  
26422 -       mm->total_vm += len >> PAGE_SHIFT;
26423 +       // mm->total_vm += len >> PAGE_SHIFT;
26424 +       vx_vmpages_add(mm, len >> PAGE_SHIFT);
26425         vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
26426         if (vm_flags & VM_LOCKED) {
26427                 /*
26428 @@ -1231,7 +1232,8 @@ out:
26429                 long nr_pages = mlock_vma_pages_range(vma, addr, addr + len);
26430                 if (nr_pages < 0)
26431                         return nr_pages;        /* vma gone! */
26432 -               mm->locked_vm += (len >> PAGE_SHIFT) - nr_pages;
26433 +               // mm->locked_vm += (len >> PAGE_SHIFT) - nr_pages;
26434 +               vx_vmlocked_add(mm, (len >> PAGE_SHIFT) - nr_pages);
26435         } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
26436                 make_pages_present(addr, addr + len);
26437         return addr;
26438 @@ -1578,9 +1580,9 @@ static int acct_stack_growth(struct vm_a
26439                 return -ENOMEM;
26440  
26441         /* Ok, everything looks good - let it rip */
26442 -       mm->total_vm += grow;
26443 +       vx_vmpages_add(mm, grow);
26444         if (vma->vm_flags & VM_LOCKED)
26445 -               mm->locked_vm += grow;
26446 +               vx_vmlocked_add(mm, grow);
26447         vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
26448         return 0;
26449  }
26450 @@ -1755,7 +1757,8 @@ static void remove_vma_list(struct mm_st
26451         do {
26452                 long nrpages = vma_pages(vma);
26453  
26454 -               mm->total_vm -= nrpages;
26455 +               // mm->total_vm -= nrpages;
26456 +               vx_vmpages_sub(mm, nrpages);
26457                 vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
26458                 vma = remove_vma(vma);
26459         } while (vma);
26460 @@ -1927,7 +1930,8 @@ int do_munmap(struct mm_struct *mm, unsi
26461                 struct vm_area_struct *tmp = vma;
26462                 while (tmp && tmp->vm_start < end) {
26463                         if (tmp->vm_flags & VM_LOCKED) {
26464 -                               mm->locked_vm -= vma_pages(tmp);
26465 +                               // mm->locked_vm -= vma_pages(tmp);
26466 +                               vx_vmlocked_sub(mm, vma_pages(tmp));
26467                                 munlock_vma_pages_all(tmp);
26468                         }
26469                         tmp = tmp->vm_next;
26470 @@ -2016,6 +2020,8 @@ unsigned long do_brk(unsigned long addr,
26471                 lock_limit >>= PAGE_SHIFT;
26472                 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
26473                         return -EAGAIN;
26474 +               if (!vx_vmlocked_avail(mm, len >> PAGE_SHIFT))
26475 +                       return -ENOMEM;
26476         }
26477  
26478         /*
26479 @@ -2042,7 +2048,8 @@ unsigned long do_brk(unsigned long addr,
26480         if (mm->map_count > sysctl_max_map_count)
26481                 return -ENOMEM;
26482  
26483 -       if (security_vm_enough_memory(len >> PAGE_SHIFT))
26484 +       if (security_vm_enough_memory(len >> PAGE_SHIFT) ||
26485 +               !vx_vmpages_avail(mm, len >> PAGE_SHIFT))
26486                 return -ENOMEM;
26487  
26488         /* Can we just expand an old private anonymous mapping? */
26489 @@ -2068,10 +2075,13 @@ unsigned long do_brk(unsigned long addr,
26490         vma->vm_page_prot = vm_get_page_prot(flags);
26491         vma_link(mm, vma, prev, rb_link, rb_parent);
26492  out:
26493 -       mm->total_vm += len >> PAGE_SHIFT;
26494 +       // mm->total_vm += len >> PAGE_SHIFT;
26495 +       vx_vmpages_add(mm, len >> PAGE_SHIFT);
26496 +
26497         if (flags & VM_LOCKED) {
26498                 if (!mlock_vma_pages_range(vma, addr, addr + len))
26499 -                       mm->locked_vm += (len >> PAGE_SHIFT);
26500 +                       // mm->locked_vm += (len >> PAGE_SHIFT);
26501 +                       vx_vmlocked_add(mm, len >> PAGE_SHIFT);
26502         }
26503         return addr;
26504  }
26505 @@ -2114,6 +2124,11 @@ void exit_mmap(struct mm_struct *mm)
26506         free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
26507         tlb_finish_mmu(tlb, 0, end);
26508  
26509 +       set_mm_counter(mm, file_rss, 0);
26510 +       set_mm_counter(mm, anon_rss, 0);
26511 +       vx_vmpages_sub(mm, mm->total_vm);
26512 +       vx_vmlocked_sub(mm, mm->locked_vm);
26513 +
26514         /*
26515          * Walk the list again, actually closing and freeing it,
26516          * with preemption enabled, without holding any MM locks.
26517 @@ -2153,7 +2168,8 @@ int insert_vm_struct(struct mm_struct * 
26518         if (__vma && __vma->vm_start < vma->vm_end)
26519                 return -ENOMEM;
26520         if ((vma->vm_flags & VM_ACCOUNT) &&
26521 -            security_vm_enough_memory_mm(mm, vma_pages(vma)))
26522 +               (security_vm_enough_memory_mm(mm, vma_pages(vma)) ||
26523 +               !vx_vmpages_avail(mm, vma_pages(vma))))
26524                 return -ENOMEM;
26525         vma_link(mm, vma, prev, rb_link, rb_parent);
26526         return 0;
26527 @@ -2229,6 +2245,8 @@ int may_expand_vm(struct mm_struct *mm, 
26528  
26529         if (cur + npages > lim)
26530                 return 0;
26531 +       if (!vx_vmpages_avail(mm, npages))
26532 +               return 0;
26533         return 1;
26534  }
26535  
26536 @@ -2306,7 +2324,7 @@ int install_special_mapping(struct mm_st
26537                 return -ENOMEM;
26538         }
26539  
26540 -       mm->total_vm += len >> PAGE_SHIFT;
26541 +       vx_vmpages_add(mm, len >> PAGE_SHIFT);
26542  
26543         perf_counter_mmap(vma);
26544  
26545 diff -NurpP --minimal linux-2.6.31.6/mm/mremap.c linux-2.6.31.6-vs2.3.0.36.24/mm/mremap.c
26546 --- linux-2.6.31.6/mm/mremap.c  2009-03-24 14:22:45.000000000 +0100
26547 +++ linux-2.6.31.6-vs2.3.0.36.24/mm/mremap.c    2009-09-10 16:11:43.000000000 +0200
26548 @@ -19,6 +19,7 @@
26549  #include <linux/security.h>
26550  #include <linux/syscalls.h>
26551  #include <linux/mmu_notifier.h>
26552 +#include <linux/vs_memory.h>
26553  
26554  #include <asm/uaccess.h>
26555  #include <asm/cacheflush.h>
26556 @@ -220,7 +221,7 @@ static unsigned long move_vma(struct vm_
26557          * If this were a serious issue, we'd add a flag to do_munmap().
26558          */
26559         hiwater_vm = mm->hiwater_vm;
26560 -       mm->total_vm += new_len >> PAGE_SHIFT;
26561 +       vx_vmpages_add(mm, new_len >> PAGE_SHIFT);
26562         vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT);
26563  
26564         if (do_munmap(mm, old_addr, old_len) < 0) {
26565 @@ -238,7 +239,7 @@ static unsigned long move_vma(struct vm_
26566         }
26567  
26568         if (vm_flags & VM_LOCKED) {
26569 -               mm->locked_vm += new_len >> PAGE_SHIFT;
26570 +               vx_vmlocked_add(mm, new_len >> PAGE_SHIFT);
26571                 if (new_len > old_len)
26572                         mlock_vma_pages_range(new_vma, new_addr + old_len,
26573                                                        new_addr + new_len);
26574 @@ -349,6 +350,9 @@ unsigned long do_mremap(unsigned long ad
26575                 ret = -EAGAIN;
26576                 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
26577                         goto out;
26578 +               if (!vx_vmlocked_avail(current->mm,
26579 +                       (new_len - old_len) >> PAGE_SHIFT))
26580 +                       goto out;
26581         }
26582         if (!may_expand_vm(mm, (new_len - old_len) >> PAGE_SHIFT)) {
26583                 ret = -ENOMEM;
26584 @@ -377,10 +381,12 @@ unsigned long do_mremap(unsigned long ad
26585                         vma_adjust(vma, vma->vm_start,
26586                                 addr + new_len, vma->vm_pgoff, NULL);
26587  
26588 -                       mm->total_vm += pages;
26589 +                       // mm->total_vm += pages;
26590 +                       vx_vmpages_add(mm, pages);
26591                         vm_stat_account(mm, vma->vm_flags, vma->vm_file, pages);
26592                         if (vma->vm_flags & VM_LOCKED) {
26593 -                               mm->locked_vm += pages;
26594 +                               // mm->locked_vm += pages;
26595 +                               vx_vmlocked_add(mm, pages);
26596                                 mlock_vma_pages_range(vma, addr + old_len,
26597                                                    addr + new_len);
26598                         }
26599 diff -NurpP --minimal linux-2.6.31.6/mm/nommu.c linux-2.6.31.6-vs2.3.0.36.24/mm/nommu.c
26600 --- linux-2.6.31.6/mm/nommu.c   2009-11-12 12:10:12.000000000 +0100
26601 +++ linux-2.6.31.6-vs2.3.0.36.24/mm/nommu.c     2009-11-12 12:26:38.000000000 +0100
26602 @@ -1368,7 +1368,7 @@ unsigned long do_mmap_pgoff(struct file 
26603         /* okay... we have a mapping; now we have to register it */
26604         result = vma->vm_start;
26605  
26606 -       current->mm->total_vm += len >> PAGE_SHIFT;
26607 +       vx_vmpages_add(current->mm, len >> PAGE_SHIFT);
26608  
26609  share:
26610         add_vma_to_mm(current->mm, vma);
26611 @@ -1628,7 +1628,7 @@ void exit_mmap(struct mm_struct *mm)
26612  
26613         kenter("");
26614  
26615 -       mm->total_vm = 0;
26616 +       vx_vmpages_sub(mm, mm->total_vm);
26617  
26618         while ((vma = mm->mmap)) {
26619                 mm->mmap = vma->vm_next;
26620 diff -NurpP --minimal linux-2.6.31.6/mm/oom_kill.c linux-2.6.31.6-vs2.3.0.36.24/mm/oom_kill.c
26621 --- linux-2.6.31.6/mm/oom_kill.c        2009-06-11 17:13:27.000000000 +0200
26622 +++ linux-2.6.31.6-vs2.3.0.36.24/mm/oom_kill.c  2009-11-12 12:50:49.000000000 +0100
26623 @@ -27,6 +27,8 @@
26624  #include <linux/notifier.h>
26625  #include <linux/memcontrol.h>
26626  #include <linux/security.h>
26627 +#include <linux/vs_memory.h>
26628 +#include <linux/vs_context.h>
26629  
26630  int sysctl_panic_on_oom;
26631  int sysctl_oom_kill_allocating_task;
26632 @@ -159,9 +161,21 @@ unsigned long badness(struct task_struct
26633                         points >>= -(p->oomkilladj);
26634         }
26635  
26636 +       /*
26637 +        * add points for context badness and
26638 +        * reduce badness for processes belonging to
26639 +        * a different context
26640 +        */
26641 +
26642 +       points += vx_badness(p, mm);
26643 +
26644 +       if ((vx_current_xid() > 1) &&
26645 +               vx_current_xid() != vx_task_xid(p))
26646 +               points /= 16;
26647 +
26648  #ifdef DEBUG
26649 -       printk(KERN_DEBUG "OOMkill: task %d (%s) got %lu points\n",
26650 -       p->pid, p->comm, points);
26651 +       printk(KERN_DEBUG "OOMkill: task %d:#%u (%s) got %d points\n",
26652 +               task_pid_nr(p), p->xid, p->comm, points);
26653  #endif
26654         return points;
26655  }
26656 @@ -215,8 +229,8 @@ static struct task_struct *select_bad_pr
26657                  */
26658                 if (!p->mm)
26659                         continue;
26660 -               /* skip the init task */
26661 -               if (is_global_init(p))
26662 +               /* skip the init task, global and per guest */
26663 +               if (task_is_init(p))
26664                         continue;
26665                 if (mem && !task_in_mem_cgroup(p, mem))
26666                         continue;
26667 @@ -330,8 +344,8 @@ static void __oom_kill_task(struct task_
26668         }
26669  
26670         if (verbose)
26671 -               printk(KERN_ERR "Killed process %d (%s)\n",
26672 -                               task_pid_nr(p), p->comm);
26673 +               printk(KERN_ERR "Killed process %s(%d:#%u)\n",
26674 +                       p->comm, task_pid_nr(p), p->xid);
26675  
26676         /*
26677          * We give our sacrificial lamb high priority and access to
26678 @@ -415,8 +429,8 @@ static int oom_kill_process(struct task_
26679                 return 0;
26680         }
26681  
26682 -       printk(KERN_ERR "%s: kill process %d (%s) score %li or a child\n",
26683 -                                       message, task_pid_nr(p), p->comm, points);
26684 +       printk(KERN_ERR "%s: kill process %s(%d:#%u) score %li or a child\n",
26685 +               message, p->comm, task_pid_nr(p), p->xid, points);
26686  
26687         /* Try to kill a child first */
26688         list_for_each_entry(c, &p->children, sibling) {
26689 diff -NurpP --minimal linux-2.6.31.6/mm/page_alloc.c linux-2.6.31.6-vs2.3.0.36.24/mm/page_alloc.c
26690 --- linux-2.6.31.6/mm/page_alloc.c      2009-11-12 12:10:12.000000000 +0100
26691 +++ linux-2.6.31.6-vs2.3.0.36.24/mm/page_alloc.c        2009-10-05 23:35:52.000000000 +0200
26692 @@ -48,6 +48,8 @@
26693  #include <linux/page_cgroup.h>
26694  #include <linux/debugobjects.h>
26695  #include <linux/kmemleak.h>
26696 +#include <linux/vs_base.h>
26697 +#include <linux/vs_limit.h>
26698  
26699  #include <asm/tlbflush.h>
26700  #include <asm/div64.h>
26701 @@ -2078,6 +2080,9 @@ void si_meminfo(struct sysinfo *val)
26702         val->totalhigh = totalhigh_pages;
26703         val->freehigh = nr_free_highpages();
26704         val->mem_unit = PAGE_SIZE;
26705 +
26706 +       if (vx_flags(VXF_VIRT_MEM, 0))
26707 +               vx_vsi_meminfo(val);
26708  }
26709  
26710  EXPORT_SYMBOL(si_meminfo);
26711 @@ -2098,6 +2103,9 @@ void si_meminfo_node(struct sysinfo *val
26712         val->freehigh = 0;
26713  #endif
26714         val->mem_unit = PAGE_SIZE;
26715 +
26716 +       if (vx_flags(VXF_VIRT_MEM, 0))
26717 +               vx_vsi_meminfo(val);
26718  }
26719  #endif
26720  
26721 diff -NurpP --minimal linux-2.6.31.6/mm/rmap.c linux-2.6.31.6-vs2.3.0.36.24/mm/rmap.c
26722 --- linux-2.6.31.6/mm/rmap.c    2009-09-10 15:26:28.000000000 +0200
26723 +++ linux-2.6.31.6-vs2.3.0.36.24/mm/rmap.c      2009-09-10 16:11:43.000000000 +0200
26724 @@ -50,6 +50,7 @@
26725  #include <linux/memcontrol.h>
26726  #include <linux/mmu_notifier.h>
26727  #include <linux/migrate.h>
26728 +#include <linux/vs_memory.h>
26729  
26730  #include <asm/tlbflush.h>
26731  
26732 diff -NurpP --minimal linux-2.6.31.6/mm/shmem.c linux-2.6.31.6-vs2.3.0.36.24/mm/shmem.c
26733 --- linux-2.6.31.6/mm/shmem.c   2009-09-10 15:26:28.000000000 +0200
26734 +++ linux-2.6.31.6-vs2.3.0.36.24/mm/shmem.c     2009-09-10 16:11:43.000000000 +0200
26735 @@ -1777,7 +1777,7 @@ static int shmem_statfs(struct dentry *d
26736  {
26737         struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
26738  
26739 -       buf->f_type = TMPFS_MAGIC;
26740 +       buf->f_type = TMPFS_SUPER_MAGIC;
26741         buf->f_bsize = PAGE_CACHE_SIZE;
26742         buf->f_namelen = NAME_MAX;
26743         spin_lock(&sbinfo->stat_lock);
26744 @@ -2346,7 +2346,7 @@ static int shmem_fill_super(struct super
26745         sb->s_maxbytes = SHMEM_MAX_BYTES;
26746         sb->s_blocksize = PAGE_CACHE_SIZE;
26747         sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
26748 -       sb->s_magic = TMPFS_MAGIC;
26749 +       sb->s_magic = TMPFS_SUPER_MAGIC;
26750         sb->s_op = &shmem_ops;
26751         sb->s_time_gran = 1;
26752  #ifdef CONFIG_TMPFS_POSIX_ACL
26753 diff -NurpP --minimal linux-2.6.31.6/mm/slab.c linux-2.6.31.6-vs2.3.0.36.24/mm/slab.c
26754 --- linux-2.6.31.6/mm/slab.c    2009-09-10 15:26:28.000000000 +0200
26755 +++ linux-2.6.31.6-vs2.3.0.36.24/mm/slab.c      2009-09-10 16:11:43.000000000 +0200
26756 @@ -431,6 +431,8 @@ static void kmem_list3_init(struct kmem_
26757  #define STATS_INC_FREEMISS(x)  do { } while (0)
26758  #endif
26759  
26760 +#include "slab_vs.h"
26761 +
26762  #if DEBUG
26763  
26764  /*
26765 @@ -3253,6 +3255,7 @@ retry:
26766  
26767         obj = slab_get_obj(cachep, slabp, nodeid);
26768         check_slabp(cachep, slabp);
26769 +       vx_slab_alloc(cachep, flags);
26770         l3->free_objects--;
26771         /* move slabp to correct slabp list: */
26772         list_del(&slabp->list);
26773 @@ -3329,6 +3332,7 @@ __cache_alloc_node(struct kmem_cache *ca
26774         /* ___cache_alloc_node can fall back to other nodes */
26775         ptr = ____cache_alloc_node(cachep, flags, nodeid);
26776    out:
26777 +       vx_slab_alloc(cachep, flags);
26778         local_irq_restore(save_flags);
26779         ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
26780         kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags,
26781 @@ -3515,6 +3519,7 @@ static inline void __cache_free(struct k
26782         check_irq_off();
26783         kmemleak_free_recursive(objp, cachep->flags);
26784         objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
26785 +       vx_slab_free(cachep);
26786  
26787         kmemcheck_slab_free(cachep, objp, obj_size(cachep));
26788  
26789 diff -NurpP --minimal linux-2.6.31.6/mm/slab_vs.h linux-2.6.31.6-vs2.3.0.36.24/mm/slab_vs.h
26790 --- linux-2.6.31.6/mm/slab_vs.h 1970-01-01 01:00:00.000000000 +0100
26791 +++ linux-2.6.31.6-vs2.3.0.36.24/mm/slab_vs.h   2009-11-05 04:25:04.000000000 +0100
26792 @@ -0,0 +1,29 @@
26793 +
26794 +#include <linux/vserver/context.h>
26795 +
26796 +#include <linux/vs_context.h>
26797 +
26798 +static inline
26799 +void vx_slab_alloc(struct kmem_cache *cachep, gfp_t flags)
26800 +{
26801 +       int what = gfp_zone(cachep->gfpflags);
26802 +       struct vx_info *vxi = current_vx_info();
26803 +
26804 +       if (!vxi)
26805 +               return;
26806 +
26807 +       atomic_add(cachep->buffer_size, &vxi->cacct.slab[what]);
26808 +}
26809 +
26810 +static inline
26811 +void vx_slab_free(struct kmem_cache *cachep)
26812 +{
26813 +       int what = gfp_zone(cachep->gfpflags);
26814 +       struct vx_info *vxi = current_vx_info();
26815 +
26816 +       if (!vxi)
26817 +               return;
26818 +
26819 +       atomic_sub(cachep->buffer_size, &vxi->cacct.slab[what]);
26820 +}
26821 +
26822 diff -NurpP --minimal linux-2.6.31.6/mm/swapfile.c linux-2.6.31.6-vs2.3.0.36.24/mm/swapfile.c
26823 --- linux-2.6.31.6/mm/swapfile.c        2009-11-12 12:10:12.000000000 +0100
26824 +++ linux-2.6.31.6-vs2.3.0.36.24/mm/swapfile.c  2009-11-12 12:26:38.000000000 +0100
26825 @@ -34,6 +34,8 @@
26826  #include <asm/tlbflush.h>
26827  #include <linux/swapops.h>
26828  #include <linux/page_cgroup.h>
26829 +#include <linux/vs_base.h>
26830 +#include <linux/vs_memory.h>
26831  
26832  static DEFINE_SPINLOCK(swap_lock);
26833  static unsigned int nr_swapfiles;
26834 @@ -1678,6 +1680,8 @@ static void *swap_next(struct seq_file *
26835         if (v == SEQ_START_TOKEN)
26836                 ptr = swap_info;
26837         else {
26838 +               if (vx_flags(VXF_VIRT_MEM, 0))
26839 +                       return NULL;
26840                 ptr = v;
26841                 ptr++;
26842         }
26843 @@ -1705,6 +1709,16 @@ static int swap_show(struct seq_file *sw
26844  
26845         if (ptr == SEQ_START_TOKEN) {
26846                 seq_puts(swap,"Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
26847 +               if (vx_flags(VXF_VIRT_MEM, 0)) {
26848 +                       struct sysinfo si;
26849 +
26850 +                       vx_vsi_swapinfo(&si);
26851 +                       if (si.totalswap < (1 << 10))
26852 +                               return 0;
26853 +                       seq_printf(swap, "%s\t\t\t\t\t%s\t%lu\t%lu\t%d\n",
26854 +                               "hdv0", "partition", si.totalswap >> 10,
26855 +                               (si.totalswap - si.freeswap) >> 10, -1);
26856 +               }
26857                 return 0;
26858         }
26859  
26860 @@ -2060,6 +2074,8 @@ void si_swapinfo(struct sysinfo *val)
26861         val->freeswap = nr_swap_pages + nr_to_be_unused;
26862         val->totalswap = total_swap_pages + nr_to_be_unused;
26863         spin_unlock(&swap_lock);
26864 +       if (vx_flags(VXF_VIRT_MEM, 0))
26865 +               vx_vsi_swapinfo(val);
26866  }
26867  
26868  /*
26869 diff -NurpP --minimal linux-2.6.31.6/net/core/dev.c linux-2.6.31.6-vs2.3.0.36.24/net/core/dev.c
26870 --- linux-2.6.31.6/net/core/dev.c       2009-11-12 12:10:12.000000000 +0100
26871 +++ linux-2.6.31.6-vs2.3.0.36.24/net/core/dev.c 2009-11-08 16:59:44.000000000 +0100
26872 @@ -126,6 +126,7 @@
26873  #include <linux/in.h>
26874  #include <linux/jhash.h>
26875  #include <linux/random.h>
26876 +#include <linux/vs_inet.h>
26877  #include <trace/events/napi.h>
26878  
26879  #include "net-sysfs.h"
26880 @@ -586,7 +587,8 @@ struct net_device *__dev_get_by_name(str
26881         hlist_for_each(p, dev_name_hash(net, name)) {
26882                 struct net_device *dev
26883                         = hlist_entry(p, struct net_device, name_hlist);
26884 -               if (!strncmp(dev->name, name, IFNAMSIZ))
26885 +               if (!strncmp(dev->name, name, IFNAMSIZ) &&
26886 +                   nx_dev_visible(current_nx_info(), dev))
26887                         return dev;
26888         }
26889         return NULL;
26890 @@ -635,7 +637,8 @@ struct net_device *__dev_get_by_index(st
26891         hlist_for_each(p, dev_index_hash(net, ifindex)) {
26892                 struct net_device *dev
26893                         = hlist_entry(p, struct net_device, index_hlist);
26894 -               if (dev->ifindex == ifindex)
26895 +               if ((dev->ifindex == ifindex) &&
26896 +                   nx_dev_visible(current_nx_info(), dev))
26897                         return dev;
26898         }
26899         return NULL;
26900 @@ -686,10 +689,12 @@ struct net_device *dev_getbyhwaddr(struc
26901  
26902         ASSERT_RTNL();
26903  
26904 -       for_each_netdev(net, dev)
26905 +       for_each_netdev(net, dev) {
26906                 if (dev->type == type &&
26907 -                   !memcmp(dev->dev_addr, ha, dev->addr_len))
26908 +                   !memcmp(dev->dev_addr, ha, dev->addr_len) &&
26909 +                   nx_dev_visible(current_nx_info(), dev))
26910                         return dev;
26911 +       }
26912  
26913         return NULL;
26914  }
26915 @@ -701,9 +706,11 @@ struct net_device *__dev_getfirstbyhwtyp
26916         struct net_device *dev;
26917  
26918         ASSERT_RTNL();
26919 -       for_each_netdev(net, dev)
26920 -               if (dev->type == type)
26921 +       for_each_netdev(net, dev) {
26922 +               if ((dev->type == type) &&
26923 +                   nx_dev_visible(current_nx_info(), dev))
26924                         return dev;
26925 +       }
26926  
26927         return NULL;
26928  }
26929 @@ -821,6 +828,8 @@ static int __dev_alloc_name(struct net *
26930                                 continue;
26931                         if (i < 0 || i >= max_netdevices)
26932                                 continue;
26933 +                       if (!nx_dev_visible(current_nx_info(), d))
26934 +                               continue;
26935  
26936                         /*  avoid cases where sscanf is not exact inverse of printf */
26937                         snprintf(buf, IFNAMSIZ, name, i);
26938 @@ -2941,6 +2950,8 @@ static int dev_ifconf(struct net *net, c
26939  
26940         total = 0;
26941         for_each_netdev(net, dev) {
26942 +               if (!nx_dev_visible(current_nx_info(), dev))
26943 +                       continue;
26944                 for (i = 0; i < NPROTO; i++) {
26945                         if (gifconf_list[i]) {
26946                                 int done;
26947 @@ -3009,6 +3020,9 @@ static void dev_seq_printf_stats(struct 
26948  {
26949         const struct net_device_stats *stats = dev_get_stats(dev);
26950  
26951 +       if (!nx_dev_visible(current_nx_info(), dev))
26952 +               return;
26953 +
26954         seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
26955                    "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
26956                    dev->name, stats->rx_bytes, stats->rx_packets,
26957 @@ -5257,6 +5271,15 @@ int dev_change_net_namespace(struct net_
26958                 goto out;
26959  #endif
26960  
26961 +#ifdef CONFIG_SYSFS
26962 +       /* Don't allow real devices to be moved when sysfs
26963 +        * is enabled.
26964 +        */
26965 +       err = -EINVAL;
26966 +       if (dev->dev.parent)
26967 +               goto out;
26968 +#endif
26969 +
26970         /* Ensure the device has been registrered */
26971         err = -EINVAL;
26972         if (dev->reg_state != NETREG_REGISTERED)
26973 @@ -5317,6 +5340,8 @@ int dev_change_net_namespace(struct net_
26974  
26975         netdev_unregister_kobject(dev);
26976  
26977 +       netdev_unregister_kobject(dev);
26978 +
26979         /* Actually switch the network namespace */
26980         dev_net_set(dev, net);
26981  
26982 diff -NurpP --minimal linux-2.6.31.6/net/core/net-sysfs.c linux-2.6.31.6-vs2.3.0.36.24/net/core/net-sysfs.c
26983 --- linux-2.6.31.6/net/core/net-sysfs.c 2009-09-10 15:26:29.000000000 +0200
26984 +++ linux-2.6.31.6-vs2.3.0.36.24/net/core/net-sysfs.c   2009-09-10 16:11:43.000000000 +0200
26985 @@ -513,6 +513,9 @@ int netdev_register_kobject(struct net_d
26986         if (dev_net(net) != &init_net)
26987                 return 0;
26988  
26989 +       if (dev_net(net) != &init_net)
26990 +               return 0;
26991 +
26992         return device_add(dev);
26993  }
26994  
26995 diff -NurpP --minimal linux-2.6.31.6/net/core/rtnetlink.c linux-2.6.31.6-vs2.3.0.36.24/net/core/rtnetlink.c
26996 --- linux-2.6.31.6/net/core/rtnetlink.c 2009-06-11 17:13:29.000000000 +0200
26997 +++ linux-2.6.31.6-vs2.3.0.36.24/net/core/rtnetlink.c   2009-11-05 03:50:06.000000000 +0100
26998 @@ -690,6 +690,8 @@ static int rtnl_dump_ifinfo(struct sk_bu
26999  
27000         idx = 0;
27001         for_each_netdev(net, dev) {
27002 +               if (!nx_dev_visible(skb->sk->sk_nx_info, dev))
27003 +                       continue;
27004                 if (idx < s_idx)
27005                         goto cont;
27006                 if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
27007 @@ -1235,6 +1237,9 @@ void rtmsg_ifinfo(int type, struct net_d
27008         struct sk_buff *skb;
27009         int err = -ENOBUFS;
27010  
27011 +       if (!nx_dev_visible(current_nx_info(), dev))
27012 +               return;
27013 +
27014         skb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL);
27015         if (skb == NULL)
27016                 goto errout;
27017 diff -NurpP --minimal linux-2.6.31.6/net/core/sock.c linux-2.6.31.6-vs2.3.0.36.24/net/core/sock.c
27018 --- linux-2.6.31.6/net/core/sock.c      2009-11-12 12:10:12.000000000 +0100
27019 +++ linux-2.6.31.6-vs2.3.0.36.24/net/core/sock.c        2009-11-05 04:25:28.000000000 +0100
27020 @@ -125,6 +125,10 @@
27021  #include <linux/ipsec.h>
27022  
27023  #include <linux/filter.h>
27024 +#include <linux/vs_socket.h>
27025 +#include <linux/vs_limit.h>
27026 +#include <linux/vs_context.h>
27027 +#include <linux/vs_network.h>
27028  
27029  #ifdef CONFIG_INET
27030  #include <net/tcp.h>
27031 @@ -974,6 +978,8 @@ static struct sock *sk_prot_alloc(struct
27032                 if (!try_module_get(prot->owner))
27033                         goto out_free_sec;
27034         }
27035 +               sock_vx_init(sk);
27036 +               sock_nx_init(sk);
27037  
27038         return sk;
27039  
27040 @@ -1053,6 +1059,11 @@ static void __sk_free(struct sock *sk)
27041                        __func__, atomic_read(&sk->sk_omem_alloc));
27042  
27043         put_net(sock_net(sk));
27044 +       vx_sock_dec(sk);
27045 +       clr_vx_info(&sk->sk_vx_info);
27046 +       sk->sk_xid = -1;
27047 +       clr_nx_info(&sk->sk_nx_info);
27048 +       sk->sk_nid = -1;
27049         sk_prot_free(sk->sk_prot_creator, sk);
27050  }
27051  
27052 @@ -1100,6 +1111,8 @@ struct sock *sk_clone(const struct sock 
27053  
27054                 /* SANITY */
27055                 get_net(sock_net(newsk));
27056 +               sock_vx_init(newsk);
27057 +               sock_nx_init(newsk);
27058                 sk_node_init(&newsk->sk_node);
27059                 sock_lock_init(newsk);
27060                 bh_lock_sock(newsk);
27061 @@ -1154,6 +1167,12 @@ struct sock *sk_clone(const struct sock 
27062                 smp_wmb();
27063                 atomic_set(&newsk->sk_refcnt, 2);
27064  
27065 +               set_vx_info(&newsk->sk_vx_info, sk->sk_vx_info);
27066 +               newsk->sk_xid = sk->sk_xid;
27067 +               vx_sock_inc(newsk);
27068 +               set_nx_info(&newsk->sk_nx_info, sk->sk_nx_info);
27069 +               newsk->sk_nid = sk->sk_nid;
27070 +
27071                 /*
27072                  * Increment the counter in the same struct proto as the master
27073                  * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
27074 @@ -1872,6 +1891,12 @@ void sock_init_data(struct socket *sock,
27075  
27076         sk->sk_stamp = ktime_set(-1L, 0);
27077  
27078 +       set_vx_info(&sk->sk_vx_info, current_vx_info());
27079 +       sk->sk_xid = vx_current_xid();
27080 +       vx_sock_inc(sk);
27081 +       set_nx_info(&sk->sk_nx_info, current_nx_info());
27082 +       sk->sk_nid = nx_current_nid();
27083 +
27084         /*
27085          * Before updating sk_refcnt, we must commit prior changes to memory
27086          * (Documentation/RCU/rculist_nulls.txt for details)
27087 diff -NurpP --minimal linux-2.6.31.6/net/ipv4/af_inet.c linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/af_inet.c
27088 --- linux-2.6.31.6/net/ipv4/af_inet.c   2009-09-10 15:26:29.000000000 +0200
27089 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/af_inet.c     2009-09-10 16:11:43.000000000 +0200
27090 @@ -115,6 +115,7 @@
27091  #ifdef CONFIG_IP_MROUTE
27092  #include <linux/mroute.h>
27093  #endif
27094 +#include <linux/vs_limit.h>
27095  
27096  
27097  /* The inetsw table contains everything that inet_create needs to
27098 @@ -324,9 +325,12 @@ lookup_protocol:
27099         }
27100  
27101         err = -EPERM;
27102 +       if ((protocol == IPPROTO_ICMP) &&
27103 +               nx_capable(answer->capability, NXC_RAW_ICMP))
27104 +               goto override;
27105         if (answer->capability > 0 && !capable(answer->capability))
27106                 goto out_rcu_unlock;
27107 -
27108 +override:
27109         err = -EAFNOSUPPORT;
27110         if (!inet_netns_ok(net, protocol))
27111                 goto out_rcu_unlock;
27112 @@ -444,6 +448,7 @@ int inet_bind(struct socket *sock, struc
27113         struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
27114         struct sock *sk = sock->sk;
27115         struct inet_sock *inet = inet_sk(sk);
27116 +       struct nx_v4_sock_addr nsa;
27117         unsigned short snum;
27118         int chk_addr_ret;
27119         int err;
27120 @@ -457,7 +462,11 @@ int inet_bind(struct socket *sock, struc
27121         if (addr_len < sizeof(struct sockaddr_in))
27122                 goto out;
27123  
27124 -       chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
27125 +       err = v4_map_sock_addr(inet, addr, &nsa);
27126 +       if (err)
27127 +               goto out;
27128 +
27129 +       chk_addr_ret = inet_addr_type(sock_net(sk), nsa.saddr);
27130  
27131         /* Not specified by any standard per-se, however it breaks too
27132          * many applications when removed.  It is unfortunate since
27133 @@ -469,7 +478,7 @@ int inet_bind(struct socket *sock, struc
27134         err = -EADDRNOTAVAIL;
27135         if (!sysctl_ip_nonlocal_bind &&
27136             !(inet->freebind || inet->transparent) &&
27137 -           addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
27138 +           nsa.saddr != htonl(INADDR_ANY) &&
27139             chk_addr_ret != RTN_LOCAL &&
27140             chk_addr_ret != RTN_MULTICAST &&
27141             chk_addr_ret != RTN_BROADCAST)
27142 @@ -494,7 +503,7 @@ int inet_bind(struct socket *sock, struc
27143         if (sk->sk_state != TCP_CLOSE || inet->num)
27144                 goto out_release_sock;
27145  
27146 -       inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr;
27147 +       v4_set_sock_addr(inet, &nsa);
27148         if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
27149                 inet->saddr = 0;  /* Use device */
27150  
27151 @@ -687,11 +696,13 @@ int inet_getname(struct socket *sock, st
27152                      peer == 1))
27153                         return -ENOTCONN;
27154                 sin->sin_port = inet->dport;
27155 -               sin->sin_addr.s_addr = inet->daddr;
27156 +               sin->sin_addr.s_addr =
27157 +                       nx_map_sock_lback(sk->sk_nx_info, inet->daddr);
27158         } else {
27159                 __be32 addr = inet->rcv_saddr;
27160                 if (!addr)
27161                         addr = inet->saddr;
27162 +               addr = nx_map_sock_lback(sk->sk_nx_info, addr);
27163                 sin->sin_port = inet->sport;
27164                 sin->sin_addr.s_addr = addr;
27165         }
27166 diff -NurpP --minimal linux-2.6.31.6/net/ipv4/devinet.c linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/devinet.c
27167 --- linux-2.6.31.6/net/ipv4/devinet.c   2009-09-10 15:26:29.000000000 +0200
27168 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/devinet.c     2009-11-05 03:50:40.000000000 +0100
27169 @@ -413,6 +413,7 @@ struct in_device *inetdev_by_index(struc
27170         return in_dev;
27171  }
27172  
27173 +
27174  /* Called only from RTNL semaphored context. No locks. */
27175  
27176  struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
27177 @@ -653,6 +654,8 @@ int devinet_ioctl(struct net *net, unsig
27178                 *colon = ':';
27179  
27180         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
27181 +               struct nx_info *nxi = current_nx_info();
27182 +
27183                 if (tryaddrmatch) {
27184                         /* Matthias Andree */
27185                         /* compare label and address (4.4BSD style) */
27186 @@ -661,6 +664,8 @@ int devinet_ioctl(struct net *net, unsig
27187                            This is checked above. */
27188                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
27189                              ifap = &ifa->ifa_next) {
27190 +                               if (!nx_v4_ifa_visible(nxi, ifa))
27191 +                                       continue;
27192                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
27193                                     sin_orig.sin_addr.s_addr ==
27194                                                         ifa->ifa_address) {
27195 @@ -673,9 +678,12 @@ int devinet_ioctl(struct net *net, unsig
27196                    comparing just the label */
27197                 if (!ifa) {
27198                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
27199 -                            ifap = &ifa->ifa_next)
27200 +                            ifap = &ifa->ifa_next) {
27201 +                               if (!nx_v4_ifa_visible(nxi, ifa))
27202 +                                       continue;
27203                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
27204                                         break;
27205 +                       }
27206                 }
27207         }
27208  
27209 @@ -826,6 +834,8 @@ static int inet_gifconf(struct net_devic
27210                 goto out;
27211  
27212         for (; ifa; ifa = ifa->ifa_next) {
27213 +               if (!nx_v4_ifa_visible(current_nx_info(), ifa))
27214 +                       continue;
27215                 if (!buf) {
27216                         done += sizeof(ifr);
27217                         continue;
27218 @@ -1164,6 +1174,7 @@ static int inet_dump_ifaddr(struct sk_bu
27219         struct net_device *dev;
27220         struct in_device *in_dev;
27221         struct in_ifaddr *ifa;
27222 +       struct sock *sk = skb->sk;
27223         int s_ip_idx, s_idx = cb->args[0];
27224  
27225         s_ip_idx = ip_idx = cb->args[1];
27226 @@ -1178,6 +1189,8 @@ static int inet_dump_ifaddr(struct sk_bu
27227  
27228                 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
27229                      ifa = ifa->ifa_next, ip_idx++) {
27230 +                       if (sk && !nx_v4_ifa_visible(sk->sk_nx_info, ifa))
27231 +                               continue;
27232                         if (ip_idx < s_ip_idx)
27233                                 continue;
27234                         if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
27235 diff -NurpP --minimal linux-2.6.31.6/net/ipv4/fib_hash.c linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/fib_hash.c
27236 --- linux-2.6.31.6/net/ipv4/fib_hash.c  2009-09-10 15:26:29.000000000 +0200
27237 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/fib_hash.c    2009-11-05 03:50:54.000000000 +0100
27238 @@ -1021,7 +1021,7 @@ static int fib_seq_show(struct seq_file 
27239         prefix  = f->fn_key;
27240         mask    = FZ_MASK(iter->zone);
27241         flags   = fib_flag_trans(fa->fa_type, mask, fi);
27242 -       if (fi)
27243 +       if (fi && nx_dev_visible(current_nx_info(), fi->fib_dev))
27244                 seq_printf(seq,
27245                          "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u%n",
27246                          fi->fib_dev ? fi->fib_dev->name : "*", prefix,
27247 diff -NurpP --minimal linux-2.6.31.6/net/ipv4/inet_connection_sock.c linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/inet_connection_sock.c
27248 --- linux-2.6.31.6/net/ipv4/inet_connection_sock.c      2009-06-11 17:13:29.000000000 +0200
27249 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/inet_connection_sock.c        2009-11-05 04:54:58.000000000 +0100
27250 @@ -49,10 +49,40 @@ void inet_get_local_port_range(int *low,
27251  }
27252  EXPORT_SYMBOL(inet_get_local_port_range);
27253  
27254 +int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
27255 +{
27256 +       __be32  sk1_rcv_saddr = inet_rcv_saddr(sk1),
27257 +               sk2_rcv_saddr = inet_rcv_saddr(sk2);
27258 +
27259 +       if (inet_v6_ipv6only(sk2))
27260 +               return 0;
27261 +
27262 +       if (sk1_rcv_saddr &&
27263 +           sk2_rcv_saddr &&
27264 +           sk1_rcv_saddr == sk2_rcv_saddr)
27265 +               return 1;
27266 +
27267 +       if (sk1_rcv_saddr &&
27268 +           !sk2_rcv_saddr &&
27269 +           v4_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr, NXA_MASK_BIND))
27270 +               return 1;
27271 +
27272 +       if (sk2_rcv_saddr &&
27273 +           !sk1_rcv_saddr &&
27274 +           v4_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr, NXA_MASK_BIND))
27275 +               return 1;
27276 +
27277 +       if (!sk1_rcv_saddr &&
27278 +           !sk2_rcv_saddr &&
27279 +           nx_v4_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info))
27280 +               return 1;
27281 +
27282 +       return 0;
27283 +}
27284 +
27285  int inet_csk_bind_conflict(const struct sock *sk,
27286                            const struct inet_bind_bucket *tb)
27287  {
27288 -       const __be32 sk_rcv_saddr = inet_rcv_saddr(sk);
27289         struct sock *sk2;
27290         struct hlist_node *node;
27291         int reuse = sk->sk_reuse;
27292 @@ -72,9 +102,7 @@ int inet_csk_bind_conflict(const struct 
27293                      sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
27294                         if (!reuse || !sk2->sk_reuse ||
27295                             sk2->sk_state == TCP_LISTEN) {
27296 -                               const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
27297 -                               if (!sk2_rcv_saddr || !sk_rcv_saddr ||
27298 -                                   sk2_rcv_saddr == sk_rcv_saddr)
27299 +                               if (ipv4_rcv_saddr_equal(sk, sk2))
27300                                         break;
27301                         }
27302                 }
27303 diff -NurpP --minimal linux-2.6.31.6/net/ipv4/inet_diag.c linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/inet_diag.c
27304 --- linux-2.6.31.6/net/ipv4/inet_diag.c 2009-09-10 15:26:29.000000000 +0200
27305 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/inet_diag.c   2009-09-10 16:11:43.000000000 +0200
27306 @@ -32,6 +32,8 @@
27307  #include <linux/stddef.h>
27308  
27309  #include <linux/inet_diag.h>
27310 +#include <linux/vs_network.h>
27311 +#include <linux/vs_inet.h>
27312  
27313  static const struct inet_diag_handler **inet_diag_table;
27314  
27315 @@ -118,8 +120,8 @@ static int inet_csk_diag_fill(struct soc
27316  
27317         r->id.idiag_sport = inet->sport;
27318         r->id.idiag_dport = inet->dport;
27319 -       r->id.idiag_src[0] = inet->rcv_saddr;
27320 -       r->id.idiag_dst[0] = inet->daddr;
27321 +       r->id.idiag_src[0] = nx_map_sock_lback(sk->sk_nx_info, inet->rcv_saddr);
27322 +       r->id.idiag_dst[0] = nx_map_sock_lback(sk->sk_nx_info, inet->daddr);
27323  
27324  #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
27325         if (r->idiag_family == AF_INET6) {
27326 @@ -204,8 +206,8 @@ static int inet_twsk_diag_fill(struct in
27327         r->id.idiag_cookie[1] = (u32)(((unsigned long)tw >> 31) >> 1);
27328         r->id.idiag_sport     = tw->tw_sport;
27329         r->id.idiag_dport     = tw->tw_dport;
27330 -       r->id.idiag_src[0]    = tw->tw_rcv_saddr;
27331 -       r->id.idiag_dst[0]    = tw->tw_daddr;
27332 +       r->id.idiag_src[0]    = nx_map_sock_lback(tw->tw_nx_info, tw->tw_rcv_saddr);
27333 +       r->id.idiag_dst[0]    = nx_map_sock_lback(tw->tw_nx_info, tw->tw_daddr);
27334         r->idiag_state        = tw->tw_substate;
27335         r->idiag_timer        = 3;
27336         r->idiag_expires      = DIV_ROUND_UP(tmo * 1000, HZ);
27337 @@ -262,6 +264,7 @@ static int inet_diag_get_exact(struct sk
27338         err = -EINVAL;
27339  
27340         if (req->idiag_family == AF_INET) {
27341 +               /* TODO: lback */
27342                 sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0],
27343                                  req->id.idiag_dport, req->id.idiag_src[0],
27344                                  req->id.idiag_sport, req->id.idiag_if);
27345 @@ -504,6 +507,7 @@ static int inet_csk_diag_dump(struct soc
27346                 } else
27347  #endif
27348                 {
27349 +                       /* TODO: lback */
27350                         entry.saddr = &inet->rcv_saddr;
27351                         entry.daddr = &inet->daddr;
27352                 }
27353 @@ -540,6 +544,7 @@ static int inet_twsk_diag_dump(struct in
27354                 } else
27355  #endif
27356                 {
27357 +                       /* TODO: lback */
27358                         entry.saddr = &tw->tw_rcv_saddr;
27359                         entry.daddr = &tw->tw_daddr;
27360                 }
27361 @@ -586,8 +591,8 @@ static int inet_diag_fill_req(struct sk_
27362  
27363         r->id.idiag_sport = inet->sport;
27364         r->id.idiag_dport = ireq->rmt_port;
27365 -       r->id.idiag_src[0] = ireq->loc_addr;
27366 -       r->id.idiag_dst[0] = ireq->rmt_addr;
27367 +       r->id.idiag_src[0] = nx_map_sock_lback(sk->sk_nx_info, ireq->loc_addr);
27368 +       r->id.idiag_dst[0] = nx_map_sock_lback(sk->sk_nx_info, ireq->rmt_addr);
27369         r->idiag_expires = jiffies_to_msecs(tmo);
27370         r->idiag_rqueue = 0;
27371         r->idiag_wqueue = 0;
27372 @@ -657,6 +662,7 @@ static int inet_diag_dump_reqs(struct sk
27373                                 continue;
27374  
27375                         if (bc) {
27376 +                               /* TODO: lback */
27377                                 entry.saddr =
27378  #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
27379                                         (entry.family == AF_INET6) ?
27380 @@ -727,6 +733,8 @@ static int inet_diag_dump(struct sk_buff
27381                         sk_nulls_for_each(sk, node, &ilb->head) {
27382                                 struct inet_sock *inet = inet_sk(sk);
27383  
27384 +                               if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
27385 +                                       continue;
27386                                 if (num < s_num) {
27387                                         num++;
27388                                         continue;
27389 @@ -793,6 +801,8 @@ skip_listen_ht:
27390                 sk_nulls_for_each(sk, node, &head->chain) {
27391                         struct inet_sock *inet = inet_sk(sk);
27392  
27393 +                       if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
27394 +                               continue;
27395                         if (num < s_num)
27396                                 goto next_normal;
27397                         if (!(r->idiag_states & (1 << sk->sk_state)))
27398 @@ -817,6 +827,8 @@ next_normal:
27399                         inet_twsk_for_each(tw, node,
27400                                     &head->twchain) {
27401  
27402 +                               if (!nx_check(tw->tw_nid, VS_WATCH_P | VS_IDENT))
27403 +                                       continue;
27404                                 if (num < s_num)
27405                                         goto next_dying;
27406                                 if (r->id.idiag_sport != tw->tw_sport &&
27407 diff -NurpP --minimal linux-2.6.31.6/net/ipv4/inet_hashtables.c linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/inet_hashtables.c
27408 --- linux-2.6.31.6/net/ipv4/inet_hashtables.c   2009-06-11 17:13:29.000000000 +0200
27409 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/inet_hashtables.c     2009-09-10 16:11:43.000000000 +0200
27410 @@ -21,6 +21,7 @@
27411  
27412  #include <net/inet_connection_sock.h>
27413  #include <net/inet_hashtables.h>
27414 +#include <net/route.h>
27415  #include <net/ip.h>
27416  
27417  /*
27418 @@ -134,6 +135,11 @@ static inline int compute_score(struct s
27419                         if (rcv_saddr != daddr)
27420                                 return -1;
27421                         score += 2;
27422 +               } else {
27423 +                       /* block non nx_info ips */
27424 +                       if (!v4_addr_in_nx_info(sk->sk_nx_info,
27425 +                               daddr, NXA_MASK_BIND))
27426 +                               return -1;
27427                 }
27428                 if (sk->sk_bound_dev_if) {
27429                         if (sk->sk_bound_dev_if != dif)
27430 @@ -151,7 +157,6 @@ static inline int compute_score(struct s
27431   * wildcarded during the search since they can never be otherwise.
27432   */
27433  
27434 -
27435  struct sock *__inet_lookup_listener(struct net *net,
27436                                     struct inet_hashinfo *hashinfo,
27437                                     const __be32 daddr, const unsigned short hnum,
27438 @@ -174,6 +179,7 @@ begin:
27439                         hiscore = score;
27440                 }
27441         }
27442 +
27443         /*
27444          * if the nulls value we got at the end of this lookup is
27445          * not the expected one, we must restart lookup.
27446 diff -NurpP --minimal linux-2.6.31.6/net/ipv4/netfilter/nf_nat_helper.c linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/netfilter/nf_nat_helper.c
27447 --- linux-2.6.31.6/net/ipv4/netfilter/nf_nat_helper.c   2009-09-10 15:26:29.000000000 +0200
27448 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/netfilter/nf_nat_helper.c     2009-09-10 16:11:43.000000000 +0200
27449 @@ -19,6 +19,7 @@
27450  #include <net/route.h>
27451  
27452  #include <linux/netfilter_ipv4.h>
27453 +#include <net/route.h>
27454  #include <net/netfilter/nf_conntrack.h>
27455  #include <net/netfilter/nf_conntrack_helper.h>
27456  #include <net/netfilter/nf_conntrack_ecache.h>
27457 diff -NurpP --minimal linux-2.6.31.6/net/ipv4/netfilter.c linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/netfilter.c
27458 --- linux-2.6.31.6/net/ipv4/netfilter.c 2009-09-10 15:26:29.000000000 +0200
27459 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/netfilter.c   2009-09-10 16:11:43.000000000 +0200
27460 @@ -4,7 +4,7 @@
27461  #include <linux/netfilter_ipv4.h>
27462  #include <linux/ip.h>
27463  #include <linux/skbuff.h>
27464 -#include <net/route.h>
27465 +// #include <net/route.h>
27466  #include <net/xfrm.h>
27467  #include <net/ip.h>
27468  #include <net/netfilter/nf_queue.h>
27469 diff -NurpP --minimal linux-2.6.31.6/net/ipv4/raw.c linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/raw.c
27470 --- linux-2.6.31.6/net/ipv4/raw.c       2009-09-10 15:26:29.000000000 +0200
27471 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/raw.c 2009-09-10 17:17:12.000000000 +0200
27472 @@ -117,7 +117,7 @@ static struct sock *__raw_v4_lookup(stru
27473  
27474                 if (net_eq(sock_net(sk), net) && inet->num == num       &&
27475                     !(inet->daddr && inet->daddr != raddr)              &&
27476 -                   !(inet->rcv_saddr && inet->rcv_saddr != laddr)      &&
27477 +                   v4_sock_addr_match(sk->sk_nx_info, inet, laddr)     &&
27478                     !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
27479                         goto found; /* gotcha */
27480         }
27481 @@ -372,6 +372,12 @@ static int raw_send_hdrinc(struct sock *
27482                 icmp_out_count(net, ((struct icmphdr *)
27483                         skb_transport_header(skb))->type);
27484  
27485 +       err = -EPERM;
27486 +       if (!nx_check(0, VS_ADMIN) && !capable(CAP_NET_RAW) &&
27487 +               sk->sk_nx_info &&
27488 +               !v4_addr_in_nx_info(sk->sk_nx_info, iph->saddr, NXA_MASK_BIND))
27489 +               goto error_free;
27490 +
27491         err = NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
27492                       dst_output);
27493         if (err > 0)
27494 @@ -383,6 +389,7 @@ out:
27495  
27496  error_fault:
27497         err = -EFAULT;
27498 +error_free:
27499         kfree_skb(skb);
27500  error:
27501         IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
27502 @@ -551,6 +558,13 @@ static int raw_sendmsg(struct kiocb *ioc
27503                 }
27504  
27505                 security_sk_classify_flow(sk, &fl);
27506 +               if (sk->sk_nx_info) {
27507 +                       err = ip_v4_find_src(sock_net(sk),
27508 +                               sk->sk_nx_info, &rt, &fl);
27509 +
27510 +                       if (err)
27511 +                               goto done;
27512 +               }
27513                 err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1);
27514         }
27515         if (err)
27516 @@ -620,17 +634,19 @@ static int raw_bind(struct sock *sk, str
27517  {
27518         struct inet_sock *inet = inet_sk(sk);
27519         struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
27520 +       struct nx_v4_sock_addr nsa = { 0 };
27521         int ret = -EINVAL;
27522         int chk_addr_ret;
27523  
27524         if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
27525                 goto out;
27526 -       chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
27527 +       v4_map_sock_addr(inet, addr, &nsa);
27528 +       chk_addr_ret = inet_addr_type(sock_net(sk), nsa.saddr);
27529         ret = -EADDRNOTAVAIL;
27530 -       if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
27531 +       if (nsa.saddr && chk_addr_ret != RTN_LOCAL &&
27532             chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
27533                 goto out;
27534 -       inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr;
27535 +       v4_set_sock_addr(inet, &nsa);
27536         if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
27537                 inet->saddr = 0;  /* Use device */
27538         sk_dst_reset(sk);
27539 @@ -682,7 +698,8 @@ static int raw_recvmsg(struct kiocb *ioc
27540         /* Copy the address. */
27541         if (sin) {
27542                 sin->sin_family = AF_INET;
27543 -               sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
27544 +               sin->sin_addr.s_addr =
27545 +                       nx_map_sock_lback(sk->sk_nx_info, ip_hdr(skb)->saddr);
27546                 sin->sin_port = 0;
27547                 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
27548         }
27549 @@ -860,7 +877,8 @@ static struct sock *raw_get_first(struct
27550                 struct hlist_node *node;
27551  
27552                 sk_for_each(sk, node, &state->h->ht[state->bucket])
27553 -                       if (sock_net(sk) == seq_file_net(seq))
27554 +                       if ((sock_net(sk) == seq_file_net(seq)) &&
27555 +                               nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
27556                                 goto found;
27557         }
27558         sk = NULL;
27559 @@ -876,7 +894,8 @@ static struct sock *raw_get_next(struct 
27560                 sk = sk_next(sk);
27561  try_again:
27562                 ;
27563 -       } while (sk && sock_net(sk) != seq_file_net(seq));
27564 +       } while (sk && ((sock_net(sk) != seq_file_net(seq)) ||
27565 +               !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
27566  
27567         if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
27568                 sk = sk_head(&state->h->ht[state->bucket]);
27569 @@ -935,7 +954,10 @@ static void raw_sock_seq_show(struct seq
27570  
27571         seq_printf(seq, "%4d: %08X:%04X %08X:%04X"
27572                 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
27573 -               i, src, srcp, dest, destp, sp->sk_state,
27574 +               i,
27575 +               nx_map_sock_lback(current_nx_info(), src), srcp,
27576 +               nx_map_sock_lback(current_nx_info(), dest), destp,
27577 +               sp->sk_state,
27578                 sk_wmem_alloc_get(sp),
27579                 sk_rmem_alloc_get(sp),
27580                 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
27581 diff -NurpP --minimal linux-2.6.31.6/net/ipv4/tcp.c linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/tcp.c
27582 --- linux-2.6.31.6/net/ipv4/tcp.c       2009-09-10 15:26:29.000000000 +0200
27583 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/tcp.c 2009-09-10 16:11:43.000000000 +0200
27584 @@ -264,6 +264,7 @@
27585  #include <linux/cache.h>
27586  #include <linux/err.h>
27587  #include <linux/crypto.h>
27588 +#include <linux/in.h>
27589  
27590  #include <net/icmp.h>
27591  #include <net/tcp.h>
27592 diff -NurpP --minimal linux-2.6.31.6/net/ipv4/tcp_ipv4.c linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/tcp_ipv4.c
27593 --- linux-2.6.31.6/net/ipv4/tcp_ipv4.c  2009-09-10 15:26:29.000000000 +0200
27594 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/tcp_ipv4.c    2009-09-10 16:11:43.000000000 +0200
27595 @@ -1887,6 +1887,12 @@ static void *listening_get_next(struct s
27596                 req = req->dl_next;
27597                 while (1) {
27598                         while (req) {
27599 +                               vxdprintk(VXD_CBIT(net, 6),
27600 +                                       "sk,req: %p [#%d] (from %d)", req->sk,
27601 +                                       (req->sk)?req->sk->sk_nid:0, nx_current_nid());
27602 +                               if (req->sk &&
27603 +                                       !nx_check(req->sk->sk_nid, VS_WATCH_P | VS_IDENT))
27604 +                                       continue;
27605                                 if (req->rsk_ops->family == st->family) {
27606                                         cur = req;
27607                                         goto out;
27608 @@ -1911,6 +1917,10 @@ get_req:
27609         }
27610  get_sk:
27611         sk_nulls_for_each_from(sk, node) {
27612 +               vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)",
27613 +                       sk, sk->sk_nid, nx_current_nid());
27614 +               if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
27615 +                       continue;
27616                 if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
27617                         cur = sk;
27618                         goto out;
27619 @@ -1974,6 +1984,11 @@ static void *established_get_first(struc
27620  
27621                 spin_lock_bh(lock);
27622                 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
27623 +                       vxdprintk(VXD_CBIT(net, 6),
27624 +                               "sk,egf: %p [#%d] (from %d)",
27625 +                               sk, sk->sk_nid, nx_current_nid());
27626 +                       if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
27627 +                               continue;
27628                         if (sk->sk_family != st->family ||
27629                             !net_eq(sock_net(sk), net)) {
27630                                 continue;
27631 @@ -1984,6 +1999,11 @@ static void *established_get_first(struc
27632                 st->state = TCP_SEQ_STATE_TIME_WAIT;
27633                 inet_twsk_for_each(tw, node,
27634                                    &tcp_hashinfo.ehash[st->bucket].twchain) {
27635 +                       vxdprintk(VXD_CBIT(net, 6),
27636 +                               "tw: %p [#%d] (from %d)",
27637 +                               tw, tw->tw_nid, nx_current_nid());
27638 +                       if (!nx_check(tw->tw_nid, VS_WATCH_P | VS_IDENT))
27639 +                               continue;
27640                         if (tw->tw_family != st->family ||
27641                             !net_eq(twsk_net(tw), net)) {
27642                                 continue;
27643 @@ -2012,7 +2032,9 @@ static void *established_get_next(struct
27644                 tw = cur;
27645                 tw = tw_next(tw);
27646  get_tw:
27647 -               while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
27648 +               while (tw && (tw->tw_family != st->family ||
27649 +                       !net_eq(twsk_net(tw), net) ||
27650 +                       !nx_check(tw->tw_nid, VS_WATCH_P | VS_IDENT))) {
27651                         tw = tw_next(tw);
27652                 }
27653                 if (tw) {
27654 @@ -2035,6 +2057,11 @@ get_tw:
27655                 sk = sk_nulls_next(sk);
27656  
27657         sk_nulls_for_each_from(sk, node) {
27658 +               vxdprintk(VXD_CBIT(net, 6),
27659 +                       "sk,egn: %p [#%d] (from %d)",
27660 +                       sk, sk->sk_nid, nx_current_nid());
27661 +               if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
27662 +                       continue;
27663                 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
27664                         goto found;
27665         }
27666 @@ -2186,9 +2213,9 @@ static void get_openreq4(struct sock *sk
27667         seq_printf(f, "%4d: %08X:%04X %08X:%04X"
27668                 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",
27669                 i,
27670 -               ireq->loc_addr,
27671 +               nx_map_sock_lback(current_nx_info(), ireq->loc_addr),
27672                 ntohs(inet_sk(sk)->sport),
27673 -               ireq->rmt_addr,
27674 +               nx_map_sock_lback(current_nx_info(), ireq->rmt_addr),
27675                 ntohs(ireq->rmt_port),
27676                 TCP_SYN_RECV,
27677                 0, 0, /* could print option size, but that is af dependent. */
27678 @@ -2231,7 +2258,10 @@ static void get_tcp4_sock(struct sock *s
27679  
27680         seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
27681                         "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
27682 -               i, src, srcp, dest, destp, sk->sk_state,
27683 +               i,
27684 +               nx_map_sock_lback(current_nx_info(), src), srcp,
27685 +               nx_map_sock_lback(current_nx_info(), dest), destp,
27686 +               sk->sk_state,
27687                 tp->write_seq - tp->snd_una,
27688                 sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
27689                                              (tp->rcv_nxt - tp->copied_seq),
27690 @@ -2267,7 +2297,10 @@ static void get_timewait4_sock(struct in
27691  
27692         seq_printf(f, "%4d: %08X:%04X %08X:%04X"
27693                 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n",
27694 -               i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
27695 +               i,
27696 +               nx_map_sock_lback(current_nx_info(), src), srcp,
27697 +               nx_map_sock_lback(current_nx_info(), dest), destp,
27698 +               tw->tw_substate, 0, 0,
27699                 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
27700                 atomic_read(&tw->tw_refcnt), tw, len);
27701  }
27702 diff -NurpP --minimal linux-2.6.31.6/net/ipv4/tcp_minisocks.c linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/tcp_minisocks.c
27703 --- linux-2.6.31.6/net/ipv4/tcp_minisocks.c     2009-11-12 12:10:12.000000000 +0100
27704 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/tcp_minisocks.c       2009-10-15 03:49:19.000000000 +0200
27705 @@ -26,6 +26,10 @@
27706  #include <net/inet_common.h>
27707  #include <net/xfrm.h>
27708  
27709 +#include <linux/vs_limit.h>
27710 +#include <linux/vs_socket.h>
27711 +#include <linux/vs_context.h>
27712 +
27713  #ifdef CONFIG_SYSCTL
27714  #define SYNC_INIT 0 /* let the user enable it */
27715  #else
27716 @@ -294,6 +298,11 @@ void tcp_time_wait(struct sock *sk, int 
27717                 tcptw->tw_ts_recent     = tp->rx_opt.ts_recent;
27718                 tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
27719  
27720 +               tw->tw_xid              = sk->sk_xid;
27721 +               tw->tw_vx_info          = NULL;
27722 +               tw->tw_nid              = sk->sk_nid;
27723 +               tw->tw_nx_info          = NULL;
27724 +
27725  #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
27726                 if (tw->tw_family == PF_INET6) {
27727                         struct ipv6_pinfo *np = inet6_sk(sk);
27728 diff -NurpP --minimal linux-2.6.31.6/net/ipv4/udp.c linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/udp.c
27729 --- linux-2.6.31.6/net/ipv4/udp.c       2009-09-10 15:26:30.000000000 +0200
27730 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv4/udp.c 2009-11-05 04:54:58.000000000 +0100
27731 @@ -222,14 +222,7 @@ fail:
27732         return error;
27733  }
27734  
27735 -static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
27736 -{
27737 -       struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
27738 -
27739 -       return  ( !ipv6_only_sock(sk2)  &&
27740 -                 (!inet1->rcv_saddr || !inet2->rcv_saddr ||
27741 -                  inet1->rcv_saddr == inet2->rcv_saddr      ));
27742 -}
27743 +extern int ipv4_rcv_saddr_equal(const struct sock *, const struct sock *);
27744  
27745  int udp_v4_get_port(struct sock *sk, unsigned short snum)
27746  {
27747 @@ -251,6 +244,11 @@ static inline int compute_score(struct s
27748                         if (inet->rcv_saddr != daddr)
27749                                 return -1;
27750                         score += 2;
27751 +               } else {
27752 +                       /* block non nx_info ips */
27753 +                       if (!v4_addr_in_nx_info(sk->sk_nx_info,
27754 +                               daddr, NXA_MASK_BIND))
27755 +                               return -1;
27756                 }
27757                 if (inet->daddr) {
27758                         if (inet->daddr != saddr)
27759 @@ -271,6 +269,7 @@ static inline int compute_score(struct s
27760         return score;
27761  }
27762  
27763 +
27764  /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
27765   * harder than this. -DaveM
27766   */
27767 @@ -292,6 +291,11 @@ begin:
27768         sk_nulls_for_each_rcu(sk, node, &hslot->head) {
27769                 score = compute_score(sk, net, saddr, hnum, sport,
27770                                       daddr, dport, dif);
27771 +               /* FIXME: disabled?
27772 +               if (score == 9) {
27773 +                       result = sk;
27774 +                       break;
27775 +               } else */
27776                 if (score > badness) {
27777                         result = sk;
27778                         badness = score;
27779 @@ -305,6 +309,7 @@ begin:
27780         if (get_nulls_value(node) != hash)
27781                 goto begin;
27782  
27783 +
27784         if (result) {
27785                 if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
27786                         result = NULL;
27787 @@ -314,6 +319,7 @@ begin:
27788                         goto begin;
27789                 }
27790         }
27791 +
27792         rcu_read_unlock();
27793         return result;
27794  }
27795 @@ -356,7 +362,7 @@ static inline struct sock *udp_v4_mcast_
27796                     s->sk_hash != hnum                                  ||
27797                     (inet->daddr && inet->daddr != rmt_addr)            ||
27798                     (inet->dport != rmt_port && inet->dport)            ||
27799 -                   (inet->rcv_saddr && inet->rcv_saddr != loc_addr)    ||
27800 +                   !v4_sock_addr_match(sk->sk_nx_info, inet, loc_addr) ||
27801                     ipv6_only_sock(s)                                   ||
27802                     (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
27803                         continue;
27804 @@ -698,8 +704,13 @@ int udp_sendmsg(struct kiocb *iocb, stru
27805                                                { .sport = inet->sport,
27806                                                  .dport = dport } } };
27807                 struct net *net = sock_net(sk);
27808 +               struct nx_info *nxi = sk->sk_nx_info;
27809  
27810                 security_sk_classify_flow(sk, &fl);
27811 +               err = ip_v4_find_src(net, nxi, &rt, &fl);
27812 +               if (err)
27813 +                       goto out;
27814 +
27815                 err = ip_route_output_flow(net, &rt, &fl, sk, 1);
27816                 if (err) {
27817                         if (err == -ENETUNREACH)
27818 @@ -945,7 +956,8 @@ try_again:
27819         {
27820                 sin->sin_family = AF_INET;
27821                 sin->sin_port = udp_hdr(skb)->source;
27822 -               sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
27823 +               sin->sin_addr.s_addr = nx_map_sock_lback(
27824 +                       skb->sk->sk_nx_info, ip_hdr(skb)->saddr);
27825                 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
27826         }
27827         if (inet->cmsg_flags)
27828 @@ -1599,6 +1611,8 @@ static struct sock *udp_get_first(struct
27829                 sk_nulls_for_each(sk, node, &hslot->head) {
27830                         if (!net_eq(sock_net(sk), net))
27831                                 continue;
27832 +                       if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
27833 +                               continue;
27834                         if (sk->sk_family == state->family)
27835                                 goto found;
27836                 }
27837 @@ -1616,7 +1630,9 @@ static struct sock *udp_get_next(struct 
27838  
27839         do {
27840                 sk = sk_nulls_next(sk);
27841 -       } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
27842 +       } while (sk && (!net_eq(sock_net(sk), net) ||
27843 +               sk->sk_family != state->family ||
27844 +               !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
27845  
27846         if (!sk) {
27847                 if (state->bucket < UDP_HTABLE_SIZE)
27848 @@ -1721,7 +1737,10 @@ static void udp4_format_sock(struct sock
27849  
27850         seq_printf(f, "%4d: %08X:%04X %08X:%04X"
27851                 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n",
27852 -               bucket, src, srcp, dest, destp, sp->sk_state,
27853 +               bucket,
27854 +               nx_map_sock_lback(current_nx_info(), src), srcp,
27855 +               nx_map_sock_lback(current_nx_info(), dest), destp,
27856 +               sp->sk_state,
27857                 sk_wmem_alloc_get(sp),
27858                 sk_rmem_alloc_get(sp),
27859                 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
27860 diff -NurpP --minimal linux-2.6.31.6/net/ipv6/addrconf.c linux-2.6.31.6-vs2.3.0.36.24/net/ipv6/addrconf.c
27861 --- linux-2.6.31.6/net/ipv6/addrconf.c  2009-09-10 15:26:30.000000000 +0200
27862 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv6/addrconf.c    2009-09-10 16:11:43.000000000 +0200
27863 @@ -86,6 +86,8 @@
27864  
27865  #include <linux/proc_fs.h>
27866  #include <linux/seq_file.h>
27867 +#include <linux/vs_network.h>
27868 +#include <linux/vs_inet6.h>
27869  
27870  /* Set to 3 to get tracing... */
27871  #define ACONF_DEBUG 2
27872 @@ -1117,7 +1119,7 @@ out:
27873  
27874  int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev,
27875                        const struct in6_addr *daddr, unsigned int prefs,
27876 -                      struct in6_addr *saddr)
27877 +                      struct in6_addr *saddr, struct nx_info *nxi)
27878  {
27879         struct ipv6_saddr_score scores[2],
27880                                 *score = &scores[0], *hiscore = &scores[1];
27881 @@ -1190,6 +1192,8 @@ int ipv6_dev_get_saddr(struct net *net, 
27882                                                dev->name);
27883                                 continue;
27884                         }
27885 +                       if (!v6_addr_in_nx_info(nxi, &score->ifa->addr, -1))
27886 +                               continue;
27887  
27888                         score->rule = -1;
27889                         bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX);
27890 @@ -2979,7 +2983,10 @@ static void if6_seq_stop(struct seq_file
27891  static int if6_seq_show(struct seq_file *seq, void *v)
27892  {
27893         struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v;
27894 -       seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n",
27895 +
27896 +       if (nx_check(0, VS_ADMIN|VS_WATCH) ||
27897 +           v6_addr_in_nx_info(current_nx_info(), &ifp->addr, -1))
27898 +               seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n",
27899                    &ifp->addr,
27900                    ifp->idev->dev->ifindex,
27901                    ifp->prefix_len,
27902 @@ -3476,6 +3483,12 @@ static int inet6_dump_addr(struct sk_buf
27903         struct ifmcaddr6 *ifmca;
27904         struct ifacaddr6 *ifaca;
27905         struct net *net = sock_net(skb->sk);
27906 +       struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
27907 +
27908 +       /* disable ipv6 on non v6 guests */
27909 +       if (nxi && !nx_info_has_v6(nxi))
27910 +               return skb->len;
27911 +
27912  
27913         s_idx = cb->args[0];
27914         s_ip_idx = ip_idx = cb->args[1];
27915 @@ -3497,6 +3510,8 @@ static int inet6_dump_addr(struct sk_buf
27916                              ifa = ifa->if_next, ip_idx++) {
27917                                 if (ip_idx < s_ip_idx)
27918                                         continue;
27919 +                               if (!v6_addr_in_nx_info(nxi, &ifa->addr, -1))
27920 +                                       continue;
27921                                 err = inet6_fill_ifaddr(skb, ifa,
27922                                                         NETLINK_CB(cb->skb).pid,
27923                                                         cb->nlh->nlmsg_seq,
27924 @@ -3510,6 +3525,8 @@ static int inet6_dump_addr(struct sk_buf
27925                              ifmca = ifmca->next, ip_idx++) {
27926                                 if (ip_idx < s_ip_idx)
27927                                         continue;
27928 +                               if (!v6_addr_in_nx_info(nxi, &ifmca->mca_addr, -1))
27929 +                                       continue;
27930                                 err = inet6_fill_ifmcaddr(skb, ifmca,
27931                                                           NETLINK_CB(cb->skb).pid,
27932                                                           cb->nlh->nlmsg_seq,
27933 @@ -3523,6 +3540,8 @@ static int inet6_dump_addr(struct sk_buf
27934                              ifaca = ifaca->aca_next, ip_idx++) {
27935                                 if (ip_idx < s_ip_idx)
27936                                         continue;
27937 +                               if (!v6_addr_in_nx_info(nxi, &ifaca->aca_addr, -1))
27938 +                                       continue;
27939                                 err = inet6_fill_ifacaddr(skb, ifaca,
27940                                                           NETLINK_CB(cb->skb).pid,
27941                                                           cb->nlh->nlmsg_seq,
27942 @@ -3809,12 +3828,19 @@ static int inet6_dump_ifinfo(struct sk_b
27943         int s_idx = cb->args[0];
27944         struct net_device *dev;
27945         struct inet6_dev *idev;
27946 +       struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
27947 +
27948 +       /* FIXME: maybe disable ipv6 on non v6 guests?
27949 +       if (skb->sk && skb->sk->sk_vx_info)
27950 +               return skb->len; */
27951  
27952         read_lock(&dev_base_lock);
27953         idx = 0;
27954         for_each_netdev(net, dev) {
27955                 if (idx < s_idx)
27956                         goto cont;
27957 +               if (!v6_dev_in_nx_info(dev, nxi))
27958 +                       goto cont;
27959                 if ((idev = in6_dev_get(dev)) == NULL)
27960                         goto cont;
27961                 err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid,
27962 diff -NurpP --minimal linux-2.6.31.6/net/ipv6/af_inet6.c linux-2.6.31.6-vs2.3.0.36.24/net/ipv6/af_inet6.c
27963 --- linux-2.6.31.6/net/ipv6/af_inet6.c  2009-09-10 15:26:30.000000000 +0200
27964 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv6/af_inet6.c    2009-09-10 16:11:43.000000000 +0200
27965 @@ -41,6 +41,8 @@
27966  #include <linux/netdevice.h>
27967  #include <linux/icmpv6.h>
27968  #include <linux/netfilter_ipv6.h>
27969 +#include <linux/vs_inet.h>
27970 +#include <linux/vs_inet6.h>
27971  
27972  #include <net/ip.h>
27973  #include <net/ipv6.h>
27974 @@ -158,9 +160,12 @@ lookup_protocol:
27975         }
27976  
27977         err = -EPERM;
27978 +       if ((protocol == IPPROTO_ICMPV6) &&
27979 +               nx_capable(answer->capability, NXC_RAW_ICMP))
27980 +               goto override;
27981         if (answer->capability > 0 && !capable(answer->capability))
27982                 goto out_rcu_unlock;
27983 -
27984 +override:
27985         sock->ops = answer->ops;
27986         answer_prot = answer->prot;
27987         answer_no_check = answer->no_check;
27988 @@ -259,6 +264,7 @@ int inet6_bind(struct socket *sock, stru
27989         struct inet_sock *inet = inet_sk(sk);
27990         struct ipv6_pinfo *np = inet6_sk(sk);
27991         struct net *net = sock_net(sk);
27992 +       struct nx_v6_sock_addr nsa;
27993         __be32 v4addr = 0;
27994         unsigned short snum;
27995         int addr_type = 0;
27996 @@ -270,6 +276,11 @@ int inet6_bind(struct socket *sock, stru
27997  
27998         if (addr_len < SIN6_LEN_RFC2133)
27999                 return -EINVAL;
28000 +
28001 +       err = v6_map_sock_addr(inet, addr, &nsa);
28002 +       if (err)
28003 +               return err;
28004 +
28005         addr_type = ipv6_addr_type(&addr->sin6_addr);
28006         if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
28007                 return -EINVAL;
28008 @@ -301,6 +312,7 @@ int inet6_bind(struct socket *sock, stru
28009                 /* Reproduce AF_INET checks to make the bindings consitant */
28010                 v4addr = addr->sin6_addr.s6_addr32[3];
28011                 chk_addr_ret = inet_addr_type(net, v4addr);
28012 +
28013                 if (!sysctl_ip_nonlocal_bind &&
28014                     !(inet->freebind || inet->transparent) &&
28015                     v4addr != htonl(INADDR_ANY) &&
28016 @@ -310,6 +322,10 @@ int inet6_bind(struct socket *sock, stru
28017                         err = -EADDRNOTAVAIL;
28018                         goto out;
28019                 }
28020 +               if (!v4_addr_in_nx_info(sk->sk_nx_info, v4addr, NXA_MASK_BIND)) {
28021 +                       err = -EADDRNOTAVAIL;
28022 +                       goto out;
28023 +               }
28024         } else {
28025                 if (addr_type != IPV6_ADDR_ANY) {
28026                         struct net_device *dev = NULL;
28027 @@ -335,6 +351,11 @@ int inet6_bind(struct socket *sock, stru
28028                                 }
28029                         }
28030  
28031 +                       if (!v6_addr_in_nx_info(sk->sk_nx_info, &addr->sin6_addr, -1)) {
28032 +                               err = -EADDRNOTAVAIL;
28033 +                               goto out;
28034 +                       }
28035 +
28036                         /* ipv4 addr of the socket is invalid.  Only the
28037                          * unspecified and mapped address have a v4 equivalent.
28038                          */
28039 @@ -353,6 +374,8 @@ int inet6_bind(struct socket *sock, stru
28040                 }
28041         }
28042  
28043 +       v6_set_sock_addr(inet, &nsa);
28044 +
28045         inet->rcv_saddr = v4addr;
28046         inet->saddr = v4addr;
28047  
28048 @@ -448,9 +471,11 @@ int inet6_getname(struct socket *sock, s
28049                         return -ENOTCONN;
28050                 sin->sin6_port = inet->dport;
28051                 ipv6_addr_copy(&sin->sin6_addr, &np->daddr);
28052 +               /* FIXME: remap lback? */
28053                 if (np->sndflow)
28054                         sin->sin6_flowinfo = np->flow_label;
28055         } else {
28056 +               /* FIXME: remap lback? */
28057                 if (ipv6_addr_any(&np->rcv_saddr))
28058                         ipv6_addr_copy(&sin->sin6_addr, &np->saddr);
28059                 else
28060 diff -NurpP --minimal linux-2.6.31.6/net/ipv6/fib6_rules.c linux-2.6.31.6-vs2.3.0.36.24/net/ipv6/fib6_rules.c
28061 --- linux-2.6.31.6/net/ipv6/fib6_rules.c        2009-09-10 15:26:30.000000000 +0200
28062 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv6/fib6_rules.c  2009-09-10 16:11:43.000000000 +0200
28063 @@ -96,7 +96,7 @@ static int fib6_rule_action(struct fib_r
28064                         if (ipv6_dev_get_saddr(net,
28065                                                ip6_dst_idev(&rt->u.dst)->dev,
28066                                                &flp->fl6_dst, srcprefs,
28067 -                                              &saddr))
28068 +                                              &saddr, NULL))
28069                                 goto again;
28070                         if (!ipv6_prefix_equal(&saddr, &r->src.addr,
28071                                                r->src.plen))
28072 diff -NurpP --minimal linux-2.6.31.6/net/ipv6/inet6_hashtables.c linux-2.6.31.6-vs2.3.0.36.24/net/ipv6/inet6_hashtables.c
28073 --- linux-2.6.31.6/net/ipv6/inet6_hashtables.c  2009-03-24 14:22:46.000000000 +0100
28074 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv6/inet6_hashtables.c    2009-09-10 16:11:43.000000000 +0200
28075 @@ -16,6 +16,7 @@
28076  
28077  #include <linux/module.h>
28078  #include <linux/random.h>
28079 +#include <linux/vs_inet6.h>
28080  
28081  #include <net/inet_connection_sock.h>
28082  #include <net/inet_hashtables.h>
28083 @@ -76,7 +77,6 @@ struct sock *__inet6_lookup_established(
28084         unsigned int slot = hash & (hashinfo->ehash_size - 1);
28085         struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
28086  
28087 -
28088         rcu_read_lock();
28089  begin:
28090         sk_nulls_for_each_rcu(sk, node, &head->chain) {
28091 @@ -88,7 +88,7 @@ begin:
28092                                 sock_put(sk);
28093                                 goto begin;
28094                         }
28095 -               goto out;
28096 +                       goto out;
28097                 }
28098         }
28099         if (get_nulls_value(node) != slot)
28100 @@ -134,6 +134,9 @@ static int inline compute_score(struct s
28101                         if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
28102                                 return -1;
28103                         score++;
28104 +               } else {
28105 +                       if (!v6_addr_in_nx_info(sk->sk_nx_info, daddr, -1))
28106 +                               return -1;
28107                 }
28108                 if (sk->sk_bound_dev_if) {
28109                         if (sk->sk_bound_dev_if != dif)
28110 diff -NurpP --minimal linux-2.6.31.6/net/ipv6/ip6_output.c linux-2.6.31.6-vs2.3.0.36.24/net/ipv6/ip6_output.c
28111 --- linux-2.6.31.6/net/ipv6/ip6_output.c        2009-09-10 15:26:30.000000000 +0200
28112 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv6/ip6_output.c  2009-09-10 16:11:43.000000000 +0200
28113 @@ -951,7 +951,7 @@ static int ip6_dst_lookup_tail(struct so
28114                 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
28115                                          &fl->fl6_dst,
28116                                          sk ? inet6_sk(sk)->srcprefs : 0,
28117 -                                        &fl->fl6_src);
28118 +                                        &fl->fl6_src, sk->sk_nx_info);
28119                 if (err)
28120                         goto out_err_release;
28121         }
28122 diff -NurpP --minimal linux-2.6.31.6/net/ipv6/Kconfig linux-2.6.31.6-vs2.3.0.36.24/net/ipv6/Kconfig
28123 --- linux-2.6.31.6/net/ipv6/Kconfig     2009-09-10 15:26:30.000000000 +0200
28124 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv6/Kconfig       2009-09-10 16:11:43.000000000 +0200
28125 @@ -4,8 +4,8 @@
28126  
28127  #   IPv6 as module will cause a CRASH if you try to unload it
28128  menuconfig IPV6
28129 -       tristate "The IPv6 protocol"
28130 -       default m
28131 +       bool "The IPv6 protocol"
28132 +       default n
28133         ---help---
28134           This is complemental support for the IP version 6.
28135           You will still be able to do traditional IPv4 networking as well.
28136 diff -NurpP --minimal linux-2.6.31.6/net/ipv6/ndisc.c linux-2.6.31.6-vs2.3.0.36.24/net/ipv6/ndisc.c
28137 --- linux-2.6.31.6/net/ipv6/ndisc.c     2009-09-10 15:26:30.000000000 +0200
28138 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv6/ndisc.c       2009-09-10 16:11:43.000000000 +0200
28139 @@ -589,7 +589,7 @@ static void ndisc_send_na(struct net_dev
28140         } else {
28141                 if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
28142                                        inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
28143 -                                      &tmpaddr))
28144 +                                      &tmpaddr, NULL /* FIXME: ? */ ))
28145                         return;
28146                 src_addr = &tmpaddr;
28147         }
28148 diff -NurpP --minimal linux-2.6.31.6/net/ipv6/raw.c linux-2.6.31.6-vs2.3.0.36.24/net/ipv6/raw.c
28149 --- linux-2.6.31.6/net/ipv6/raw.c       2009-09-10 15:26:30.000000000 +0200
28150 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv6/raw.c 2009-09-10 16:11:43.000000000 +0200
28151 @@ -29,6 +29,7 @@
28152  #include <linux/icmpv6.h>
28153  #include <linux/netfilter.h>
28154  #include <linux/netfilter_ipv6.h>
28155 +#include <linux/vs_inet6.h>
28156  #include <linux/skbuff.h>
28157  #include <asm/uaccess.h>
28158  #include <asm/ioctls.h>
28159 @@ -281,6 +282,13 @@ static int rawv6_bind(struct sock *sk, s
28160                         }
28161                 }
28162  
28163 +               if (!v6_addr_in_nx_info(sk->sk_nx_info, &addr->sin6_addr, -1)) {
28164 +                       err = -EADDRNOTAVAIL;
28165 +                       if (dev)
28166 +                               dev_put(dev);
28167 +                       goto out;
28168 +               }
28169 +
28170                 /* ipv4 addr of the socket is invalid.  Only the
28171                  * unspecified and mapped address have a v4 equivalent.
28172                  */
28173 diff -NurpP --minimal linux-2.6.31.6/net/ipv6/route.c linux-2.6.31.6-vs2.3.0.36.24/net/ipv6/route.c
28174 --- linux-2.6.31.6/net/ipv6/route.c     2009-09-10 15:26:30.000000000 +0200
28175 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv6/route.c       2009-09-10 16:11:43.000000000 +0200
28176 @@ -2257,7 +2257,8 @@ static int rt6_fill_node(struct net *net
28177                 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
28178                 struct in6_addr saddr_buf;
28179                 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
28180 -                                      dst, 0, &saddr_buf) == 0)
28181 +                       dst, 0, &saddr_buf,
28182 +                       (skb->sk ? skb->sk->sk_nx_info : NULL)) == 0)
28183                         NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
28184         }
28185  
28186 diff -NurpP --minimal linux-2.6.31.6/net/ipv6/tcp_ipv6.c linux-2.6.31.6-vs2.3.0.36.24/net/ipv6/tcp_ipv6.c
28187 --- linux-2.6.31.6/net/ipv6/tcp_ipv6.c  2009-09-10 15:26:30.000000000 +0200
28188 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv6/tcp_ipv6.c    2009-09-10 16:11:43.000000000 +0200
28189 @@ -68,6 +68,7 @@
28190  
28191  #include <linux/crypto.h>
28192  #include <linux/scatterlist.h>
28193 +#include <linux/vs_inet6.h>
28194  
28195  static void    tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
28196  static void    tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
28197 @@ -156,8 +157,15 @@ static int tcp_v6_connect(struct sock *s
28198          *      connect() to INADDR_ANY means loopback (BSD'ism).
28199          */
28200  
28201 -       if(ipv6_addr_any(&usin->sin6_addr))
28202 -               usin->sin6_addr.s6_addr[15] = 0x1;
28203 +       if(ipv6_addr_any(&usin->sin6_addr)) {
28204 +               struct nx_info *nxi =  sk->sk_nx_info;
28205 +
28206 +               if (nxi && nx_info_has_v6(nxi))
28207 +                       /* FIXME: remap lback? */
28208 +                       usin->sin6_addr = nxi->v6.ip;
28209 +               else
28210 +                       usin->sin6_addr.s6_addr[15] = 0x1;
28211 +       }
28212  
28213         addr_type = ipv6_addr_type(&usin->sin6_addr);
28214  
28215 diff -NurpP --minimal linux-2.6.31.6/net/ipv6/udp.c linux-2.6.31.6-vs2.3.0.36.24/net/ipv6/udp.c
28216 --- linux-2.6.31.6/net/ipv6/udp.c       2009-09-10 15:26:30.000000000 +0200
28217 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv6/udp.c 2009-11-05 05:04:49.000000000 +0100
28218 @@ -47,6 +47,7 @@
28219  
28220  #include <linux/proc_fs.h>
28221  #include <linux/seq_file.h>
28222 +#include <linux/vs_inet6.h>
28223  #include "udp_impl.h"
28224  
28225  int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
28226 @@ -61,24 +62,49 @@ int ipv6_rcv_saddr_equal(const struct so
28227         int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
28228  
28229         /* if both are mapped, treat as IPv4 */
28230 -       if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
28231 -               return (!sk2_ipv6only &&
28232 +       if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
28233 +               if (!sk2_ipv6only &&
28234                         (!sk_rcv_saddr || !sk2_rcv_saddr ||
28235 -                         sk_rcv_saddr == sk2_rcv_saddr));
28236 +                         sk_rcv_saddr == sk2_rcv_saddr))
28237 +                       goto vs_v4;
28238 +               else
28239 +                       return 0;
28240 +       }
28241  
28242         if (addr_type2 == IPV6_ADDR_ANY &&
28243             !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
28244 -               return 1;
28245 +               goto vs;
28246  
28247         if (addr_type == IPV6_ADDR_ANY &&
28248             !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
28249 -               return 1;
28250 +               goto vs;
28251  
28252         if (sk2_rcv_saddr6 &&
28253             ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6))
28254 -               return 1;
28255 +               goto vs;
28256  
28257         return 0;
28258 +
28259 +vs_v4:
28260 +       if (!sk_rcv_saddr && !sk2_rcv_saddr)
28261 +               return nx_v4_addr_conflict(sk->sk_nx_info, sk2->sk_nx_info);
28262 +       if (!sk2_rcv_saddr)
28263 +               return v4_addr_in_nx_info(sk->sk_nx_info, sk2_rcv_saddr, -1);
28264 +       if (!sk_rcv_saddr)
28265 +               return v4_addr_in_nx_info(sk2->sk_nx_info, sk_rcv_saddr, -1);
28266 +       return 1;
28267 +vs:
28268 +       if (addr_type2 == IPV6_ADDR_ANY && addr_type == IPV6_ADDR_ANY)
28269 +               return nx_v6_addr_conflict(sk->sk_nx_info, sk2->sk_nx_info);
28270 +       else if (addr_type2 == IPV6_ADDR_ANY)
28271 +               return v6_addr_in_nx_info(sk2->sk_nx_info, sk_rcv_saddr6, -1);
28272 +       else if (addr_type == IPV6_ADDR_ANY) {
28273 +               if (addr_type2 == IPV6_ADDR_MAPPED)
28274 +                       return nx_v4_addr_conflict(sk->sk_nx_info, sk2->sk_nx_info);
28275 +               else
28276 +                       return v6_addr_in_nx_info(sk->sk_nx_info, sk2_rcv_saddr6, -1);
28277 +       }
28278 +       return 1;
28279  }
28280  
28281  int udp_v6_get_port(struct sock *sk, unsigned short snum)
28282 @@ -109,6 +135,10 @@ static inline int compute_score(struct s
28283                         if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
28284                                 return -1;
28285                         score++;
28286 +               } else {
28287 +                       /* block non nx_info ips */
28288 +                       if (!v6_addr_in_nx_info(sk->sk_nx_info, daddr, -1))
28289 +                               return -1;
28290                 }
28291                 if (!ipv6_addr_any(&np->daddr)) {
28292                         if (!ipv6_addr_equal(&np->daddr, saddr))
28293 diff -NurpP --minimal linux-2.6.31.6/net/ipv6/xfrm6_policy.c linux-2.6.31.6-vs2.3.0.36.24/net/ipv6/xfrm6_policy.c
28294 --- linux-2.6.31.6/net/ipv6/xfrm6_policy.c      2009-09-10 15:26:30.000000000 +0200
28295 +++ linux-2.6.31.6-vs2.3.0.36.24/net/ipv6/xfrm6_policy.c        2009-09-10 16:11:43.000000000 +0200
28296 @@ -63,7 +63,7 @@ static int xfrm6_get_saddr(struct net *n
28297         dev = ip6_dst_idev(dst)->dev;
28298         ipv6_dev_get_saddr(dev_net(dev), dev,
28299                            (struct in6_addr *)&daddr->a6, 0,
28300 -                          (struct in6_addr *)&saddr->a6);
28301 +                          (struct in6_addr *)&saddr->a6, NULL);
28302         dst_release(dst);
28303         return 0;
28304  }
28305 diff -NurpP --minimal linux-2.6.31.6/net/netlink/af_netlink.c linux-2.6.31.6-vs2.3.0.36.24/net/netlink/af_netlink.c
28306 --- linux-2.6.31.6/net/netlink/af_netlink.c     2009-09-10 15:26:30.000000000 +0200
28307 +++ linux-2.6.31.6-vs2.3.0.36.24/net/netlink/af_netlink.c       2009-09-10 16:11:43.000000000 +0200
28308 @@ -55,6 +55,9 @@
28309  #include <linux/types.h>
28310  #include <linux/audit.h>
28311  #include <linux/mutex.h>
28312 +#include <linux/vs_context.h>
28313 +#include <linux/vs_network.h>
28314 +#include <linux/vs_limit.h>
28315  
28316  #include <net/net_namespace.h>
28317  #include <net/sock.h>
28318 @@ -1831,6 +1834,8 @@ static struct sock *netlink_seq_socket_i
28319                         sk_for_each(s, node, &hash->table[j]) {
28320                                 if (sock_net(s) != seq_file_net(seq))
28321                                         continue;
28322 +                               if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT))
28323 +                                       continue;
28324                                 if (off == pos) {
28325                                         iter->link = i;
28326                                         iter->hash_idx = j;
28327 @@ -1865,7 +1870,8 @@ static void *netlink_seq_next(struct seq
28328         s = v;
28329         do {
28330                 s = sk_next(s);
28331 -       } while (s && sock_net(s) != seq_file_net(seq));
28332 +       } while (s && (sock_net(s) != seq_file_net(seq) ||
28333 +               !nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT)));
28334         if (s)
28335                 return s;
28336  
28337 @@ -1877,7 +1883,8 @@ static void *netlink_seq_next(struct seq
28338  
28339                 for (; j <= hash->mask; j++) {
28340                         s = sk_head(&hash->table[j]);
28341 -                       while (s && sock_net(s) != seq_file_net(seq))
28342 +                       while (s && (sock_net(s) != seq_file_net(seq) ||
28343 +                               !nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT)))
28344                                 s = sk_next(s);
28345                         if (s) {
28346                                 iter->link = i;
28347 diff -NurpP --minimal linux-2.6.31.6/net/sctp/ipv6.c linux-2.6.31.6-vs2.3.0.36.24/net/sctp/ipv6.c
28348 --- linux-2.6.31.6/net/sctp/ipv6.c      2009-09-10 15:26:31.000000000 +0200
28349 +++ linux-2.6.31.6-vs2.3.0.36.24/net/sctp/ipv6.c        2009-09-10 16:11:43.000000000 +0200
28350 @@ -316,7 +316,8 @@ static void sctp_v6_get_saddr(struct sct
28351                                    dst ? ip6_dst_idev(dst)->dev : NULL,
28352                                    &daddr->v6.sin6_addr,
28353                                    inet6_sk(&sk->inet.sk)->srcprefs,
28354 -                                  &saddr->v6.sin6_addr);
28355 +                                  &saddr->v6.sin6_addr,
28356 +                                  asoc->base.sk->sk_nx_info);
28357                 SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: %pI6\n",
28358                                   &saddr->v6.sin6_addr);
28359                 return;
28360 diff -NurpP --minimal linux-2.6.31.6/net/socket.c linux-2.6.31.6-vs2.3.0.36.24/net/socket.c
28361 --- linux-2.6.31.6/net/socket.c 2009-09-10 15:26:31.000000000 +0200
28362 +++ linux-2.6.31.6-vs2.3.0.36.24/net/socket.c   2009-09-10 16:11:43.000000000 +0200
28363 @@ -95,6 +95,10 @@
28364  
28365  #include <net/sock.h>
28366  #include <linux/netfilter.h>
28367 +#include <linux/vs_base.h>
28368 +#include <linux/vs_socket.h>
28369 +#include <linux/vs_inet.h>
28370 +#include <linux/vs_inet6.h>
28371  
28372  static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
28373  static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
28374 @@ -559,7 +563,7 @@ static inline int __sock_sendmsg(struct 
28375                                  struct msghdr *msg, size_t size)
28376  {
28377         struct sock_iocb *si = kiocb_to_siocb(iocb);
28378 -       int err;
28379 +       int err, len;
28380  
28381         si->sock = sock;
28382         si->scm = NULL;
28383 @@ -570,7 +574,22 @@ static inline int __sock_sendmsg(struct 
28384         if (err)
28385                 return err;
28386  
28387 -       return sock->ops->sendmsg(iocb, sock, msg, size);
28388 +       len = sock->ops->sendmsg(iocb, sock, msg, size);
28389 +       if (sock->sk) {
28390 +               if (len == size)
28391 +                       vx_sock_send(sock->sk, size);
28392 +               else
28393 +                       vx_sock_fail(sock->sk, size);
28394 +       }
28395 +       vxdprintk(VXD_CBIT(net, 7),
28396 +               "__sock_sendmsg: %p[%p,%p,%p;%d/%d]:%d/%d",
28397 +               sock, sock->sk,
28398 +               (sock->sk)?sock->sk->sk_nx_info:0,
28399 +               (sock->sk)?sock->sk->sk_vx_info:0,
28400 +               (sock->sk)?sock->sk->sk_xid:0,
28401 +               (sock->sk)?sock->sk->sk_nid:0,
28402 +               (unsigned int)size, len);
28403 +       return len;
28404  }
28405  
28406  int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
28407 @@ -671,7 +690,7 @@ EXPORT_SYMBOL_GPL(__sock_recv_timestamp)
28408  static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
28409                                  struct msghdr *msg, size_t size, int flags)
28410  {
28411 -       int err;
28412 +       int err, len;
28413         struct sock_iocb *si = kiocb_to_siocb(iocb);
28414  
28415         si->sock = sock;
28416 @@ -684,7 +703,18 @@ static inline int __sock_recvmsg(struct 
28417         if (err)
28418                 return err;
28419  
28420 -       return sock->ops->recvmsg(iocb, sock, msg, size, flags);
28421 +       len = sock->ops->recvmsg(iocb, sock, msg, size, flags);
28422 +       if ((len >= 0) && sock->sk)
28423 +               vx_sock_recv(sock->sk, len);
28424 +       vxdprintk(VXD_CBIT(net, 7),
28425 +               "__sock_recvmsg: %p[%p,%p,%p;%d/%d]:%d/%d",
28426 +               sock, sock->sk,
28427 +               (sock->sk)?sock->sk->sk_nx_info:0,
28428 +               (sock->sk)?sock->sk->sk_vx_info:0,
28429 +               (sock->sk)?sock->sk->sk_xid:0,
28430 +               (sock->sk)?sock->sk->sk_nid:0,
28431 +               (unsigned int)size, len);
28432 +       return len;
28433  }
28434  
28435  int sock_recvmsg(struct socket *sock, struct msghdr *msg,
28436 @@ -1155,6 +1185,13 @@ static int __sock_create(struct net *net
28437         if (type < 0 || type >= SOCK_MAX)
28438                 return -EINVAL;
28439  
28440 +       if (!nx_check(0, VS_ADMIN)) {
28441 +               if (family == PF_INET && !current_nx_info_has_v4())
28442 +                       return -EAFNOSUPPORT;
28443 +               if (family == PF_INET6 && !current_nx_info_has_v6())
28444 +                       return -EAFNOSUPPORT;
28445 +       }
28446 +
28447         /* Compatibility.
28448  
28449            This uglymoron is moved from INET layer to here to avoid
28450 @@ -1287,6 +1324,7 @@ SYSCALL_DEFINE3(socket, int, family, int
28451         if (retval < 0)
28452                 goto out;
28453  
28454 +       set_bit(SOCK_USER_SOCKET, &sock->flags);
28455         retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
28456         if (retval < 0)
28457                 goto out_release;
28458 @@ -1328,10 +1366,12 @@ SYSCALL_DEFINE4(socketpair, int, family,
28459         err = sock_create(family, type, protocol, &sock1);
28460         if (err < 0)
28461                 goto out;
28462 +       set_bit(SOCK_USER_SOCKET, &sock1->flags);
28463  
28464         err = sock_create(family, type, protocol, &sock2);
28465         if (err < 0)
28466                 goto out_release_1;
28467 +       set_bit(SOCK_USER_SOCKET, &sock2->flags);
28468  
28469         err = sock1->ops->socketpair(sock1, sock2);
28470         if (err < 0)
28471 diff -NurpP --minimal linux-2.6.31.6/net/sunrpc/auth.c linux-2.6.31.6-vs2.3.0.36.24/net/sunrpc/auth.c
28472 --- linux-2.6.31.6/net/sunrpc/auth.c    2009-03-24 14:22:48.000000000 +0100
28473 +++ linux-2.6.31.6-vs2.3.0.36.24/net/sunrpc/auth.c      2009-09-10 16:11:43.000000000 +0200
28474 @@ -14,6 +14,7 @@
28475  #include <linux/hash.h>
28476  #include <linux/sunrpc/clnt.h>
28477  #include <linux/spinlock.h>
28478 +#include <linux/vs_tag.h>
28479  
28480  #ifdef RPC_DEBUG
28481  # define RPCDBG_FACILITY       RPCDBG_AUTH
28482 @@ -360,6 +361,7 @@ rpcauth_lookupcred(struct rpc_auth *auth
28483         memset(&acred, 0, sizeof(acred));
28484         acred.uid = cred->fsuid;
28485         acred.gid = cred->fsgid;
28486 +       acred.tag = dx_current_tag();
28487         acred.group_info = get_group_info(((struct cred *)cred)->group_info);
28488  
28489         ret = auth->au_ops->lookup_cred(auth, &acred, flags);
28490 @@ -400,6 +402,7 @@ rpcauth_bind_root_cred(struct rpc_task *
28491         struct auth_cred acred = {
28492                 .uid = 0,
28493                 .gid = 0,
28494 +               .tag = dx_current_tag(),
28495         };
28496         struct rpc_cred *ret;
28497  
28498 diff -NurpP --minimal linux-2.6.31.6/net/sunrpc/auth_unix.c linux-2.6.31.6-vs2.3.0.36.24/net/sunrpc/auth_unix.c
28499 --- linux-2.6.31.6/net/sunrpc/auth_unix.c       2008-12-25 00:26:37.000000000 +0100
28500 +++ linux-2.6.31.6-vs2.3.0.36.24/net/sunrpc/auth_unix.c 2009-09-10 16:11:43.000000000 +0200
28501 @@ -11,12 +11,14 @@
28502  #include <linux/module.h>
28503  #include <linux/sunrpc/clnt.h>
28504  #include <linux/sunrpc/auth.h>
28505 +#include <linux/vs_tag.h>
28506  
28507  #define NFS_NGROUPS    16
28508  
28509  struct unx_cred {
28510         struct rpc_cred         uc_base;
28511         gid_t                   uc_gid;
28512 +       tag_t                   uc_tag;
28513         gid_t                   uc_gids[NFS_NGROUPS];
28514  };
28515  #define uc_uid                 uc_base.cr_uid
28516 @@ -78,6 +80,7 @@ unx_create_cred(struct rpc_auth *auth, s
28517                 groups = NFS_NGROUPS;
28518  
28519         cred->uc_gid = acred->gid;
28520 +       cred->uc_tag = acred->tag;
28521         for (i = 0; i < groups; i++)
28522                 cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
28523         if (i < NFS_NGROUPS)
28524 @@ -119,7 +122,9 @@ unx_match(struct auth_cred *acred, struc
28525         unsigned int i;
28526  
28527  
28528 -       if (cred->uc_uid != acred->uid || cred->uc_gid != acred->gid)
28529 +       if (cred->uc_uid != acred->uid ||
28530 +               cred->uc_gid != acred->gid ||
28531 +               cred->uc_tag != acred->tag)
28532                 return 0;
28533  
28534         if (acred->group_info != NULL)
28535 @@ -142,7 +147,7 @@ unx_marshal(struct rpc_task *task, __be3
28536         struct rpc_clnt *clnt = task->tk_client;
28537         struct unx_cred *cred = container_of(task->tk_msg.rpc_cred, struct unx_cred, uc_base);
28538         __be32          *base, *hold;
28539 -       int             i;
28540 +       int             i, tag;
28541  
28542         *p++ = htonl(RPC_AUTH_UNIX);
28543         base = p++;
28544 @@ -152,9 +157,12 @@ unx_marshal(struct rpc_task *task, __be3
28545          * Copy the UTS nodename captured when the client was created.
28546          */
28547         p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
28548 +       tag = task->tk_client->cl_tag;
28549  
28550 -       *p++ = htonl((u32) cred->uc_uid);
28551 -       *p++ = htonl((u32) cred->uc_gid);
28552 +       *p++ = htonl((u32) TAGINO_UID(tag,
28553 +               cred->uc_uid, cred->uc_tag));
28554 +       *p++ = htonl((u32) TAGINO_GID(tag,
28555 +               cred->uc_gid, cred->uc_tag));
28556         hold = p++;
28557         for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++)
28558                 *p++ = htonl((u32) cred->uc_gids[i]);
28559 diff -NurpP --minimal linux-2.6.31.6/net/sunrpc/clnt.c linux-2.6.31.6-vs2.3.0.36.24/net/sunrpc/clnt.c
28560 --- linux-2.6.31.6/net/sunrpc/clnt.c    2009-09-10 15:26:31.000000000 +0200
28561 +++ linux-2.6.31.6-vs2.3.0.36.24/net/sunrpc/clnt.c      2009-09-10 16:11:43.000000000 +0200
28562 @@ -31,6 +31,7 @@
28563  #include <linux/utsname.h>
28564  #include <linux/workqueue.h>
28565  #include <linux/in6.h>
28566 +#include <linux/vs_cvirt.h>
28567  
28568  #include <linux/sunrpc/clnt.h>
28569  #include <linux/sunrpc/rpc_pipe_fs.h>
28570 @@ -339,6 +340,9 @@ struct rpc_clnt *rpc_create(struct rpc_c
28571         if (!(args->flags & RPC_CLNT_CREATE_QUIET))
28572                 clnt->cl_chatty = 1;
28573  
28574 +       /* TODO: handle RPC_CLNT_CREATE_TAGGED
28575 +       if (args->flags & RPC_CLNT_CREATE_TAGGED)
28576 +               clnt->cl_tag = 1; */
28577         return clnt;
28578  }
28579  EXPORT_SYMBOL_GPL(rpc_create);
28580 diff -NurpP --minimal linux-2.6.31.6/net/unix/af_unix.c linux-2.6.31.6-vs2.3.0.36.24/net/unix/af_unix.c
28581 --- linux-2.6.31.6/net/unix/af_unix.c   2009-11-12 12:10:12.000000000 +0100
28582 +++ linux-2.6.31.6-vs2.3.0.36.24/net/unix/af_unix.c     2009-11-12 12:26:38.000000000 +0100
28583 @@ -114,6 +114,8 @@
28584  #include <linux/mount.h>
28585  #include <net/checksum.h>
28586  #include <linux/security.h>
28587 +#include <linux/vs_context.h>
28588 +#include <linux/vs_limit.h>
28589  
28590  static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
28591  static DEFINE_SPINLOCK(unix_table_lock);
28592 @@ -258,6 +260,8 @@ static struct sock *__unix_find_socket_b
28593                 if (!net_eq(sock_net(s), net))
28594                         continue;
28595  
28596 +               if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT))
28597 +                       continue;
28598                 if (u->addr->len == len &&
28599                     !memcmp(u->addr->name, sunname, len))
28600                         goto found;
28601 @@ -2114,6 +2118,8 @@ static struct sock *unix_seq_idx(struct 
28602         for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
28603                 if (sock_net(s) != seq_file_net(seq))
28604                         continue;
28605 +               if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT))
28606 +                       continue;
28607                 if (off == pos)
28608                         return s;
28609                 ++off;
28610 @@ -2138,7 +2144,8 @@ static void *unix_seq_next(struct seq_fi
28611                 sk = first_unix_socket(&iter->i);
28612         else
28613                 sk = next_unix_socket(&iter->i, sk);
28614 -       while (sk && (sock_net(sk) != seq_file_net(seq)))
28615 +       while (sk && (sock_net(sk) != seq_file_net(seq) ||
28616 +               !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)))
28617                 sk = next_unix_socket(&iter->i, sk);
28618         return sk;
28619  }
28620 diff -NurpP --minimal linux-2.6.31.6/net/x25/af_x25.c linux-2.6.31.6-vs2.3.0.36.24/net/x25/af_x25.c
28621 --- linux-2.6.31.6/net/x25/af_x25.c     2009-09-10 15:26:31.000000000 +0200
28622 +++ linux-2.6.31.6-vs2.3.0.36.24/net/x25/af_x25.c       2009-09-10 16:11:43.000000000 +0200
28623 @@ -519,7 +519,10 @@ static int x25_create(struct net *net, s
28624  
28625         x25 = x25_sk(sk);
28626  
28627 -       sock_init_data(sock, sk);
28628 +       sk->sk_socket = sock;
28629 +       sk->sk_type = sock->type;
28630 +       sk->sk_sleep = &sock->wait;
28631 +       sock->sk = sk;
28632  
28633         x25_init_timers(sk);
28634  
28635 diff -NurpP --minimal linux-2.6.31.6/scripts/checksyscalls.sh linux-2.6.31.6-vs2.3.0.36.24/scripts/checksyscalls.sh
28636 --- linux-2.6.31.6/scripts/checksyscalls.sh     2009-09-10 15:26:31.000000000 +0200
28637 +++ linux-2.6.31.6-vs2.3.0.36.24/scripts/checksyscalls.sh       2009-09-10 16:11:43.000000000 +0200
28638 @@ -194,7 +194,6 @@ cat << EOF
28639  #define __IGNORE_afs_syscall
28640  #define __IGNORE_getpmsg
28641  #define __IGNORE_putpmsg
28642 -#define __IGNORE_vserver
28643  EOF
28644  }
28645  
28646 diff -NurpP --minimal linux-2.6.31.6/security/commoncap.c linux-2.6.31.6-vs2.3.0.36.24/security/commoncap.c
28647 --- linux-2.6.31.6/security/commoncap.c 2009-09-10 15:26:32.000000000 +0200
28648 +++ linux-2.6.31.6-vs2.3.0.36.24/security/commoncap.c   2009-09-10 16:32:54.000000000 +0200
28649 @@ -27,6 +27,7 @@
28650  #include <linux/sched.h>
28651  #include <linux/prctl.h>
28652  #include <linux/securebits.h>
28653 +#include <linux/vs_context.h>
28654  
28655  /*
28656   * If a non-root user executes a setuid-root binary in
28657 @@ -52,7 +53,7 @@ static void warn_setuid_and_fcaps_mixed(
28658  
28659  int cap_netlink_send(struct sock *sk, struct sk_buff *skb)
28660  {
28661 -       NETLINK_CB(skb).eff_cap = current_cap();
28662 +       NETLINK_CB(skb).eff_cap = vx_mbcaps(current_cap());
28663         return 0;
28664  }
28665  
28666 @@ -62,6 +63,7 @@ int cap_netlink_recv(struct sk_buff *skb
28667                 return -EPERM;
28668         return 0;
28669  }
28670 +
28671  EXPORT_SYMBOL(cap_netlink_recv);
28672  
28673  /**
28674 @@ -82,7 +84,22 @@ EXPORT_SYMBOL(cap_netlink_recv);
28675  int cap_capable(struct task_struct *tsk, const struct cred *cred, int cap,
28676                 int audit)
28677  {
28678 -       return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
28679 +       struct vx_info *vxi = tsk->vx_info;
28680 +
28681 +#if 0
28682 +       printk("cap_capable() VXF_STATE_SETUP = %llx, raised = %x, eff = %08x:%08x\n",
28683 +               vx_info_flags(vxi, VXF_STATE_SETUP, 0),
28684 +               cap_raised(tsk->cap_effective, cap),
28685 +               tsk->cap_effective.cap[1], tsk->cap_effective.cap[0]);
28686 +#endif
28687 +
28688 +       /* special case SETUP */
28689 +       if (vx_info_flags(vxi, VXF_STATE_SETUP, 0) &&
28690 +               /* FIXME: maybe use cred instead? */
28691 +               cap_raised(tsk->cred->cap_effective, cap))
28692 +               return 0;
28693 +
28694 +       return vx_cap_raised(vxi, cred->cap_effective, cap) ? 0 : -EPERM;
28695  }
28696  
28697  /**
28698 @@ -618,7 +635,7 @@ int cap_inode_setxattr(struct dentry *de
28699  
28700         if (!strncmp(name, XATTR_SECURITY_PREFIX,
28701                      sizeof(XATTR_SECURITY_PREFIX) - 1)  &&
28702 -           !capable(CAP_SYS_ADMIN))
28703 +               !vx_capable(CAP_SYS_ADMIN, VXC_FS_SECURITY))
28704                 return -EPERM;
28705         return 0;
28706  }
28707 @@ -962,7 +979,8 @@ error:
28708   */
28709  int cap_syslog(int type)
28710  {
28711 -       if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN))
28712 +       if ((type != 3 && type != 10) &&
28713 +               !vx_capable(CAP_SYS_ADMIN, VXC_SYSLOG))
28714                 return -EPERM;
28715         return 0;
28716  }
28717 @@ -1014,3 +1032,4 @@ int cap_file_mmap(struct file *file, uns
28718         }
28719         return ret;
28720  }
28721 +
28722 diff -NurpP --minimal linux-2.6.31.6/security/selinux/hooks.c linux-2.6.31.6-vs2.3.0.36.24/security/selinux/hooks.c
28723 --- linux-2.6.31.6/security/selinux/hooks.c     2009-09-10 15:26:32.000000000 +0200
28724 +++ linux-2.6.31.6-vs2.3.0.36.24/security/selinux/hooks.c       2009-09-10 16:11:43.000000000 +0200
28725 @@ -64,7 +64,6 @@
28726  #include <linux/dccp.h>
28727  #include <linux/quota.h>
28728  #include <linux/un.h>          /* for Unix socket types */
28729 -#include <net/af_unix.h>       /* for Unix socket types */
28730  #include <linux/parser.h>
28731  #include <linux/nfs_mount.h>
28732  #include <net/ipv6.h>
28733 diff -NurpP --minimal linux-2.6.31.6/security/selinux/include/av_permissions.h linux-2.6.31.6-vs2.3.0.36.24/security/selinux/include/av_permissions.h
28734 --- linux-2.6.31.6/security/selinux/include/av_permissions.h    2009-06-11 17:13:33.000000000 +0200
28735 +++ linux-2.6.31.6-vs2.3.0.36.24/security/selinux/include/av_permissions.h      2009-09-29 17:20:23.000000000 +0200
28736 @@ -542,6 +542,7 @@
28737  #define CAPABILITY__SETFCAP                       0x80000000UL
28738  #define CAPABILITY2__MAC_OVERRIDE                 0x00000001UL
28739  #define CAPABILITY2__MAC_ADMIN                    0x00000002UL
28740 +#define CAPABILITY2__CONTEXT                      0x00000004UL
28741  #define NETLINK_ROUTE_SOCKET__IOCTL               0x00000001UL
28742  #define NETLINK_ROUTE_SOCKET__READ                0x00000002UL
28743  #define NETLINK_ROUTE_SOCKET__WRITE               0x00000004UL
28744 diff -NurpP --minimal linux-2.6.31.6/security/selinux/include/av_perm_to_string.h linux-2.6.31.6-vs2.3.0.36.24/security/selinux/include/av_perm_to_string.h
28745 --- linux-2.6.31.6/security/selinux/include/av_perm_to_string.h 2009-06-11 17:13:33.000000000 +0200
28746 +++ linux-2.6.31.6-vs2.3.0.36.24/security/selinux/include/av_perm_to_string.h   2009-09-29 17:20:23.000000000 +0200
28747 @@ -141,6 +141,7 @@
28748     S_(SECCLASS_CAPABILITY, CAPABILITY__SETFCAP, "setfcap")
28749     S_(SECCLASS_CAPABILITY2, CAPABILITY2__MAC_OVERRIDE, "mac_override")
28750     S_(SECCLASS_CAPABILITY2, CAPABILITY2__MAC_ADMIN, "mac_admin")
28751 +   S_(SECCLASS_CAPABILITY2, CAPABILITY2__CONTEXT, "context")
28752     S_(SECCLASS_NETLINK_ROUTE_SOCKET, NETLINK_ROUTE_SOCKET__NLMSG_READ, "nlmsg_read")
28753     S_(SECCLASS_NETLINK_ROUTE_SOCKET, NETLINK_ROUTE_SOCKET__NLMSG_WRITE, "nlmsg_write")
28754     S_(SECCLASS_NETLINK_FIREWALL_SOCKET, NETLINK_FIREWALL_SOCKET__NLMSG_READ, "nlmsg_read")
This page took 2.660184 seconds and 4 git commands to generate.