]> git.pld-linux.org Git - packages/kernel.git/blob - kernel-vserver-2.3.patch
- remove from HEAD
[packages/kernel.git] / kernel-vserver-2.3.patch
1 diff -NurpP --minimal linux-2.6.30.2/arch/alpha/Kconfig linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/alpha/Kconfig
2 --- linux-2.6.30.2/arch/alpha/Kconfig   2009-03-24 14:18:07.000000000 +0100
3 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/alpha/Kconfig        2009-07-04 01:11:38.000000000 +0200
4 @@ -666,6 +666,8 @@ config DUMMY_CONSOLE
5         depends on VGA_HOSE
6         default y
7  
8 +source "kernel/vserver/Kconfig"
9 +
10  source "security/Kconfig"
11  
12  source "crypto/Kconfig"
13 diff -NurpP --minimal linux-2.6.30.2/arch/alpha/kernel/entry.S linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/alpha/kernel/entry.S
14 --- linux-2.6.30.2/arch/alpha/kernel/entry.S    2009-06-11 17:11:46.000000000 +0200
15 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/alpha/kernel/entry.S 2009-07-04 01:11:38.000000000 +0200
16 @@ -874,24 +874,15 @@ sys_getxgid:
17         .globl  sys_getxpid
18         .ent    sys_getxpid
19  sys_getxpid:
20 +       lda     $sp, -16($sp)
21 +       stq     $26, 0($sp)
22         .prologue 0
23 -       ldq     $2, TI_TASK($8)
24  
25 -       /* See linux/kernel/timer.c sys_getppid for discussion
26 -          about this loop.  */
27 -       ldq     $3, TASK_GROUP_LEADER($2)
28 -       ldq     $4, TASK_REAL_PARENT($3)
29 -       ldl     $0, TASK_TGID($2)
30 -1:     ldl     $1, TASK_TGID($4)
31 -#ifdef CONFIG_SMP
32 -       mov     $4, $5
33 -       mb
34 -       ldq     $3, TASK_GROUP_LEADER($2)
35 -       ldq     $4, TASK_REAL_PARENT($3)
36 -       cmpeq   $4, $5, $5
37 -       beq     $5, 1b
38 -#endif
39 -       stq     $1, 80($sp)
40 +       lda     $16, 96($sp)
41 +       jsr     $26, do_getxpid
42 +       ldq     $26, 0($sp)
43 +
44 +       lda     $sp, 16($sp)
45         ret
46  .end sys_getxpid
47  
48 diff -NurpP --minimal linux-2.6.30.2/arch/alpha/kernel/osf_sys.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/alpha/kernel/osf_sys.c
49 --- linux-2.6.30.2/arch/alpha/kernel/osf_sys.c  2009-06-11 17:11:46.000000000 +0200
50 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/alpha/kernel/osf_sys.c       2009-07-04 01:11:38.000000000 +0200
51 @@ -875,7 +875,7 @@ SYSCALL_DEFINE2(osf_gettimeofday, struct
52  {
53         if (tv) {
54                 struct timeval ktv;
55 -               do_gettimeofday(&ktv);
56 +               vx_gettimeofday(&ktv);
57                 if (put_tv32(tv, &ktv))
58                         return -EFAULT;
59         }
60 diff -NurpP --minimal linux-2.6.30.2/arch/alpha/kernel/ptrace.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/alpha/kernel/ptrace.c
61 --- linux-2.6.30.2/arch/alpha/kernel/ptrace.c   2008-12-25 00:26:37.000000000 +0100
62 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/alpha/kernel/ptrace.c        2009-07-04 01:11:38.000000000 +0200
63 @@ -15,6 +15,7 @@
64  #include <linux/slab.h>
65  #include <linux/security.h>
66  #include <linux/signal.h>
67 +#include <linux/vs_base.h>
68  
69  #include <asm/uaccess.h>
70  #include <asm/pgtable.h>
71 diff -NurpP --minimal linux-2.6.30.2/arch/alpha/kernel/systbls.S linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/alpha/kernel/systbls.S
72 --- linux-2.6.30.2/arch/alpha/kernel/systbls.S  2009-03-24 14:18:08.000000000 +0100
73 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/alpha/kernel/systbls.S       2009-07-04 01:11:38.000000000 +0200
74 @@ -446,7 +446,7 @@ sys_call_table:
75         .quad sys_stat64                        /* 425 */
76         .quad sys_lstat64
77         .quad sys_fstat64
78 -       .quad sys_ni_syscall                    /* sys_vserver */
79 +       .quad sys_vserver                       /* sys_vserver */
80         .quad sys_ni_syscall                    /* sys_mbind */
81         .quad sys_ni_syscall                    /* sys_get_mempolicy */
82         .quad sys_ni_syscall                    /* sys_set_mempolicy */
83 diff -NurpP --minimal linux-2.6.30.2/arch/alpha/kernel/traps.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/alpha/kernel/traps.c
84 --- linux-2.6.30.2/arch/alpha/kernel/traps.c    2009-06-11 17:11:46.000000000 +0200
85 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/alpha/kernel/traps.c 2009-07-04 01:11:38.000000000 +0200
86 @@ -183,7 +183,8 @@ die_if_kernel(char * str, struct pt_regs
87  #ifdef CONFIG_SMP
88         printk("CPU %d ", hard_smp_processor_id());
89  #endif
90 -       printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err);
91 +       printk("%s(%d[#%u]): %s %ld\n", current->comm,
92 +               task_pid_nr(current), current->xid, str, err);
93         dik_show_regs(regs, r9_15);
94         add_taint(TAINT_DIE);
95         dik_show_trace((unsigned long *)(regs+1));
96 diff -NurpP --minimal linux-2.6.30.2/arch/alpha/mm/fault.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/alpha/mm/fault.c
97 --- linux-2.6.30.2/arch/alpha/mm/fault.c        2008-12-25 00:26:37.000000000 +0100
98 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/alpha/mm/fault.c     2009-07-04 01:11:38.000000000 +0200
99 @@ -193,8 +193,8 @@ do_page_fault(unsigned long address, uns
100                 down_read(&mm->mmap_sem);
101                 goto survive;
102         }
103 -       printk(KERN_ALERT "VM: killing process %s(%d)\n",
104 -              current->comm, task_pid_nr(current));
105 +       printk(KERN_ALERT "VM: killing process %s(%d:#%u)\n",
106 +              current->comm, task_pid_nr(current), current->xid);
107         if (!user_mode(regs))
108                 goto no_context;
109         do_group_exit(SIGKILL);
110 diff -NurpP --minimal linux-2.6.30.2/arch/arm/Kconfig linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/arm/Kconfig
111 --- linux-2.6.30.2/arch/arm/Kconfig     2009-06-11 17:11:46.000000000 +0200
112 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/arm/Kconfig  2009-07-04 01:11:38.000000000 +0200
113 @@ -1429,6 +1429,8 @@ source "fs/Kconfig"
114  
115  source "arch/arm/Kconfig.debug"
116  
117 +source "kernel/vserver/Kconfig"
118 +
119  source "security/Kconfig"
120  
121  source "crypto/Kconfig"
122 diff -NurpP --minimal linux-2.6.30.2/arch/arm/kernel/calls.S linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/arm/kernel/calls.S
123 --- linux-2.6.30.2/arch/arm/kernel/calls.S      2009-06-11 17:11:48.000000000 +0200
124 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/arm/kernel/calls.S   2009-07-04 01:11:38.000000000 +0200
125 @@ -322,7 +322,7 @@
126  /* 310 */      CALL(sys_request_key)
127                 CALL(sys_keyctl)
128                 CALL(ABI(sys_semtimedop, sys_oabi_semtimedop))
129 -/* vserver */  CALL(sys_ni_syscall)
130 +               CALL(sys_vserver)
131                 CALL(sys_ioprio_set)
132  /* 315 */      CALL(sys_ioprio_get)
133                 CALL(sys_inotify_init)
134 diff -NurpP --minimal linux-2.6.30.2/arch/arm/kernel/process.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/arm/kernel/process.c
135 --- linux-2.6.30.2/arch/arm/kernel/process.c    2009-06-11 17:11:49.000000000 +0200
136 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/arm/kernel/process.c 2009-07-04 01:11:38.000000000 +0200
137 @@ -263,7 +263,8 @@ void __show_regs(struct pt_regs *regs)
138  void show_regs(struct pt_regs * regs)
139  {
140         printk("\n");
141 -       printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm);
142 +       printk("Pid: %d[#%u], comm: %20s\n",
143 +               task_pid_nr(current), current->xid, current->comm);
144         __show_regs(regs);
145         __backtrace();
146  }
147 diff -NurpP --minimal linux-2.6.30.2/arch/arm/kernel/traps.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/arm/kernel/traps.c
148 --- linux-2.6.30.2/arch/arm/kernel/traps.c      2009-06-11 17:11:49.000000000 +0200
149 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/arm/kernel/traps.c   2009-07-04 01:11:38.000000000 +0200
150 @@ -228,8 +228,8 @@ static void __die(const char *str, int e
151                str, err, ++die_counter);
152         print_modules();
153         __show_regs(regs);
154 -       printk("Process %s (pid: %d, stack limit = 0x%p)\n",
155 -               tsk->comm, task_pid_nr(tsk), thread + 1);
156 +       printk("Process %s (pid: %d:#%u, stack limit = 0x%p)\n",
157 +               tsk->comm, task_pid_nr(tsk), tsk->xid, thread + 1);
158  
159         if (!user_mode(regs) || in_interrupt()) {
160                 dump_mem("Stack: ", regs->ARM_sp,
161 diff -NurpP --minimal linux-2.6.30.2/arch/arm/mm/fault.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/arm/mm/fault.c
162 --- linux-2.6.30.2/arch/arm/mm/fault.c  2009-03-24 14:18:17.000000000 +0100
163 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/arm/mm/fault.c       2009-07-04 01:11:38.000000000 +0200
164 @@ -294,7 +294,8 @@ do_page_fault(unsigned long addr, unsign
165                  * happened to us that made us unable to handle
166                  * the page fault gracefully.
167                  */
168 -               printk("VM: killing process %s\n", tsk->comm);
169 +               printk("VM: killing process %s(%d:#%u)\n",
170 +                       tsk->comm, task_pid_nr(tsk), tsk->xid);
171                 do_group_exit(SIGKILL);
172                 return 0;
173         }
174 diff -NurpP --minimal linux-2.6.30.2/arch/cris/Kconfig linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/cris/Kconfig
175 --- linux-2.6.30.2/arch/cris/Kconfig    2009-06-11 17:11:56.000000000 +0200
176 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/cris/Kconfig 2009-07-04 01:11:38.000000000 +0200
177 @@ -685,6 +685,8 @@ source "drivers/staging/Kconfig"
178  
179  source "arch/cris/Kconfig.debug"
180  
181 +source "kernel/vserver/Kconfig"
182 +
183  source "security/Kconfig"
184  
185  source "crypto/Kconfig"
186 diff -NurpP --minimal linux-2.6.30.2/arch/frv/kernel/kernel_thread.S linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/frv/kernel/kernel_thread.S
187 --- linux-2.6.30.2/arch/frv/kernel/kernel_thread.S      2008-12-25 00:26:37.000000000 +0100
188 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/frv/kernel/kernel_thread.S   2009-07-04 01:11:38.000000000 +0200
189 @@ -37,7 +37,7 @@ kernel_thread:
190  
191         # start by forking the current process, but with shared VM
192         setlos.p        #__NR_clone,gr7         ; syscall number
193 -       ori             gr10,#CLONE_VM,gr8      ; first syscall arg     [clone_flags]
194 +       ori             gr10,#CLONE_KT,gr8      ; first syscall arg     [clone_flags]
195         sethi.p         #0xe4e4,gr9             ; second syscall arg    [newsp]
196         setlo           #0xe4e4,gr9
197         setlos.p        #0,gr10                 ; third syscall arg     [parent_tidptr]
198 diff -NurpP --minimal linux-2.6.30.2/arch/h8300/Kconfig linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/h8300/Kconfig
199 --- linux-2.6.30.2/arch/h8300/Kconfig   2009-03-24 14:18:24.000000000 +0100
200 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/h8300/Kconfig        2009-07-04 01:11:38.000000000 +0200
201 @@ -226,6 +226,8 @@ source "fs/Kconfig"
202  
203  source "arch/h8300/Kconfig.debug"
204  
205 +source "kernel/vserver/Kconfig"
206 +
207  source "security/Kconfig"
208  
209  source "crypto/Kconfig"
210 diff -NurpP --minimal linux-2.6.30.2/arch/ia64/ia32/ia32_entry.S linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/ia64/ia32/ia32_entry.S
211 --- linux-2.6.30.2/arch/ia64/ia32/ia32_entry.S  2009-06-11 17:11:57.000000000 +0200
212 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/ia64/ia32/ia32_entry.S       2009-07-04 01:11:38.000000000 +0200
213 @@ -451,7 +451,7 @@ ia32_syscall_table:
214         data8 sys_tgkill        /* 270 */
215         data8 compat_sys_utimes
216         data8 sys32_fadvise64_64
217 -       data8 sys_ni_syscall
218 +       data8 sys32_vserver
219         data8 sys_ni_syscall
220         data8 sys_ni_syscall    /* 275 */
221         data8 sys_ni_syscall
222 diff -NurpP --minimal linux-2.6.30.2/arch/ia64/Kconfig linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/ia64/Kconfig
223 --- linux-2.6.30.2/arch/ia64/Kconfig    2009-06-11 17:11:57.000000000 +0200
224 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/ia64/Kconfig 2009-07-04 01:11:38.000000000 +0200
225 @@ -675,6 +675,8 @@ source "fs/Kconfig"
226  
227  source "arch/ia64/Kconfig.debug"
228  
229 +source "kernel/vserver/Kconfig"
230 +
231  source "security/Kconfig"
232  
233  source "crypto/Kconfig"
234 diff -NurpP --minimal linux-2.6.30.2/arch/ia64/kernel/entry.S linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/ia64/kernel/entry.S
235 --- linux-2.6.30.2/arch/ia64/kernel/entry.S     2009-06-11 17:11:57.000000000 +0200
236 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/ia64/kernel/entry.S  2009-07-04 01:11:38.000000000 +0200
237 @@ -1753,7 +1753,7 @@ sys_call_table:
238         data8 sys_mq_notify
239         data8 sys_mq_getsetattr
240         data8 sys_kexec_load
241 -       data8 sys_ni_syscall                    // reserved for vserver
242 +       data8 sys_vserver
243         data8 sys_waitid                        // 1270
244         data8 sys_add_key
245         data8 sys_request_key
246 diff -NurpP --minimal linux-2.6.30.2/arch/ia64/kernel/perfmon.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/ia64/kernel/perfmon.c
247 --- linux-2.6.30.2/arch/ia64/kernel/perfmon.c   2009-06-11 17:11:57.000000000 +0200
248 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/ia64/kernel/perfmon.c        2009-07-04 01:11:38.000000000 +0200
249 @@ -41,6 +41,7 @@
250  #include <linux/rcupdate.h>
251  #include <linux/completion.h>
252  #include <linux/tracehook.h>
253 +#include <linux/vs_memory.h>
254  
255  #include <asm/errno.h>
256  #include <asm/intrinsics.h>
257 @@ -2372,7 +2373,7 @@ pfm_smpl_buffer_alloc(struct task_struct
258          */
259         insert_vm_struct(mm, vma);
260  
261 -       mm->total_vm  += size >> PAGE_SHIFT;
262 +       vx_vmpages_add(mm, size >> PAGE_SHIFT);
263         vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
264                                                         vma_pages(vma));
265         up_write(&task->mm->mmap_sem);
266 diff -NurpP --minimal linux-2.6.30.2/arch/ia64/kernel/process.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/ia64/kernel/process.c
267 --- linux-2.6.30.2/arch/ia64/kernel/process.c   2009-06-11 17:11:57.000000000 +0200
268 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/ia64/kernel/process.c        2009-07-04 01:11:38.000000000 +0200
269 @@ -110,8 +110,8 @@ show_regs (struct pt_regs *regs)
270         unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
271  
272         print_modules();
273 -       printk("\nPid: %d, CPU %d, comm: %20s\n", task_pid_nr(current),
274 -                       smp_processor_id(), current->comm);
275 +       printk("\nPid: %d[#%u], CPU %d, comm: %20s\n", task_pid_nr(current),
276 +                       current->xid, smp_processor_id(), current->comm);
277         printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]    %s (%s)\n",
278                regs->cr_ipsr, regs->cr_ifs, ip, print_tainted(),
279                init_utsname()->release);
280 diff -NurpP --minimal linux-2.6.30.2/arch/ia64/kernel/ptrace.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/ia64/kernel/ptrace.c
281 --- linux-2.6.30.2/arch/ia64/kernel/ptrace.c    2008-12-25 00:26:37.000000000 +0100
282 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/ia64/kernel/ptrace.c 2009-07-04 01:11:38.000000000 +0200
283 @@ -23,6 +23,7 @@
284  #include <linux/regset.h>
285  #include <linux/elf.h>
286  #include <linux/tracehook.h>
287 +#include <linux/vs_base.h>
288  
289  #include <asm/pgtable.h>
290  #include <asm/processor.h>
291 diff -NurpP --minimal linux-2.6.30.2/arch/ia64/kernel/traps.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/ia64/kernel/traps.c
292 --- linux-2.6.30.2/arch/ia64/kernel/traps.c     2008-12-25 00:26:37.000000000 +0100
293 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/ia64/kernel/traps.c  2009-07-04 01:11:38.000000000 +0200
294 @@ -60,8 +60,9 @@ die (const char *str, struct pt_regs *re
295         put_cpu();
296  
297         if (++die.lock_owner_depth < 3) {
298 -               printk("%s[%d]: %s %ld [%d]\n",
299 -               current->comm, task_pid_nr(current), str, err, ++die_counter);
300 +               printk("%s[%d[#%u]]: %s %ld [%d]\n",
301 +                       current->comm, task_pid_nr(current), current->xid,
302 +                       str, err, ++die_counter);
303                 if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV)
304                     != NOTIFY_STOP)
305                         show_regs(regs);
306 @@ -324,8 +325,9 @@ handle_fpu_swa (int fp_fault, struct pt_
307                         if ((last.count & 15) < 5 && (ia64_fetchadd(1, &last.count, acq) & 15) < 5) {
308                                 last.time = current_jiffies + 5 * HZ;
309                                 printk(KERN_WARNING
310 -                                       "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
311 -                                       current->comm, task_pid_nr(current), regs->cr_iip + ia64_psr(regs)->ri, isr);
312 +                                       "%s(%d[#%u]): floating-point assist fault at ip %016lx, isr %016lx\n",
313 +                                       current->comm, task_pid_nr(current), current->xid,
314 +                                       regs->cr_iip + ia64_psr(regs)->ri, isr);
315                         }
316                 }
317         }
318 diff -NurpP --minimal linux-2.6.30.2/arch/ia64/mm/fault.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/ia64/mm/fault.c
319 --- linux-2.6.30.2/arch/ia64/mm/fault.c 2008-12-25 00:26:37.000000000 +0100
320 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/ia64/mm/fault.c      2009-07-04 01:11:38.000000000 +0200
321 @@ -10,6 +10,7 @@
322  #include <linux/interrupt.h>
323  #include <linux/kprobes.h>
324  #include <linux/kdebug.h>
325 +#include <linux/vs_memory.h>
326  
327  #include <asm/pgtable.h>
328  #include <asm/processor.h>
329 diff -NurpP --minimal linux-2.6.30.2/arch/m32r/kernel/traps.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/m32r/kernel/traps.c
330 --- linux-2.6.30.2/arch/m32r/kernel/traps.c     2008-12-25 00:26:37.000000000 +0100
331 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/m32r/kernel/traps.c  2009-07-04 01:11:38.000000000 +0200
332 @@ -196,8 +196,9 @@ static void show_registers(struct pt_reg
333         } else {
334                 printk("SPI: %08lx\n", sp);
335         }
336 -       printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)",
337 -               current->comm, task_pid_nr(current), 0xffff & i, 4096+(unsigned long)current);
338 +       printk("Process %s (pid: %d[#%u], process nr: %d, stackpage=%08lx)",
339 +               current->comm, task_pid_nr(current), current->xid,
340 +               0xffff & i, 4096+(unsigned long)current);
341  
342         /*
343          * When in-kernel, we also print out the stack and code at the
344 diff -NurpP --minimal linux-2.6.30.2/arch/m68k/Kconfig linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/m68k/Kconfig
345 --- linux-2.6.30.2/arch/m68k/Kconfig    2009-03-24 14:18:26.000000000 +0100
346 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/m68k/Kconfig 2009-07-04 01:11:38.000000000 +0200
347 @@ -616,6 +616,8 @@ source "fs/Kconfig"
348  
349  source "arch/m68k/Kconfig.debug"
350  
351 +source "kernel/vserver/Kconfig"
352 +
353  source "security/Kconfig"
354  
355  source "crypto/Kconfig"
356 diff -NurpP --minimal linux-2.6.30.2/arch/m68k/kernel/ptrace.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/m68k/kernel/ptrace.c
357 --- linux-2.6.30.2/arch/m68k/kernel/ptrace.c    2008-12-25 00:26:37.000000000 +0100
358 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/m68k/kernel/ptrace.c 2009-07-04 01:11:38.000000000 +0200
359 @@ -18,6 +18,7 @@
360  #include <linux/ptrace.h>
361  #include <linux/user.h>
362  #include <linux/signal.h>
363 +#include <linux/vs_base.h>
364  
365  #include <asm/uaccess.h>
366  #include <asm/page.h>
367 @@ -269,6 +270,8 @@ long arch_ptrace(struct task_struct *chi
368                 ret = ptrace_request(child, request, addr, data);
369                 break;
370         }
371 +       if (!vx_check(vx_task_xid(child), VS_WATCH_P | VS_IDENT))
372 +               goto out_tsk;
373  
374         return ret;
375  out_eio:
376 diff -NurpP --minimal linux-2.6.30.2/arch/m68k/kernel/traps.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/m68k/kernel/traps.c
377 --- linux-2.6.30.2/arch/m68k/kernel/traps.c     2009-03-24 14:18:26.000000000 +0100
378 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/m68k/kernel/traps.c  2009-07-04 01:11:38.000000000 +0200
379 @@ -906,8 +906,8 @@ void show_registers(struct pt_regs *regs
380         printk("d4: %08lx    d5: %08lx    a0: %08lx    a1: %08lx\n",
381                regs->d4, regs->d5, regs->a0, regs->a1);
382  
383 -       printk("Process %s (pid: %d, task=%p)\n",
384 -               current->comm, task_pid_nr(current), current);
385 +       printk("Process %s (pid: %d[#%u], task=%p)\n",
386 +               current->comm, task_pid_nr(current), current->xid, current);
387         addr = (unsigned long)&fp->un;
388         printk("Frame format=%X ", regs->format);
389         switch (regs->format) {
390 diff -NurpP --minimal linux-2.6.30.2/arch/m68knommu/Kconfig linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/m68knommu/Kconfig
391 --- linux-2.6.30.2/arch/m68knommu/Kconfig       2009-06-11 17:11:59.000000000 +0200
392 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/m68knommu/Kconfig    2009-07-04 01:11:38.000000000 +0200
393 @@ -721,6 +721,8 @@ source "fs/Kconfig"
394  
395  source "arch/m68knommu/Kconfig.debug"
396  
397 +source "kernel/vserver/Kconfig"
398 +
399  source "security/Kconfig"
400  
401  source "crypto/Kconfig"
402 diff -NurpP --minimal linux-2.6.30.2/arch/m68knommu/kernel/traps.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/m68knommu/kernel/traps.c
403 --- linux-2.6.30.2/arch/m68knommu/kernel/traps.c        2008-12-25 00:26:37.000000000 +0100
404 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/m68knommu/kernel/traps.c     2009-07-04 01:11:38.000000000 +0200
405 @@ -78,8 +78,9 @@ void die_if_kernel(char *str, struct pt_
406         printk(KERN_EMERG "d4: %08lx    d5: %08lx    a0: %08lx    a1: %08lx\n",
407                fp->d4, fp->d5, fp->a0, fp->a1);
408  
409 -       printk(KERN_EMERG "Process %s (pid: %d, stackpage=%08lx)\n",
410 -               current->comm, current->pid, PAGE_SIZE+(unsigned long)current);
411 +       printk(KERN_EMERG "Process %s (pid: %d[#%u], stackpage=%08lx)\n",
412 +               current->comm, task_pid_nr(current), current->xid,
413 +               PAGE_SIZE+(unsigned long)current);
414         show_stack(NULL, (unsigned long *)(fp + 1));
415         add_taint(TAINT_DIE);
416         do_exit(SIGSEGV);
417 diff -NurpP --minimal linux-2.6.30.2/arch/mips/Kconfig linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/mips/Kconfig
418 --- linux-2.6.30.2/arch/mips/Kconfig    2009-06-11 17:12:00.000000000 +0200
419 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/mips/Kconfig 2009-07-04 01:11:38.000000000 +0200
420 @@ -2137,6 +2137,8 @@ source "fs/Kconfig"
421  
422  source "arch/mips/Kconfig.debug"
423  
424 +source "kernel/vserver/Kconfig"
425 +
426  source "security/Kconfig"
427  
428  source "crypto/Kconfig"
429 diff -NurpP --minimal linux-2.6.30.2/arch/mips/kernel/ptrace.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/mips/kernel/ptrace.c
430 --- linux-2.6.30.2/arch/mips/kernel/ptrace.c    2008-12-25 00:26:37.000000000 +0100
431 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/mips/kernel/ptrace.c 2009-07-04 01:11:38.000000000 +0200
432 @@ -25,6 +25,7 @@
433  #include <linux/security.h>
434  #include <linux/audit.h>
435  #include <linux/seccomp.h>
436 +#include <linux/vs_base.h>
437  
438  #include <asm/byteorder.h>
439  #include <asm/cpu.h>
440 @@ -259,6 +260,9 @@ long arch_ptrace(struct task_struct *chi
441  {
442         int ret;
443  
444 +       if (!vx_check(vx_task_xid(child), VS_WATCH_P | VS_IDENT))
445 +               goto out;
446 +
447         switch (request) {
448         /* when I and D space are separate, these will need to be fixed. */
449         case PTRACE_PEEKTEXT: /* read word at location addr. */
450 diff -NurpP --minimal linux-2.6.30.2/arch/mips/kernel/scall32-o32.S linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/mips/kernel/scall32-o32.S
451 --- linux-2.6.30.2/arch/mips/kernel/scall32-o32.S       2009-06-11 17:12:01.000000000 +0200
452 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/mips/kernel/scall32-o32.S    2009-07-04 01:11:38.000000000 +0200
453 @@ -597,7 +597,7 @@ einval:     li      v0, -ENOSYS
454         sys     sys_mq_timedreceive     5
455         sys     sys_mq_notify           2       /* 4275 */
456         sys     sys_mq_getsetattr       3
457 -       sys     sys_ni_syscall          0       /* sys_vserver */
458 +       sys     sys_vserver             3
459         sys     sys_waitid              5
460         sys     sys_ni_syscall          0       /* available, was setaltroot */
461         sys     sys_add_key             5       /* 4280 */
462 diff -NurpP --minimal linux-2.6.30.2/arch/mips/kernel/scall64-64.S linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/mips/kernel/scall64-64.S
463 --- linux-2.6.30.2/arch/mips/kernel/scall64-64.S        2009-06-11 17:12:01.000000000 +0200
464 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/mips/kernel/scall64-64.S     2009-07-04 01:11:38.000000000 +0200
465 @@ -434,7 +434,7 @@ sys_call_table:
466         PTR     sys_mq_timedreceive
467         PTR     sys_mq_notify
468         PTR     sys_mq_getsetattr               /* 5235 */
469 -       PTR     sys_ni_syscall                  /* sys_vserver */
470 +       PTR     sys_vserver
471         PTR     sys_waitid
472         PTR     sys_ni_syscall                  /* available, was setaltroot */
473         PTR     sys_add_key
474 diff -NurpP --minimal linux-2.6.30.2/arch/mips/kernel/scall64-n32.S linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/mips/kernel/scall64-n32.S
475 --- linux-2.6.30.2/arch/mips/kernel/scall64-n32.S       2009-06-11 17:12:01.000000000 +0200
476 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/mips/kernel/scall64-n32.S    2009-07-04 01:11:38.000000000 +0200
477 @@ -360,7 +360,7 @@ EXPORT(sysn32_call_table)
478         PTR     compat_sys_mq_timedreceive
479         PTR     compat_sys_mq_notify
480         PTR     compat_sys_mq_getsetattr
481 -       PTR     sys_ni_syscall                  /* 6240, sys_vserver */
482 +       PTR     sys32_vserver                   /* 6240 */
483         PTR     compat_sys_waitid
484         PTR     sys_ni_syscall                  /* available, was setaltroot */
485         PTR     sys_add_key
486 diff -NurpP --minimal linux-2.6.30.2/arch/mips/kernel/scall64-o32.S linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/mips/kernel/scall64-o32.S
487 --- linux-2.6.30.2/arch/mips/kernel/scall64-o32.S       2009-06-11 17:12:01.000000000 +0200
488 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/mips/kernel/scall64-o32.S    2009-07-04 01:11:38.000000000 +0200
489 @@ -480,7 +480,7 @@ sys_call_table:
490         PTR     compat_sys_mq_timedreceive
491         PTR     compat_sys_mq_notify            /* 4275 */
492         PTR     compat_sys_mq_getsetattr
493 -       PTR     sys_ni_syscall                  /* sys_vserver */
494 +       PTR     sys32_vserver
495         PTR     sys_32_waitid
496         PTR     sys_ni_syscall                  /* available, was setaltroot */
497         PTR     sys_add_key                     /* 4280 */
498 diff -NurpP --minimal linux-2.6.30.2/arch/mips/kernel/traps.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/mips/kernel/traps.c
499 --- linux-2.6.30.2/arch/mips/kernel/traps.c     2009-06-11 17:12:01.000000000 +0200
500 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/mips/kernel/traps.c  2009-07-04 01:11:38.000000000 +0200
501 @@ -335,9 +335,10 @@ void show_registers(const struct pt_regs
502  
503         __show_regs(regs);
504         print_modules();
505 -       printk("Process %s (pid: %d, threadinfo=%p, task=%p, tls=%0*lx)\n",
506 -              current->comm, current->pid, current_thread_info(), current,
507 -             field, current_thread_info()->tp_value);
508 +       printk("Process %s (pid: %d:#%u, threadinfo=%p, task=%p, tls=%0*lx)\n",
509 +               current->comm, task_pid_nr(current), current->xid,
510 +               current_thread_info(), current,
511 +               field, current_thread_info()->tp_value);
512         if (cpu_has_userlocal) {
513                 unsigned long tls;
514  
515 diff -NurpP --minimal linux-2.6.30.2/arch/parisc/Kconfig linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/parisc/Kconfig
516 --- linux-2.6.30.2/arch/parisc/Kconfig  2009-06-11 17:12:02.000000000 +0200
517 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/parisc/Kconfig       2009-07-04 01:11:38.000000000 +0200
518 @@ -291,6 +291,8 @@ source "fs/Kconfig"
519  
520  source "arch/parisc/Kconfig.debug"
521  
522 +source "kernel/vserver/Kconfig"
523 +
524  source "security/Kconfig"
525  
526  source "crypto/Kconfig"
527 diff -NurpP --minimal linux-2.6.30.2/arch/parisc/kernel/syscall_table.S linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/parisc/kernel/syscall_table.S
528 --- linux-2.6.30.2/arch/parisc/kernel/syscall_table.S   2009-06-11 17:12:03.000000000 +0200
529 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/parisc/kernel/syscall_table.S        2009-07-04 01:11:38.000000000 +0200
530 @@ -361,7 +361,7 @@
531         ENTRY_COMP(mbind)               /* 260 */
532         ENTRY_COMP(get_mempolicy)
533         ENTRY_COMP(set_mempolicy)
534 -       ENTRY_SAME(ni_syscall)  /* 263: reserved for vserver */
535 +       ENTRY_DIFF(vserver)
536         ENTRY_SAME(add_key)
537         ENTRY_SAME(request_key)         /* 265 */
538         ENTRY_SAME(keyctl)
539 diff -NurpP --minimal linux-2.6.30.2/arch/parisc/kernel/traps.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/parisc/kernel/traps.c
540 --- linux-2.6.30.2/arch/parisc/kernel/traps.c   2009-06-11 17:12:03.000000000 +0200
541 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/parisc/kernel/traps.c        2009-07-04 01:11:38.000000000 +0200
542 @@ -236,8 +236,9 @@ void die_if_kernel(char *str, struct pt_
543                 if (err == 0)
544                         return; /* STFU */
545  
546 -               printk(KERN_CRIT "%s (pid %d): %s (code %ld) at " RFMT "\n",
547 -                       current->comm, task_pid_nr(current), str, err, regs->iaoq[0]);
548 +               printk(KERN_CRIT "%s (pid %d:#%u): %s (code %ld) at " RFMT "\n",
549 +                       current->comm, task_pid_nr(current), current->xid,
550 +                       str, err, regs->iaoq[0]);
551  #ifdef PRINT_USER_FAULTS
552                 /* XXX for debugging only */
553                 show_regs(regs);
554 @@ -271,8 +272,8 @@ KERN_CRIT "                     ||     |
555                 pdc_console_restart();
556         
557         if (err)
558 -               printk(KERN_CRIT "%s (pid %d): %s (code %ld)\n",
559 -                       current->comm, task_pid_nr(current), str, err);
560 +               printk(KERN_CRIT "%s (pid %d:#%u): %s (code %ld)\n",
561 +                       current->comm, task_pid_nr(current), current->xid, str, err);
562  
563         /* Wot's wrong wif bein' racy? */
564         if (current->thread.flags & PARISC_KERNEL_DEATH) {
565 diff -NurpP --minimal linux-2.6.30.2/arch/parisc/mm/fault.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/parisc/mm/fault.c
566 --- linux-2.6.30.2/arch/parisc/mm/fault.c       2009-03-24 14:18:33.000000000 +0100
567 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/parisc/mm/fault.c    2009-07-04 01:11:38.000000000 +0200
568 @@ -238,8 +238,9 @@ bad_area:
569  
570  #ifdef PRINT_USER_FAULTS
571                 printk(KERN_DEBUG "\n");
572 -               printk(KERN_DEBUG "do_page_fault() pid=%d command='%s' type=%lu address=0x%08lx\n",
573 -                   task_pid_nr(tsk), tsk->comm, code, address);
574 +               printk(KERN_DEBUG "do_page_fault() pid=%d:#%u "
575 +                   "command='%s' type=%lu address=0x%08lx\n",
576 +                   task_pid_nr(tsk), tsk->xid, tsk->comm, code, address);
577                 if (vma) {
578                         printk(KERN_DEBUG "vm_start = 0x%08lx, vm_end = 0x%08lx\n",
579                                         vma->vm_start, vma->vm_end);
580 @@ -265,7 +266,8 @@ no_context:
581  
582    out_of_memory:
583         up_read(&mm->mmap_sem);
584 -       printk(KERN_CRIT "VM: killing process %s\n", current->comm);
585 +       printk(KERN_CRIT "VM: killing process %s(%d:#%u)\n",
586 +               current->comm, current->pid, current->xid);
587         if (user_mode(regs))
588                 do_group_exit(SIGKILL);
589         goto no_context;
590 diff -NurpP --minimal linux-2.6.30.2/arch/powerpc/Kconfig linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/powerpc/Kconfig
591 --- linux-2.6.30.2/arch/powerpc/Kconfig 2009-06-11 17:12:03.000000000 +0200
592 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/powerpc/Kconfig      2009-07-04 01:11:38.000000000 +0200
593 @@ -922,6 +922,8 @@ source "lib/Kconfig"
594  
595  source "arch/powerpc/Kconfig.debug"
596  
597 +source "kernel/vserver/Kconfig"
598 +
599  source "security/Kconfig"
600  
601  config KEYS_COMPAT
602 diff -NurpP --minimal linux-2.6.30.2/arch/powerpc/kernel/irq.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/powerpc/kernel/irq.c
603 --- linux-2.6.30.2/arch/powerpc/kernel/irq.c    2009-06-11 17:12:14.000000000 +0200
604 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/powerpc/kernel/irq.c 2009-07-04 01:11:38.000000000 +0200
605 @@ -53,6 +53,7 @@
606  #include <linux/bootmem.h>
607  #include <linux/pci.h>
608  #include <linux/debugfs.h>
609 +#include <linux/vs_context.h>
610  
611  #include <asm/uaccess.h>
612  #include <asm/system.h>
613 diff -NurpP --minimal linux-2.6.30.2/arch/powerpc/kernel/process.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/powerpc/kernel/process.c
614 --- linux-2.6.30.2/arch/powerpc/kernel/process.c        2009-06-11 17:12:14.000000000 +0200
615 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/powerpc/kernel/process.c     2009-07-04 01:11:38.000000000 +0200
616 @@ -519,8 +519,9 @@ void show_regs(struct pt_regs * regs)
617  #else
618                 printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr);
619  #endif
620 -       printk("TASK = %p[%d] '%s' THREAD: %p",
621 -              current, task_pid_nr(current), current->comm, task_thread_info(current));
622 +       printk("TASK = %p[%d,#%u] '%s' THREAD: %p",
623 +              current, task_pid_nr(current), current->xid,
624 +              current->comm, task_thread_info(current));
625  
626  #ifdef CONFIG_SMP
627         printk(" CPU: %d", raw_smp_processor_id());
628 diff -NurpP --minimal linux-2.6.30.2/arch/powerpc/kernel/traps.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/powerpc/kernel/traps.c
629 --- linux-2.6.30.2/arch/powerpc/kernel/traps.c  2009-06-11 17:12:14.000000000 +0200
630 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/powerpc/kernel/traps.c       2009-07-04 01:11:38.000000000 +0200
631 @@ -921,8 +921,9 @@ void nonrecoverable_exception(struct pt_
632  
633  void trace_syscall(struct pt_regs *regs)
634  {
635 -       printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld    %s\n",
636 -              current, task_pid_nr(current), regs->nip, regs->link, regs->gpr[0],
637 +       printk("Task: %p(%d[#%u]), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld    %s\n",
638 +              current, task_pid_nr(current), current->xid,
639 +              regs->nip, regs->link, regs->gpr[0],
640                regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted());
641  }
642  
643 diff -NurpP --minimal linux-2.6.30.2/arch/powerpc/kernel/vdso.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/powerpc/kernel/vdso.c
644 --- linux-2.6.30.2/arch/powerpc/kernel/vdso.c   2009-03-24 14:18:35.000000000 +0100
645 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/powerpc/kernel/vdso.c        2009-07-04 01:11:38.000000000 +0200
646 @@ -22,6 +22,7 @@
647  #include <linux/security.h>
648  #include <linux/bootmem.h>
649  #include <linux/lmb.h>
650 +#include <linux/vs_memory.h>
651  
652  #include <asm/pgtable.h>
653  #include <asm/system.h>
654 diff -NurpP --minimal linux-2.6.30.2/arch/powerpc/mm/fault.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/powerpc/mm/fault.c
655 --- linux-2.6.30.2/arch/powerpc/mm/fault.c      2009-06-11 17:12:14.000000000 +0200
656 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/powerpc/mm/fault.c   2009-07-04 01:11:38.000000000 +0200
657 @@ -350,7 +350,8 @@ out_of_memory:
658                 down_read(&mm->mmap_sem);
659                 goto survive;
660         }
661 -       printk("VM: killing process %s\n", current->comm);
662 +       printk("VM: killing process %s(%d:#%u)\n",
663 +               current->comm, current->pid, current->xid);
664         if (user_mode(regs))
665                 do_group_exit(SIGKILL);
666         return SIGKILL;
667 diff -NurpP --minimal linux-2.6.30.2/arch/s390/Kconfig linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/s390/Kconfig
668 --- linux-2.6.30.2/arch/s390/Kconfig    2009-06-11 17:12:16.000000000 +0200
669 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/s390/Kconfig 2009-07-04 01:11:38.000000000 +0200
670 @@ -583,6 +583,8 @@ source "fs/Kconfig"
671  
672  source "arch/s390/Kconfig.debug"
673  
674 +source "kernel/vserver/Kconfig"
675 +
676  source "security/Kconfig"
677  
678  source "crypto/Kconfig"
679 diff -NurpP --minimal linux-2.6.30.2/arch/s390/kernel/ptrace.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/s390/kernel/ptrace.c
680 --- linux-2.6.30.2/arch/s390/kernel/ptrace.c    2009-03-24 14:18:40.000000000 +0100
681 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/s390/kernel/ptrace.c 2009-07-04 01:11:38.000000000 +0200
682 @@ -36,6 +36,7 @@
683  #include <linux/elf.h>
684  #include <linux/regset.h>
685  #include <linux/tracehook.h>
686 +#include <linux/vs_base.h>
687  
688  #include <asm/segment.h>
689  #include <asm/page.h>
690 diff -NurpP --minimal linux-2.6.30.2/arch/s390/kernel/syscalls.S linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/s390/kernel/syscalls.S
691 --- linux-2.6.30.2/arch/s390/kernel/syscalls.S  2009-06-11 17:12:16.000000000 +0200
692 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/s390/kernel/syscalls.S       2009-07-04 01:11:38.000000000 +0200
693 @@ -271,7 +271,7 @@ SYSCALL(sys_clock_settime,sys_clock_sett
694  SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper)       /* 260 */
695  SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper)
696  SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper)
697 -NI_SYSCALL                                                     /* reserved for vserver */
698 +SYSCALL(sys_vserver,sys_vserver,sys32_vserver)
699  SYSCALL(sys_s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper)
700  SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper)
701  SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper)
702 diff -NurpP --minimal linux-2.6.30.2/arch/sh/Kconfig linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/sh/Kconfig
703 --- linux-2.6.30.2/arch/sh/Kconfig      2009-06-11 17:12:16.000000000 +0200
704 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/sh/Kconfig   2009-07-04 01:11:38.000000000 +0200
705 @@ -757,6 +757,8 @@ source "fs/Kconfig"
706  
707  source "arch/sh/Kconfig.debug"
708  
709 +source "kernel/vserver/Kconfig"
710 +
711  source "security/Kconfig"
712  
713  source "crypto/Kconfig"
714 diff -NurpP --minimal linux-2.6.30.2/arch/sh/kernel/irq.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/sh/kernel/irq.c
715 --- linux-2.6.30.2/arch/sh/kernel/irq.c 2009-06-11 17:12:18.000000000 +0200
716 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/sh/kernel/irq.c      2009-07-04 01:11:38.000000000 +0200
717 @@ -11,6 +11,7 @@
718  #include <linux/module.h>
719  #include <linux/kernel_stat.h>
720  #include <linux/seq_file.h>
721 +#include <linux/vs_context.h>
722  #include <asm/processor.h>
723  #include <asm/machvec.h>
724  #include <asm/uaccess.h>
725 diff -NurpP --minimal linux-2.6.30.2/arch/sh/kernel/vsyscall/vsyscall.c linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/sh/kernel/vsyscall/vsyscall.c
726 --- linux-2.6.30.2/arch/sh/kernel/vsyscall/vsyscall.c   2009-03-24 14:18:42.000000000 +0100
727 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/sh/kernel/vsyscall/vsyscall.c        2009-07-04 01:11:38.000000000 +0200
728 @@ -19,6 +19,7 @@
729  #include <linux/elf.h>
730  #include <linux/sched.h>
731  #include <linux/err.h>
732 +#include <linux/vs_memory.h>
733  
734  /*
735   * Should the kernel map a VDSO page into processes and pass its
736 diff -NurpP --minimal linux-2.6.30.2/arch/sparc/include/asm/tlb_64.h linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/sparc/include/asm/tlb_64.h
737 --- linux-2.6.30.2/arch/sparc/include/asm/tlb_64.h      2009-06-11 17:12:18.000000000 +0200
738 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/sparc/include/asm/tlb_64.h   2009-07-04 01:11:38.000000000 +0200
739 @@ -3,6 +3,7 @@
740  
741  #include <linux/swap.h>
742  #include <linux/pagemap.h>
743 +#include <linux/vs_memory.h>
744  #include <asm/pgalloc.h>
745  #include <asm/tlbflush.h>
746  #include <asm/mmu_context.h>
747 diff -NurpP --minimal linux-2.6.30.2/arch/sparc/include/asm/unistd.h linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/sparc/include/asm/unistd.h
748 --- linux-2.6.30.2/arch/sparc/include/asm/unistd.h      2009-06-11 17:12:18.000000000 +0200
749 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/sparc/include/asm/unistd.h   2009-07-04 01:11:38.000000000 +0200
750 @@ -335,7 +335,7 @@
751  #define __NR_timer_getoverrun  264
752  #define __NR_timer_delete      265
753  #define __NR_timer_create      266
754 -/* #define __NR_vserver                267 Reserved for VSERVER */
755 +#define __NR_vserver           267
756  #define __NR_io_setup          268
757  #define __NR_io_destroy                269
758  #define __NR_io_submit         270
759 diff -NurpP --minimal linux-2.6.30.2/arch/sparc/Kconfig linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/sparc/Kconfig
760 --- linux-2.6.30.2/arch/sparc/Kconfig   2009-06-11 17:12:18.000000000 +0200
761 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/sparc/Kconfig        2009-07-04 01:11:38.000000000 +0200
762 @@ -525,6 +525,8 @@ source "fs/Kconfig"
763  
764  source "arch/sparc/Kconfig.debug"
765  
766 +source "kernel/vserver/Kconfig"
767 +
768  source "security/Kconfig"
769  
770  source "crypto/Kconfig"
771 diff -NurpP --minimal linux-2.6.30.2/arch/sparc/kernel/systbls_32.S linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/sparc/kernel/systbls_32.S
772 --- linux-2.6.30.2/arch/sparc/kernel/systbls_32.S       2009-06-11 17:12:19.000000000 +0200
773 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/sparc/kernel/systbls_32.S    2009-07-04 01:11:38.000000000 +0200
774 @@ -70,7 +70,7 @@ sys_call_table:
775  /*250*/        .long sparc_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl
776  /*255*/        .long sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
777  /*260*/        .long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
778 -/*265*/        .long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy
779 +/*265*/        .long sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
780  /*270*/        .long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
781  /*275*/        .long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
782  /*280*/        .long sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat
783 diff -NurpP --minimal linux-2.6.30.2/arch/sparc/kernel/systbls_64.S linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/sparc/kernel/systbls_64.S
784 --- linux-2.6.30.2/arch/sparc/kernel/systbls_64.S       2009-06-11 17:12:19.000000000 +0200
785 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/sparc/kernel/systbls_64.S    2009-07-04 01:11:38.000000000 +0200
786 @@ -71,7 +71,7 @@ sys_call_table32:
787  /*250*/        .word sys32_mremap, sys32_sysctl, sys32_getsid, sys_fdatasync, sys32_nfsservctl
788         .word sys32_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep
789  /*260*/        .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun
790 -       .word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy
791 +       .word sys_timer_delete, compat_sys_timer_create, sys32_vserver, compat_sys_io_setup, sys_io_destroy
792  /*270*/        .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink
793         .word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid
794  /*280*/        .word sys32_tee, sys_add_key, sys_request_key, sys_keyctl, compat_sys_openat
795 @@ -145,7 +145,7 @@ sys_call_table:
796  /*250*/        .word sys_64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl
797         .word sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
798  /*260*/        .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
799 -       .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy
800 +       .word sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
801  /*270*/        .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
802         .word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
803  /*280*/        .word sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat
804 diff -NurpP --minimal linux-2.6.30.2/arch/x86/ia32/ia32entry.S linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/x86/ia32/ia32entry.S
805 --- linux-2.6.30.2/arch/x86/ia32/ia32entry.S    2009-06-11 17:12:20.000000000 +0200
806 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/x86/ia32/ia32entry.S 2009-07-04 01:11:38.000000000 +0200
807 @@ -768,7 +768,7 @@ ia32_sys_call_table:
808         .quad sys_tgkill                /* 270 */
809         .quad compat_sys_utimes
810         .quad sys32_fadvise64_64
811 -       .quad quiet_ni_syscall  /* sys_vserver */
812 +       .quad sys32_vserver
813         .quad sys_mbind
814         .quad compat_sys_get_mempolicy  /* 275 */
815         .quad sys_set_mempolicy
816 diff -NurpP --minimal linux-2.6.30.2/arch/x86/include/asm/unistd_64.h linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/x86/include/asm/unistd_64.h
817 --- linux-2.6.30.2/arch/x86/include/asm/unistd_64.h     2009-06-11 17:12:21.000000000 +0200
818 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/x86/include/asm/unistd_64.h  2009-07-04 01:11:38.000000000 +0200
819 @@ -535,7 +535,7 @@ __SYSCALL(__NR_tgkill, sys_tgkill)
820  #define __NR_utimes                            235
821  __SYSCALL(__NR_utimes, sys_utimes)
822  #define __NR_vserver                           236
823 -__SYSCALL(__NR_vserver, sys_ni_syscall)
824 +__SYSCALL(__NR_vserver, sys_vserver)
825  #define __NR_mbind                             237
826  __SYSCALL(__NR_mbind, sys_mbind)
827  #define __NR_set_mempolicy                     238
828 diff -NurpP --minimal linux-2.6.30.2/arch/x86/Kconfig linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/x86/Kconfig
829 --- linux-2.6.30.2/arch/x86/Kconfig     2009-06-11 17:12:19.000000000 +0200
830 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/x86/Kconfig  2009-07-04 01:11:38.000000000 +0200
831 @@ -2045,6 +2045,8 @@ source "fs/Kconfig"
832  
833  source "arch/x86/Kconfig.debug"
834  
835 +source "kernel/vserver/Kconfig"
836 +
837  source "security/Kconfig"
838  
839  source "crypto/Kconfig"
840 diff -NurpP --minimal linux-2.6.30.2/arch/x86/kernel/syscall_table_32.S linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/x86/kernel/syscall_table_32.S
841 --- linux-2.6.30.2/arch/x86/kernel/syscall_table_32.S   2009-06-11 17:12:23.000000000 +0200
842 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/arch/x86/kernel/syscall_table_32.S        2009-07-04 01:11:38.000000000 +0200
843 @@ -272,7 +272,7 @@ ENTRY(sys_call_table)
844         .long sys_tgkill        /* 270 */
845         .long sys_utimes
846         .long sys_fadvise64_64
847 -       .long sys_ni_syscall    /* sys_vserver */
848 +       .long sys_vserver
849         .long sys_mbind
850         .long sys_get_mempolicy
851         .long sys_set_mempolicy
852 diff -NurpP --minimal linux-2.6.30.2/Documentation/vserver/debug.txt linux-2.6.30.2-vs2.3.0.36.14-pre4/Documentation/vserver/debug.txt
853 --- linux-2.6.30.2/Documentation/vserver/debug.txt      1970-01-01 01:00:00.000000000 +0100
854 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/Documentation/vserver/debug.txt   2009-07-04 01:11:38.000000000 +0200
855 @@ -0,0 +1,154 @@
856 +
857 +debug_cvirt:
858 +
859 + 2   4 "vx_map_tgid: %p/%llx: %d -> %d"
860 +       "vx_rmap_tgid: %p/%llx: %d -> %d"
861 +
862 +debug_dlim:
863 +
864 + 0   1 "ALLOC (%p,#%d)%c inode (%d)"
865 +       "FREE  (%p,#%d)%c inode"
866 + 1   2 "ALLOC (%p,#%d)%c %lld bytes (%d)"
867 +       "FREE  (%p,#%d)%c %lld bytes"
868 + 2   4 "ADJUST: %lld,%lld on %ld,%ld [mult=%d]"
869 + 3   8 "ext3_has_free_blocks(%p): %lu<%lu+1, %c, %u!=%u r=%d"
870 +       "ext3_has_free_blocks(%p): free=%lu, root=%lu"
871 +       "rcu_free_dl_info(%p)"
872 + 4  10 "alloc_dl_info(%p,%d) = %p"
873 +       "dealloc_dl_info(%p)"
874 +       "get_dl_info(%p[#%d.%d])"
875 +       "put_dl_info(%p[#%d.%d])"
876 + 5  20 "alloc_dl_info(%p,%d)*"
877 + 6  40 "__hash_dl_info: %p[#%d]"
878 +       "__unhash_dl_info: %p[#%d]"
879 + 7  80 "locate_dl_info(%p,#%d) = %p"
880 +
881 +debug_misc:
882 +
883 + 0   1 "destroy_dqhash: %p [#0x%08x] c=%d"
884 +       "new_dqhash: %p [#0x%08x]"
885 +       "vroot[%d]_clr_dev: dev=%p[%lu,%d:%d]"
886 +       "vroot[%d]_get_real_bdev: dev=%p[%lu,%d:%d]"
887 +       "vroot[%d]_set_dev: dev=%p[%lu,%d:%d]"
888 +       "vroot_get_real_bdev not set"
889 + 1   2 "cow_break_link(»%s«)"
890 +       "temp copy Â»%s«"
891 + 2   4 "dentry_open(new): %p"
892 +       "dentry_open(old): %p"
893 +       "lookup_create(new): %p"
894 +       "old path Â»%s«"
895 +       "path_lookup(old): %d"
896 +       "vfs_create(new): %d"
897 +       "vfs_rename: %d"
898 +       "vfs_sendfile: %d"
899 + 3   8 "fput(new_file=%p[#%d])"
900 +       "fput(old_file=%p[#%d])"
901 + 4  10 "vx_info_kill(%p[#%d],%d,%d) = %d"
902 +       "vx_info_kill(%p[#%d],%d,%d)*"
903 + 5  20 "vs_reboot(%p[#%d],%d)"
904 + 6  40 "dropping task %p[#%u,%u] for %p[#%u,%u]"
905 +
906 +debug_net:
907 +
908 + 2   4 "nx_addr_conflict(%p,%p) %d.%d,%d.%d"
909 + 3   8 "inet_bind(%p) %d.%d.%d.%d, %d.%d.%d.%d, %d.%d.%d.%d"
910 +       "inet_bind(%p)* %p,%p;%lx %d.%d.%d.%d"
911 + 4  10 "ip_route_connect(%p) %p,%p;%lx"
912 + 5  20 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx"
913 + 6  40 "sk,egf: %p [#%d] (from %d)"
914 +       "sk,egn: %p [#%d] (from %d)"
915 +       "sk,req: %p [#%d] (from %d)"
916 +       "sk: %p [#%d] (from %d)"
917 +       "tw: %p [#%d] (from %d)"
918 + 7  80 "__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d"
919 +       "__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d"
920 +
921 +debug_nid:
922 +
923 + 0   1 "__lookup_nx_info(#%u): %p[#%u]"
924 +       "alloc_nx_info(%d) = %p"
925 +       "create_nx_info(%d) (dynamic rejected)"
926 +       "create_nx_info(%d) = %p (already there)"
927 +       "create_nx_info(%d) = %p (new)"
928 +       "dealloc_nx_info(%p)"
929 + 1   2 "alloc_nx_info(%d)*"
930 +       "create_nx_info(%d)*"
931 + 2   4 "get_nx_info(%p[#%d.%d])"
932 +       "put_nx_info(%p[#%d.%d])"
933 + 3   8 "claim_nx_info(%p[#%d.%d.%d]) %p"
934 +       "clr_nx_info(%p[#%d.%d])"
935 +       "init_nx_info(%p[#%d.%d])"
936 +       "release_nx_info(%p[#%d.%d.%d]) %p"
937 +       "set_nx_info(%p[#%d.%d])"
938 + 4  10 "__hash_nx_info: %p[#%d]"
939 +       "__nx_dynamic_id: [#%d]"
940 +       "__unhash_nx_info: %p[#%d.%d.%d]"
941 + 5  20 "moved task %p into nxi:%p[#%d]"
942 +       "nx_migrate_task(%p,%p[#%d.%d.%d])"
943 +       "task_get_nx_info(%p)"
944 + 6  40 "nx_clear_persistent(%p[#%d])"
945 +
946 +debug_quota:
947 +
948 + 0   1 "quota_sync_dqh(%p,%d) discard inode %p"
949 + 1   2 "quota_sync_dqh(%p,%d)"
950 +       "sync_dquots(%p,%d)"
951 +       "sync_dquots_dqh(%p,%d)"
952 + 3   8 "do_quotactl(%p,%d,cmd=%d,id=%d,%p)"
953 +
954 +debug_switch:
955 +
956 + 0   1 "vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]"
957 + 1   2 "vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]"
958 + 4  10 "%s: (%s %s) returned %s with %d"
959 +
960 +debug_tag:
961 +
962 + 7  80 "dx_parse_tag(»%s«): %d:#%d"
963 +       "dx_propagate_tag(%p[#%lu.%d]): %d,%d"
964 +
965 +debug_xid:
966 +
967 + 0   1 "__lookup_vx_info(#%u): %p[#%u]"
968 +       "alloc_vx_info(%d) = %p"
969 +       "alloc_vx_info(%d)*"
970 +       "create_vx_info(%d) (dynamic rejected)"
971 +       "create_vx_info(%d) = %p (already there)"
972 +       "create_vx_info(%d) = %p (new)"
973 +       "dealloc_vx_info(%p)"
974 +       "loc_vx_info(%d) = %p (found)"
975 +       "loc_vx_info(%d) = %p (new)"
976 +       "loc_vx_info(%d) = %p (not available)"
977 + 1   2 "create_vx_info(%d)*"
978 +       "loc_vx_info(%d)*"
979 + 2   4 "get_vx_info(%p[#%d.%d])"
980 +       "put_vx_info(%p[#%d.%d])"
981 + 3   8 "claim_vx_info(%p[#%d.%d.%d]) %p"
982 +       "clr_vx_info(%p[#%d.%d])"
983 +       "init_vx_info(%p[#%d.%d])"
984 +       "release_vx_info(%p[#%d.%d.%d]) %p"
985 +       "set_vx_info(%p[#%d.%d])"
986 + 4  10 "__hash_vx_info: %p[#%d]"
987 +       "__unhash_vx_info: %p[#%d.%d.%d]"
988 +       "__vx_dynamic_id: [#%d]"
989 + 5  20 "enter_vx_info(%p[#%d],%p) %p[#%d,%p]"
990 +       "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]"
991 +       "moved task %p into vxi:%p[#%d]"
992 +       "task_get_vx_info(%p)"
993 +       "vx_migrate_task(%p,%p[#%d.%d])"
994 + 6  40 "vx_clear_persistent(%p[#%d])"
995 +       "vx_exit_init(%p[#%d],%p[#%d,%d,%d])"
996 +       "vx_set_init(%p[#%d],%p[#%d,%d,%d])"
997 +       "vx_set_persistent(%p[#%d])"
998 +       "vx_set_reaper(%p[#%d],%p[#%d,%d])"
999 + 7  80 "vx_child_reaper(%p[#%u,%u]) = %p[#%u,%u]"
1000 +
1001 +
1002 +debug_limit:
1003 +
1004 + n 2^n "vx_acc_cres[%5d,%s,%2d]: %5d%s"
1005 +       "vx_cres_avail[%5d,%s,%2d]: %5ld > %5d + %5d"
1006 +
1007 + m 2^m "vx_acc_page[%5d,%s,%2d]: %5d%s"
1008 +       "vx_acc_pages[%5d,%s,%2d]: %5d += %5d"
1009 +       "vx_pages_avail[%5d,%s,%2d]: %5ld > %5d + %5d"
1010 diff -NurpP --minimal linux-2.6.30.2/drivers/block/Kconfig linux-2.6.30.2-vs2.3.0.36.14-pre4/drivers/block/Kconfig
1011 --- linux-2.6.30.2/drivers/block/Kconfig        2009-06-11 17:12:26.000000000 +0200
1012 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/drivers/block/Kconfig     2009-07-04 01:11:39.000000000 +0200
1013 @@ -271,6 +271,13 @@ config BLK_DEV_CRYPTOLOOP
1014           instead, which can be configured to be on-disk compatible with the
1015           cryptoloop device.
1016  
1017 +config BLK_DEV_VROOT
1018 +       tristate "Virtual Root device support"
1019 +       depends on QUOTACTL
1020 +       ---help---
1021 +         Saying Y here will allow you to use quota/fs ioctls on a shared
1022 +         partition within a virtual server without compromising security.
1023 +
1024  config BLK_DEV_NBD
1025         tristate "Network block device support"
1026         depends on NET
1027 diff -NurpP --minimal linux-2.6.30.2/drivers/block/loop.c linux-2.6.30.2-vs2.3.0.36.14-pre4/drivers/block/loop.c
1028 --- linux-2.6.30.2/drivers/block/loop.c 2009-06-11 17:12:26.000000000 +0200
1029 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/drivers/block/loop.c      2009-07-04 01:11:39.000000000 +0200
1030 @@ -75,6 +75,7 @@
1031  #include <linux/gfp.h>
1032  #include <linux/kthread.h>
1033  #include <linux/splice.h>
1034 +#include <linux/vs_context.h>
1035  
1036  #include <asm/uaccess.h>
1037  
1038 @@ -834,6 +835,7 @@ static int loop_set_fd(struct loop_devic
1039         lo->lo_blocksize = lo_blocksize;
1040         lo->lo_device = bdev;
1041         lo->lo_flags = lo_flags;
1042 +       lo->lo_xid = vx_current_xid();
1043         lo->lo_backing_file = file;
1044         lo->transfer = transfer_none;
1045         lo->ioctl = NULL;
1046 @@ -959,6 +961,7 @@ static int loop_clr_fd(struct loop_devic
1047         lo->lo_encrypt_key_size = 0;
1048         lo->lo_flags = 0;
1049         lo->lo_thread = NULL;
1050 +       lo->lo_xid = 0;
1051         memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
1052         memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
1053         memset(lo->lo_file_name, 0, LO_NAME_SIZE);
1054 @@ -993,7 +996,7 @@ loop_set_status(struct loop_device *lo, 
1055  
1056         if (lo->lo_encrypt_key_size &&
1057             lo->lo_key_owner != uid &&
1058 -           !capable(CAP_SYS_ADMIN))
1059 +           !vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_CLOOP))
1060                 return -EPERM;
1061         if (lo->lo_state != Lo_bound)
1062                 return -ENXIO;
1063 @@ -1077,7 +1080,8 @@ loop_get_status(struct loop_device *lo, 
1064         memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
1065         info->lo_encrypt_type =
1066                 lo->lo_encryption ? lo->lo_encryption->number : 0;
1067 -       if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
1068 +       if (lo->lo_encrypt_key_size &&
1069 +               vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_CLOOP)) {
1070                 info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
1071                 memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
1072                        lo->lo_encrypt_key_size);
1073 @@ -1421,6 +1425,9 @@ static int lo_open(struct block_device *
1074  {
1075         struct loop_device *lo = bdev->bd_disk->private_data;
1076  
1077 +       if (!vx_check(lo->lo_xid, VS_IDENT|VS_HOSTID|VS_ADMIN_P))
1078 +               return -EACCES;
1079 +
1080         mutex_lock(&lo->lo_ctl_mutex);
1081         lo->lo_refcnt++;
1082         mutex_unlock(&lo->lo_ctl_mutex);
1083 diff -NurpP --minimal linux-2.6.30.2/drivers/block/Makefile linux-2.6.30.2-vs2.3.0.36.14-pre4/drivers/block/Makefile
1084 --- linux-2.6.30.2/drivers/block/Makefile       2009-06-11 17:12:26.000000000 +0200
1085 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/drivers/block/Makefile    2009-07-04 01:11:39.000000000 +0200
1086 @@ -33,6 +33,7 @@ obj-$(CONFIG_VIODASD)         += viodasd.o
1087  obj-$(CONFIG_BLK_DEV_SX8)      += sx8.o
1088  obj-$(CONFIG_BLK_DEV_UB)       += ub.o
1089  obj-$(CONFIG_BLK_DEV_HD)       += hd.o
1090 +obj-$(CONFIG_BLK_DEV_VROOT)    += vroot.o
1091  
1092  obj-$(CONFIG_XEN_BLKDEV_FRONTEND)      += xen-blkfront.o
1093  
1094 diff -NurpP --minimal linux-2.6.30.2/drivers/block/vroot.c linux-2.6.30.2-vs2.3.0.36.14-pre4/drivers/block/vroot.c
1095 --- linux-2.6.30.2/drivers/block/vroot.c        1970-01-01 01:00:00.000000000 +0100
1096 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/drivers/block/vroot.c     2009-07-04 01:11:39.000000000 +0200
1097 @@ -0,0 +1,281 @@
1098 +/*
1099 + *  linux/drivers/block/vroot.c
1100 + *
1101 + *  written by Herbert Pötzl, 9/11/2002
1102 + *  ported to 2.6.10 by Herbert Pötzl, 30/12/2004
1103 + *
1104 + *  based on the loop.c code by Theodore Ts'o.
1105 + *
1106 + * Copyright (C) 2002-2007 by Herbert Pötzl.
1107 + * Redistribution of this file is permitted under the
1108 + * GNU General Public License.
1109 + *
1110 + */
1111 +
1112 +#include <linux/module.h>
1113 +#include <linux/moduleparam.h>
1114 +#include <linux/file.h>
1115 +#include <linux/major.h>
1116 +#include <linux/blkdev.h>
1117 +
1118 +#include <linux/vroot.h>
1119 +#include <linux/vs_context.h>
1120 +
1121 +
1122 +static int max_vroot = 8;
1123 +
1124 +static struct vroot_device *vroot_dev;
1125 +static struct gendisk **disks;
1126 +
1127 +
1128 +static int vroot_set_dev(
1129 +       struct vroot_device *vr,
1130 +       struct block_device *bdev,
1131 +       unsigned int arg)
1132 +{
1133 +       struct block_device *real_bdev;
1134 +       struct file *file;
1135 +       struct inode *inode;
1136 +       int error;
1137 +
1138 +       error = -EBUSY;
1139 +       if (vr->vr_state != Vr_unbound)
1140 +               goto out;
1141 +
1142 +       error = -EBADF;
1143 +       file = fget(arg);
1144 +       if (!file)
1145 +               goto out;
1146 +
1147 +       error = -EINVAL;
1148 +       inode = file->f_dentry->d_inode;
1149 +
1150 +
1151 +       if (S_ISBLK(inode->i_mode)) {
1152 +               real_bdev = inode->i_bdev;
1153 +               vr->vr_device = real_bdev;
1154 +               __iget(real_bdev->bd_inode);
1155 +       } else
1156 +               goto out_fput;
1157 +
1158 +       vxdprintk(VXD_CBIT(misc, 0),
1159 +               "vroot[%d]_set_dev: dev=" VXF_DEV,
1160 +               vr->vr_number, VXD_DEV(real_bdev));
1161 +
1162 +       vr->vr_state = Vr_bound;
1163 +       error = 0;
1164 +
1165 + out_fput:
1166 +       fput(file);
1167 + out:
1168 +       return error;
1169 +}
1170 +
1171 +static int vroot_clr_dev(
1172 +       struct vroot_device *vr,
1173 +       struct block_device *bdev)
1174 +{
1175 +       struct block_device *real_bdev;
1176 +
1177 +       if (vr->vr_state != Vr_bound)
1178 +               return -ENXIO;
1179 +       if (vr->vr_refcnt > 1)  /* we needed one fd for the ioctl */
1180 +               return -EBUSY;
1181 +
1182 +       real_bdev = vr->vr_device;
1183 +
1184 +       vxdprintk(VXD_CBIT(misc, 0),
1185 +               "vroot[%d]_clr_dev: dev=" VXF_DEV,
1186 +               vr->vr_number, VXD_DEV(real_bdev));
1187 +
1188 +       bdput(real_bdev);
1189 +       vr->vr_state = Vr_unbound;
1190 +       vr->vr_device = NULL;
1191 +       return 0;
1192 +}
1193 +
1194 +
1195 +static int vr_ioctl(struct block_device *bdev, fmode_t mode,
1196 +       unsigned int cmd, unsigned long arg)
1197 +{
1198 +       struct vroot_device *vr = bdev->bd_disk->private_data;
1199 +       int err;
1200 +
1201 +       down(&vr->vr_ctl_mutex);
1202 +       switch (cmd) {
1203 +       case VROOT_SET_DEV:
1204 +               err = vroot_set_dev(vr, bdev, arg);
1205 +               break;
1206 +       case VROOT_CLR_DEV:
1207 +               err = vroot_clr_dev(vr, bdev);
1208 +               break;
1209 +       default:
1210 +               err = -EINVAL;
1211 +               break;
1212 +       }
1213 +       up(&vr->vr_ctl_mutex);
1214 +       return err;
1215 +}
1216 +
1217 +static int vr_open(struct block_device *bdev, fmode_t mode)
1218 +{
1219 +       struct vroot_device *vr = bdev->bd_disk->private_data;
1220 +
1221 +       down(&vr->vr_ctl_mutex);
1222 +       vr->vr_refcnt++;
1223 +       up(&vr->vr_ctl_mutex);
1224 +       return 0;
1225 +}
1226 +
1227 +static int vr_release(struct gendisk *disk, fmode_t mode)
1228 +{
1229 +       struct vroot_device *vr = disk->private_data;
1230 +
1231 +       down(&vr->vr_ctl_mutex);
1232 +       --vr->vr_refcnt;
1233 +       up(&vr->vr_ctl_mutex);
1234 +       return 0;
1235 +}
1236 +
1237 +static struct block_device_operations vr_fops = {
1238 +       .owner =        THIS_MODULE,
1239 +       .open =         vr_open,
1240 +       .release =      vr_release,
1241 +       .ioctl =        vr_ioctl,
1242 +};
1243 +
1244 +struct block_device *__vroot_get_real_bdev(struct block_device *bdev)
1245 +{
1246 +       struct inode *inode = bdev->bd_inode;
1247 +       struct vroot_device *vr;
1248 +       struct block_device *real_bdev;
1249 +       int minor = iminor(inode);
1250 +
1251 +       vr = &vroot_dev[minor];
1252 +       real_bdev = vr->vr_device;
1253 +
1254 +       vxdprintk(VXD_CBIT(misc, 0),
1255 +               "vroot[%d]_get_real_bdev: dev=" VXF_DEV,
1256 +               vr->vr_number, VXD_DEV(real_bdev));
1257 +
1258 +       if (vr->vr_state != Vr_bound)
1259 +               return ERR_PTR(-ENXIO);
1260 +
1261 +       __iget(real_bdev->bd_inode);
1262 +       return real_bdev;
1263 +}
1264 +
1265 +/*
1266 + * And now the modules code and kernel interface.
1267 + */
1268 +
1269 +module_param(max_vroot, int, 0);
1270 +
1271 +MODULE_PARM_DESC(max_vroot, "Maximum number of vroot devices (1-256)");
1272 +MODULE_LICENSE("GPL");
1273 +MODULE_ALIAS_BLOCKDEV_MAJOR(VROOT_MAJOR);
1274 +
1275 +MODULE_AUTHOR ("Herbert Pötzl");
1276 +MODULE_DESCRIPTION ("Virtual Root Device Mapper");
1277 +
1278 +
1279 +int __init vroot_init(void)
1280 +{
1281 +       int err, i;
1282 +
1283 +       if (max_vroot < 1 || max_vroot > 256) {
1284 +               max_vroot = MAX_VROOT_DEFAULT;
1285 +               printk(KERN_WARNING "vroot: invalid max_vroot "
1286 +                       "(must be between 1 and 256), "
1287 +                       "using default (%d)\n", max_vroot);
1288 +       }
1289 +
1290 +       if (register_blkdev(VROOT_MAJOR, "vroot"))
1291 +               return -EIO;
1292 +
1293 +       err = -ENOMEM;
1294 +       vroot_dev = kmalloc(max_vroot * sizeof(struct vroot_device), GFP_KERNEL);
1295 +       if (!vroot_dev)
1296 +               goto out_mem1;
1297 +       memset(vroot_dev, 0, max_vroot * sizeof(struct vroot_device));
1298 +
1299 +       disks = kmalloc(max_vroot * sizeof(struct gendisk *), GFP_KERNEL);
1300 +       if (!disks)
1301 +               goto out_mem2;
1302 +
1303 +       for (i = 0; i < max_vroot; i++) {
1304 +               disks[i] = alloc_disk(1);
1305 +               if (!disks[i])
1306 +                       goto out_mem3;
1307 +               disks[i]->queue = blk_alloc_queue(GFP_KERNEL);
1308 +               if (!disks[i]->queue)
1309 +                       goto out_mem3;
1310 +       }
1311 +
1312 +       for (i = 0; i < max_vroot; i++) {
1313 +               struct vroot_device *vr = &vroot_dev[i];
1314 +               struct gendisk *disk = disks[i];
1315 +
1316 +               memset(vr, 0, sizeof(*vr));
1317 +               init_MUTEX(&vr->vr_ctl_mutex);
1318 +               vr->vr_number = i;
1319 +               disk->major = VROOT_MAJOR;
1320 +               disk->first_minor = i;
1321 +               disk->fops = &vr_fops;
1322 +               sprintf(disk->disk_name, "vroot%d", i);
1323 +               disk->private_data = vr;
1324 +       }
1325 +
1326 +       err = register_vroot_grb(&__vroot_get_real_bdev);
1327 +       if (err)
1328 +               goto out_mem3;
1329 +
1330 +       for (i = 0; i < max_vroot; i++)
1331 +               add_disk(disks[i]);
1332 +       printk(KERN_INFO "vroot: loaded (max %d devices)\n", max_vroot);
1333 +       return 0;
1334 +
1335 +out_mem3:
1336 +       while (i--)
1337 +               put_disk(disks[i]);
1338 +       kfree(disks);
1339 +out_mem2:
1340 +       kfree(vroot_dev);
1341 +out_mem1:
1342 +       unregister_blkdev(VROOT_MAJOR, "vroot");
1343 +       printk(KERN_ERR "vroot: ran out of memory\n");
1344 +       return err;
1345 +}
1346 +
1347 +void vroot_exit(void)
1348 +{
1349 +       int i;
1350 +
1351 +       if (unregister_vroot_grb(&__vroot_get_real_bdev))
1352 +               printk(KERN_WARNING "vroot: cannot unregister grb\n");
1353 +
1354 +       for (i = 0; i < max_vroot; i++) {
1355 +               del_gendisk(disks[i]);
1356 +               put_disk(disks[i]);
1357 +       }
1358 +       unregister_blkdev(VROOT_MAJOR, "vroot");
1359 +
1360 +       kfree(disks);
1361 +       kfree(vroot_dev);
1362 +}
1363 +
1364 +module_init(vroot_init);
1365 +module_exit(vroot_exit);
1366 +
1367 +#ifndef MODULE
1368 +
1369 +static int __init max_vroot_setup(char *str)
1370 +{
1371 +       max_vroot = simple_strtol(str, NULL, 0);
1372 +       return 1;
1373 +}
1374 +
1375 +__setup("max_vroot=", max_vroot_setup);
1376 +
1377 +#endif
1378 +
1379 diff -NurpP --minimal linux-2.6.30.2/drivers/char/sysrq.c linux-2.6.30.2-vs2.3.0.36.14-pre4/drivers/char/sysrq.c
1380 --- linux-2.6.30.2/drivers/char/sysrq.c 2009-06-11 17:12:26.000000000 +0200
1381 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/drivers/char/sysrq.c      2009-07-04 01:11:39.000000000 +0200
1382 @@ -37,6 +37,7 @@
1383  #include <linux/kexec.h>
1384  #include <linux/hrtimer.h>
1385  #include <linux/oom.h>
1386 +#include <linux/vserver/debug.h>
1387  
1388  #include <asm/ptrace.h>
1389  #include <asm/irq_regs.h>
1390 @@ -381,6 +382,21 @@ static struct sysrq_key_op sysrq_unrt_op
1391         .enable_mask    = SYSRQ_ENABLE_RTNICE,
1392  };
1393  
1394 +
1395 +#ifdef CONFIG_VSERVER_DEBUG
1396 +static void sysrq_handle_vxinfo(int key, struct tty_struct *tty)
1397 +{
1398 +       dump_vx_info_inactive((key == 'x')?0:1);
1399 +}
1400 +
1401 +static struct sysrq_key_op sysrq_showvxinfo_op = {
1402 +       .handler        = sysrq_handle_vxinfo,
1403 +       .help_msg       = "conteXt",
1404 +       .action_msg     = "Show Context Info",
1405 +       .enable_mask    = SYSRQ_ENABLE_DUMP,
1406 +};
1407 +#endif
1408 +
1409  /* Key Operations table and lock */
1410  static DEFINE_SPINLOCK(sysrq_key_table_lock);
1411  
1412 @@ -435,7 +451,11 @@ static struct sysrq_key_op *sysrq_key_ta
1413         NULL,                           /* v */
1414         &sysrq_showstate_blocked_op,    /* w */
1415         /* x: May be registered on ppc/powerpc for xmon */
1416 +#ifdef CONFIG_VSERVER_DEBUG
1417 +       &sysrq_showvxinfo_op,           /* x */
1418 +#else
1419         NULL,                           /* x */
1420 +#endif
1421         /* y: May be registered on sparc64 for global register dump */
1422         NULL,                           /* y */
1423         &sysrq_ftrace_dump_op,          /* z */
1424 @@ -450,6 +470,8 @@ static int sysrq_key_table_key2index(int
1425                 retval = key - '0';
1426         else if ((key >= 'a') && (key <= 'z'))
1427                 retval = key + 10 - 'a';
1428 +       else if ((key >= 'A') && (key <= 'Z'))
1429 +               retval = key + 10 - 'A';
1430         else
1431                 retval = -1;
1432         return retval;
1433 diff -NurpP --minimal linux-2.6.30.2/drivers/char/tty_io.c linux-2.6.30.2-vs2.3.0.36.14-pre4/drivers/char/tty_io.c
1434 --- linux-2.6.30.2/drivers/char/tty_io.c        2009-06-11 17:12:26.000000000 +0200
1435 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/drivers/char/tty_io.c     2009-07-04 01:11:39.000000000 +0200
1436 @@ -106,6 +106,7 @@
1437  
1438  #include <linux/kmod.h>
1439  #include <linux/nsproxy.h>
1440 +#include <linux/vs_pid.h>
1441  
1442  #undef TTY_DEBUG_HANGUP
1443  
1444 @@ -2302,6 +2303,7 @@ static int tiocspgrp(struct tty_struct *
1445                 return -ENOTTY;
1446         if (get_user(pgrp_nr, p))
1447                 return -EFAULT;
1448 +       pgrp_nr = vx_rmap_pid(pgrp_nr);
1449         if (pgrp_nr < 0)
1450                 return -EINVAL;
1451         rcu_read_lock();
1452 diff -NurpP --minimal linux-2.6.30.2/drivers/infiniband/hw/ipath/ipath_user_pages.c linux-2.6.30.2-vs2.3.0.36.14-pre4/drivers/infiniband/hw/ipath/ipath_user_pages.c
1453 --- linux-2.6.30.2/drivers/infiniband/hw/ipath/ipath_user_pages.c       2009-06-11 17:12:30.000000000 +0200
1454 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/drivers/infiniband/hw/ipath/ipath_user_pages.c    2009-07-04 01:11:39.000000000 +0200
1455 @@ -33,6 +33,7 @@
1456  
1457  #include <linux/mm.h>
1458  #include <linux/device.h>
1459 +#include <linux/vs_memory.h>
1460  
1461  #include "ipath_kernel.h"
1462  
1463 @@ -61,7 +62,8 @@ static int __get_user_pages(unsigned lon
1464         lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >>
1465                 PAGE_SHIFT;
1466  
1467 -       if (num_pages > lock_limit) {
1468 +       if (num_pages > lock_limit ||
1469 +               !vx_vmlocked_avail(current->mm, num_pages)) {
1470                 ret = -ENOMEM;
1471                 goto bail;
1472         }
1473 @@ -78,7 +80,7 @@ static int __get_user_pages(unsigned lon
1474                         goto bail_release;
1475         }
1476  
1477 -       current->mm->locked_vm += num_pages;
1478 +       vx_vmlocked_add(current->mm, num_pages);
1479  
1480         ret = 0;
1481         goto bail;
1482 @@ -177,7 +179,7 @@ void ipath_release_user_pages(struct pag
1483  
1484         __ipath_release_user_pages(p, num_pages, 1);
1485  
1486 -       current->mm->locked_vm -= num_pages;
1487 +       vx_vmlocked_sub(current->mm, num_pages);
1488  
1489         up_write(&current->mm->mmap_sem);
1490  }
1491 @@ -194,7 +196,7 @@ static void user_pages_account(struct wo
1492                 container_of(_work, struct ipath_user_pages_work, work);
1493  
1494         down_write(&work->mm->mmap_sem);
1495 -       work->mm->locked_vm -= work->num_pages;
1496 +       vx_vmlocked_sub(work->mm, work->num_pages);
1497         up_write(&work->mm->mmap_sem);
1498         mmput(work->mm);
1499         kfree(work);
1500 diff -NurpP --minimal linux-2.6.30.2/drivers/md/dm.c linux-2.6.30.2-vs2.3.0.36.14-pre4/drivers/md/dm.c
1501 --- linux-2.6.30.2/drivers/md/dm.c      2009-07-23 13:28:46.000000000 +0200
1502 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/drivers/md/dm.c   2009-07-04 01:11:39.000000000 +0200
1503 @@ -21,6 +21,7 @@
1504  #include <linux/hdreg.h>
1505  #include <linux/blktrace_api.h>
1506  #include <trace/block.h>
1507 +#include <linux/vs_base.h>
1508  
1509  #define DM_MSG_PREFIX "core"
1510  
1511 @@ -105,6 +106,7 @@ struct mapped_device {
1512         rwlock_t map_lock;
1513         atomic_t holders;
1514         atomic_t open_count;
1515 +       xid_t xid;
1516  
1517         unsigned long flags;
1518  
1519 @@ -291,6 +293,7 @@ static void __exit dm_exit(void)
1520  static int dm_blk_open(struct block_device *bdev, fmode_t mode)
1521  {
1522         struct mapped_device *md;
1523 +       int ret = -ENXIO;
1524  
1525         spin_lock(&_minor_lock);
1526  
1527 @@ -299,18 +302,19 @@ static int dm_blk_open(struct block_devi
1528                 goto out;
1529  
1530         if (test_bit(DMF_FREEING, &md->flags) ||
1531 -           test_bit(DMF_DELETING, &md->flags)) {
1532 -               md = NULL;
1533 +           test_bit(DMF_DELETING, &md->flags))
1534 +               goto out;
1535 +
1536 +       ret = -EACCES;
1537 +       if (!vx_check(md->xid, VS_IDENT|VS_HOSTID))
1538                 goto out;
1539 -       }
1540  
1541         dm_get(md);
1542         atomic_inc(&md->open_count);
1543 -
1544 +       ret = 0;
1545  out:
1546         spin_unlock(&_minor_lock);
1547 -
1548 -       return md ? 0 : -ENXIO;
1549 +       return ret;
1550  }
1551  
1552  static int dm_blk_close(struct gendisk *disk, fmode_t mode)
1553 @@ -503,6 +507,14 @@ int dm_set_geometry(struct mapped_device
1554         return 0;
1555  }
1556  
1557 +/*
1558 + * Get the xid associated with a dm device
1559 + */
1560 +xid_t dm_get_xid(struct mapped_device *md)
1561 +{
1562 +       return md->xid;
1563 +}
1564 +
1565  /*-----------------------------------------------------------------
1566   * CRUD START:
1567   *   A more elegant soln is in the works that uses the queue
1568 @@ -1126,6 +1138,7 @@ static struct mapped_device *alloc_dev(i
1569         INIT_LIST_HEAD(&md->uevent_list);
1570         spin_lock_init(&md->uevent_lock);
1571  
1572 +       md->xid = vx_current_xid();
1573         md->queue = blk_alloc_queue(GFP_KERNEL);
1574         if (!md->queue)
1575                 goto bad_queue;
1576 diff -NurpP --minimal linux-2.6.30.2/drivers/md/dm.h linux-2.6.30.2-vs2.3.0.36.14-pre4/drivers/md/dm.h
1577 --- linux-2.6.30.2/drivers/md/dm.h      2009-06-11 17:12:31.000000000 +0200
1578 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/drivers/md/dm.h   2009-07-04 01:11:39.000000000 +0200
1579 @@ -53,6 +53,8 @@ int dm_table_any_congested(struct dm_tab
1580   */
1581  #define dm_target_is_valid(t) ((t)->table)
1582  
1583 +xid_t dm_get_xid(struct mapped_device *md);
1584 +
1585  /*-----------------------------------------------------------------
1586   * A registry of target types.
1587   *---------------------------------------------------------------*/
1588 diff -NurpP --minimal linux-2.6.30.2/drivers/md/dm-ioctl.c linux-2.6.30.2-vs2.3.0.36.14-pre4/drivers/md/dm-ioctl.c
1589 --- linux-2.6.30.2/drivers/md/dm-ioctl.c        2009-06-11 17:12:31.000000000 +0200
1590 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/drivers/md/dm-ioctl.c     2009-07-04 01:11:39.000000000 +0200
1591 @@ -16,6 +16,7 @@
1592  #include <linux/dm-ioctl.h>
1593  #include <linux/hdreg.h>
1594  #include <linux/compat.h>
1595 +#include <linux/vs_context.h>
1596  
1597  #include <asm/uaccess.h>
1598  
1599 @@ -101,7 +102,8 @@ static struct hash_cell *__get_name_cell
1600         unsigned int h = hash_str(str);
1601  
1602         list_for_each_entry (hc, _name_buckets + h, name_list)
1603 -               if (!strcmp(hc->name, str)) {
1604 +               if (vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT) &&
1605 +                       !strcmp(hc->name, str)) {
1606                         dm_get(hc->md);
1607                         return hc;
1608                 }
1609 @@ -115,7 +117,8 @@ static struct hash_cell *__get_uuid_cell
1610         unsigned int h = hash_str(str);
1611  
1612         list_for_each_entry (hc, _uuid_buckets + h, uuid_list)
1613 -               if (!strcmp(hc->uuid, str)) {
1614 +               if (vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT) &&
1615 +                       !strcmp(hc->uuid, str)) {
1616                         dm_get(hc->md);
1617                         return hc;
1618                 }
1619 @@ -352,6 +355,9 @@ typedef int (*ioctl_fn)(struct dm_ioctl 
1620  
1621  static int remove_all(struct dm_ioctl *param, size_t param_size)
1622  {
1623 +       if (!vx_check(0, VS_ADMIN))
1624 +               return -EPERM;
1625 +
1626         dm_hash_remove_all(1);
1627         param->data_size = 0;
1628         return 0;
1629 @@ -399,6 +405,8 @@ static int list_devices(struct dm_ioctl 
1630          */
1631         for (i = 0; i < NUM_BUCKETS; i++) {
1632                 list_for_each_entry (hc, _name_buckets + i, name_list) {
1633 +                       if (!vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT))
1634 +                               continue;
1635                         needed += sizeof(struct dm_name_list);
1636                         needed += strlen(hc->name) + 1;
1637                         needed += ALIGN_MASK;
1638 @@ -422,6 +430,8 @@ static int list_devices(struct dm_ioctl 
1639          */
1640         for (i = 0; i < NUM_BUCKETS; i++) {
1641                 list_for_each_entry (hc, _name_buckets + i, name_list) {
1642 +                       if (!vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT))
1643 +                               continue;
1644                         if (old_nl)
1645                                 old_nl->next = (uint32_t) ((void *) nl -
1646                                                            (void *) old_nl);
1647 @@ -612,10 +622,11 @@ static struct hash_cell *__find_device_h
1648         if (!md)
1649                 goto out;
1650  
1651 -       mdptr = dm_get_mdptr(md);
1652 +       if (vx_check(dm_get_xid(md), VS_WATCH_P | VS_IDENT))
1653 +               mdptr = dm_get_mdptr(md);
1654 +
1655         if (!mdptr)
1656                 dm_put(md);
1657 -
1658  out:
1659         return mdptr;
1660  }
1661 @@ -1426,8 +1437,8 @@ static int ctl_ioctl(uint command, struc
1662         ioctl_fn fn = NULL;
1663         size_t param_size;
1664  
1665 -       /* only root can play with this */
1666 -       if (!capable(CAP_SYS_ADMIN))
1667 +       /* only root and certain contexts can play with this */
1668 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_MAPPER))
1669                 return -EACCES;
1670  
1671         if (_IOC_TYPE(command) != DM_IOCTL)
1672 diff -NurpP --minimal linux-2.6.30.2/drivers/net/tun.c linux-2.6.30.2-vs2.3.0.36.14-pre4/drivers/net/tun.c
1673 --- linux-2.6.30.2/drivers/net/tun.c    2009-07-23 13:28:47.000000000 +0200
1674 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/drivers/net/tun.c 2009-07-23 13:50:49.000000000 +0200
1675 @@ -61,6 +61,7 @@
1676  #include <linux/crc32.h>
1677  #include <linux/nsproxy.h>
1678  #include <linux/virtio_net.h>
1679 +#include <linux/vs_network.h>
1680  #include <net/net_namespace.h>
1681  #include <net/netns/generic.h>
1682  #include <net/rtnetlink.h>
1683 @@ -102,6 +103,7 @@ struct tun_struct {
1684         unsigned int            flags;
1685         uid_t                   owner;
1686         gid_t                   group;
1687 +       nid_t                   nid;
1688  
1689         struct sk_buff_head     readq;
1690  
1691 @@ -138,7 +140,7 @@ static int tun_attach(struct tun_struct 
1692         /* Check permissions */
1693         if (((tun->owner != -1 && cred->euid != tun->owner) ||
1694              (tun->group != -1 && !in_egroup_p(tun->group))) &&
1695 -               !capable(CAP_NET_ADMIN))
1696 +               !cap_raised(current_cap(), CAP_NET_ADMIN))
1697                 return -EPERM;
1698  
1699         netif_tx_lock_bh(tun->dev);
1700 @@ -815,6 +817,7 @@ static void tun_setup(struct net_device 
1701  
1702         tun->owner = -1;
1703         tun->group = -1;
1704 +       tun->nid = current->nid;
1705  
1706         dev->ethtool_ops = &tun_ethtool_ops;
1707         dev->destructor = tun_free_netdev;
1708 @@ -879,6 +882,9 @@ static int tun_set_iff(struct net *net, 
1709                 else
1710                         return -EINVAL;
1711  
1712 +               if (!nx_check(tun->nid, VS_IDENT | VS_HOSTID | VS_ADMIN_P))
1713 +                       return -EPERM;
1714 +
1715                 err = tun_attach(tun, file);
1716                 if (err < 0)
1717                         return err;
1718 @@ -889,7 +895,7 @@ static int tun_set_iff(struct net *net, 
1719  
1720                 err = -EINVAL;
1721  
1722 -               if (!capable(CAP_NET_ADMIN))
1723 +               if (!nx_capable(CAP_NET_ADMIN, NXC_TUN_CREATE))
1724                         return -EPERM;
1725  
1726                 /* Set dev type */
1727 @@ -1152,6 +1158,16 @@ static int tun_chr_ioctl(struct inode *i
1728                 DBG(KERN_INFO "%s: group set to %d\n", tun->dev->name, tun->group);
1729                 break;
1730  
1731 +       case TUNSETNID:
1732 +               if (!capable(CAP_CONTEXT))
1733 +                       return -EPERM;
1734 +
1735 +               /* Set nid owner of the device */
1736 +               tun->nid = (nid_t) arg;
1737 +
1738 +               DBG(KERN_INFO "%s: nid owner set to %u\n", tun->dev->name, tun->nid);
1739 +               break;
1740 +
1741         case TUNSETLINK:
1742                 /* Only allow setting the type when the interface is down */
1743                 rtnl_lock();
1744 diff -NurpP --minimal linux-2.6.30.2/fs/attr.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/attr.c
1745 --- linux-2.6.30.2/fs/attr.c    2009-06-11 17:13:01.000000000 +0200
1746 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/attr.c 2009-07-04 02:19:40.000000000 +0200
1747 @@ -14,6 +14,9 @@
1748  #include <linux/fcntl.h>
1749  #include <linux/quotaops.h>
1750  #include <linux/security.h>
1751 +#include <linux/proc_fs.h>
1752 +#include <linux/devpts_fs.h>
1753 +#include <linux/vs_tag.h>
1754  
1755  /* Taken over from the old code... */
1756  
1757 @@ -55,6 +58,10 @@ int inode_change_ok(struct inode *inode,
1758                 if (!is_owner_or_cap(inode))
1759                         goto error;
1760         }
1761 +
1762 +       if (dx_permission(inode, MAY_WRITE))
1763 +               goto error;
1764 +
1765  fine:
1766         retval = 0;
1767  error:
1768 @@ -78,6 +85,8 @@ int inode_setattr(struct inode * inode, 
1769                 inode->i_uid = attr->ia_uid;
1770         if (ia_valid & ATTR_GID)
1771                 inode->i_gid = attr->ia_gid;
1772 +       if ((ia_valid & ATTR_TAG) && IS_TAGGED(inode))
1773 +               inode->i_tag = attr->ia_tag;
1774         if (ia_valid & ATTR_ATIME)
1775                 inode->i_atime = timespec_trunc(attr->ia_atime,
1776                                                 inode->i_sb->s_time_gran);
1777 @@ -172,7 +181,8 @@ int notify_change(struct dentry * dentry
1778                 error = inode_change_ok(inode, attr);
1779                 if (!error) {
1780                         if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
1781 -                           (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid))
1782 +                           (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) ||
1783 +                           (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag))
1784                                 error = vfs_dq_transfer(inode, attr) ?
1785                                         -EDQUOT : 0;
1786                         if (!error)
1787 diff -NurpP --minimal linux-2.6.30.2/fs/binfmt_aout.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/binfmt_aout.c
1788 --- linux-2.6.30.2/fs/binfmt_aout.c     2009-03-24 14:22:24.000000000 +0100
1789 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/binfmt_aout.c  2009-07-04 01:11:39.000000000 +0200
1790 @@ -24,6 +24,7 @@
1791  #include <linux/binfmts.h>
1792  #include <linux/personality.h>
1793  #include <linux/init.h>
1794 +#include <linux/vs_memory.h>
1795  
1796  #include <asm/system.h>
1797  #include <asm/uaccess.h>
1798 diff -NurpP --minimal linux-2.6.30.2/fs/binfmt_elf.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/binfmt_elf.c
1799 --- linux-2.6.30.2/fs/binfmt_elf.c      2009-06-11 17:13:02.000000000 +0200
1800 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/binfmt_elf.c   2009-07-04 01:11:39.000000000 +0200
1801 @@ -31,6 +31,7 @@
1802  #include <linux/random.h>
1803  #include <linux/elf.h>
1804  #include <linux/utsname.h>
1805 +#include <linux/vs_memory.h>
1806  #include <asm/uaccess.h>
1807  #include <asm/param.h>
1808  #include <asm/page.h>
1809 diff -NurpP --minimal linux-2.6.30.2/fs/binfmt_flat.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/binfmt_flat.c
1810 --- linux-2.6.30.2/fs/binfmt_flat.c     2009-06-11 17:13:02.000000000 +0200
1811 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/binfmt_flat.c  2009-07-04 01:11:39.000000000 +0200
1812 @@ -35,6 +35,7 @@
1813  #include <linux/init.h>
1814  #include <linux/flat.h>
1815  #include <linux/syscalls.h>
1816 +#include <linux/vs_memory.h>
1817  
1818  #include <asm/byteorder.h>
1819  #include <asm/system.h>
1820 diff -NurpP --minimal linux-2.6.30.2/fs/binfmt_som.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/binfmt_som.c
1821 --- linux-2.6.30.2/fs/binfmt_som.c      2009-06-11 17:13:02.000000000 +0200
1822 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/binfmt_som.c   2009-07-04 01:11:39.000000000 +0200
1823 @@ -28,6 +28,7 @@
1824  #include <linux/shm.h>
1825  #include <linux/personality.h>
1826  #include <linux/init.h>
1827 +#include <linux/vs_memory.h>
1828  
1829  #include <asm/uaccess.h>
1830  #include <asm/pgtable.h>
1831 diff -NurpP --minimal linux-2.6.30.2/fs/block_dev.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/block_dev.c
1832 --- linux-2.6.30.2/fs/block_dev.c       2009-06-11 17:13:02.000000000 +0200
1833 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/block_dev.c    2009-07-04 01:11:39.000000000 +0200
1834 @@ -25,6 +25,7 @@
1835  #include <linux/uio.h>
1836  #include <linux/namei.h>
1837  #include <linux/log2.h>
1838 +#include <linux/vs_device.h>
1839  #include <asm/uaccess.h>
1840  #include "internal.h"
1841  
1842 @@ -539,6 +540,7 @@ struct block_device *bdget(dev_t dev)
1843                 bdev->bd_invalidated = 0;
1844                 inode->i_mode = S_IFBLK;
1845                 inode->i_rdev = dev;
1846 +               inode->i_mdev = dev;
1847                 inode->i_bdev = bdev;
1848                 inode->i_data.a_ops = &def_blk_aops;
1849                 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
1850 @@ -575,6 +577,11 @@ EXPORT_SYMBOL(bdput);
1851  static struct block_device *bd_acquire(struct inode *inode)
1852  {
1853         struct block_device *bdev;
1854 +       dev_t mdev;
1855 +
1856 +       if (!vs_map_blkdev(inode->i_rdev, &mdev, DATTR_OPEN))
1857 +               return NULL;
1858 +       inode->i_mdev = mdev;
1859  
1860         spin_lock(&bdev_lock);
1861         bdev = inode->i_bdev;
1862 @@ -585,7 +592,7 @@ static struct block_device *bd_acquire(s
1863         }
1864         spin_unlock(&bdev_lock);
1865  
1866 -       bdev = bdget(inode->i_rdev);
1867 +       bdev = bdget(mdev);
1868         if (bdev) {
1869                 spin_lock(&bdev_lock);
1870                 if (!inode->i_bdev) {
1871 diff -NurpP --minimal linux-2.6.30.2/fs/char_dev.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/char_dev.c
1872 --- linux-2.6.30.2/fs/char_dev.c        2009-03-24 14:22:25.000000000 +0100
1873 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/char_dev.c     2009-07-04 01:11:39.000000000 +0200
1874 @@ -21,6 +21,8 @@
1875  #include <linux/cdev.h>
1876  #include <linux/mutex.h>
1877  #include <linux/backing-dev.h>
1878 +#include <linux/vs_context.h>
1879 +#include <linux/vs_device.h>
1880  
1881  #include "internal.h"
1882  
1883 @@ -358,14 +360,21 @@ static int chrdev_open(struct inode *ino
1884         struct cdev *p;
1885         struct cdev *new = NULL;
1886         int ret = 0;
1887 +       dev_t mdev;
1888 +
1889 +       if (!vs_map_chrdev(inode->i_rdev, &mdev, DATTR_OPEN))
1890 +               return -EPERM;
1891 +       inode->i_mdev = mdev;
1892  
1893         spin_lock(&cdev_lock);
1894         p = inode->i_cdev;
1895         if (!p) {
1896                 struct kobject *kobj;
1897                 int idx;
1898 +
1899                 spin_unlock(&cdev_lock);
1900 -               kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
1901 +
1902 +               kobj = kobj_lookup(cdev_map, mdev, &idx);
1903                 if (!kobj)
1904                         return -ENXIO;
1905                 new = container_of(kobj, struct cdev, kobj);
1906 diff -NurpP --minimal linux-2.6.30.2/fs/dcache.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/dcache.c
1907 --- linux-2.6.30.2/fs/dcache.c  2009-06-11 17:13:02.000000000 +0200
1908 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/dcache.c       2009-07-04 02:18:36.000000000 +0200
1909 @@ -32,6 +32,7 @@
1910  #include <linux/swap.h>
1911  #include <linux/bootmem.h>
1912  #include <linux/fs_struct.h>
1913 +#include <linux/vs_limit.h>
1914  #include "internal.h"
1915  
1916  int sysctl_vfs_cache_pressure __read_mostly = 100;
1917 @@ -229,6 +230,8 @@ repeat:
1918                 return;
1919         }
1920  
1921 +       vx_dentry_dec(dentry);
1922 +
1923         /*
1924          * AV: ->d_delete() is _NOT_ allowed to block now.
1925          */
1926 @@ -320,6 +323,7 @@ static inline struct dentry * __dget_loc
1927  {
1928         atomic_inc(&dentry->d_count);
1929         dentry_lru_del_init(dentry);
1930 +       vx_dentry_inc(dentry);
1931         return dentry;
1932  }
1933  
1934 @@ -918,6 +922,9 @@ struct dentry *d_alloc(struct dentry * p
1935         struct dentry *dentry;
1936         char *dname;
1937  
1938 +       if (!vx_dentry_avail(1))
1939 +               return NULL;
1940 +
1941         dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
1942         if (!dentry)
1943                 return NULL;
1944 @@ -963,6 +970,7 @@ struct dentry *d_alloc(struct dentry * p
1945         if (parent)
1946                 list_add(&dentry->d_u.d_child, &parent->d_subdirs);
1947         dentry_stat.nr_dentry++;
1948 +       vx_dentry_inc(dentry);
1949         spin_unlock(&dcache_lock);
1950  
1951         return dentry;
1952 @@ -1406,6 +1414,7 @@ struct dentry * __d_lookup(struct dentry
1953                 }
1954  
1955                 atomic_inc(&dentry->d_count);
1956 +               vx_dentry_inc(dentry);
1957                 found = dentry;
1958                 spin_unlock(&dentry->d_lock);
1959                 break;
1960 diff -NurpP --minimal linux-2.6.30.2/fs/devpts/inode.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/devpts/inode.c
1961 --- linux-2.6.30.2/fs/devpts/inode.c    2009-06-11 17:13:02.000000000 +0200
1962 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/devpts/inode.c 2009-07-04 01:11:39.000000000 +0200
1963 @@ -19,12 +19,12 @@
1964  #include <linux/tty.h>
1965  #include <linux/mutex.h>
1966  #include <linux/idr.h>
1967 +#include <linux/magic.h>
1968  #include <linux/devpts_fs.h>
1969  #include <linux/parser.h>
1970  #include <linux/fsnotify.h>
1971  #include <linux/seq_file.h>
1972 -
1973 -#define DEVPTS_SUPER_MAGIC 0x1cd1
1974 +#include <linux/vs_base.h>
1975  
1976  #define DEVPTS_DEFAULT_MODE 0600
1977  /*
1978 @@ -36,6 +36,20 @@
1979  #define DEVPTS_DEFAULT_PTMX_MODE 0000
1980  #define PTMX_MINOR     2
1981  
1982 +static int devpts_permission(struct inode *inode, int mask)
1983 +{
1984 +       int ret = -EACCES;
1985 +
1986 +       /* devpts is xid tagged */
1987 +       if (vx_check((xid_t)inode->i_tag, VS_WATCH_P | VS_IDENT))
1988 +               ret = generic_permission(inode, mask, NULL);
1989 +       return ret;
1990 +}
1991 +
1992 +static struct inode_operations devpts_file_inode_operations = {
1993 +       .permission     = devpts_permission,
1994 +};
1995 +
1996  extern int pty_limit;                  /* Config limit on Unix98 ptys */
1997  static DEFINE_MUTEX(allocated_ptys_lock);
1998  
1999 @@ -263,6 +277,25 @@ static int devpts_show_options(struct se
2000         return 0;
2001  }
2002  
2003 +static int devpts_filter(struct dentry *de)
2004 +{
2005 +       /* devpts is xid tagged */
2006 +       return vx_check((xid_t)de->d_inode->i_tag, VS_WATCH_P | VS_IDENT);
2007 +}
2008 +
2009 +static int devpts_readdir(struct file * filp, void * dirent, filldir_t filldir)
2010 +{
2011 +       return dcache_readdir_filter(filp, dirent, filldir, devpts_filter);
2012 +}
2013 +
2014 +static struct file_operations devpts_dir_operations = {
2015 +       .open           = dcache_dir_open,
2016 +       .release        = dcache_dir_close,
2017 +       .llseek         = dcache_dir_lseek,
2018 +       .read           = generic_read_dir,
2019 +       .readdir        = devpts_readdir,
2020 +};
2021 +
2022  static const struct super_operations devpts_sops = {
2023         .statfs         = simple_statfs,
2024         .remount_fs     = devpts_remount,
2025 @@ -302,12 +335,15 @@ devpts_fill_super(struct super_block *s,
2026         inode = new_inode(s);
2027         if (!inode)
2028                 goto free_fsi;
2029 +
2030         inode->i_ino = 1;
2031         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
2032         inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
2033         inode->i_op = &simple_dir_inode_operations;
2034 -       inode->i_fop = &simple_dir_operations;
2035 +       inode->i_fop = &devpts_dir_operations;
2036         inode->i_nlink = 2;
2037 +       /* devpts is xid tagged */
2038 +       inode->i_tag = (tag_t)vx_current_xid();
2039  
2040         s->s_root = d_alloc_root(inode);
2041         if (s->s_root)
2042 @@ -499,6 +535,9 @@ int devpts_pty_new(struct inode *ptmx_in
2043         inode->i_gid = opts->setgid ? opts->gid : current_fsgid();
2044         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
2045         init_special_inode(inode, S_IFCHR|opts->mode, device);
2046 +       /* devpts is xid tagged */
2047 +       inode->i_tag = (tag_t)vx_current_xid();
2048 +       inode->i_op = &devpts_file_inode_operations;
2049         inode->i_private = tty;
2050         tty->driver_data = inode;
2051  
2052 diff -NurpP --minimal linux-2.6.30.2/fs/exec.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/exec.c
2053 --- linux-2.6.30.2/fs/exec.c    2009-06-11 17:13:03.000000000 +0200
2054 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/exec.c 2009-07-04 01:11:39.000000000 +0200
2055 @@ -248,7 +248,9 @@ static int __bprm_mm_init(struct linux_b
2056         if (err)
2057                 goto err;
2058  
2059 -       mm->stack_vm = mm->total_vm = 1;
2060 +       mm->total_vm = 0;
2061 +       vx_vmpages_inc(mm);
2062 +       mm->stack_vm = 1;
2063         up_write(&mm->mmap_sem);
2064         bprm->p = vma->vm_end - sizeof(void *);
2065         return 0;
2066 @@ -1449,7 +1451,7 @@ static int format_corename(char *corenam
2067                         /* UNIX time of coredump */
2068                         case 't': {
2069                                 struct timeval tv;
2070 -                               do_gettimeofday(&tv);
2071 +                               vx_gettimeofday(&tv);
2072                                 rc = snprintf(out_ptr, out_end - out_ptr,
2073                                               "%lu", tv.tv_sec);
2074                                 if (rc > out_end - out_ptr)
2075 diff -NurpP --minimal linux-2.6.30.2/fs/ext2/balloc.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext2/balloc.c
2076 --- linux-2.6.30.2/fs/ext2/balloc.c     2009-06-11 17:13:03.000000000 +0200
2077 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext2/balloc.c  2009-07-04 01:11:38.000000000 +0200
2078 @@ -701,7 +701,6 @@ ext2_try_to_allocate(struct super_block 
2079                         start = 0;
2080                 end = EXT2_BLOCKS_PER_GROUP(sb);
2081         }
2082 -
2083         BUG_ON(start > EXT2_BLOCKS_PER_GROUP(sb));
2084  
2085  repeat:
2086 diff -NurpP --minimal linux-2.6.30.2/fs/ext2/ext2.h linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext2/ext2.h
2087 --- linux-2.6.30.2/fs/ext2/ext2.h       2008-12-25 00:26:37.000000000 +0100
2088 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext2/ext2.h    2009-07-04 01:11:39.000000000 +0200
2089 @@ -170,6 +170,7 @@ extern const struct file_operations ext2
2090  extern const struct address_space_operations ext2_aops;
2091  extern const struct address_space_operations ext2_aops_xip;
2092  extern const struct address_space_operations ext2_nobh_aops;
2093 +extern int ext2_sync_flags(struct inode *inode);
2094  
2095  /* namei.c */
2096  extern const struct inode_operations ext2_dir_inode_operations;
2097 diff -NurpP --minimal linux-2.6.30.2/fs/ext2/file.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext2/file.c
2098 --- linux-2.6.30.2/fs/ext2/file.c       2008-12-25 00:26:37.000000000 +0100
2099 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext2/file.c    2009-07-04 01:11:39.000000000 +0200
2100 @@ -87,4 +87,5 @@ const struct inode_operations ext2_file_
2101         .setattr        = ext2_setattr,
2102         .permission     = ext2_permission,
2103         .fiemap         = ext2_fiemap,
2104 +       .sync_flags     = ext2_sync_flags,
2105  };
2106 diff -NurpP --minimal linux-2.6.30.2/fs/ext2/inode.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext2/inode.c
2107 --- linux-2.6.30.2/fs/ext2/inode.c      2009-06-11 17:13:03.000000000 +0200
2108 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext2/inode.c   2009-07-04 02:26:09.000000000 +0200
2109 @@ -33,6 +33,7 @@
2110  #include <linux/mpage.h>
2111  #include <linux/fiemap.h>
2112  #include <linux/namei.h>
2113 +#include <linux/vs_tag.h>
2114  #include "ext2.h"
2115  #include "acl.h"
2116  #include "xip.h"
2117 @@ -1040,7 +1041,7 @@ void ext2_truncate(struct inode *inode)
2118                 return;
2119         if (ext2_inode_is_fast_symlink(inode))
2120                 return;
2121 -       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2122 +       if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
2123                 return;
2124  
2125         blocksize = inode->i_sb->s_blocksize;
2126 @@ -1178,38 +1179,72 @@ void ext2_set_inode_flags(struct inode *
2127  {
2128         unsigned int flags = EXT2_I(inode)->i_flags;
2129  
2130 -       inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
2131 +       inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
2132 +               S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
2133 +
2134 +
2135 +       if (flags & EXT2_IMMUTABLE_FL)
2136 +               inode->i_flags |= S_IMMUTABLE;
2137 +       if (flags & EXT2_IXUNLINK_FL)
2138 +               inode->i_flags |= S_IXUNLINK;
2139 +
2140         if (flags & EXT2_SYNC_FL)
2141                 inode->i_flags |= S_SYNC;
2142         if (flags & EXT2_APPEND_FL)
2143                 inode->i_flags |= S_APPEND;
2144 -       if (flags & EXT2_IMMUTABLE_FL)
2145 -               inode->i_flags |= S_IMMUTABLE;
2146         if (flags & EXT2_NOATIME_FL)
2147                 inode->i_flags |= S_NOATIME;
2148         if (flags & EXT2_DIRSYNC_FL)
2149                 inode->i_flags |= S_DIRSYNC;
2150 +
2151 +       inode->i_vflags &= ~(V_BARRIER | V_COW);
2152 +
2153 +       if (flags & EXT2_BARRIER_FL)
2154 +               inode->i_vflags |= V_BARRIER;
2155 +       if (flags & EXT2_COW_FL)
2156 +               inode->i_vflags |= V_COW;
2157  }
2158  
2159  /* Propagate flags from i_flags to EXT2_I(inode)->i_flags */
2160  void ext2_get_inode_flags(struct ext2_inode_info *ei)
2161  {
2162         unsigned int flags = ei->vfs_inode.i_flags;
2163 +       unsigned int vflags = ei->vfs_inode.i_vflags;
2164 +
2165 +       ei->i_flags &= ~(EXT2_SYNC_FL | EXT2_APPEND_FL |
2166 +                       EXT2_IMMUTABLE_FL | EXT2_IXUNLINK_FL |
2167 +                       EXT2_NOATIME_FL | EXT2_DIRSYNC_FL |
2168 +                       EXT2_BARRIER_FL | EXT2_COW_FL);
2169 +
2170 +       if (flags & S_IMMUTABLE)
2171 +               ei->i_flags |= EXT2_IMMUTABLE_FL;
2172 +       if (flags & S_IXUNLINK)
2173 +               ei->i_flags |= EXT2_IXUNLINK_FL;
2174  
2175 -       ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL|
2176 -                       EXT2_IMMUTABLE_FL|EXT2_NOATIME_FL|EXT2_DIRSYNC_FL);
2177         if (flags & S_SYNC)
2178                 ei->i_flags |= EXT2_SYNC_FL;
2179         if (flags & S_APPEND)
2180                 ei->i_flags |= EXT2_APPEND_FL;
2181 -       if (flags & S_IMMUTABLE)
2182 -               ei->i_flags |= EXT2_IMMUTABLE_FL;
2183         if (flags & S_NOATIME)
2184                 ei->i_flags |= EXT2_NOATIME_FL;
2185         if (flags & S_DIRSYNC)
2186                 ei->i_flags |= EXT2_DIRSYNC_FL;
2187 +
2188 +       if (vflags & V_BARRIER)
2189 +               ei->i_flags |= EXT2_BARRIER_FL;
2190 +       if (vflags & V_COW)
2191 +               ei->i_flags |= EXT2_COW_FL;
2192  }
2193  
2194 +int ext2_sync_flags(struct inode *inode)
2195 +{
2196 +       ext2_get_inode_flags(EXT2_I(inode));
2197 +       inode->i_ctime = CURRENT_TIME;
2198 +       mark_inode_dirty(inode);
2199 +       return 0;
2200 +}
2201 +
2202 +
2203  struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
2204  {
2205         struct ext2_inode_info *ei;
2206 @@ -1217,6 +1252,8 @@ struct inode *ext2_iget (struct super_bl
2207         struct ext2_inode *raw_inode;
2208         struct inode *inode;
2209         long ret = -EIO;
2210 +       uid_t uid;
2211 +       gid_t gid;
2212         int n;
2213  
2214         inode = iget_locked(sb, ino);
2215 @@ -1239,12 +1276,17 @@ struct inode *ext2_iget (struct super_bl
2216         }
2217  
2218         inode->i_mode = le16_to_cpu(raw_inode->i_mode);
2219 -       inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
2220 -       inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
2221 +       uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
2222 +       gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
2223         if (!(test_opt (inode->i_sb, NO_UID32))) {
2224 -               inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2225 -               inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
2226 +               uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2227 +               gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
2228         }
2229 +       inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
2230 +       inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
2231 +       inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
2232 +               le16_to_cpu(raw_inode->i_raw_tag));
2233 +
2234         inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
2235         inode->i_size = le32_to_cpu(raw_inode->i_size);
2236         inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
2237 @@ -1342,8 +1384,8 @@ static int ext2_update_inode(struct inod
2238         struct ext2_inode_info *ei = EXT2_I(inode);
2239         struct super_block *sb = inode->i_sb;
2240         ino_t ino = inode->i_ino;
2241 -       uid_t uid = inode->i_uid;
2242 -       gid_t gid = inode->i_gid;
2243 +       uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
2244 +       gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
2245         struct buffer_head * bh;
2246         struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh);
2247         int n;
2248 @@ -1379,6 +1421,9 @@ static int ext2_update_inode(struct inod
2249                 raw_inode->i_uid_high = 0;
2250                 raw_inode->i_gid_high = 0;
2251         }
2252 +#ifdef CONFIG_TAGGING_INTERN
2253 +       raw_inode->i_raw_tag = cpu_to_le16(inode->i_tag);
2254 +#endif
2255         raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
2256         raw_inode->i_size = cpu_to_le32(inode->i_size);
2257         raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
2258 @@ -1465,7 +1510,8 @@ int ext2_setattr(struct dentry *dentry, 
2259         if (error)
2260                 return error;
2261         if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
2262 -           (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
2263 +           (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) ||
2264 +           (iattr->ia_valid & ATTR_TAG && iattr->ia_tag != inode->i_tag)) {
2265                 error = vfs_dq_transfer(inode, iattr) ? -EDQUOT : 0;
2266                 if (error)
2267                         return error;
2268 diff -NurpP --minimal linux-2.6.30.2/fs/ext2/ioctl.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext2/ioctl.c
2269 --- linux-2.6.30.2/fs/ext2/ioctl.c      2009-03-24 14:22:25.000000000 +0100
2270 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext2/ioctl.c   2009-07-04 01:11:39.000000000 +0200
2271 @@ -14,6 +14,7 @@
2272  #include <linux/compat.h>
2273  #include <linux/mount.h>
2274  #include <linux/smp_lock.h>
2275 +#include <linux/mount.h>
2276  #include <asm/current.h>
2277  #include <asm/uaccess.h>
2278  
2279 @@ -52,6 +53,11 @@ long ext2_ioctl(struct file *filp, unsig
2280  
2281                 flags = ext2_mask_flags(inode->i_mode, flags);
2282  
2283 +               if (IS_BARRIER(inode)) {
2284 +                       vxwprintk_task(1, "messing with the barrier.");
2285 +                       return -EACCES;
2286 +               }
2287 +
2288                 mutex_lock(&inode->i_mutex);
2289                 /* Is it quota file? Do not allow user to mess with it */
2290                 if (IS_NOQUOTA(inode)) {
2291 @@ -67,7 +73,9 @@ long ext2_ioctl(struct file *filp, unsig
2292                  *
2293                  * This test looks nicer. Thanks to Pauline Middelink
2294                  */
2295 -               if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
2296 +               if ((oldflags & EXT2_IMMUTABLE_FL) ||
2297 +                       ((flags ^ oldflags) & (EXT2_APPEND_FL |
2298 +                       EXT2_IMMUTABLE_FL | EXT2_IXUNLINK_FL))) {
2299                         if (!capable(CAP_LINUX_IMMUTABLE)) {
2300                                 mutex_unlock(&inode->i_mutex);
2301                                 ret = -EPERM;
2302 diff -NurpP --minimal linux-2.6.30.2/fs/ext2/namei.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext2/namei.c
2303 --- linux-2.6.30.2/fs/ext2/namei.c      2009-03-24 14:22:25.000000000 +0100
2304 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext2/namei.c   2009-07-04 01:11:39.000000000 +0200
2305 @@ -31,6 +31,7 @@
2306   */
2307  
2308  #include <linux/pagemap.h>
2309 +#include <linux/vs_tag.h>
2310  #include "ext2.h"
2311  #include "xattr.h"
2312  #include "acl.h"
2313 @@ -68,6 +69,7 @@ static struct dentry *ext2_lookup(struct
2314                 inode = ext2_iget(dir->i_sb, ino);
2315                 if (IS_ERR(inode))
2316                         return ERR_CAST(inode);
2317 +               dx_propagate_tag(nd, inode);
2318         }
2319         return d_splice_alias(inode, dentry);
2320  }
2321 @@ -388,6 +390,7 @@ const struct inode_operations ext2_dir_i
2322  #endif
2323         .setattr        = ext2_setattr,
2324         .permission     = ext2_permission,
2325 +       .sync_flags     = ext2_sync_flags,
2326  };
2327  
2328  const struct inode_operations ext2_special_inode_operations = {
2329 @@ -399,4 +402,5 @@ const struct inode_operations ext2_speci
2330  #endif
2331         .setattr        = ext2_setattr,
2332         .permission     = ext2_permission,
2333 +       .sync_flags     = ext2_sync_flags,
2334  };
2335 diff -NurpP --minimal linux-2.6.30.2/fs/ext2/super.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext2/super.c
2336 --- linux-2.6.30.2/fs/ext2/super.c      2009-06-11 17:13:03.000000000 +0200
2337 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext2/super.c   2009-07-04 01:11:39.000000000 +0200
2338 @@ -391,7 +391,8 @@ enum {
2339         Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug,
2340         Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr,
2341         Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota,
2342 -       Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation
2343 +       Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation,
2344 +       Opt_tag, Opt_notag, Opt_tagid
2345  };
2346  
2347  static const match_table_t tokens = {
2348 @@ -419,6 +420,9 @@ static const match_table_t tokens = {
2349         {Opt_acl, "acl"},
2350         {Opt_noacl, "noacl"},
2351         {Opt_xip, "xip"},
2352 +       {Opt_tag, "tag"},
2353 +       {Opt_notag, "notag"},
2354 +       {Opt_tagid, "tagid=%u"},
2355         {Opt_grpquota, "grpquota"},
2356         {Opt_ignore, "noquota"},
2357         {Opt_quota, "quota"},
2358 @@ -489,6 +493,20 @@ static int parse_options (char * options
2359                 case Opt_nouid32:
2360                         set_opt (sbi->s_mount_opt, NO_UID32);
2361                         break;
2362 +#ifndef CONFIG_TAGGING_NONE
2363 +               case Opt_tag:
2364 +                       set_opt (sbi->s_mount_opt, TAGGED);
2365 +                       break;
2366 +               case Opt_notag:
2367 +                       clear_opt (sbi->s_mount_opt, TAGGED);
2368 +                       break;
2369 +#endif
2370 +#ifdef CONFIG_PROPAGATE
2371 +               case Opt_tagid:
2372 +                       /* use args[0] */
2373 +                       set_opt (sbi->s_mount_opt, TAGGED);
2374 +                       break;
2375 +#endif
2376                 case Opt_nocheck:
2377                         clear_opt (sbi->s_mount_opt, CHECK);
2378                         break;
2379 @@ -838,6 +856,8 @@ static int ext2_fill_super(struct super_
2380         if (!parse_options ((char *) data, sbi))
2381                 goto failed_mount;
2382  
2383 +       if (EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_TAGGED)
2384 +               sb->s_flags |= MS_TAGGED;
2385         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2386                 ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
2387                  MS_POSIXACL : 0);
2388 @@ -1170,6 +1190,13 @@ static int ext2_remount (struct super_bl
2389                 goto restore_opts;
2390         }
2391  
2392 +       if ((sbi->s_mount_opt & EXT2_MOUNT_TAGGED) &&
2393 +               !(sb->s_flags & MS_TAGGED)) {
2394 +               printk("EXT2-fs: %s: tagging not permitted on remount.\n",
2395 +                      sb->s_id);
2396 +               return -EINVAL;
2397 +       }
2398 +
2399         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2400                 ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
2401  
2402 diff -NurpP --minimal linux-2.6.30.2/fs/ext2/symlink.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext2/symlink.c
2403 --- linux-2.6.30.2/fs/ext2/symlink.c    2008-12-25 00:26:37.000000000 +0100
2404 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext2/symlink.c 2009-07-04 01:11:39.000000000 +0200
2405 @@ -38,6 +38,7 @@ const struct inode_operations ext2_symli
2406         .listxattr      = ext2_listxattr,
2407         .removexattr    = generic_removexattr,
2408  #endif
2409 +       .sync_flags     = ext2_sync_flags,
2410  };
2411   
2412  const struct inode_operations ext2_fast_symlink_inode_operations = {
2413 @@ -49,4 +50,5 @@ const struct inode_operations ext2_fast_
2414         .listxattr      = ext2_listxattr,
2415         .removexattr    = generic_removexattr,
2416  #endif
2417 +       .sync_flags     = ext2_sync_flags,
2418  };
2419 diff -NurpP --minimal linux-2.6.30.2/fs/ext3/file.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext3/file.c
2420 --- linux-2.6.30.2/fs/ext3/file.c       2009-06-11 17:13:03.000000000 +0200
2421 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext3/file.c    2009-07-04 01:11:39.000000000 +0200
2422 @@ -139,5 +139,6 @@ const struct inode_operations ext3_file_
2423  #endif
2424         .permission     = ext3_permission,
2425         .fiemap         = ext3_fiemap,
2426 +       .sync_flags     = ext3_sync_flags,
2427  };
2428  
2429 diff -NurpP --minimal linux-2.6.30.2/fs/ext3/inode.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext3/inode.c
2430 --- linux-2.6.30.2/fs/ext3/inode.c      2009-06-11 17:13:03.000000000 +0200
2431 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext3/inode.c   2009-07-04 01:11:39.000000000 +0200
2432 @@ -38,6 +38,7 @@
2433  #include <linux/bio.h>
2434  #include <linux/fiemap.h>
2435  #include <linux/namei.h>
2436 +#include <linux/vs_tag.h>
2437  #include "xattr.h"
2438  #include "acl.h"
2439  
2440 @@ -2318,7 +2319,7 @@ static void ext3_free_branches(handle_t 
2441  
2442  int ext3_can_truncate(struct inode *inode)
2443  {
2444 -       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2445 +       if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
2446                 return 0;
2447         if (S_ISREG(inode->i_mode))
2448                 return 1;
2449 @@ -2695,36 +2696,84 @@ void ext3_set_inode_flags(struct inode *
2450  {
2451         unsigned int flags = EXT3_I(inode)->i_flags;
2452  
2453 -       inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
2454 +       inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
2455 +               S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
2456 +
2457 +       if (flags & EXT3_IMMUTABLE_FL)
2458 +               inode->i_flags |= S_IMMUTABLE;
2459 +       if (flags & EXT3_IXUNLINK_FL)
2460 +               inode->i_flags |= S_IXUNLINK;
2461 +
2462         if (flags & EXT3_SYNC_FL)
2463                 inode->i_flags |= S_SYNC;
2464         if (flags & EXT3_APPEND_FL)
2465                 inode->i_flags |= S_APPEND;
2466 -       if (flags & EXT3_IMMUTABLE_FL)
2467 -               inode->i_flags |= S_IMMUTABLE;
2468         if (flags & EXT3_NOATIME_FL)
2469                 inode->i_flags |= S_NOATIME;
2470         if (flags & EXT3_DIRSYNC_FL)
2471                 inode->i_flags |= S_DIRSYNC;
2472 +
2473 +       inode->i_vflags &= ~(V_BARRIER | V_COW);
2474 +
2475 +       if (flags & EXT3_BARRIER_FL)
2476 +               inode->i_vflags |= V_BARRIER;
2477 +       if (flags & EXT3_COW_FL)
2478 +               inode->i_vflags |= V_COW;
2479  }
2480  
2481  /* Propagate flags from i_flags to EXT3_I(inode)->i_flags */
2482  void ext3_get_inode_flags(struct ext3_inode_info *ei)
2483  {
2484         unsigned int flags = ei->vfs_inode.i_flags;
2485 +       unsigned int vflags = ei->vfs_inode.i_vflags;
2486 +
2487 +       ei->i_flags &= ~(EXT3_SYNC_FL | EXT3_APPEND_FL |
2488 +                       EXT3_IMMUTABLE_FL | EXT3_IXUNLINK_FL |
2489 +                       EXT3_NOATIME_FL | EXT3_DIRSYNC_FL |
2490 +                       EXT3_BARRIER_FL | EXT3_COW_FL);
2491 +
2492 +       if (flags & S_IMMUTABLE)
2493 +               ei->i_flags |= EXT3_IMMUTABLE_FL;
2494 +       if (flags & S_IXUNLINK)
2495 +               ei->i_flags |= EXT3_IXUNLINK_FL;
2496  
2497 -       ei->i_flags &= ~(EXT3_SYNC_FL|EXT3_APPEND_FL|
2498 -                       EXT3_IMMUTABLE_FL|EXT3_NOATIME_FL|EXT3_DIRSYNC_FL);
2499         if (flags & S_SYNC)
2500                 ei->i_flags |= EXT3_SYNC_FL;
2501         if (flags & S_APPEND)
2502                 ei->i_flags |= EXT3_APPEND_FL;
2503 -       if (flags & S_IMMUTABLE)
2504 -               ei->i_flags |= EXT3_IMMUTABLE_FL;
2505         if (flags & S_NOATIME)
2506                 ei->i_flags |= EXT3_NOATIME_FL;
2507         if (flags & S_DIRSYNC)
2508                 ei->i_flags |= EXT3_DIRSYNC_FL;
2509 +
2510 +       if (vflags & V_BARRIER)
2511 +               ei->i_flags |= EXT3_BARRIER_FL;
2512 +       if (vflags & V_COW)
2513 +               ei->i_flags |= EXT3_COW_FL;
2514 +}
2515 +
2516 +int ext3_sync_flags(struct inode *inode)
2517 +{
2518 +       struct ext3_iloc iloc;
2519 +       handle_t *handle;
2520 +       int err;
2521 +
2522 +       handle = ext3_journal_start(inode, 1);
2523 +       if (IS_ERR(handle))
2524 +               return PTR_ERR(handle);
2525 +       if (IS_SYNC(inode))
2526 +               handle->h_sync = 1;
2527 +       err = ext3_reserve_inode_write(handle, inode, &iloc);
2528 +       if (err)
2529 +               goto flags_err;
2530 +
2531 +       ext3_get_inode_flags(EXT3_I(inode));
2532 +       inode->i_ctime = CURRENT_TIME;
2533 +
2534 +       err = ext3_mark_iloc_dirty(handle, inode, &iloc);
2535 +flags_err:
2536 +       ext3_journal_stop(handle);
2537 +       return err;
2538  }
2539  
2540  struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
2541 @@ -2736,6 +2785,8 @@ struct inode *ext3_iget(struct super_blo
2542         struct inode *inode;
2543         long ret;
2544         int block;
2545 +       uid_t uid;
2546 +       gid_t gid;
2547  
2548         inode = iget_locked(sb, ino);
2549         if (!inode)
2550 @@ -2756,12 +2807,17 @@ struct inode *ext3_iget(struct super_blo
2551         bh = iloc.bh;
2552         raw_inode = ext3_raw_inode(&iloc);
2553         inode->i_mode = le16_to_cpu(raw_inode->i_mode);
2554 -       inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
2555 -       inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
2556 +       uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
2557 +       gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
2558         if(!(test_opt (inode->i_sb, NO_UID32))) {
2559 -               inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2560 -               inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
2561 +               uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2562 +               gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
2563         }
2564 +       inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
2565 +       inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
2566 +       inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
2567 +               le16_to_cpu(raw_inode->i_raw_tag));
2568 +
2569         inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
2570         inode->i_size = le32_to_cpu(raw_inode->i_size);
2571         inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
2572 @@ -2892,6 +2948,8 @@ static int ext3_do_update_inode(handle_t
2573         struct ext3_inode *raw_inode = ext3_raw_inode(iloc);
2574         struct ext3_inode_info *ei = EXT3_I(inode);
2575         struct buffer_head *bh = iloc->bh;
2576 +       uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
2577 +       gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
2578         int err = 0, rc, block;
2579  
2580         /* For fields not not tracking in the in-memory inode,
2581 @@ -2902,29 +2960,32 @@ static int ext3_do_update_inode(handle_t
2582         ext3_get_inode_flags(ei);
2583         raw_inode->i_mode = cpu_to_le16(inode->i_mode);
2584         if(!(test_opt(inode->i_sb, NO_UID32))) {
2585 -               raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
2586 -               raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
2587 +               raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid));
2588 +               raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid));
2589  /*
2590   * Fix up interoperability with old kernels. Otherwise, old inodes get
2591   * re-used with the upper 16 bits of the uid/gid intact
2592   */
2593                 if(!ei->i_dtime) {
2594                         raw_inode->i_uid_high =
2595 -                               cpu_to_le16(high_16_bits(inode->i_uid));
2596 +                               cpu_to_le16(high_16_bits(uid));
2597                         raw_inode->i_gid_high =
2598 -                               cpu_to_le16(high_16_bits(inode->i_gid));
2599 +                               cpu_to_le16(high_16_bits(gid));
2600                 } else {
2601                         raw_inode->i_uid_high = 0;
2602                         raw_inode->i_gid_high = 0;
2603                 }
2604         } else {
2605                 raw_inode->i_uid_low =
2606 -                       cpu_to_le16(fs_high2lowuid(inode->i_uid));
2607 +                       cpu_to_le16(fs_high2lowuid(uid));
2608                 raw_inode->i_gid_low =
2609 -                       cpu_to_le16(fs_high2lowgid(inode->i_gid));
2610 +                       cpu_to_le16(fs_high2lowgid(gid));
2611                 raw_inode->i_uid_high = 0;
2612                 raw_inode->i_gid_high = 0;
2613         }
2614 +#ifdef CONFIG_TAGGING_INTERN
2615 +       raw_inode->i_raw_tag = cpu_to_le16(inode->i_tag);
2616 +#endif
2617         raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
2618         raw_inode->i_size = cpu_to_le32(ei->i_disksize);
2619         raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
2620 @@ -3077,7 +3138,8 @@ int ext3_setattr(struct dentry *dentry, 
2621                 return error;
2622  
2623         if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
2624 -               (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
2625 +               (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) ||
2626 +               (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) {
2627                 handle_t *handle;
2628  
2629                 /* (user+group)*(old+new) structure, inode write (sb,
2630 @@ -3099,6 +3161,8 @@ int ext3_setattr(struct dentry *dentry, 
2631                         inode->i_uid = attr->ia_uid;
2632                 if (attr->ia_valid & ATTR_GID)
2633                         inode->i_gid = attr->ia_gid;
2634 +               if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
2635 +                       inode->i_tag = attr->ia_tag;
2636                 error = ext3_mark_inode_dirty(handle, inode);
2637                 ext3_journal_stop(handle);
2638         }
2639 diff -NurpP --minimal linux-2.6.30.2/fs/ext3/ioctl.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext3/ioctl.c
2640 --- linux-2.6.30.2/fs/ext3/ioctl.c      2009-06-11 17:13:03.000000000 +0200
2641 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext3/ioctl.c   2009-07-04 02:23:27.000000000 +0200
2642 @@ -8,6 +8,7 @@
2643   */
2644  
2645  #include <linux/fs.h>
2646 +#include <linux/mount.h>
2647  #include <linux/jbd.h>
2648  #include <linux/capability.h>
2649  #include <linux/ext3_fs.h>
2650 @@ -15,6 +16,7 @@
2651  #include <linux/mount.h>
2652  #include <linux/time.h>
2653  #include <linux/compat.h>
2654 +#include <linux/vs_tag.h>
2655  #include <asm/uaccess.h>
2656  
2657  long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
2658 @@ -50,6 +52,11 @@ long ext3_ioctl(struct file *filp, unsig
2659  
2660                 flags = ext3_mask_flags(inode->i_mode, flags);
2661  
2662 +               if (IS_BARRIER(inode)) {
2663 +                       vxwprintk_task(1, "messing with the barrier.");
2664 +                       return -EACCES;
2665 +               }
2666 +
2667                 mutex_lock(&inode->i_mutex);
2668  
2669                 /* Is it quota file? Do not allow user to mess with it */
2670 @@ -68,7 +75,9 @@ long ext3_ioctl(struct file *filp, unsig
2671                  *
2672                  * This test looks nicer. Thanks to Pauline Middelink
2673                  */
2674 -               if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) {
2675 +               if ((oldflags & EXT3_IMMUTABLE_FL) ||
2676 +                       ((flags ^ oldflags) & (EXT3_APPEND_FL |
2677 +                       EXT3_IMMUTABLE_FL | EXT3_IXUNLINK_FL))) {
2678                         if (!capable(CAP_LINUX_IMMUTABLE))
2679                                 goto flags_out;
2680                 }
2681 diff -NurpP --minimal linux-2.6.30.2/fs/ext3/namei.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext3/namei.c
2682 --- linux-2.6.30.2/fs/ext3/namei.c      2009-06-11 17:13:03.000000000 +0200
2683 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext3/namei.c   2009-07-04 01:11:39.000000000 +0200
2684 @@ -36,6 +36,7 @@
2685  #include <linux/quotaops.h>
2686  #include <linux/buffer_head.h>
2687  #include <linux/bio.h>
2688 +#include <linux/vs_tag.h>
2689  
2690  #include "namei.h"
2691  #include "xattr.h"
2692 @@ -912,6 +913,7 @@ restart:
2693                                 if (bh)
2694                                         ll_rw_block(READ_META, 1, &bh);
2695                         }
2696 +               dx_propagate_tag(nd, inode);
2697                 }
2698                 if ((bh = bh_use[ra_ptr++]) == NULL)
2699                         goto next;
2700 @@ -2446,6 +2448,7 @@ const struct inode_operations ext3_dir_i
2701         .removexattr    = generic_removexattr,
2702  #endif
2703         .permission     = ext3_permission,
2704 +       .sync_flags     = ext3_sync_flags,
2705  };
2706  
2707  const struct inode_operations ext3_special_inode_operations = {
2708 @@ -2457,4 +2460,5 @@ const struct inode_operations ext3_speci
2709         .removexattr    = generic_removexattr,
2710  #endif
2711         .permission     = ext3_permission,
2712 +       .sync_flags     = ext3_sync_flags,
2713  };
2714 diff -NurpP --minimal linux-2.6.30.2/fs/ext3/super.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext3/super.c
2715 --- linux-2.6.30.2/fs/ext3/super.c      2009-06-11 17:13:03.000000000 +0200
2716 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext3/super.c   2009-07-04 01:11:39.000000000 +0200
2717 @@ -794,7 +794,7 @@ enum {
2718         Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
2719         Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
2720         Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
2721 -       Opt_grpquota
2722 +       Opt_grpquota, Opt_tag, Opt_notag, Opt_tagid
2723  };
2724  
2725  static const match_table_t tokens = {
2726 @@ -847,6 +847,9 @@ static const match_table_t tokens = {
2727         {Opt_usrquota, "usrquota"},
2728         {Opt_barrier, "barrier=%u"},
2729         {Opt_resize, "resize"},
2730 +       {Opt_tag, "tag"},
2731 +       {Opt_notag, "notag"},
2732 +       {Opt_tagid, "tagid=%u"},
2733         {Opt_err, NULL},
2734  };
2735  
2736 @@ -939,6 +942,20 @@ static int parse_options (char *options,
2737                 case Opt_nouid32:
2738                         set_opt (sbi->s_mount_opt, NO_UID32);
2739                         break;
2740 +#ifndef CONFIG_TAGGING_NONE
2741 +               case Opt_tag:
2742 +                       set_opt (sbi->s_mount_opt, TAGGED);
2743 +                       break;
2744 +               case Opt_notag:
2745 +                       clear_opt (sbi->s_mount_opt, TAGGED);
2746 +                       break;
2747 +#endif
2748 +#ifdef CONFIG_PROPAGATE
2749 +               case Opt_tagid:
2750 +                       /* use args[0] */
2751 +                       set_opt (sbi->s_mount_opt, TAGGED);
2752 +                       break;
2753 +#endif
2754                 case Opt_nocheck:
2755                         clear_opt (sbi->s_mount_opt, CHECK);
2756                         break;
2757 @@ -1657,6 +1674,9 @@ static int ext3_fill_super (struct super
2758                             NULL, 0))
2759                 goto failed_mount;
2760  
2761 +       if (EXT3_SB(sb)->s_mount_opt & EXT3_MOUNT_TAGGED)
2762 +               sb->s_flags |= MS_TAGGED;
2763 +
2764         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2765                 ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
2766  
2767 @@ -2531,6 +2551,13 @@ static int ext3_remount (struct super_bl
2768         if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
2769                 ext3_abort(sb, __func__, "Abort forced by user");
2770  
2771 +       if ((sbi->s_mount_opt & EXT3_MOUNT_TAGGED) &&
2772 +               !(sb->s_flags & MS_TAGGED)) {
2773 +               printk("EXT3-fs: %s: tagging not permitted on remount.\n",
2774 +                       sb->s_id);
2775 +               return -EINVAL;
2776 +       }
2777 +
2778         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2779                 ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
2780  
2781 diff -NurpP --minimal linux-2.6.30.2/fs/ext3/symlink.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext3/symlink.c
2782 --- linux-2.6.30.2/fs/ext3/symlink.c    2008-12-25 00:26:37.000000000 +0100
2783 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext3/symlink.c 2009-07-04 01:11:39.000000000 +0200
2784 @@ -40,6 +40,7 @@ const struct inode_operations ext3_symli
2785         .listxattr      = ext3_listxattr,
2786         .removexattr    = generic_removexattr,
2787  #endif
2788 +       .sync_flags     = ext3_sync_flags,
2789  };
2790  
2791  const struct inode_operations ext3_fast_symlink_inode_operations = {
2792 @@ -51,4 +52,5 @@ const struct inode_operations ext3_fast_
2793         .listxattr      = ext3_listxattr,
2794         .removexattr    = generic_removexattr,
2795  #endif
2796 +       .sync_flags     = ext3_sync_flags,
2797  };
2798 diff -NurpP --minimal linux-2.6.30.2/fs/ext4/ext4.h linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext4/ext4.h
2799 --- linux-2.6.30.2/fs/ext4/ext4.h       2009-06-11 17:13:04.000000000 +0200
2800 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext4/ext4.h    2009-07-04 15:54:50.000000000 +0200
2801 @@ -235,8 +235,12 @@ struct flex_groups {
2802  #define EXT4_HUGE_FILE_FL               0x00040000 /* Set to each huge file */
2803  #define EXT4_EXTENTS_FL                        0x00080000 /* Inode uses extents */
2804  #define EXT4_EXT_MIGRATE               0x00100000 /* Inode is migrating */
2805 +#define EXT4_IXUNLINK_FL               0x08000000 /* Immutable invert on unlink */
2806  #define EXT4_RESERVED_FL               0x80000000 /* reserved for ext4 lib */
2807  
2808 +#define EXT4_BARRIER_FL                        0x04000000 /* Barrier for chroot() */
2809 +#define EXT4_COW_FL                    0x20000000 /* Copy on Write marker */
2810 +
2811  #define EXT4_FL_USER_VISIBLE           0x000BDFFF /* User visible flags */
2812  #define EXT4_FL_USER_MODIFIABLE                0x000B80FF /* User modifiable flags */
2813  
2814 @@ -560,6 +564,7 @@ do {                                                                               \
2815  #define EXT4_MOUNT_I_VERSION            0x2000000 /* i_version support */
2816  #define EXT4_MOUNT_DELALLOC            0x8000000 /* Delalloc support */
2817  #define EXT4_MOUNT_DATA_ERR_ABORT      0x10000000 /* Abort on file data write */
2818 +#define EXT4_MOUNT_TAGGED              (1<<30) /* Enable Context Tags */
2819  
2820  /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
2821  #ifndef _LINUX_EXT2_FS_H
2822 @@ -1068,6 +1073,7 @@ struct buffer_head *ext4_bread(handle_t 
2823                                                 ext4_lblk_t, int, int *);
2824  int ext4_get_block(struct inode *inode, sector_t iblock,
2825                                 struct buffer_head *bh_result, int create);
2826 +extern int ext4_sync_flags(struct inode *inode);
2827  
2828  extern struct inode *ext4_iget(struct super_block *, unsigned long);
2829  extern int  ext4_write_inode(struct inode *, int);
2830 diff -NurpP --minimal linux-2.6.30.2/fs/ext4/file.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext4/file.c
2831 --- linux-2.6.30.2/fs/ext4/file.c       2009-06-11 17:13:04.000000000 +0200
2832 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext4/file.c    2009-07-04 01:11:39.000000000 +0200
2833 @@ -176,5 +176,6 @@ const struct inode_operations ext4_file_
2834         .permission     = ext4_permission,
2835         .fallocate      = ext4_fallocate,
2836         .fiemap         = ext4_fiemap,
2837 +       .sync_flags     = ext4_sync_flags,
2838  };
2839  
2840 diff -NurpP --minimal linux-2.6.30.2/fs/ext4/inode.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext4/inode.c
2841 --- linux-2.6.30.2/fs/ext4/inode.c      2009-06-11 17:13:04.000000000 +0200
2842 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext4/inode.c   2009-07-04 01:11:39.000000000 +0200
2843 @@ -37,6 +37,7 @@
2844  #include <linux/namei.h>
2845  #include <linux/uio.h>
2846  #include <linux/bio.h>
2847 +#include <linux/vs_tag.h>
2848  #include "ext4_jbd2.h"
2849  #include "xattr.h"
2850  #include "acl.h"
2851 @@ -3916,7 +3917,7 @@ static void ext4_free_branches(handle_t 
2852  
2853  int ext4_can_truncate(struct inode *inode)
2854  {
2855 -       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2856 +       if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
2857                 return 0;
2858         if (S_ISREG(inode->i_mode))
2859                 return 1;
2860 @@ -4267,37 +4268,86 @@ void ext4_set_inode_flags(struct inode *
2861  {
2862         unsigned int flags = EXT4_I(inode)->i_flags;
2863  
2864 -       inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
2865 +       inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
2866 +               S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
2867 +
2868 +       if (flags & EXT4_IMMUTABLE_FL)
2869 +               inode->i_flags |= S_IMMUTABLE;
2870 +       if (flags & EXT4_IXUNLINK_FL)
2871 +               inode->i_flags |= S_IXUNLINK;
2872 +
2873         if (flags & EXT4_SYNC_FL)
2874                 inode->i_flags |= S_SYNC;
2875         if (flags & EXT4_APPEND_FL)
2876                 inode->i_flags |= S_APPEND;
2877 -       if (flags & EXT4_IMMUTABLE_FL)
2878 -               inode->i_flags |= S_IMMUTABLE;
2879         if (flags & EXT4_NOATIME_FL)
2880                 inode->i_flags |= S_NOATIME;
2881         if (flags & EXT4_DIRSYNC_FL)
2882                 inode->i_flags |= S_DIRSYNC;
2883 +
2884 +       inode->i_vflags &= ~(V_BARRIER | V_COW);
2885 +
2886 +       if (flags & EXT4_BARRIER_FL)
2887 +               inode->i_vflags |= V_BARRIER;
2888 +       if (flags & EXT4_COW_FL)
2889 +               inode->i_vflags |= V_COW;
2890  }
2891  
2892  /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
2893  void ext4_get_inode_flags(struct ext4_inode_info *ei)
2894  {
2895         unsigned int flags = ei->vfs_inode.i_flags;
2896 +       unsigned int vflags = ei->vfs_inode.i_vflags;
2897 +
2898 +       ei->i_flags &= ~(EXT4_SYNC_FL | EXT4_APPEND_FL |
2899 +                       EXT4_IMMUTABLE_FL | EXT4_IXUNLINK_FL |
2900 +                       EXT4_NOATIME_FL | EXT4_DIRSYNC_FL |
2901 +                       EXT4_BARRIER_FL | EXT4_COW_FL);
2902 +
2903 +       if (flags & S_IMMUTABLE)
2904 +               ei->i_flags |= EXT4_IMMUTABLE_FL;
2905 +       if (flags & S_IXUNLINK)
2906 +               ei->i_flags |= EXT4_IXUNLINK_FL;
2907  
2908 -       ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
2909 -                       EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|EXT4_DIRSYNC_FL);
2910         if (flags & S_SYNC)
2911                 ei->i_flags |= EXT4_SYNC_FL;
2912         if (flags & S_APPEND)
2913                 ei->i_flags |= EXT4_APPEND_FL;
2914 -       if (flags & S_IMMUTABLE)
2915 -               ei->i_flags |= EXT4_IMMUTABLE_FL;
2916         if (flags & S_NOATIME)
2917                 ei->i_flags |= EXT4_NOATIME_FL;
2918         if (flags & S_DIRSYNC)
2919                 ei->i_flags |= EXT4_DIRSYNC_FL;
2920 +
2921 +       if (vflags & V_BARRIER)
2922 +               ei->i_flags |= EXT4_BARRIER_FL;
2923 +       if (vflags & V_COW)
2924 +               ei->i_flags |= EXT4_COW_FL;
2925 +}
2926 +
2927 +int ext4_sync_flags(struct inode *inode)
2928 +{
2929 +       struct ext4_iloc iloc;
2930 +       handle_t *handle;
2931 +       int err;
2932 +
2933 +       handle = ext4_journal_start(inode, 1);
2934 +       if (IS_ERR(handle))
2935 +               return PTR_ERR(handle);
2936 +       if (IS_SYNC(inode))
2937 +               handle->h_sync = 1;
2938 +       err = ext4_reserve_inode_write(handle, inode, &iloc);
2939 +       if (err)
2940 +               goto flags_err;
2941 +
2942 +       ext4_get_inode_flags(EXT4_I(inode));
2943 +       inode->i_ctime = CURRENT_TIME;
2944 +
2945 +       err = ext4_mark_iloc_dirty(handle, inode, &iloc);
2946 +flags_err:
2947 +       ext4_journal_stop(handle);
2948 +       return err;
2949  }
2950 +
2951  static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
2952                                         struct ext4_inode_info *ei)
2953  {
2954 @@ -4330,6 +4380,8 @@ struct inode *ext4_iget(struct super_blo
2955         struct inode *inode;
2956         long ret;
2957         int block;
2958 +       uid_t uid;
2959 +       gid_t gid;
2960  
2961         inode = iget_locked(sb, ino);
2962         if (!inode)
2963 @@ -4349,12 +4401,16 @@ struct inode *ext4_iget(struct super_blo
2964         bh = iloc.bh;
2965         raw_inode = ext4_raw_inode(&iloc);
2966         inode->i_mode = le16_to_cpu(raw_inode->i_mode);
2967 -       inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
2968 -       inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
2969 +       uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
2970 +       gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
2971         if (!(test_opt(inode->i_sb, NO_UID32))) {
2972 -               inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2973 -               inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
2974 +               uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2975 +               gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
2976         }
2977 +       inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
2978 +       inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
2979 +       inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
2980 +               le16_to_cpu(raw_inode->i_raw_tag));
2981         inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
2982  
2983         ei->i_state = 0;
2984 @@ -4555,6 +4611,8 @@ static int ext4_do_update_inode(handle_t
2985         struct ext4_inode *raw_inode = ext4_raw_inode(iloc);
2986         struct ext4_inode_info *ei = EXT4_I(inode);
2987         struct buffer_head *bh = iloc->bh;
2988 +       uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
2989 +       gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
2990         int err = 0, rc, block;
2991  
2992         /* For fields not not tracking in the in-memory inode,
2993 @@ -4565,29 +4623,32 @@ static int ext4_do_update_inode(handle_t
2994         ext4_get_inode_flags(ei);
2995         raw_inode->i_mode = cpu_to_le16(inode->i_mode);
2996         if (!(test_opt(inode->i_sb, NO_UID32))) {
2997 -               raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
2998 -               raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
2999 +               raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid));
3000 +               raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid));
3001  /*
3002   * Fix up interoperability with old kernels. Otherwise, old inodes get
3003   * re-used with the upper 16 bits of the uid/gid intact
3004   */
3005                 if (!ei->i_dtime) {
3006                         raw_inode->i_uid_high =
3007 -                               cpu_to_le16(high_16_bits(inode->i_uid));
3008 +                               cpu_to_le16(high_16_bits(uid));
3009                         raw_inode->i_gid_high =
3010 -                               cpu_to_le16(high_16_bits(inode->i_gid));
3011 +                               cpu_to_le16(high_16_bits(gid));
3012                 } else {
3013                         raw_inode->i_uid_high = 0;
3014                         raw_inode->i_gid_high = 0;
3015                 }
3016         } else {
3017                 raw_inode->i_uid_low =
3018 -                       cpu_to_le16(fs_high2lowuid(inode->i_uid));
3019 +                       cpu_to_le16(fs_high2lowuid(uid));
3020                 raw_inode->i_gid_low =
3021 -                       cpu_to_le16(fs_high2lowgid(inode->i_gid));
3022 +                       cpu_to_le16(fs_high2lowgid(gid));
3023                 raw_inode->i_uid_high = 0;
3024                 raw_inode->i_gid_high = 0;
3025         }
3026 +#ifdef CONFIG_TAGGING_INTERN
3027 +       raw_inode->i_raw_tag = cpu_to_le16(inode->i_tag);
3028 +#endif
3029         raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
3030  
3031         EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
3032 @@ -4769,7 +4830,8 @@ int ext4_setattr(struct dentry *dentry, 
3033                 return error;
3034  
3035         if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
3036 -               (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
3037 +               (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) ||
3038 +               (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) {
3039                 handle_t *handle;
3040  
3041                 /* (user+group)*(old+new) structure, inode write (sb,
3042 @@ -4791,6 +4853,8 @@ int ext4_setattr(struct dentry *dentry, 
3043                         inode->i_uid = attr->ia_uid;
3044                 if (attr->ia_valid & ATTR_GID)
3045                         inode->i_gid = attr->ia_gid;
3046 +               if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
3047 +                       inode->i_tag = attr->ia_tag;
3048                 error = ext4_mark_inode_dirty(handle, inode);
3049                 ext4_journal_stop(handle);
3050         }
3051 diff -NurpP --minimal linux-2.6.30.2/fs/ext4/ioctl.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext4/ioctl.c
3052 --- linux-2.6.30.2/fs/ext4/ioctl.c      2009-06-11 17:13:04.000000000 +0200
3053 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext4/ioctl.c   2009-07-04 01:11:39.000000000 +0200
3054 @@ -8,12 +8,14 @@
3055   */
3056  
3057  #include <linux/fs.h>
3058 +#include <linux/mount.h>
3059  #include <linux/jbd2.h>
3060  #include <linux/capability.h>
3061  #include <linux/time.h>
3062  #include <linux/compat.h>
3063  #include <linux/smp_lock.h>
3064  #include <linux/mount.h>
3065 +#include <linux/vs_tag.h>
3066  #include <asm/uaccess.h>
3067  #include "ext4_jbd2.h"
3068  #include "ext4.h"
3069 @@ -50,6 +52,11 @@ long ext4_ioctl(struct file *filp, unsig
3070  
3071                 flags = ext4_mask_flags(inode->i_mode, flags);
3072  
3073 +               if (IS_BARRIER(inode)) {
3074 +                       vxwprintk_task(1, "messing with the barrier.");
3075 +                       return -EACCES;
3076 +               }
3077 +
3078                 err = -EPERM;
3079                 mutex_lock(&inode->i_mutex);
3080                 /* Is it quota file? Do not allow user to mess with it */
3081 @@ -67,7 +74,9 @@ long ext4_ioctl(struct file *filp, unsig
3082                  *
3083                  * This test looks nicer. Thanks to Pauline Middelink
3084                  */
3085 -               if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
3086 +               if ((oldflags & EXT4_IMMUTABLE_FL) ||
3087 +                       ((flags ^ oldflags) & (EXT4_APPEND_FL |
3088 +                       EXT4_IMMUTABLE_FL | EXT4_IXUNLINK_FL))) {
3089                         if (!capable(CAP_LINUX_IMMUTABLE))
3090                                 goto flags_out;
3091                 }
3092 diff -NurpP --minimal linux-2.6.30.2/fs/ext4/namei.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext4/namei.c
3093 --- linux-2.6.30.2/fs/ext4/namei.c      2009-06-11 17:13:04.000000000 +0200
3094 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext4/namei.c   2009-07-04 01:11:39.000000000 +0200
3095 @@ -34,6 +34,7 @@
3096  #include <linux/quotaops.h>
3097  #include <linux/buffer_head.h>
3098  #include <linux/bio.h>
3099 +#include <linux/vs_tag.h>
3100  #include "ext4.h"
3101  #include "ext4_jbd2.h"
3102  
3103 @@ -942,6 +943,7 @@ restart:
3104                                 if (bh)
3105                                         ll_rw_block(READ_META, 1, &bh);
3106                         }
3107 +               dx_propagate_tag(nd, inode);
3108                 }
3109                 if ((bh = bh_use[ra_ptr++]) == NULL)
3110                         goto next;
3111 @@ -2533,6 +2535,7 @@ const struct inode_operations ext4_dir_i
3112         .removexattr    = generic_removexattr,
3113  #endif
3114         .permission     = ext4_permission,
3115 +       .sync_flags     = ext4_sync_flags,
3116  };
3117  
3118  const struct inode_operations ext4_special_inode_operations = {
3119 @@ -2544,4 +2547,5 @@ const struct inode_operations ext4_speci
3120         .removexattr    = generic_removexattr,
3121  #endif
3122         .permission     = ext4_permission,
3123 +       .sync_flags     = ext4_sync_flags,
3124  };
3125 diff -NurpP --minimal linux-2.6.30.2/fs/ext4/super.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext4/super.c
3126 --- linux-2.6.30.2/fs/ext4/super.c      2009-06-11 17:13:04.000000000 +0200
3127 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext4/super.c   2009-07-04 02:21:38.000000000 +0200
3128 @@ -1029,7 +1029,8 @@ enum {
3129         Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
3130         Opt_usrquota, Opt_grpquota, Opt_i_version,
3131         Opt_stripe, Opt_delalloc, Opt_nodelalloc,
3132 -       Opt_inode_readahead_blks, Opt_journal_ioprio
3133 +       Opt_inode_readahead_blks, Opt_journal_ioprio,
3134 +       Opt_tag, Opt_notag, Opt_tagid
3135  };
3136  
3137  static const match_table_t tokens = {
3138 @@ -1092,6 +1093,9 @@ static const match_table_t tokens = {
3139         {Opt_auto_da_alloc, "auto_da_alloc=%u"},
3140         {Opt_auto_da_alloc, "auto_da_alloc"},
3141         {Opt_noauto_da_alloc, "noauto_da_alloc"},
3142 +       {Opt_tag, "tag"},
3143 +       {Opt_notag, "notag"},
3144 +       {Opt_tagid, "tagid=%u"},
3145         {Opt_err, NULL},
3146  };
3147  
3148 @@ -1187,6 +1191,20 @@ static int parse_options(char *options, 
3149                 case Opt_nouid32:
3150                         set_opt(sbi->s_mount_opt, NO_UID32);
3151                         break;
3152 +#ifndef CONFIG_TAGGING_NONE
3153 +               case Opt_tag:
3154 +                       set_opt (sbi->s_mount_opt, TAGGED);
3155 +                       break;
3156 +               case Opt_notag:
3157 +                       clear_opt (sbi->s_mount_opt, TAGGED);
3158 +                       break;
3159 +#endif
3160 +#ifdef CONFIG_PROPAGATE
3161 +               case Opt_tagid:
3162 +                       /* use args[0] */
3163 +                       set_opt (sbi->s_mount_opt, TAGGED);
3164 +                       break;
3165 +#endif
3166                 case Opt_debug:
3167                         set_opt(sbi->s_mount_opt, DEBUG);
3168                         break;
3169 @@ -2335,6 +2353,9 @@ static int ext4_fill_super(struct super_
3170                            &journal_ioprio, NULL, 0))
3171                 goto failed_mount;
3172  
3173 +       if (EXT4_SB(sb)->s_mount_opt & EXT4_MOUNT_TAGGED)
3174 +               sb->s_flags |= MS_TAGGED;
3175 +
3176         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3177                 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
3178  
3179 @@ -3399,6 +3420,13 @@ static int ext4_remount(struct super_blo
3180         if (sbi->s_mount_opt & EXT4_MOUNT_ABORT)
3181                 ext4_abort(sb, __func__, "Abort forced by user");
3182  
3183 +       if ((sbi->s_mount_opt & EXT4_MOUNT_TAGGED) &&
3184 +               !(sb->s_flags & MS_TAGGED)) {
3185 +               printk("EXT4-fs: %s: tagging not permitted on remount.\n",
3186 +                       sb->s_id);
3187 +               return -EINVAL;
3188 +       }
3189 +
3190         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3191                 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
3192  
3193 diff -NurpP --minimal linux-2.6.30.2/fs/ext4/symlink.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext4/symlink.c
3194 --- linux-2.6.30.2/fs/ext4/symlink.c    2008-12-25 00:26:37.000000000 +0100
3195 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ext4/symlink.c 2009-07-04 01:11:39.000000000 +0200
3196 @@ -40,6 +40,7 @@ const struct inode_operations ext4_symli
3197         .listxattr      = ext4_listxattr,
3198         .removexattr    = generic_removexattr,
3199  #endif
3200 +       .sync_flags     = ext4_sync_flags,
3201  };
3202  
3203  const struct inode_operations ext4_fast_symlink_inode_operations = {
3204 @@ -51,4 +52,5 @@ const struct inode_operations ext4_fast_
3205         .listxattr      = ext4_listxattr,
3206         .removexattr    = generic_removexattr,
3207  #endif
3208 +       .sync_flags     = ext4_sync_flags,
3209  };
3210 diff -NurpP --minimal linux-2.6.30.2/fs/fcntl.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/fcntl.c
3211 --- linux-2.6.30.2/fs/fcntl.c   2009-06-11 17:13:04.000000000 +0200
3212 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/fcntl.c        2009-07-04 01:11:39.000000000 +0200
3213 @@ -20,6 +20,7 @@
3214  #include <linux/rcupdate.h>
3215  #include <linux/pid_namespace.h>
3216  #include <linux/smp_lock.h>
3217 +#include <linux/vs_limit.h>
3218  
3219  #include <asm/poll.h>
3220  #include <asm/siginfo.h>
3221 @@ -103,6 +104,8 @@ SYSCALL_DEFINE3(dup3, unsigned int, oldf
3222  
3223         if (tofree)
3224                 filp_close(tofree, files);
3225 +       else
3226 +               vx_openfd_inc(newfd);   /* fd was unused */
3227  
3228         return newfd;
3229  
3230 @@ -345,6 +348,8 @@ SYSCALL_DEFINE3(fcntl, unsigned int, fd,
3231         filp = fget(fd);
3232         if (!filp)
3233                 goto out;
3234 +       if (!vx_files_avail(1))
3235 +               goto out;
3236  
3237         err = security_file_fcntl(filp, cmd, arg);
3238         if (err) {
3239 diff -NurpP --minimal linux-2.6.30.2/fs/file.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/file.c
3240 --- linux-2.6.30.2/fs/file.c    2008-12-25 00:26:37.000000000 +0100
3241 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/file.c 2009-07-04 01:11:39.000000000 +0200
3242 @@ -19,6 +19,7 @@
3243  #include <linux/spinlock.h>
3244  #include <linux/rcupdate.h>
3245  #include <linux/workqueue.h>
3246 +#include <linux/vs_limit.h>
3247  
3248  struct fdtable_defer {
3249         spinlock_t lock;
3250 @@ -367,6 +368,8 @@ struct files_struct *dup_fd(struct files
3251                 struct file *f = *old_fds++;
3252                 if (f) {
3253                         get_file(f);
3254 +                       /* TODO: sum it first for check and performance */
3255 +                       vx_openfd_inc(open_files - i);
3256                 } else {
3257                         /*
3258                          * The fd may be claimed in the fd bitmap but not yet
3259 @@ -475,6 +478,7 @@ repeat:
3260         else
3261                 FD_CLR(fd, fdt->close_on_exec);
3262         error = fd;
3263 +       vx_openfd_inc(fd);
3264  #if 1
3265         /* Sanity check */
3266         if (rcu_dereference(fdt->fd[fd]) != NULL) {
3267 diff -NurpP --minimal linux-2.6.30.2/fs/file_table.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/file_table.c
3268 --- linux-2.6.30.2/fs/file_table.c      2009-06-11 17:13:04.000000000 +0200
3269 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/file_table.c   2009-07-04 01:11:39.000000000 +0200
3270 @@ -22,6 +22,8 @@
3271  #include <linux/fsnotify.h>
3272  #include <linux/sysctl.h>
3273  #include <linux/percpu_counter.h>
3274 +#include <linux/vs_limit.h>
3275 +#include <linux/vs_context.h>
3276  
3277  #include <asm/atomic.h>
3278  
3279 @@ -131,6 +133,8 @@ struct file *get_empty_filp(void)
3280         spin_lock_init(&f->f_lock);
3281         eventpoll_init_file(f);
3282         /* f->f_version: 0 */
3283 +       f->f_xid = vx_current_xid();
3284 +       vx_files_inc(f);
3285         return f;
3286  
3287  over:
3288 @@ -285,6 +289,8 @@ void __fput(struct file *file)
3289                 cdev_put(inode->i_cdev);
3290         fops_put(file->f_op);
3291         put_pid(file->f_owner.pid);
3292 +       vx_files_dec(file);
3293 +       file->f_xid = 0;
3294         file_kill(file);
3295         if (file->f_mode & FMODE_WRITE)
3296                 drop_file_write_access(file);
3297 @@ -352,6 +358,8 @@ void put_filp(struct file *file)
3298  {
3299         if (atomic_long_dec_and_test(&file->f_count)) {
3300                 security_file_free(file);
3301 +               vx_files_dec(file);
3302 +               file->f_xid = 0;
3303                 file_kill(file);
3304                 file_free(file);
3305         }
3306 diff -NurpP --minimal linux-2.6.30.2/fs/fs_struct.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/fs_struct.c
3307 --- linux-2.6.30.2/fs/fs_struct.c       2009-06-11 17:13:04.000000000 +0200
3308 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/fs_struct.c    2009-07-04 02:03:16.000000000 +0200
3309 @@ -4,6 +4,7 @@
3310  #include <linux/path.h>
3311  #include <linux/slab.h>
3312  #include <linux/fs_struct.h>
3313 +#include <linux/vserver/global.h>
3314  
3315  /*
3316   * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
3317 @@ -77,6 +78,7 @@ void free_fs_struct(struct fs_struct *fs
3318  {
3319         path_put(&fs->root);
3320         path_put(&fs->pwd);
3321 +       atomic_dec(&vs_global_fs);
3322         kmem_cache_free(fs_cachep, fs);
3323  }
3324  
3325 @@ -112,6 +114,7 @@ struct fs_struct *copy_fs_struct(struct 
3326                 fs->pwd = old->pwd;
3327                 path_get(&old->pwd);
3328                 read_unlock(&old->lock);
3329 +               atomic_inc(&vs_global_fs);
3330         }
3331         return fs;
3332  }
3333 diff -NurpP --minimal linux-2.6.30.2/fs/hfsplus/ioctl.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/hfsplus/ioctl.c
3334 --- linux-2.6.30.2/fs/hfsplus/ioctl.c   2008-12-25 00:26:37.000000000 +0100
3335 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/hfsplus/ioctl.c        2009-07-04 01:11:39.000000000 +0200
3336 @@ -17,6 +17,7 @@
3337  #include <linux/mount.h>
3338  #include <linux/sched.h>
3339  #include <linux/xattr.h>
3340 +#include <linux/mount.h>
3341  #include <asm/uaccess.h>
3342  #include "hfsplus_fs.h"
3343  
3344 diff -NurpP --minimal linux-2.6.30.2/fs/inode.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/inode.c
3345 --- linux-2.6.30.2/fs/inode.c   2009-06-11 17:13:05.000000000 +0200
3346 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/inode.c        2009-07-04 01:11:39.000000000 +0200
3347 @@ -127,6 +127,9 @@ struct inode *inode_init_always(struct s
3348         struct address_space *const mapping = &inode->i_data;
3349  
3350         inode->i_sb = sb;
3351 +
3352 +       /* essential because of inode slab reuse */
3353 +       inode->i_tag = 0;
3354         inode->i_blkbits = sb->s_blocksize_bits;
3355         inode->i_flags = 0;
3356         atomic_set(&inode->i_count, 1);
3357 @@ -147,6 +150,7 @@ struct inode *inode_init_always(struct s
3358         inode->i_bdev = NULL;
3359         inode->i_cdev = NULL;
3360         inode->i_rdev = 0;
3361 +       inode->i_mdev = 0;
3362         inode->dirtied_when = 0;
3363  
3364         if (security_inode_alloc(inode))
3365 @@ -277,6 +281,8 @@ void __iget(struct inode *inode)
3366         inodes_stat.nr_unused--;
3367  }
3368  
3369 +EXPORT_SYMBOL_GPL(__iget);
3370 +
3371  /**
3372   * clear_inode - clear an inode
3373   * @inode: inode to clear
3374 @@ -1553,9 +1559,11 @@ void init_special_inode(struct inode *in
3375         if (S_ISCHR(mode)) {
3376                 inode->i_fop = &def_chr_fops;
3377                 inode->i_rdev = rdev;
3378 +               inode->i_mdev = rdev;
3379         } else if (S_ISBLK(mode)) {
3380                 inode->i_fop = &def_blk_fops;
3381                 inode->i_rdev = rdev;
3382 +               inode->i_mdev = rdev;
3383         } else if (S_ISFIFO(mode))
3384                 inode->i_fop = &def_fifo_fops;
3385         else if (S_ISSOCK(mode))
3386 diff -NurpP --minimal linux-2.6.30.2/fs/ioctl.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ioctl.c
3387 --- linux-2.6.30.2/fs/ioctl.c   2009-06-11 17:13:05.000000000 +0200
3388 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ioctl.c        2009-07-04 01:11:39.000000000 +0200
3389 @@ -15,6 +15,9 @@
3390  #include <linux/uaccess.h>
3391  #include <linux/writeback.h>
3392  #include <linux/buffer_head.h>
3393 +#include <linux/proc_fs.h>
3394 +#include <linux/vserver/inode.h>
3395 +#include <linux/vs_tag.h>
3396  
3397  #include <asm/ioctls.h>
3398  
3399 diff -NurpP --minimal linux-2.6.30.2/fs/ioprio.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ioprio.c
3400 --- linux-2.6.30.2/fs/ioprio.c  2009-03-24 14:22:26.000000000 +0100
3401 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ioprio.c       2009-07-04 01:11:39.000000000 +0200
3402 @@ -26,6 +26,7 @@
3403  #include <linux/syscalls.h>
3404  #include <linux/security.h>
3405  #include <linux/pid_namespace.h>
3406 +#include <linux/vs_base.h>
3407  
3408  int set_task_ioprio(struct task_struct *task, int ioprio)
3409  {
3410 @@ -123,6 +124,8 @@ SYSCALL_DEFINE3(ioprio_set, int, which, 
3411                         else
3412                                 pgrp = find_vpid(who);
3413                         do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
3414 +                               if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
3415 +                                       continue;
3416                                 ret = set_task_ioprio(p, ioprio);
3417                                 if (ret)
3418                                         break;
3419 @@ -212,6 +215,8 @@ SYSCALL_DEFINE2(ioprio_get, int, which, 
3420                         else
3421                                 pgrp = find_vpid(who);
3422                         do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
3423 +                               if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
3424 +                                       continue;
3425                                 tmpio = get_task_ioprio(p);
3426                                 if (tmpio < 0)
3427                                         continue;
3428 diff -NurpP --minimal linux-2.6.30.2/fs/jfs/acl.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/jfs/acl.c
3429 --- linux-2.6.30.2/fs/jfs/acl.c 2009-06-11 17:13:05.000000000 +0200
3430 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/jfs/acl.c      2009-07-04 02:22:31.000000000 +0200
3431 @@ -232,7 +232,8 @@ int jfs_setattr(struct dentry *dentry, s
3432                 return rc;
3433  
3434         if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
3435 -           (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
3436 +           (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) ||
3437 +           (iattr->ia_valid & ATTR_TAG && iattr->ia_tag != inode->i_tag)) {
3438                 if (vfs_dq_transfer(inode, iattr))
3439                         return -EDQUOT;
3440         }
3441 diff -NurpP --minimal linux-2.6.30.2/fs/jfs/file.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/jfs/file.c
3442 --- linux-2.6.30.2/fs/jfs/file.c        2008-12-25 00:26:37.000000000 +0100
3443 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/jfs/file.c     2009-07-04 01:11:39.000000000 +0200
3444 @@ -98,6 +98,7 @@ const struct inode_operations jfs_file_i
3445         .setattr        = jfs_setattr,
3446         .permission     = jfs_permission,
3447  #endif
3448 +       .sync_flags     = jfs_sync_flags,
3449  };
3450  
3451  const struct file_operations jfs_file_operations = {
3452 diff -NurpP --minimal linux-2.6.30.2/fs/jfs/ioctl.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/jfs/ioctl.c
3453 --- linux-2.6.30.2/fs/jfs/ioctl.c       2008-12-25 00:26:37.000000000 +0100
3454 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/jfs/ioctl.c    2009-07-04 01:11:39.000000000 +0200
3455 @@ -11,6 +11,7 @@
3456  #include <linux/mount.h>
3457  #include <linux/time.h>
3458  #include <linux/sched.h>
3459 +#include <linux/mount.h>
3460  #include <asm/current.h>
3461  #include <asm/uaccess.h>
3462  
3463 @@ -85,6 +86,11 @@ long jfs_ioctl(struct file *filp, unsign
3464                 if (!S_ISDIR(inode->i_mode))
3465                         flags &= ~JFS_DIRSYNC_FL;
3466  
3467 +               if (IS_BARRIER(inode)) {
3468 +                       vxwprintk_task(1, "messing with the barrier.");
3469 +                       return -EACCES;
3470 +               }
3471 +
3472                 /* Is it quota file? Do not allow user to mess with it */
3473                 if (IS_NOQUOTA(inode)) {
3474                         err = -EPERM;
3475 @@ -102,8 +108,8 @@ long jfs_ioctl(struct file *filp, unsign
3476                  * the relevant capability.
3477                  */
3478                 if ((oldflags & JFS_IMMUTABLE_FL) ||
3479 -                       ((flags ^ oldflags) &
3480 -                       (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
3481 +                       ((flags ^ oldflags) & (JFS_APPEND_FL |
3482 +                       JFS_IMMUTABLE_FL | JFS_IXUNLINK_FL))) {
3483                         if (!capable(CAP_LINUX_IMMUTABLE)) {
3484                                 mutex_unlock(&inode->i_mutex);
3485                                 err = -EPERM;
3486 diff -NurpP --minimal linux-2.6.30.2/fs/jfs/jfs_dinode.h linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/jfs/jfs_dinode.h
3487 --- linux-2.6.30.2/fs/jfs/jfs_dinode.h  2008-12-25 00:26:37.000000000 +0100
3488 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/jfs/jfs_dinode.h       2009-07-04 01:11:39.000000000 +0200
3489 @@ -161,9 +161,13 @@ struct dinode {
3490  
3491  #define JFS_APPEND_FL          0x01000000 /* writes to file may only append */
3492  #define JFS_IMMUTABLE_FL       0x02000000 /* Immutable file */
3493 +#define JFS_IXUNLINK_FL                0x08000000 /* Immutable invert on unlink */
3494  
3495 -#define JFS_FL_USER_VISIBLE    0x03F80000
3496 -#define JFS_FL_USER_MODIFIABLE 0x03F80000
3497 +#define JFS_BARRIER_FL         0x04000000 /* Barrier for chroot() */
3498 +#define JFS_COW_FL             0x20000000 /* Copy on Write marker */
3499 +
3500 +#define JFS_FL_USER_VISIBLE    0x07F80000
3501 +#define JFS_FL_USER_MODIFIABLE 0x07F80000
3502  #define JFS_FL_INHERIT         0x03C80000
3503  
3504  /* These are identical to EXT[23]_IOC_GETFLAGS/SETFLAGS */
3505 diff -NurpP --minimal linux-2.6.30.2/fs/jfs/jfs_filsys.h linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/jfs/jfs_filsys.h
3506 --- linux-2.6.30.2/fs/jfs/jfs_filsys.h  2008-12-25 00:26:37.000000000 +0100
3507 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/jfs/jfs_filsys.h       2009-07-04 01:11:39.000000000 +0200
3508 @@ -263,6 +263,7 @@
3509  #define JFS_NAME_MAX   255
3510  #define JFS_PATH_MAX   BPSIZE
3511  
3512 +#define JFS_TAGGED             0x00800000      /* Context Tagging */
3513  
3514  /*
3515   *     file system state (superblock state)
3516 diff -NurpP --minimal linux-2.6.30.2/fs/jfs/jfs_imap.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/jfs/jfs_imap.c
3517 --- linux-2.6.30.2/fs/jfs/jfs_imap.c    2009-06-11 17:13:05.000000000 +0200
3518 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/jfs/jfs_imap.c 2009-07-04 01:11:39.000000000 +0200
3519 @@ -45,6 +45,7 @@
3520  #include <linux/buffer_head.h>
3521  #include <linux/pagemap.h>
3522  #include <linux/quotaops.h>
3523 +#include <linux/vs_tag.h>
3524  
3525  #include "jfs_incore.h"
3526  #include "jfs_inode.h"
3527 @@ -3058,6 +3059,8 @@ static int copy_from_dinode(struct dinod
3528  {
3529         struct jfs_inode_info *jfs_ip = JFS_IP(ip);
3530         struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
3531 +       uid_t uid;
3532 +       gid_t gid;
3533  
3534         jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
3535         jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
3536 @@ -3078,14 +3081,18 @@ static int copy_from_dinode(struct dinod
3537         }
3538         ip->i_nlink = le32_to_cpu(dip->di_nlink);
3539  
3540 -       jfs_ip->saved_uid = le32_to_cpu(dip->di_uid);
3541 +       uid = le32_to_cpu(dip->di_uid);
3542 +       gid = le32_to_cpu(dip->di_gid);
3543 +       ip->i_tag = INOTAG_TAG(DX_TAG(ip), uid, gid, 0);
3544 +
3545 +       jfs_ip->saved_uid = INOTAG_UID(DX_TAG(ip), uid, gid);
3546         if (sbi->uid == -1)
3547                 ip->i_uid = jfs_ip->saved_uid;
3548         else {
3549                 ip->i_uid = sbi->uid;
3550         }
3551  
3552 -       jfs_ip->saved_gid = le32_to_cpu(dip->di_gid);
3553 +       jfs_ip->saved_gid = INOTAG_GID(DX_TAG(ip), uid, gid);
3554         if (sbi->gid == -1)
3555                 ip->i_gid = jfs_ip->saved_gid;
3556         else {
3557 @@ -3150,14 +3157,12 @@ static void copy_to_dinode(struct dinode
3558         dip->di_size = cpu_to_le64(ip->i_size);
3559         dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
3560         dip->di_nlink = cpu_to_le32(ip->i_nlink);
3561 -       if (sbi->uid == -1)
3562 -               dip->di_uid = cpu_to_le32(ip->i_uid);
3563 -       else
3564 -               dip->di_uid = cpu_to_le32(jfs_ip->saved_uid);
3565 -       if (sbi->gid == -1)
3566 -               dip->di_gid = cpu_to_le32(ip->i_gid);
3567 -       else
3568 -               dip->di_gid = cpu_to_le32(jfs_ip->saved_gid);
3569 +
3570 +       dip->di_uid = cpu_to_le32(TAGINO_UID(DX_TAG(ip),
3571 +               (sbi->uid == -1) ? ip->i_uid : jfs_ip->saved_uid, ip->i_tag));
3572 +       dip->di_gid = cpu_to_le32(TAGINO_GID(DX_TAG(ip),
3573 +               (sbi->gid == -1) ? ip->i_gid : jfs_ip->saved_gid, ip->i_tag));
3574 +
3575         jfs_get_inode_flags(jfs_ip);
3576         /*
3577          * mode2 is only needed for storing the higher order bits.
3578 diff -NurpP --minimal linux-2.6.30.2/fs/jfs/jfs_inode.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/jfs/jfs_inode.c
3579 --- linux-2.6.30.2/fs/jfs/jfs_inode.c   2009-06-11 17:13:05.000000000 +0200
3580 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/jfs/jfs_inode.c        2009-07-04 01:11:38.000000000 +0200
3581 @@ -30,29 +30,46 @@ void jfs_set_inode_flags(struct inode *i
3582  {
3583         unsigned int flags = JFS_IP(inode)->mode2;
3584  
3585 -       inode->i_flags &= ~(S_IMMUTABLE | S_APPEND |
3586 -               S_NOATIME | S_DIRSYNC | S_SYNC);
3587 +       inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
3588 +               S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
3589  
3590         if (flags & JFS_IMMUTABLE_FL)
3591                 inode->i_flags |= S_IMMUTABLE;
3592 +       if (flags & JFS_IXUNLINK_FL)
3593 +               inode->i_flags |= S_IXUNLINK;
3594 +
3595 +       if (flags & JFS_SYNC_FL)
3596 +               inode->i_flags |= S_SYNC;
3597         if (flags & JFS_APPEND_FL)
3598                 inode->i_flags |= S_APPEND;
3599         if (flags & JFS_NOATIME_FL)
3600                 inode->i_flags |= S_NOATIME;
3601         if (flags & JFS_DIRSYNC_FL)
3602                 inode->i_flags |= S_DIRSYNC;
3603 -       if (flags & JFS_SYNC_FL)
3604 -               inode->i_flags |= S_SYNC;
3605 +
3606 +       inode->i_vflags &= ~(V_BARRIER | V_COW);
3607 +
3608 +       if (flags & JFS_BARRIER_FL)
3609 +               inode->i_vflags |= V_BARRIER;
3610 +       if (flags & JFS_COW_FL)
3611 +               inode->i_vflags |= V_COW;
3612  }
3613  
3614  void jfs_get_inode_flags(struct jfs_inode_info *jfs_ip)
3615  {
3616         unsigned int flags = jfs_ip->vfs_inode.i_flags;
3617 +       unsigned int vflags = jfs_ip->vfs_inode.i_vflags;
3618 +
3619 +       jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_IXUNLINK_FL |
3620 +                          JFS_APPEND_FL | JFS_NOATIME_FL |
3621 +                          JFS_DIRSYNC_FL | JFS_SYNC_FL |
3622 +                          JFS_BARRIER_FL | JFS_COW_FL);
3623  
3624 -       jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_APPEND_FL | JFS_NOATIME_FL |
3625 -                          JFS_DIRSYNC_FL | JFS_SYNC_FL);
3626         if (flags & S_IMMUTABLE)
3627                 jfs_ip->mode2 |= JFS_IMMUTABLE_FL;
3628 +       if (flags & S_IXUNLINK)
3629 +               jfs_ip->mode2 |= JFS_IXUNLINK_FL;
3630 +
3631         if (flags & S_APPEND)
3632                 jfs_ip->mode2 |= JFS_APPEND_FL;
3633         if (flags & S_NOATIME)
3634 @@ -61,6 +78,19 @@ void jfs_get_inode_flags(struct jfs_inod
3635                 jfs_ip->mode2 |= JFS_DIRSYNC_FL;
3636         if (flags & S_SYNC)
3637                 jfs_ip->mode2 |= JFS_SYNC_FL;
3638 +
3639 +       if (vflags & V_BARRIER)
3640 +               jfs_ip->mode2 |= JFS_BARRIER_FL;
3641 +       if (vflags & V_COW)
3642 +               jfs_ip->mode2 |= JFS_COW_FL;
3643 +}
3644 +
3645 +int jfs_sync_flags(struct inode *inode)
3646 +{
3647 +       jfs_get_inode_flags(JFS_IP(inode));
3648 +       inode->i_ctime = CURRENT_TIME;
3649 +       mark_inode_dirty(inode);
3650 +       return 0;
3651  }
3652  
3653  /*
3654 diff -NurpP --minimal linux-2.6.30.2/fs/jfs/jfs_inode.h linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/jfs/jfs_inode.h
3655 --- linux-2.6.30.2/fs/jfs/jfs_inode.h   2009-06-11 17:13:05.000000000 +0200
3656 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/jfs/jfs_inode.h        2009-07-04 01:11:39.000000000 +0200
3657 @@ -39,6 +39,7 @@ extern struct dentry *jfs_fh_to_dentry(s
3658  extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid,
3659         int fh_len, int fh_type);
3660  extern void jfs_set_inode_flags(struct inode *);
3661 +extern int jfs_sync_flags(struct inode *);
3662  extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
3663  
3664  extern const struct address_space_operations jfs_aops;
3665 diff -NurpP --minimal linux-2.6.30.2/fs/jfs/namei.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/jfs/namei.c
3666 --- linux-2.6.30.2/fs/jfs/namei.c       2009-06-11 17:13:05.000000000 +0200
3667 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/jfs/namei.c    2009-07-04 01:11:39.000000000 +0200
3668 @@ -21,6 +21,7 @@
3669  #include <linux/ctype.h>
3670  #include <linux/quotaops.h>
3671  #include <linux/exportfs.h>
3672 +#include <linux/vs_tag.h>
3673  #include "jfs_incore.h"
3674  #include "jfs_superblock.h"
3675  #include "jfs_inode.h"
3676 @@ -1476,6 +1477,7 @@ static struct dentry *jfs_lookup(struct 
3677                 return ERR_CAST(ip);
3678         }
3679  
3680 +       dx_propagate_tag(nd, ip);
3681         dentry = d_splice_alias(ip, dentry);
3682  
3683         if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2))
3684 @@ -1545,6 +1547,7 @@ const struct inode_operations jfs_dir_in
3685         .setattr        = jfs_setattr,
3686         .permission     = jfs_permission,
3687  #endif
3688 +       .sync_flags     = jfs_sync_flags,
3689  };
3690  
3691  const struct file_operations jfs_dir_operations = {
3692 diff -NurpP --minimal linux-2.6.30.2/fs/jfs/super.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/jfs/super.c
3693 --- linux-2.6.30.2/fs/jfs/super.c       2009-06-11 17:13:05.000000000 +0200
3694 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/jfs/super.c    2009-07-04 01:11:39.000000000 +0200
3695 @@ -200,7 +200,8 @@ static void jfs_put_super(struct super_b
3696  enum {
3697         Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize,
3698         Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota,
3699 -       Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask
3700 +       Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask,
3701 +       Opt_tag, Opt_notag, Opt_tagid
3702  };
3703  
3704  static const match_table_t tokens = {
3705 @@ -210,6 +211,10 @@ static const match_table_t tokens = {
3706         {Opt_resize, "resize=%u"},
3707         {Opt_resize_nosize, "resize"},
3708         {Opt_errors, "errors=%s"},
3709 +       {Opt_tag, "tag"},
3710 +       {Opt_notag, "notag"},
3711 +       {Opt_tagid, "tagid=%u"},
3712 +       {Opt_tag, "tagxid"},
3713         {Opt_ignore, "noquota"},
3714         {Opt_ignore, "quota"},
3715         {Opt_usrquota, "usrquota"},
3716 @@ -344,6 +349,20 @@ static int parse_options(char *options, 
3717                         }
3718                         break;
3719                 }
3720 +#ifndef CONFIG_TAGGING_NONE
3721 +               case Opt_tag:
3722 +                       *flag |= JFS_TAGGED;
3723 +                       break;
3724 +               case Opt_notag:
3725 +                       *flag &= JFS_TAGGED;
3726 +                       break;
3727 +#endif
3728 +#ifdef CONFIG_PROPAGATE
3729 +               case Opt_tagid:
3730 +                       /* use args[0] */
3731 +                       *flag |= JFS_TAGGED;
3732 +                       break;
3733 +#endif
3734                 default:
3735                         printk("jfs: Unrecognized mount option \"%s\" "
3736                                         " or missing value\n", p);
3737 @@ -374,6 +393,13 @@ static int jfs_remount(struct super_bloc
3738         if (!parse_options(data, sb, &newLVSize, &flag)) {
3739                 return -EINVAL;
3740         }
3741 +
3742 +       if ((flag & JFS_TAGGED) && !(sb->s_flags & MS_TAGGED)) {
3743 +               printk(KERN_ERR "JFS: %s: tagging not permitted on remount.\n",
3744 +                       sb->s_id);
3745 +               return -EINVAL;
3746 +       }
3747 +
3748         if (newLVSize) {
3749                 if (sb->s_flags & MS_RDONLY) {
3750                         printk(KERN_ERR
3751 @@ -445,6 +471,9 @@ static int jfs_fill_super(struct super_b
3752  #ifdef CONFIG_JFS_POSIX_ACL
3753         sb->s_flags |= MS_POSIXACL;
3754  #endif
3755 +       /* map mount option tagxid */
3756 +       if (sbi->flag & JFS_TAGGED)
3757 +               sb->s_flags |= MS_TAGGED;
3758  
3759         if (newLVSize) {
3760                 printk(KERN_ERR "resize option for remount only\n");
3761 diff -NurpP --minimal linux-2.6.30.2/fs/libfs.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/libfs.c
3762 --- linux-2.6.30.2/fs/libfs.c   2009-06-11 17:13:05.000000000 +0200
3763 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/libfs.c        2009-07-04 01:11:39.000000000 +0200
3764 @@ -125,7 +125,8 @@ static inline unsigned char dt_type(stru
3765   * both impossible due to the lock on directory.
3766   */
3767  
3768 -int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
3769 +static inline int do_dcache_readdir_filter(struct file *filp,
3770 +       void *dirent, filldir_t filldir, int (*filter)(struct dentry *dentry))
3771  {
3772         struct dentry *dentry = filp->f_path.dentry;
3773         struct dentry *cursor = filp->private_data;
3774 @@ -158,6 +159,8 @@ int dcache_readdir(struct file * filp, v
3775                                 next = list_entry(p, struct dentry, d_u.d_child);
3776                                 if (d_unhashed(next) || !next->d_inode)
3777                                         continue;
3778 +                               if (filter && !filter(next))
3779 +                                       continue;
3780  
3781                                 spin_unlock(&dcache_lock);
3782                                 if (filldir(dirent, next->d_name.name, 
3783 @@ -176,6 +179,18 @@ int dcache_readdir(struct file * filp, v
3784         return 0;
3785  }
3786  
3787 +int dcache_readdir(struct file *filp, void *dirent, filldir_t filldir)
3788 +{
3789 +       return do_dcache_readdir_filter(filp, dirent, filldir, NULL);
3790 +}
3791 +
3792 +int dcache_readdir_filter(struct file *filp, void *dirent, filldir_t filldir,
3793 +       int (*filter)(struct dentry *))
3794 +{
3795 +       return do_dcache_readdir_filter(filp, dirent, filldir, filter);
3796 +}
3797 +
3798 +
3799  ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
3800  {
3801         return -EISDIR;
3802 @@ -811,6 +826,7 @@ EXPORT_SYMBOL(dcache_dir_close);
3803  EXPORT_SYMBOL(dcache_dir_lseek);
3804  EXPORT_SYMBOL(dcache_dir_open);
3805  EXPORT_SYMBOL(dcache_readdir);
3806 +EXPORT_SYMBOL(dcache_readdir_filter);
3807  EXPORT_SYMBOL(generic_read_dir);
3808  EXPORT_SYMBOL(get_sb_pseudo);
3809  EXPORT_SYMBOL(simple_write_begin);
3810 diff -NurpP --minimal linux-2.6.30.2/fs/locks.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/locks.c
3811 --- linux-2.6.30.2/fs/locks.c   2009-03-24 14:22:26.000000000 +0100
3812 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/locks.c        2009-07-04 01:11:39.000000000 +0200
3813 @@ -127,6 +127,8 @@
3814  #include <linux/time.h>
3815  #include <linux/rcupdate.h>
3816  #include <linux/pid_namespace.h>
3817 +#include <linux/vs_base.h>
3818 +#include <linux/vs_limit.h>
3819  
3820  #include <asm/uaccess.h>
3821  
3822 @@ -148,6 +150,8 @@ static struct kmem_cache *filelock_cache
3823  /* Allocate an empty lock structure. */
3824  static struct file_lock *locks_alloc_lock(void)
3825  {
3826 +       if (!vx_locks_avail(1))
3827 +               return NULL;
3828         return kmem_cache_alloc(filelock_cache, GFP_KERNEL);
3829  }
3830  
3831 @@ -173,6 +177,7 @@ static void locks_free_lock(struct file_
3832         BUG_ON(!list_empty(&fl->fl_block));
3833         BUG_ON(!list_empty(&fl->fl_link));
3834  
3835 +       vx_locks_dec(fl);
3836         locks_release_private(fl);
3837         kmem_cache_free(filelock_cache, fl);
3838  }
3839 @@ -193,6 +198,7 @@ void locks_init_lock(struct file_lock *f
3840         fl->fl_start = fl->fl_end = 0;
3841         fl->fl_ops = NULL;
3842         fl->fl_lmops = NULL;
3843 +       fl->fl_xid = -1;
3844  }
3845  
3846  EXPORT_SYMBOL(locks_init_lock);
3847 @@ -247,6 +253,7 @@ void locks_copy_lock(struct file_lock *n
3848         new->fl_file = fl->fl_file;
3849         new->fl_ops = fl->fl_ops;
3850         new->fl_lmops = fl->fl_lmops;
3851 +       new->fl_xid = fl->fl_xid;
3852  
3853         locks_copy_private(new, fl);
3854  }
3855 @@ -285,6 +292,11 @@ static int flock_make_lock(struct file *
3856         fl->fl_flags = FL_FLOCK;
3857         fl->fl_type = type;
3858         fl->fl_end = OFFSET_MAX;
3859 +
3860 +       vxd_assert(filp->f_xid == vx_current_xid(),
3861 +               "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
3862 +       fl->fl_xid = filp->f_xid;
3863 +       vx_locks_inc(fl);
3864         
3865         *lock = fl;
3866         return 0;
3867 @@ -450,6 +462,7 @@ static int lease_init(struct file *filp,
3868  
3869         fl->fl_owner = current->files;
3870         fl->fl_pid = current->tgid;
3871 +       fl->fl_xid = vx_current_xid();
3872  
3873         fl->fl_file = filp;
3874         fl->fl_flags = FL_LEASE;
3875 @@ -469,6 +482,11 @@ static struct file_lock *lease_alloc(str
3876         if (fl == NULL)
3877                 return ERR_PTR(error);
3878  
3879 +       fl->fl_xid = vx_current_xid();
3880 +       if (filp)
3881 +               vxd_assert(filp->f_xid == fl->fl_xid,
3882 +                       "f_xid(%d) == fl_xid(%d)", filp->f_xid, fl->fl_xid);
3883 +       vx_locks_inc(fl);
3884         error = lease_init(filp, type, fl);
3885         if (error) {
3886                 locks_free_lock(fl);
3887 @@ -769,6 +787,7 @@ static int flock_lock_file(struct file *
3888         if (found)
3889                 cond_resched_bkl();
3890  
3891 +       new_fl->fl_xid = -1;
3892  find_conflict:
3893         for_each_lock(inode, before) {
3894                 struct file_lock *fl = *before;
3895 @@ -789,6 +808,7 @@ find_conflict:
3896                 goto out;
3897         locks_copy_lock(new_fl, request);
3898         locks_insert_lock(before, new_fl);
3899 +       vx_locks_inc(new_fl);
3900         new_fl = NULL;
3901         error = 0;
3902  
3903 @@ -799,7 +819,8 @@ out:
3904         return error;
3905  }
3906  
3907 -static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
3908 +static int __posix_lock_file(struct inode *inode, struct file_lock *request,
3909 +       struct file_lock *conflock, xid_t xid)
3910  {
3911         struct file_lock *fl;
3912         struct file_lock *new_fl = NULL;
3913 @@ -809,6 +830,8 @@ static int __posix_lock_file(struct inod
3914         struct file_lock **before;
3915         int error, added = 0;
3916  
3917 +       vxd_assert(xid == vx_current_xid(),
3918 +               "xid(%d) == current(%d)", xid, vx_current_xid());
3919         /*
3920          * We may need two file_lock structures for this operation,
3921          * so we get them in advance to avoid races.
3922 @@ -819,7 +842,11 @@ static int __posix_lock_file(struct inod
3923             (request->fl_type != F_UNLCK ||
3924              request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {
3925                 new_fl = locks_alloc_lock();
3926 +               new_fl->fl_xid = xid;
3927 +               vx_locks_inc(new_fl);
3928                 new_fl2 = locks_alloc_lock();
3929 +               new_fl2->fl_xid = xid;
3930 +               vx_locks_inc(new_fl2);
3931         }
3932  
3933         lock_kernel();
3934 @@ -1018,7 +1045,8 @@ static int __posix_lock_file(struct inod
3935  int posix_lock_file(struct file *filp, struct file_lock *fl,
3936                         struct file_lock *conflock)
3937  {
3938 -       return __posix_lock_file(filp->f_path.dentry->d_inode, fl, conflock);
3939 +       return __posix_lock_file(filp->f_path.dentry->d_inode,
3940 +               fl, conflock, filp->f_xid);
3941  }
3942  EXPORT_SYMBOL(posix_lock_file);
3943  
3944 @@ -1108,7 +1136,7 @@ int locks_mandatory_area(int read_write,
3945         fl.fl_end = offset + count - 1;
3946  
3947         for (;;) {
3948 -               error = __posix_lock_file(inode, &fl, NULL);
3949 +               error = __posix_lock_file(inode, &fl, NULL, filp->f_xid);
3950                 if (error != FILE_LOCK_DEFERRED)
3951                         break;
3952                 error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
3953 @@ -1423,6 +1451,7 @@ int generic_setlease(struct file *filp, 
3954  
3955         locks_copy_lock(new_fl, lease);
3956         locks_insert_lock(before, new_fl);
3957 +       vx_locks_inc(new_fl);
3958  
3959         *flp = new_fl;
3960         return 0;
3961 @@ -1778,6 +1807,11 @@ int fcntl_setlk(unsigned int fd, struct 
3962         if (file_lock == NULL)
3963                 return -ENOLCK;
3964  
3965 +       vxd_assert(filp->f_xid == vx_current_xid(),
3966 +               "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
3967 +       file_lock->fl_xid = filp->f_xid;
3968 +       vx_locks_inc(file_lock);
3969 +
3970         /*
3971          * This might block, so we do it before checking the inode.
3972          */
3973 @@ -1896,6 +1930,11 @@ int fcntl_setlk64(unsigned int fd, struc
3974         if (file_lock == NULL)
3975                 return -ENOLCK;
3976  
3977 +       vxd_assert(filp->f_xid == vx_current_xid(),
3978 +               "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
3979 +       file_lock->fl_xid = filp->f_xid;
3980 +       vx_locks_inc(file_lock);
3981 +
3982         /*
3983          * This might block, so we do it before checking the inode.
3984          */
3985 @@ -2161,8 +2200,11 @@ static int locks_show(struct seq_file *f
3986  
3987         lock_get_status(f, fl, (long)f->private, "");
3988  
3989 -       list_for_each_entry(bfl, &fl->fl_block, fl_block)
3990 +       list_for_each_entry(bfl, &fl->fl_block, fl_block) {
3991 +               if (!vx_check(fl->fl_xid, VS_WATCH_P | VS_IDENT))
3992 +                       continue;
3993                 lock_get_status(f, bfl, (long)f->private, " ->");
3994 +       }
3995  
3996         f->private++;
3997         return 0;
3998 diff -NurpP --minimal linux-2.6.30.2/fs/namei.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/namei.c
3999 --- linux-2.6.30.2/fs/namei.c   2009-06-11 17:13:05.000000000 +0200
4000 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/namei.c        2009-07-04 02:25:19.000000000 +0200
4001 @@ -33,6 +33,14 @@
4002  #include <linux/fcntl.h>
4003  #include <linux/device_cgroup.h>
4004  #include <linux/fs_struct.h>
4005 +#include <linux/proc_fs.h>
4006 +#include <linux/vserver/inode.h>
4007 +#include <linux/vs_base.h>
4008 +#include <linux/vs_tag.h>
4009 +#include <linux/vs_cowbl.h>
4010 +#include <linux/vs_device.h>
4011 +#include <linux/vs_context.h>
4012 +#include <linux/pid_namespace.h>
4013  #include <asm/uaccess.h>
4014  
4015  #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
4016 @@ -169,6 +177,77 @@ void putname(const char *name)
4017  EXPORT_SYMBOL(putname);
4018  #endif
4019  
4020 +static inline int dx_barrier(struct inode *inode)
4021 +{
4022 +       if (IS_BARRIER(inode) && !vx_check(0, VS_ADMIN | VS_WATCH)) {
4023 +               vxwprintk_task(1, "did hit the barrier.");
4024 +               return 1;
4025 +       }
4026 +       return 0;
4027 +}
4028 +
4029 +static int __dx_permission(struct inode *inode, int mask)
4030 +{
4031 +       if (dx_barrier(inode))
4032 +               return -EACCES;
4033 +
4034 +       if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) {
4035 +               /* devpts is xid tagged */
4036 +               if (S_ISDIR(inode->i_mode) ||
4037 +                   vx_check((xid_t)inode->i_tag, VS_IDENT | VS_WATCH_P))
4038 +                       return 0;
4039 +       }
4040 +       else if (inode->i_sb->s_magic == PROC_SUPER_MAGIC) {
4041 +               struct proc_dir_entry *de = PDE(inode);
4042 +
4043 +               if (de && !vx_hide_check(0, de->vx_flags))
4044 +                       goto out;
4045 +
4046 +               if ((mask & (MAY_WRITE | MAY_APPEND))) {
4047 +                       struct pid *pid;
4048 +                       struct task_struct *tsk;
4049 +
4050 +                       if (vx_check(0, VS_ADMIN | VS_WATCH_P) ||
4051 +                           vx_flags(VXF_STATE_SETUP, 0))
4052 +                               return 0;
4053 +
4054 +                       pid = PROC_I(inode)->pid;
4055 +                       if (!pid)
4056 +                               goto out;
4057 +
4058 +                       tsk = pid_task(pid, PIDTYPE_PID);
4059 +                       vxdprintk(VXD_CBIT(tag, 0), "accessing %p[#%u]",
4060 +                                 tsk, (tsk ? vx_task_xid(tsk) : 0));
4061 +                       if (tsk && vx_check(vx_task_xid(tsk), VS_IDENT | VS_WATCH_P))
4062 +                               return 0;
4063 +               }
4064 +               else {
4065 +                       /* FIXME: Should we block some entries here? */
4066 +                       return 0;
4067 +               }
4068 +       }
4069 +       else {
4070 +               if (dx_notagcheck(inode->i_sb) ||
4071 +                   dx_check(inode->i_tag, DX_HOSTID | DX_ADMIN | DX_WATCH |
4072 +                            DX_IDENT))
4073 +                       return 0;
4074 +       }
4075 +
4076 +out:
4077 +       return -EACCES;
4078 +}
4079 +
4080 +int dx_permission(struct inode *inode, int mask)
4081 +{
4082 +       int ret = __dx_permission(inode, mask);
4083 +       if (unlikely(ret)) {
4084 +               vxwprintk_task(1, "denied %x access to %s:%p[#%d,%lu]",
4085 +                       mask, inode->i_sb->s_id, inode, inode->i_tag,
4086 +                       inode->i_ino);
4087 +       }
4088 +       return ret;
4089 +}
4090 +
4091  
4092  /**
4093   * generic_permission  -  check for access rights on a Posix-like filesystem
4094 @@ -255,10 +334,14 @@ int inode_permission(struct inode *inode
4095                 /*
4096                  * Nobody gets write access to an immutable file.
4097                  */
4098 -               if (IS_IMMUTABLE(inode))
4099 +               if (IS_IMMUTABLE(inode) && !IS_COW(inode))
4100                         return -EACCES;
4101         }
4102  
4103 +       retval = dx_permission(inode, mask);
4104 +       if (retval)
4105 +               return retval;
4106 +
4107         if (inode->i_op->permission)
4108                 retval = inode->i_op->permission(inode, mask);
4109         else
4110 @@ -434,6 +517,8 @@ static int exec_permission_lite(struct i
4111  {
4112         umode_t mode = inode->i_mode;
4113  
4114 +       if (dx_barrier(inode))
4115 +               return -EACCES;
4116         if (inode->i_op->permission)
4117                 return -EAGAIN;
4118  
4119 @@ -751,7 +836,8 @@ static __always_inline void follow_dotdo
4120                 if (nd->path.dentry == fs->root.dentry &&
4121                     nd->path.mnt == fs->root.mnt) {
4122                          read_unlock(&fs->lock);
4123 -                       break;
4124 +                       /* for sane '/' avoid follow_mount() */
4125 +                       return;
4126                 }
4127                  read_unlock(&fs->lock);
4128                 spin_lock(&dcache_lock);
4129 @@ -788,16 +874,30 @@ static int do_lookup(struct nameidata *n
4130  {
4131         struct vfsmount *mnt = nd->path.mnt;
4132         struct dentry *dentry = __d_lookup(nd->path.dentry, name);
4133 +       struct inode *inode;
4134  
4135         if (!dentry)
4136                 goto need_lookup;
4137         if (dentry->d_op && dentry->d_op->d_revalidate)
4138                 goto need_revalidate;
4139 +       inode = dentry->d_inode;
4140 +       if (!inode)
4141 +               goto done;
4142 +
4143 +       if (__dx_permission(inode, MAY_ACCESS))
4144 +               goto hidden;
4145 +
4146  done:
4147         path->mnt = mnt;
4148         path->dentry = dentry;
4149         __follow_mount(path);
4150         return 0;
4151 +hidden:
4152 +       vxwprintk_task(1, "did lookup hidden %s:%p[#%d,%lu] Â»%s/%.*s«.",
4153 +               inode->i_sb->s_id, inode, inode->i_tag, inode->i_ino,
4154 +               vxd_path(&nd->path), name->len, name->name);
4155 +       dput(dentry);
4156 +       return -ENOENT;
4157  
4158  need_lookup:
4159         dentry = real_lookup(nd->path.dentry, name, nd);
4160 @@ -1370,7 +1470,7 @@ static int may_delete(struct inode *dir,
4161         if (IS_APPEND(dir))
4162                 return -EPERM;
4163         if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
4164 -           IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
4165 +               IS_IXORUNLINK(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
4166                 return -EPERM;
4167         if (isdir) {
4168                 if (!S_ISDIR(victim->d_inode->i_mode))
4169 @@ -1510,6 +1610,14 @@ int may_open(struct path *path, int acc_
4170                 break;
4171         }
4172  
4173 +#ifdef CONFIG_VSERVER_COWBL
4174 +       if (IS_COW(inode) && (flag & FMODE_WRITE)) {
4175 +               if (IS_COW_LINK(inode))
4176 +                       return -EMLINK;
4177 +               inode->i_flags &= ~(S_IXUNLINK|S_IMMUTABLE);
4178 +               mark_inode_dirty(inode);
4179 +       }
4180 +#endif
4181         error = inode_permission(inode, acc_mode);
4182         if (error)
4183                 return error;
4184 @@ -1647,7 +1755,11 @@ struct file *do_filp_open(int dfd, const
4185         int count = 0;
4186         int will_write;
4187         int flag = open_to_namei_flags(open_flag);
4188 -
4189 +#ifdef CONFIG_VSERVER_COWBL
4190 +       int rflag = flag;
4191 +       int rmode = mode;
4192 +restart:
4193 +#endif
4194         if (!acc_mode)
4195                 acc_mode = MAY_OPEN | ACC_MODE(flag);
4196  
4197 @@ -1781,6 +1893,25 @@ ok:
4198                         goto exit;
4199         }
4200         error = may_open(&nd.path, acc_mode, flag);
4201 +#ifdef CONFIG_VSERVER_COWBL
4202 +       if (error == -EMLINK) {
4203 +               struct dentry *dentry;
4204 +               dentry = cow_break_link(pathname);
4205 +               if (IS_ERR(dentry)) {
4206 +                       error = PTR_ERR(dentry);
4207 +                       goto exit_cow;
4208 +               }
4209 +               dput(dentry);
4210 +               if (will_write)
4211 +                       mnt_drop_write(nd.path.mnt);
4212 +               release_open_intent(&nd);
4213 +               path_put(&nd.path);
4214 +               flag = rflag;
4215 +               mode = rmode;
4216 +               goto restart;
4217 +       }
4218 +exit_cow:
4219 +#endif
4220         if (error) {
4221                 if (will_write)
4222                         mnt_drop_write(nd.path.mnt);
4223 @@ -1934,9 +2065,17 @@ int vfs_mknod(struct inode *dir, struct 
4224         if (error)
4225                 return error;
4226  
4227 -       if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
4228 +       if (!(S_ISCHR(mode) || S_ISBLK(mode)))
4229 +               goto okay;
4230 +
4231 +       if (!capable(CAP_MKNOD))
4232                 return -EPERM;
4233  
4234 +       if (S_ISCHR(mode) && !vs_chrdev_perm(dev, DATTR_CREATE))
4235 +               return -EPERM;
4236 +       if (S_ISBLK(mode) && !vs_blkdev_perm(dev, DATTR_CREATE))
4237 +               return -EPERM;
4238 +okay:
4239         if (!dir->i_op->mknod)
4240                 return -EPERM;
4241  
4242 @@ -2403,7 +2542,7 @@ int vfs_link(struct dentry *old_dentry, 
4243         /*
4244          * A link to an append-only or immutable file cannot be created.
4245          */
4246 -       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
4247 +       if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
4248                 return -EPERM;
4249         if (!dir->i_op->link)
4250                 return -EPERM;
4251 @@ -2776,6 +2915,219 @@ int vfs_follow_link(struct nameidata *nd
4252         return __vfs_follow_link(nd, link);
4253  }
4254  
4255 +
4256 +#ifdef CONFIG_VSERVER_COWBL
4257 +
4258 +#include <linux/file.h>
4259 +
4260 +static inline
4261 +long do_cow_splice(struct file *in, struct file *out, size_t len)
4262 +{
4263 +       loff_t ppos = 0;
4264 +
4265 +       return do_splice_direct(in, &ppos, out, len, 0);
4266 +}
4267 +
4268 +struct dentry *cow_break_link(const char *pathname)
4269 +{
4270 +       int ret, mode, pathlen, redo = 0;
4271 +       struct nameidata old_nd, dir_nd;
4272 +       struct path old_path, new_path;
4273 +       struct dentry *dir, *res = NULL;
4274 +       struct file *old_file;
4275 +       struct file *new_file;
4276 +       char *to, *path, pad='\251';
4277 +       loff_t size;
4278 +
4279 +       vxdprintk(VXD_CBIT(misc, 1), "cow_break_link(»%s«)", pathname);
4280 +       path = kmalloc(PATH_MAX, GFP_KERNEL);
4281 +       ret = -ENOMEM;
4282 +       if (!path)
4283 +               goto out;
4284 +
4285 +       /* old_nd will have refs to dentry and mnt */
4286 +       ret = path_lookup(pathname, LOOKUP_FOLLOW, &old_nd);
4287 +       vxdprintk(VXD_CBIT(misc, 2), "path_lookup(old): %d", ret);
4288 +       if (ret < 0)
4289 +               goto out_free_path;
4290 +
4291 +       old_path = old_nd.path;
4292 +       mode = old_path.dentry->d_inode->i_mode;
4293 +
4294 +       to = d_path(&old_path, path, PATH_MAX-2);
4295 +       pathlen = strlen(to);
4296 +       vxdprintk(VXD_CBIT(misc, 2), "old path Â»%s« [»%.*s«:%d]", to,
4297 +               old_path.dentry->d_name.len, old_path.dentry->d_name.name,
4298 +               old_path.dentry->d_name.len);
4299 +
4300 +       to[pathlen + 1] = 0;
4301 +retry:
4302 +       to[pathlen] = pad--;
4303 +       ret = -EMLINK;
4304 +       if (pad <= '\240')
4305 +               goto out_rel_old;
4306 +
4307 +       vxdprintk(VXD_CBIT(misc, 1), "temp copy Â»%s«", to);
4308 +       /* dir_nd will have refs to dentry and mnt */
4309 +       ret = path_lookup(to,
4310 +               LOOKUP_PARENT | LOOKUP_OPEN | LOOKUP_CREATE, &dir_nd);
4311 +       vxdprintk(VXD_CBIT(misc, 2),
4312 +               "path_lookup(new): %d", ret);
4313 +       if (ret < 0)
4314 +               goto retry;
4315 +
4316 +       /* this puppy downs the inode mutex */
4317 +       new_path.dentry = lookup_create(&dir_nd, 0);
4318 +       if (!new_path.dentry || IS_ERR(new_path.dentry)) {
4319 +               vxdprintk(VXD_CBIT(misc, 2),
4320 +                       "lookup_create(new): %p", new_path.dentry);
4321 +               mutex_unlock(&dir_nd.path.dentry->d_inode->i_mutex);
4322 +               path_put(&dir_nd.path);
4323 +               goto retry;
4324 +       }
4325 +       vxdprintk(VXD_CBIT(misc, 2),
4326 +               "lookup_create(new): %p [»%.*s«:%d]", new_path.dentry,
4327 +               new_path.dentry->d_name.len, new_path.dentry->d_name.name,
4328 +               new_path.dentry->d_name.len);
4329 +       dir = dir_nd.path.dentry;
4330 +
4331 +       ret = vfs_create(dir_nd.path.dentry->d_inode, new_path.dentry, mode, &dir_nd);
4332 +       vxdprintk(VXD_CBIT(misc, 2),
4333 +               "vfs_create(new): %d", ret);
4334 +       if (ret == -EEXIST) {
4335 +               mutex_unlock(&dir->d_inode->i_mutex);
4336 +               dput(new_path.dentry);
4337 +               path_put(&dir_nd.path);
4338 +               goto retry;
4339 +       }
4340 +       else if (ret < 0)
4341 +               goto out_unlock_new;
4342 +
4343 +       /* drop out early, ret passes ENOENT */
4344 +       ret = -ENOENT;
4345 +       if ((redo = d_unhashed(old_path.dentry)))
4346 +               goto out_unlock_new;
4347 +
4348 +       new_path.mnt = dir_nd.path.mnt;
4349 +       dget(old_path.dentry);
4350 +       mntget(old_path.mnt);
4351 +       /* this one cleans up the dentry/mnt in case of failure */
4352 +       old_file = dentry_open(old_path.dentry, old_path.mnt,
4353 +               O_RDONLY, current_cred());
4354 +       vxdprintk(VXD_CBIT(misc, 2),
4355 +               "dentry_open(old): %p", old_file);
4356 +       if (!old_file || IS_ERR(old_file)) {
4357 +               res = IS_ERR(old_file) ? (void *) old_file : res;
4358 +               goto out_unlock_new;
4359 +       }
4360 +
4361 +       dget(new_path.dentry);
4362 +       mntget(new_path.mnt);
4363 +       /* this one cleans up the dentry/mnt in case of failure */
4364 +       new_file = dentry_open(new_path.dentry, new_path.mnt,
4365 +               O_WRONLY, current_cred());
4366 +       vxdprintk(VXD_CBIT(misc, 2),
4367 +               "dentry_open(new): %p", new_file);
4368 +
4369 +       ret = IS_ERR(new_file) ? PTR_ERR(new_file) : -ENOENT;
4370 +       if (!new_file || IS_ERR(new_file))
4371 +               goto out_fput_old;
4372 +
4373 +       size = i_size_read(old_file->f_dentry->d_inode);
4374 +       ret = do_cow_splice(old_file, new_file, size);
4375 +       vxdprintk(VXD_CBIT(misc, 2), "do_splice_direct: %d", ret);
4376 +       if (ret < 0) {
4377 +               goto out_fput_both;
4378 +       } else if (ret < size) {
4379 +               ret = -ENOSPC;
4380 +               goto out_fput_both;
4381 +       } else {
4382 +               struct inode *old_inode = old_path.dentry->d_inode;
4383 +               struct inode *new_inode = new_path.dentry->d_inode;
4384 +               struct iattr attr = {
4385 +                       .ia_uid = old_inode->i_uid,
4386 +                       .ia_gid = old_inode->i_gid,
4387 +                       .ia_valid = ATTR_UID | ATTR_GID
4388 +                       };
4389 +
4390 +               ret = inode_setattr(new_inode, &attr);
4391 +               if (ret)
4392 +                       goto out_fput_both;
4393 +       }
4394 +
4395 +       mutex_lock(&old_path.dentry->d_inode->i_sb->s_vfs_rename_mutex);
4396 +
4397 +       /* drop out late */
4398 +       ret = -ENOENT;
4399 +       if ((redo = d_unhashed(old_path.dentry)))
4400 +               goto out_unlock;
4401 +
4402 +       vxdprintk(VXD_CBIT(misc, 2),
4403 +               "vfs_rename: [»%*s«:%d] -> [»%*s«:%d]",
4404 +               new_path.dentry->d_name.len, new_path.dentry->d_name.name,
4405 +               new_path.dentry->d_name.len,
4406 +               old_path.dentry->d_name.len, old_path.dentry->d_name.name,
4407 +               old_path.dentry->d_name.len);
4408 +       ret = vfs_rename(dir_nd.path.dentry->d_inode, new_path.dentry,
4409 +               old_nd.path.dentry->d_parent->d_inode, old_path.dentry);
4410 +       vxdprintk(VXD_CBIT(misc, 2), "vfs_rename: %d", ret);
4411 +       res = new_path.dentry;
4412 +
4413 +out_unlock:
4414 +       mutex_unlock(&old_path.dentry->d_inode->i_sb->s_vfs_rename_mutex);
4415 +
4416 +out_fput_both:
4417 +       vxdprintk(VXD_CBIT(misc, 3),
4418 +               "fput(new_file=%p[#%ld])", new_file,
4419 +               atomic_read(&new_file->f_count));
4420 +       fput(new_file);
4421 +
4422 +out_fput_old:
4423 +       vxdprintk(VXD_CBIT(misc, 3),
4424 +               "fput(old_file=%p[#%ld])", old_file,
4425 +               atomic_read(&old_file->f_count));
4426 +       fput(old_file);
4427 +
4428 +out_unlock_new:
4429 +       mutex_unlock(&dir->d_inode->i_mutex);
4430 +       if (!ret)
4431 +               goto out_redo;
4432 +
4433 +       /* error path cleanup */
4434 +       vfs_unlink(dir->d_inode, new_path.dentry);
4435 +       dput(new_path.dentry);
4436 +
4437 +out_redo:
4438 +       if (!redo)
4439 +               goto out_rel_both;
4440 +       /* lookup dentry once again */
4441 +       path_put(&old_nd.path);
4442 +       ret = path_lookup(pathname, LOOKUP_FOLLOW, &old_nd);
4443 +       if (ret)
4444 +               goto out_rel_both;
4445 +
4446 +       new_path.dentry = old_nd.path.dentry;
4447 +       vxdprintk(VXD_CBIT(misc, 2),
4448 +               "path_lookup(redo): %p [»%.*s«:%d]", new_path.dentry,
4449 +               new_path.dentry->d_name.len, new_path.dentry->d_name.name,
4450 +               new_path.dentry->d_name.len);
4451 +       dget(new_path.dentry);
4452 +       res = new_path.dentry;
4453 +
4454 +out_rel_both:
4455 +       path_put(&dir_nd.path);
4456 +out_rel_old:
4457 +       path_put(&old_nd.path);
4458 +out_free_path:
4459 +       kfree(path);
4460 +out:
4461 +       if (ret)
4462 +               res = ERR_PTR(ret);
4463 +       return res;
4464 +}
4465 +
4466 +#endif
4467 +
4468  /* get the link contents into pagecache */
4469  static char *page_getlink(struct dentry * dentry, struct page **ppage)
4470  {
4471 diff -NurpP --minimal linux-2.6.30.2/fs/namespace.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/namespace.c
4472 --- linux-2.6.30.2/fs/namespace.c       2009-06-11 17:13:05.000000000 +0200
4473 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/namespace.c    2009-07-04 02:20:45.000000000 +0200
4474 @@ -28,6 +28,11 @@
4475  #include <linux/log2.h>
4476  #include <linux/idr.h>
4477  #include <linux/fs_struct.h>
4478 +#include <linux/vs_base.h>
4479 +#include <linux/vs_context.h>
4480 +#include <linux/vs_tag.h>
4481 +#include <linux/vserver/space.h>
4482 +#include <linux/vserver/global.h>
4483  #include <asm/uaccess.h>
4484  #include <asm/unistd.h>
4485  #include "pnode.h"
4486 @@ -573,6 +578,7 @@ static struct vfsmount *clone_mnt(struct
4487                 mnt->mnt_root = dget(root);
4488                 mnt->mnt_mountpoint = mnt->mnt_root;
4489                 mnt->mnt_parent = mnt;
4490 +               mnt->mnt_tag = old->mnt_tag;
4491  
4492                 if (flag & CL_SLAVE) {
4493                         list_add(&mnt->mnt_slave, &old->mnt_slave_list);
4494 @@ -687,6 +693,31 @@ static inline void mangle(struct seq_fil
4495         seq_escape(m, s, " \t\n\\");
4496  }
4497  
4498 +static int mnt_is_reachable(struct vfsmount *mnt)
4499 +{
4500 +       struct path root;
4501 +       struct dentry *point;
4502 +       int ret;
4503 +
4504 +       if (mnt == mnt->mnt_ns->root)
4505 +               return 1;
4506 +
4507 +       spin_lock(&vfsmount_lock);
4508 +       root = current->fs->root;
4509 +       point = root.dentry;
4510 +
4511 +       while ((mnt != mnt->mnt_parent) && (mnt != root.mnt)) {
4512 +               point = mnt->mnt_mountpoint;
4513 +               mnt = mnt->mnt_parent;
4514 +       }
4515 +
4516 +       ret = (mnt == root.mnt) && is_subdir(point, root.dentry);
4517 +
4518 +       spin_unlock(&vfsmount_lock);
4519 +
4520 +       return ret;
4521 +}
4522 +
4523  /*
4524   * Simple .show_options callback for filesystems which don't want to
4525   * implement more complex mount option showing.
4526 @@ -774,6 +805,8 @@ static int show_sb_opts(struct seq_file 
4527                 { MS_SYNCHRONOUS, ",sync" },
4528                 { MS_DIRSYNC, ",dirsync" },
4529                 { MS_MANDLOCK, ",mand" },
4530 +               { MS_TAGGED, ",tag" },
4531 +               { MS_NOTAGCHECK, ",notagcheck" },
4532                 { 0, NULL }
4533         };
4534         const struct proc_fs_info *fs_infop;
4535 @@ -821,10 +854,20 @@ static int show_vfsmnt(struct seq_file *
4536         int err = 0;
4537         struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
4538  
4539 -       mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
4540 -       seq_putc(m, ' ');
4541 -       seq_path(m, &mnt_path, " \t\n\\");
4542 -       seq_putc(m, ' ');
4543 +       if (vx_flags(VXF_HIDE_MOUNT, 0))
4544 +               return SEQ_SKIP;
4545 +       if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
4546 +               return SEQ_SKIP;
4547 +
4548 +       if (!vx_check(0, VS_ADMIN|VS_WATCH) &&
4549 +               mnt == current->fs->root.mnt) {
4550 +               seq_puts(m, "/dev/root / ");
4551 +       } else {
4552 +               mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
4553 +               seq_putc(m, ' ');
4554 +               seq_path(m, &mnt_path, " \t\n\\");
4555 +               seq_putc(m, ' ');
4556 +       }
4557         show_type(m, mnt->mnt_sb);
4558         seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
4559         err = show_sb_opts(m, mnt->mnt_sb);
4560 @@ -854,6 +897,11 @@ static int show_mountinfo(struct seq_fil
4561         struct path root = p->root;
4562         int err = 0;
4563  
4564 +       if (vx_flags(VXF_HIDE_MOUNT, 0))
4565 +               return SEQ_SKIP;
4566 +       if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
4567 +               return SEQ_SKIP;
4568 +
4569         seq_printf(m, "%i %i %u:%u ", mnt->mnt_id, mnt->mnt_parent->mnt_id,
4570                    MAJOR(sb->s_dev), MINOR(sb->s_dev));
4571         seq_dentry(m, mnt->mnt_root, " \t\n\\");
4572 @@ -912,17 +960,27 @@ static int show_vfsstat(struct seq_file 
4573         struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
4574         int err = 0;
4575  
4576 -       /* device */
4577 -       if (mnt->mnt_devname) {
4578 -               seq_puts(m, "device ");
4579 -               mangle(m, mnt->mnt_devname);
4580 -       } else
4581 -               seq_puts(m, "no device");
4582 +       if (vx_flags(VXF_HIDE_MOUNT, 0))
4583 +               return SEQ_SKIP;
4584 +       if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
4585 +               return SEQ_SKIP;
4586  
4587 -       /* mount point */
4588 -       seq_puts(m, " mounted on ");
4589 -       seq_path(m, &mnt_path, " \t\n\\");
4590 -       seq_putc(m, ' ');
4591 +       if (!vx_check(0, VS_ADMIN|VS_WATCH) &&
4592 +               mnt == current->fs->root.mnt) {
4593 +               seq_puts(m, "device /dev/root mounted on / ");
4594 +       } else {
4595 +               /* device */
4596 +               if (mnt->mnt_devname) {
4597 +                       seq_puts(m, "device ");
4598 +                       mangle(m, mnt->mnt_devname);
4599 +               } else
4600 +                       seq_puts(m, "no device");
4601 +
4602 +               /* mount point */
4603 +               seq_puts(m, " mounted on ");
4604 +               seq_path(m, &mnt_path, " \t\n\\");
4605 +               seq_putc(m, ' ');
4606 +       }
4607  
4608         /* file system type */
4609         seq_puts(m, "with fstype ");
4610 @@ -1159,7 +1217,7 @@ SYSCALL_DEFINE2(umount, char __user *, n
4611                 goto dput_and_out;
4612  
4613         retval = -EPERM;
4614 -       if (!capable(CAP_SYS_ADMIN))
4615 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
4616                 goto dput_and_out;
4617  
4618         retval = do_umount(path.mnt, flags);
4619 @@ -1185,7 +1243,7 @@ SYSCALL_DEFINE1(oldumount, char __user *
4620  
4621  static int mount_is_safe(struct path *path)
4622  {
4623 -       if (capable(CAP_SYS_ADMIN))
4624 +       if (vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
4625                 return 0;
4626         return -EPERM;
4627  #ifdef notyet
4628 @@ -1476,11 +1534,13 @@ static int do_change_type(struct path *p
4629   * do loopback mount.
4630   */
4631  static int do_loopback(struct path *path, char *old_name,
4632 -                               int recurse)
4633 +       tag_t tag, unsigned long flags, int mnt_flags)
4634  {
4635         struct path old_path;
4636         struct vfsmount *mnt = NULL;
4637         int err = mount_is_safe(path);
4638 +       int recurse = flags & MS_REC;
4639 +
4640         if (err)
4641                 return err;
4642         if (!old_name || !*old_name)
4643 @@ -1514,6 +1574,7 @@ static int do_loopback(struct path *path
4644                 spin_unlock(&vfsmount_lock);
4645                 release_mounts(&umount_list);
4646         }
4647 +       mnt->mnt_flags = mnt_flags;
4648  
4649  out:
4650         up_write(&namespace_sem);
4651 @@ -1544,12 +1605,12 @@ static int change_mount_flags(struct vfs
4652   * on it - tough luck.
4653   */
4654  static int do_remount(struct path *path, int flags, int mnt_flags,
4655 -                     void *data)
4656 +       void *data, xid_t xid)
4657  {
4658         int err;
4659         struct super_block *sb = path->mnt->mnt_sb;
4660  
4661 -       if (!capable(CAP_SYS_ADMIN))
4662 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_REMOUNT))
4663                 return -EPERM;
4664  
4665         if (!check_mnt(path->mnt))
4666 @@ -1591,7 +1652,7 @@ static int do_move_mount(struct path *pa
4667         struct path old_path, parent_path;
4668         struct vfsmount *p;
4669         int err = 0;
4670 -       if (!capable(CAP_SYS_ADMIN))
4671 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
4672                 return -EPERM;
4673         if (!old_name || !*old_name)
4674                 return -EINVAL;
4675 @@ -1673,7 +1734,7 @@ static int do_new_mount(struct path *pat
4676                 return -EINVAL;
4677  
4678         /* we need capabilities... */
4679 -       if (!capable(CAP_SYS_ADMIN))
4680 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
4681                 return -EPERM;
4682  
4683         mnt = do_kern_mount(type, flags, name, data);
4684 @@ -1918,6 +1979,7 @@ long do_mount(char *dev_name, char *dir_
4685         struct path path;
4686         int retval = 0;
4687         int mnt_flags = 0;
4688 +       tag_t tag = 0;
4689  
4690         /* Discard magic */
4691         if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
4692 @@ -1937,6 +1999,12 @@ long do_mount(char *dev_name, char *dir_
4693         if (!(flags & MS_NOATIME))
4694                 mnt_flags |= MNT_RELATIME;
4695  
4696 +       if (dx_parse_tag(data_page, &tag, 1, &mnt_flags, &flags)) {
4697 +               /* FIXME: bind and re-mounts get the tag flag? */
4698 +               if (flags & (MS_BIND|MS_REMOUNT))
4699 +                       flags |= MS_TAGID;
4700 +       }
4701 +
4702         /* Separate the per-mountpoint flags */
4703         if (flags & MS_NOSUID)
4704                 mnt_flags |= MNT_NOSUID;
4705 @@ -1953,6 +2021,8 @@ long do_mount(char *dev_name, char *dir_
4706         if (flags & MS_RDONLY)
4707                 mnt_flags |= MNT_READONLY;
4708  
4709 +       if (!capable(CAP_SYS_ADMIN))
4710 +               mnt_flags |= MNT_NODEV;
4711         flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
4712                    MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
4713                    MS_STRICTATIME);
4714 @@ -1969,9 +2039,9 @@ long do_mount(char *dev_name, char *dir_
4715  
4716         if (flags & MS_REMOUNT)
4717                 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
4718 -                                   data_page);
4719 +                                   data_page, tag);
4720         else if (flags & MS_BIND)
4721 -               retval = do_loopback(&path, dev_name, flags & MS_REC);
4722 +               retval = do_loopback(&path, dev_name, tag, flags, mnt_flags);
4723         else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
4724                 retval = do_change_type(&path, flags);
4725         else if (flags & MS_MOVE)
4726 @@ -2040,6 +2110,7 @@ static struct mnt_namespace *dup_mnt_ns(
4727                 q = next_mnt(q, new_ns->root);
4728         }
4729         up_write(&namespace_sem);
4730 +       atomic_inc(&vs_global_mnt_ns);
4731  
4732         if (rootmnt)
4733                 mntput(rootmnt);
4734 @@ -2166,9 +2237,10 @@ SYSCALL_DEFINE2(pivot_root, const char _
4735         down_write(&namespace_sem);
4736         mutex_lock(&old.dentry->d_inode->i_mutex);
4737         error = -EINVAL;
4738 -       if (IS_MNT_SHARED(old.mnt) ||
4739 +       if ((IS_MNT_SHARED(old.mnt) ||
4740                 IS_MNT_SHARED(new.mnt->mnt_parent) ||
4741 -               IS_MNT_SHARED(root.mnt->mnt_parent))
4742 +               IS_MNT_SHARED(root.mnt->mnt_parent)) &&
4743 +               !vx_flags(VXF_STATE_SETUP, 0))
4744                 goto out2;
4745         if (!check_mnt(root.mnt))
4746                 goto out2;
4747 @@ -2307,5 +2379,6 @@ void __put_mnt_ns(struct mnt_namespace *
4748         spin_unlock(&vfsmount_lock);
4749         up_write(&namespace_sem);
4750         release_mounts(&umount_list);
4751 +       atomic_dec(&vs_global_mnt_ns);
4752         kfree(ns);
4753  }
4754 diff -NurpP --minimal linux-2.6.30.2/fs/nfs/client.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/nfs/client.c
4755 --- linux-2.6.30.2/fs/nfs/client.c      2009-06-11 17:13:05.000000000 +0200
4756 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/nfs/client.c   2009-07-04 01:11:39.000000000 +0200
4757 @@ -689,6 +689,9 @@ static int nfs_init_server_rpcclient(str
4758         if (server->flags & NFS_MOUNT_SOFT)
4759                 server->client->cl_softrtry = 1;
4760  
4761 +       server->client->cl_tag = 0;
4762 +       if (server->flags & NFS_MOUNT_TAGGED)
4763 +               server->client->cl_tag = 1;
4764         return 0;
4765  }
4766  
4767 @@ -856,6 +859,10 @@ static void nfs_server_set_fsinfo(struct
4768                 server->acdirmin = server->acdirmax = 0;
4769         }
4770  
4771 +       /* FIXME: needs fsinfo
4772 +       if (server->flags & NFS_MOUNT_TAGGED)
4773 +               sb->s_flags |= MS_TAGGED;       */
4774 +
4775         server->maxfilesize = fsinfo->maxfilesize;
4776  
4777         /* We're airborne Set socket buffersize */
4778 diff -NurpP --minimal linux-2.6.30.2/fs/nfs/dir.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/nfs/dir.c
4779 --- linux-2.6.30.2/fs/nfs/dir.c 2009-06-11 17:13:05.000000000 +0200
4780 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/nfs/dir.c      2009-07-04 01:11:39.000000000 +0200
4781 @@ -34,6 +34,7 @@
4782  #include <linux/namei.h>
4783  #include <linux/mount.h>
4784  #include <linux/sched.h>
4785 +#include <linux/vs_tag.h>
4786  
4787  #include "nfs4_fs.h"
4788  #include "delegation.h"
4789 @@ -950,6 +951,7 @@ static struct dentry *nfs_lookup(struct 
4790         if (IS_ERR(res))
4791                 goto out_unblock_sillyrename;
4792  
4793 +       dx_propagate_tag(nd, inode);
4794  no_entry:
4795         res = d_materialise_unique(dentry, inode);
4796         if (res != NULL) {
4797 diff -NurpP --minimal linux-2.6.30.2/fs/nfs/inode.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/nfs/inode.c
4798 --- linux-2.6.30.2/fs/nfs/inode.c       2009-06-11 17:13:06.000000000 +0200
4799 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/nfs/inode.c    2009-07-04 02:07:17.000000000 +0200
4800 @@ -37,6 +37,7 @@
4801  #include <linux/vfs.h>
4802  #include <linux/inet.h>
4803  #include <linux/nfs_xdr.h>
4804 +#include <linux/vs_tag.h>
4805  
4806  #include <asm/system.h>
4807  #include <asm/uaccess.h>
4808 @@ -279,6 +280,8 @@ nfs_fhget(struct super_block *sb, struct
4809         if (inode->i_state & I_NEW) {
4810                 struct nfs_inode *nfsi = NFS_I(inode);
4811                 unsigned long now = jiffies;
4812 +               uid_t uid;
4813 +               gid_t gid;
4814  
4815                 /* We set i_ino for the few things that still rely on it,
4816                  * such as stat(2) */
4817 @@ -322,8 +325,8 @@ nfs_fhget(struct super_block *sb, struct
4818                 nfsi->change_attr = 0;
4819                 inode->i_size = 0;
4820                 inode->i_nlink = 0;
4821 -               inode->i_uid = -2;
4822 -               inode->i_gid = -2;
4823 +               uid = -2;
4824 +               gid = -2;
4825                 inode->i_blocks = 0;
4826                 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
4827  
4828 @@ -342,9 +345,9 @@ nfs_fhget(struct super_block *sb, struct
4829                 if (fattr->valid & NFS_ATTR_FATTR_NLINK)
4830                         inode->i_nlink = fattr->nlink;
4831                 if (fattr->valid & NFS_ATTR_FATTR_OWNER)
4832 -                       inode->i_uid = fattr->uid;
4833 +                       uid = fattr->uid;
4834                 if (fattr->valid & NFS_ATTR_FATTR_GROUP)
4835 -                       inode->i_gid = fattr->gid;
4836 +                       gid = fattr->gid;
4837                 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
4838                         inode->i_blocks = fattr->du.nfs2.blocks;
4839                 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
4840 @@ -353,6 +356,11 @@ nfs_fhget(struct super_block *sb, struct
4841                          */
4842                         inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
4843                 }
4844 +               inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
4845 +               inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
4846 +               inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, 0);
4847 +                               /* maybe fattr->xid someday */
4848 +
4849                 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
4850                 nfsi->attrtimeo_timestamp = now;
4851                 nfsi->access_cache = RB_ROOT;
4852 @@ -493,6 +501,8 @@ void nfs_setattr_update_inode(struct ino
4853                         inode->i_uid = attr->ia_uid;
4854                 if ((attr->ia_valid & ATTR_GID) != 0)
4855                         inode->i_gid = attr->ia_gid;
4856 +               if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
4857 +                       inode->i_tag = attr->ia_tag;
4858                 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
4859                 spin_unlock(&inode->i_lock);
4860         }
4861 @@ -903,6 +913,9 @@ static int nfs_check_inode_attributes(st
4862         struct nfs_inode *nfsi = NFS_I(inode);
4863         loff_t cur_size, new_isize;
4864         unsigned long invalid = 0;
4865 +       uid_t uid;
4866 +       gid_t gid;
4867 +       tag_t tag;
4868  
4869  
4870         /* Has the inode gone and changed behind our back? */
4871 @@ -926,13 +939,18 @@ static int nfs_check_inode_attributes(st
4872                         invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
4873         }
4874  
4875 +       uid = INOTAG_UID(DX_TAG(inode), fattr->uid, fattr->gid);
4876 +       gid = INOTAG_GID(DX_TAG(inode), fattr->uid, fattr->gid);
4877 +       tag = INOTAG_TAG(DX_TAG(inode), fattr->uid, fattr->gid, 0);
4878 +
4879         /* Have any file permissions changed? */
4880         if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
4881                 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
4882 -       if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && inode->i_uid != fattr->uid)
4883 +       if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && uid != fattr->uid)
4884                 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
4885 -       if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && inode->i_gid != fattr->gid)
4886 +       if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && gid != fattr->gid)
4887                 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
4888 +               /* maybe check for tag too? */
4889  
4890         /* Has the link count changed? */
4891         if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
4892 @@ -1146,6 +1164,9 @@ static int nfs_update_inode(struct inode
4893         loff_t cur_isize, new_isize;
4894         unsigned long invalid = 0;
4895         unsigned long now = jiffies;
4896 +       uid_t uid;
4897 +       gid_t gid;
4898 +       tag_t tag;
4899  
4900         dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
4901                         __func__, inode->i_sb->s_id, inode->i_ino,
4902 @@ -1234,6 +1255,9 @@ static int nfs_update_inode(struct inode
4903                 }
4904         }
4905  
4906 +       uid = INOTAG_UID(DX_TAG(inode), fattr->uid, fattr->gid);
4907 +       gid = INOTAG_GID(DX_TAG(inode), fattr->uid, fattr->gid);
4908 +       tag = INOTAG_TAG(DX_TAG(inode), fattr->uid, fattr->gid, 0);
4909  
4910         if (fattr->valid & NFS_ATTR_FATTR_ATIME)
4911                 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
4912 @@ -1245,18 +1269,22 @@ static int nfs_update_inode(struct inode
4913                 }
4914         }
4915         if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
4916 -               if (inode->i_uid != fattr->uid) {
4917 +               if (uid != fattr->uid) {
4918                         invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
4919 -                       inode->i_uid = fattr->uid;
4920 +                       uid = fattr->uid;
4921                 }
4922         }
4923         if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
4924 -               if (inode->i_gid != fattr->gid) {
4925 +               if (gid != fattr->gid) {
4926                         invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
4927 -                       inode->i_gid = fattr->gid;
4928 +                       gid = fattr->gid;
4929                 }
4930         }
4931  
4932 +       inode->i_uid = uid;
4933 +       inode->i_gid = gid;
4934 +       inode->i_tag = tag;
4935 +
4936         if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
4937                 if (inode->i_nlink != fattr->nlink) {
4938                         invalid |= NFS_INO_INVALID_ATTR;
4939 diff -NurpP --minimal linux-2.6.30.2/fs/nfs/nfs3xdr.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/nfs/nfs3xdr.c
4940 --- linux-2.6.30.2/fs/nfs/nfs3xdr.c     2009-06-11 17:13:06.000000000 +0200
4941 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/nfs/nfs3xdr.c  2009-07-04 01:11:39.000000000 +0200
4942 @@ -22,6 +22,7 @@
4943  #include <linux/nfs3.h>
4944  #include <linux/nfs_fs.h>
4945  #include <linux/nfsacl.h>
4946 +#include <linux/vs_tag.h>
4947  #include "internal.h"
4948  
4949  #define NFSDBG_FACILITY                NFSDBG_XDR
4950 @@ -177,7 +178,7 @@ xdr_decode_fattr(__be32 *p, struct nfs_f
4951  }
4952  
4953  static inline __be32 *
4954 -xdr_encode_sattr(__be32 *p, struct iattr *attr)
4955 +xdr_encode_sattr(__be32 *p, struct iattr *attr, int tag)
4956  {
4957         if (attr->ia_valid & ATTR_MODE) {
4958                 *p++ = xdr_one;
4959 @@ -185,15 +186,17 @@ xdr_encode_sattr(__be32 *p, struct iattr
4960         } else {
4961                 *p++ = xdr_zero;
4962         }
4963 -       if (attr->ia_valid & ATTR_UID) {
4964 +       if (attr->ia_valid & ATTR_UID ||
4965 +               (tag && (attr->ia_valid & ATTR_TAG))) {
4966                 *p++ = xdr_one;
4967 -               *p++ = htonl(attr->ia_uid);
4968 +               *p++ = htonl(TAGINO_UID(tag, attr->ia_uid, attr->ia_tag));
4969         } else {
4970                 *p++ = xdr_zero;
4971         }
4972 -       if (attr->ia_valid & ATTR_GID) {
4973 +       if (attr->ia_valid & ATTR_GID ||
4974 +               (tag && (attr->ia_valid & ATTR_TAG))) {
4975                 *p++ = xdr_one;
4976 -               *p++ = htonl(attr->ia_gid);
4977 +               *p++ = htonl(TAGINO_GID(tag, attr->ia_gid, attr->ia_tag));
4978         } else {
4979                 *p++ = xdr_zero;
4980         }
4981 @@ -280,7 +283,8 @@ static int
4982  nfs3_xdr_sattrargs(struct rpc_rqst *req, __be32 *p, struct nfs3_sattrargs *args)
4983  {
4984         p = xdr_encode_fhandle(p, args->fh);
4985 -       p = xdr_encode_sattr(p, args->sattr);
4986 +       p = xdr_encode_sattr(p, args->sattr,
4987 +               req->rq_task->tk_client->cl_tag);
4988         *p++ = htonl(args->guard);
4989         if (args->guard)
4990                 p = xdr_encode_time3(p, &args->guardtime);
4991 @@ -385,7 +389,8 @@ nfs3_xdr_createargs(struct rpc_rqst *req
4992                 *p++ = args->verifier[0];
4993                 *p++ = args->verifier[1];
4994         } else
4995 -               p = xdr_encode_sattr(p, args->sattr);
4996 +               p = xdr_encode_sattr(p, args->sattr,
4997 +                       req->rq_task->tk_client->cl_tag);
4998  
4999         req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
5000         return 0;
5001 @@ -399,7 +404,8 @@ nfs3_xdr_mkdirargs(struct rpc_rqst *req,
5002  {
5003         p = xdr_encode_fhandle(p, args->fh);
5004         p = xdr_encode_array(p, args->name, args->len);
5005 -       p = xdr_encode_sattr(p, args->sattr);
5006 +       p = xdr_encode_sattr(p, args->sattr,
5007 +               req->rq_task->tk_client->cl_tag);
5008         req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
5009         return 0;
5010  }
5011 @@ -412,7 +418,8 @@ nfs3_xdr_symlinkargs(struct rpc_rqst *re
5012  {
5013         p = xdr_encode_fhandle(p, args->fromfh);
5014         p = xdr_encode_array(p, args->fromname, args->fromlen);
5015 -       p = xdr_encode_sattr(p, args->sattr);
5016 +       p = xdr_encode_sattr(p, args->sattr,
5017 +               req->rq_task->tk_client->cl_tag);
5018         *p++ = htonl(args->pathlen);
5019         req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
5020  
5021 @@ -430,7 +437,8 @@ nfs3_xdr_mknodargs(struct rpc_rqst *req,
5022         p = xdr_encode_fhandle(p, args->fh);
5023         p = xdr_encode_array(p, args->name, args->len);
5024         *p++ = htonl(args->type);
5025 -       p = xdr_encode_sattr(p, args->sattr);
5026 +       p = xdr_encode_sattr(p, args->sattr,
5027 +               req->rq_task->tk_client->cl_tag);
5028         if (args->type == NF3CHR || args->type == NF3BLK) {
5029                 *p++ = htonl(MAJOR(args->rdev));
5030                 *p++ = htonl(MINOR(args->rdev));
5031 diff -NurpP --minimal linux-2.6.30.2/fs/nfs/nfsroot.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/nfs/nfsroot.c
5032 --- linux-2.6.30.2/fs/nfs/nfsroot.c     2009-06-11 17:13:06.000000000 +0200
5033 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/nfs/nfsroot.c  2009-07-04 01:11:39.000000000 +0200
5034 @@ -119,12 +119,12 @@ static int mount_port __initdata = 0;             /
5035  enum {
5036         /* Options that take integer arguments */
5037         Opt_port, Opt_rsize, Opt_wsize, Opt_timeo, Opt_retrans, Opt_acregmin,
5038 -       Opt_acregmax, Opt_acdirmin, Opt_acdirmax,
5039 +       Opt_acregmax, Opt_acdirmin, Opt_acdirmax, Opt_tagid,
5040         /* Options that take no arguments */
5041         Opt_soft, Opt_hard, Opt_intr,
5042         Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, 
5043         Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp,
5044 -       Opt_acl, Opt_noacl,
5045 +       Opt_acl, Opt_noacl, Opt_tag, Opt_notag,
5046         /* Error token */
5047         Opt_err
5048  };
5049 @@ -161,6 +161,9 @@ static const match_table_t tokens __init
5050         {Opt_tcp, "tcp"},
5051         {Opt_acl, "acl"},
5052         {Opt_noacl, "noacl"},
5053 +       {Opt_tag, "tag"},
5054 +       {Opt_notag, "notag"},
5055 +       {Opt_tagid, "tagid=%u"},
5056         {Opt_err, NULL}
5057         
5058  };
5059 @@ -272,6 +275,20 @@ static int __init root_nfs_parse(char *n
5060                         case Opt_noacl:
5061                                 nfs_data.flags |= NFS_MOUNT_NOACL;
5062                                 break;
5063 +#ifndef CONFIG_TAGGING_NONE
5064 +                       case Opt_tag:
5065 +                               nfs_data.flags |= NFS_MOUNT_TAGGED;
5066 +                               break;
5067 +                       case Opt_notag:
5068 +                               nfs_data.flags &= ~NFS_MOUNT_TAGGED;
5069 +                               break;
5070 +#endif
5071 +#ifdef CONFIG_PROPAGATE
5072 +                       case Opt_tagid:
5073 +                               /* use args[0] */
5074 +                               nfs_data.flags |= NFS_MOUNT_TAGGED;
5075 +                               break;
5076 +#endif
5077                         default:
5078                                 printk(KERN_WARNING "Root-NFS: unknown "
5079                                         "option: %s\n", p);
5080 diff -NurpP --minimal linux-2.6.30.2/fs/nfs/super.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/nfs/super.c
5081 --- linux-2.6.30.2/fs/nfs/super.c       2009-06-11 17:13:06.000000000 +0200
5082 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/nfs/super.c    2009-07-04 01:11:39.000000000 +0200
5083 @@ -51,6 +51,7 @@
5084  #include <linux/nfs_xdr.h>
5085  #include <linux/magic.h>
5086  #include <linux/parser.h>
5087 +#include <linux/vs_tag.h>
5088  
5089  #include <asm/system.h>
5090  #include <asm/uaccess.h>
5091 @@ -523,6 +524,7 @@ static void nfs_show_mount_options(struc
5092                 { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
5093                 { NFS_MOUNT_UNSHARED, ",nosharecache", "" },
5094                 { NFS_MOUNT_NORESVPORT, ",noresvport", "" },
5095 +               { NFS_MOUNT_TAGGED, ",tag", "" },
5096                 { 0, NULL, NULL }
5097         };
5098         const struct proc_nfs_info *nfs_infop;
5099 diff -NurpP --minimal linux-2.6.30.2/fs/nfsd/auth.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/nfsd/auth.c
5100 --- linux-2.6.30.2/fs/nfsd/auth.c       2009-03-24 14:22:26.000000000 +0100
5101 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/nfsd/auth.c    2009-07-04 01:11:39.000000000 +0200
5102 @@ -10,6 +10,7 @@
5103  #include <linux/sunrpc/svcauth.h>
5104  #include <linux/nfsd/nfsd.h>
5105  #include <linux/nfsd/export.h>
5106 +#include <linux/vs_tag.h>
5107  #include "auth.h"
5108  
5109  int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
5110 @@ -42,6 +43,9 @@ int nfsd_setuser(struct svc_rqst *rqstp,
5111  
5112         new->fsuid = rqstp->rq_cred.cr_uid;
5113         new->fsgid = rqstp->rq_cred.cr_gid;
5114 +       /* FIXME: this desperately needs a tag :)
5115 +       new->xid = (xid_t)INOTAG_TAG(DX_TAG_NFSD, cred.cr_uid, cred.cr_gid, 0);
5116 +                       */
5117  
5118         rqgi = rqstp->rq_cred.cr_group_info;
5119  
5120 diff -NurpP --minimal linux-2.6.30.2/fs/nfsd/nfs3xdr.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/nfsd/nfs3xdr.c
5121 --- linux-2.6.30.2/fs/nfsd/nfs3xdr.c    2008-12-25 00:26:37.000000000 +0100
5122 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/nfsd/nfs3xdr.c 2009-07-04 01:11:39.000000000 +0200
5123 @@ -21,6 +21,7 @@
5124  #include <linux/sunrpc/svc.h>
5125  #include <linux/nfsd/nfsd.h>
5126  #include <linux/nfsd/xdr3.h>
5127 +#include <linux/vs_tag.h>
5128  #include "auth.h"
5129  
5130  #define NFSDDBG_FACILITY               NFSDDBG_XDR
5131 @@ -108,6 +109,8 @@ static __be32 *
5132  decode_sattr3(__be32 *p, struct iattr *iap)
5133  {
5134         u32     tmp;
5135 +       uid_t   uid = 0;
5136 +       gid_t   gid = 0;
5137  
5138         iap->ia_valid = 0;
5139  
5140 @@ -117,12 +120,15 @@ decode_sattr3(__be32 *p, struct iattr *i
5141         }
5142         if (*p++) {
5143                 iap->ia_valid |= ATTR_UID;
5144 -               iap->ia_uid = ntohl(*p++);
5145 +               uid = ntohl(*p++);
5146         }
5147         if (*p++) {
5148                 iap->ia_valid |= ATTR_GID;
5149 -               iap->ia_gid = ntohl(*p++);
5150 +               gid = ntohl(*p++);
5151         }
5152 +       iap->ia_uid = INOTAG_UID(DX_TAG_NFSD, uid, gid);
5153 +       iap->ia_gid = INOTAG_GID(DX_TAG_NFSD, uid, gid);
5154 +       iap->ia_tag = INOTAG_TAG(DX_TAG_NFSD, uid, gid, 0);
5155         if (*p++) {
5156                 u64     newsize;
5157  
5158 @@ -178,8 +184,12 @@ encode_fattr3(struct svc_rqst *rqstp, __
5159         *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
5160         *p++ = htonl((u32) stat->mode);
5161         *p++ = htonl((u32) stat->nlink);
5162 -       *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid));
5163 -       *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid));
5164 +       *p++ = htonl((u32) nfsd_ruid(rqstp,
5165 +               TAGINO_UID(0 /* FIXME: DX_TAG(dentry->d_inode) */,
5166 +               stat->uid, stat->tag)));
5167 +       *p++ = htonl((u32) nfsd_rgid(rqstp,
5168 +               TAGINO_GID(0 /* FIXME: DX_TAG(dentry->d_inode) */,
5169 +               stat->gid, stat->tag)));
5170         if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) {
5171                 p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
5172         } else {
5173 diff -NurpP --minimal linux-2.6.30.2/fs/nfsd/nfs4xdr.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/nfsd/nfs4xdr.c
5174 --- linux-2.6.30.2/fs/nfsd/nfs4xdr.c    2009-06-11 17:13:06.000000000 +0200
5175 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/nfsd/nfs4xdr.c 2009-07-04 01:11:39.000000000 +0200
5176 @@ -57,6 +57,7 @@
5177  #include <linux/nfs4_acl.h>
5178  #include <linux/sunrpc/gss_api.h>
5179  #include <linux/sunrpc/svcauth_gss.h>
5180 +#include <linux/vs_tag.h>
5181  
5182  #define NFSDDBG_FACILITY               NFSDDBG_XDR
5183  
5184 @@ -2078,14 +2079,18 @@ out_acl:
5185                 WRITE32(stat.nlink);
5186         }
5187         if (bmval1 & FATTR4_WORD1_OWNER) {
5188 -               status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen);
5189 +               status = nfsd4_encode_user(rqstp,
5190 +                       TAGINO_UID(DX_TAG(dentry->d_inode),
5191 +                       stat.uid, stat.tag), &p, &buflen);
5192                 if (status == nfserr_resource)
5193                         goto out_resource;
5194                 if (status)
5195                         goto out;
5196         }
5197         if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
5198 -               status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen);
5199 +               status = nfsd4_encode_group(rqstp,
5200 +                       TAGINO_GID(DX_TAG(dentry->d_inode),
5201 +                       stat.gid, stat.tag), &p, &buflen);
5202                 if (status == nfserr_resource)
5203                         goto out_resource;
5204                 if (status)
5205 diff -NurpP --minimal linux-2.6.30.2/fs/nfsd/nfsxdr.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/nfsd/nfsxdr.c
5206 --- linux-2.6.30.2/fs/nfsd/nfsxdr.c     2008-12-25 00:26:37.000000000 +0100
5207 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/nfsd/nfsxdr.c  2009-07-04 01:11:39.000000000 +0200
5208 @@ -15,6 +15,7 @@
5209  #include <linux/nfsd/nfsd.h>
5210  #include <linux/nfsd/xdr.h>
5211  #include <linux/mm.h>
5212 +#include <linux/vs_tag.h>
5213  #include "auth.h"
5214  
5215  #define NFSDDBG_FACILITY               NFSDDBG_XDR
5216 @@ -98,6 +99,8 @@ static __be32 *
5217  decode_sattr(__be32 *p, struct iattr *iap)
5218  {
5219         u32     tmp, tmp1;
5220 +       uid_t   uid = 0;
5221 +       gid_t   gid = 0;
5222  
5223         iap->ia_valid = 0;
5224  
5225 @@ -111,12 +114,15 @@ decode_sattr(__be32 *p, struct iattr *ia
5226         }
5227         if ((tmp = ntohl(*p++)) != (u32)-1) {
5228                 iap->ia_valid |= ATTR_UID;
5229 -               iap->ia_uid = tmp;
5230 +               uid = tmp;
5231         }
5232         if ((tmp = ntohl(*p++)) != (u32)-1) {
5233                 iap->ia_valid |= ATTR_GID;
5234 -               iap->ia_gid = tmp;
5235 +               gid = tmp;
5236         }
5237 +       iap->ia_uid = INOTAG_UID(DX_TAG_NFSD, uid, gid);
5238 +       iap->ia_gid = INOTAG_GID(DX_TAG_NFSD, uid, gid);
5239 +       iap->ia_tag = INOTAG_TAG(DX_TAG_NFSD, uid, gid, 0);
5240         if ((tmp = ntohl(*p++)) != (u32)-1) {
5241                 iap->ia_valid |= ATTR_SIZE;
5242                 iap->ia_size = tmp;
5243 @@ -161,8 +167,10 @@ encode_fattr(struct svc_rqst *rqstp, __b
5244         *p++ = htonl(nfs_ftypes[type >> 12]);
5245         *p++ = htonl((u32) stat->mode);
5246         *p++ = htonl((u32) stat->nlink);
5247 -       *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid));
5248 -       *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid));
5249 +       *p++ = htonl((u32) nfsd_ruid(rqstp,
5250 +               TAGINO_UID(DX_TAG(dentry->d_inode), stat->uid, stat->tag)));
5251 +       *p++ = htonl((u32) nfsd_rgid(rqstp,
5252 +               TAGINO_GID(DX_TAG(dentry->d_inode), stat->gid, stat->tag)));
5253  
5254         if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) {
5255                 *p++ = htonl(NFS_MAXPATHLEN);
5256 diff -NurpP --minimal linux-2.6.30.2/fs/ocfs2/dlm/dlmfs.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ocfs2/dlm/dlmfs.c
5257 --- linux-2.6.30.2/fs/ocfs2/dlm/dlmfs.c 2009-03-24 14:22:27.000000000 +0100
5258 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ocfs2/dlm/dlmfs.c      2009-07-04 01:11:39.000000000 +0200
5259 @@ -43,6 +43,7 @@
5260  #include <linux/init.h>
5261  #include <linux/string.h>
5262  #include <linux/backing-dev.h>
5263 +#include <linux/vs_tag.h>
5264  
5265  #include <asm/uaccess.h>
5266  
5267 @@ -341,6 +342,7 @@ static struct inode *dlmfs_get_root_inod
5268                 inode->i_mode = mode;
5269                 inode->i_uid = current_fsuid();
5270                 inode->i_gid = current_fsgid();
5271 +               inode->i_tag = dx_current_fstag(sb);
5272                 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
5273                 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
5274                 inc_nlink(inode);
5275 @@ -366,6 +368,7 @@ static struct inode *dlmfs_get_inode(str
5276         inode->i_mode = mode;
5277         inode->i_uid = current_fsuid();
5278         inode->i_gid = current_fsgid();
5279 +       inode->i_tag = dx_current_fstag(sb);
5280         inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
5281         inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
5282  
5283 diff -NurpP --minimal linux-2.6.30.2/fs/ocfs2/dlmglue.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ocfs2/dlmglue.c
5284 --- linux-2.6.30.2/fs/ocfs2/dlmglue.c   2009-06-11 17:13:07.000000000 +0200
5285 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ocfs2/dlmglue.c        2009-07-04 01:11:39.000000000 +0200
5286 @@ -1900,6 +1900,7 @@ static void __ocfs2_stuff_meta_lvb(struc
5287         lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
5288         lvb->lvb_iuid      = cpu_to_be32(inode->i_uid);
5289         lvb->lvb_igid      = cpu_to_be32(inode->i_gid);
5290 +       lvb->lvb_itag      = cpu_to_be16(inode->i_tag);
5291         lvb->lvb_imode     = cpu_to_be16(inode->i_mode);
5292         lvb->lvb_inlink    = cpu_to_be16(inode->i_nlink);
5293         lvb->lvb_iatime_packed  =
5294 @@ -1954,6 +1955,7 @@ static void ocfs2_refresh_inode_from_lvb
5295  
5296         inode->i_uid     = be32_to_cpu(lvb->lvb_iuid);
5297         inode->i_gid     = be32_to_cpu(lvb->lvb_igid);
5298 +       inode->i_tag     = be16_to_cpu(lvb->lvb_itag);
5299         inode->i_mode    = be16_to_cpu(lvb->lvb_imode);
5300         inode->i_nlink   = be16_to_cpu(lvb->lvb_inlink);
5301         ocfs2_unpack_timespec(&inode->i_atime,
5302 diff -NurpP --minimal linux-2.6.30.2/fs/ocfs2/dlmglue.h linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ocfs2/dlmglue.h
5303 --- linux-2.6.30.2/fs/ocfs2/dlmglue.h   2009-06-11 17:13:07.000000000 +0200
5304 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ocfs2/dlmglue.h        2009-07-04 01:11:39.000000000 +0200
5305 @@ -46,7 +46,8 @@ struct ocfs2_meta_lvb {
5306         __be16       lvb_inlink;
5307         __be32       lvb_iattr;
5308         __be32       lvb_igeneration;
5309 -       __be32       lvb_reserved2;
5310 +       __be16       lvb_itag;
5311 +       __be16       lvb_reserved2;
5312  };
5313  
5314  #define OCFS2_QINFO_LVB_VERSION 1
5315 diff -NurpP --minimal linux-2.6.30.2/fs/ocfs2/file.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ocfs2/file.c
5316 --- linux-2.6.30.2/fs/ocfs2/file.c      2009-06-11 17:13:07.000000000 +0200
5317 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ocfs2/file.c   2009-07-04 01:11:39.000000000 +0200
5318 @@ -911,13 +911,15 @@ int ocfs2_setattr(struct dentry *dentry,
5319                 mlog(0, "uid change: %d\n", attr->ia_uid);
5320         if (attr->ia_valid & ATTR_GID)
5321                 mlog(0, "gid change: %d\n", attr->ia_gid);
5322 +       if (attr->ia_valid & ATTR_TAG)
5323 +               mlog(0, "tag change: %d\n", attr->ia_tag);
5324         if (attr->ia_valid & ATTR_SIZE)
5325                 mlog(0, "size change...\n");
5326         if (attr->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME))
5327                 mlog(0, "time change...\n");
5328  
5329  #define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \
5330 -                          | ATTR_GID | ATTR_UID | ATTR_MODE)
5331 +                          | ATTR_GID | ATTR_UID | ATTR_TAG | ATTR_MODE)
5332         if (!(attr->ia_valid & OCFS2_VALID_ATTRS)) {
5333                 mlog(0, "can't handle attrs: 0x%x\n", attr->ia_valid);
5334                 return 0;
5335 @@ -2128,6 +2130,7 @@ const struct inode_operations ocfs2_file
5336         .removexattr    = generic_removexattr,
5337         .fallocate      = ocfs2_fallocate,
5338         .fiemap         = ocfs2_fiemap,
5339 +       .sync_flags     = ocfs2_sync_flags,
5340  };
5341  
5342  const struct inode_operations ocfs2_special_file_iops = {
5343 diff -NurpP --minimal linux-2.6.30.2/fs/ocfs2/inode.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ocfs2/inode.c
5344 --- linux-2.6.30.2/fs/ocfs2/inode.c     2009-06-11 17:13:07.000000000 +0200
5345 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ocfs2/inode.c  2009-07-04 01:11:39.000000000 +0200
5346 @@ -29,6 +29,7 @@
5347  #include <linux/highmem.h>
5348  #include <linux/pagemap.h>
5349  #include <linux/quotaops.h>
5350 +#include <linux/vs_tag.h>
5351  
5352  #include <asm/byteorder.h>
5353  
5354 @@ -45,6 +46,7 @@
5355  #include "file.h"
5356  #include "heartbeat.h"
5357  #include "inode.h"
5358 +#include "ioctl.h"
5359  #include "journal.h"
5360  #include "namei.h"
5361  #include "suballoc.h"
5362 @@ -78,11 +80,13 @@ void ocfs2_set_inode_flags(struct inode 
5363  {
5364         unsigned int flags = OCFS2_I(inode)->ip_attr;
5365  
5366 -       inode->i_flags &= ~(S_IMMUTABLE |
5367 +       inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
5368                 S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
5369  
5370         if (flags & OCFS2_IMMUTABLE_FL)
5371                 inode->i_flags |= S_IMMUTABLE;
5372 +       if (flags & OCFS2_IXUNLINK_FL)
5373 +               inode->i_flags |= S_IXUNLINK;
5374  
5375         if (flags & OCFS2_SYNC_FL)
5376                 inode->i_flags |= S_SYNC;
5377 @@ -92,25 +96,85 @@ void ocfs2_set_inode_flags(struct inode 
5378                 inode->i_flags |= S_NOATIME;
5379         if (flags & OCFS2_DIRSYNC_FL)
5380                 inode->i_flags |= S_DIRSYNC;
5381 +
5382 +       inode->i_vflags &= ~(V_BARRIER | V_COW);
5383 +
5384 +       if (flags & OCFS2_BARRIER_FL)
5385 +               inode->i_vflags |= V_BARRIER;
5386 +       if (flags & OCFS2_COW_FL)
5387 +               inode->i_vflags |= V_COW;
5388  }
5389  
5390  /* Propagate flags from i_flags to OCFS2_I(inode)->ip_attr */
5391  void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi)
5392  {
5393         unsigned int flags = oi->vfs_inode.i_flags;
5394 +       unsigned int vflags = oi->vfs_inode.i_vflags;
5395 +
5396 +       oi->ip_attr &= ~(OCFS2_SYNC_FL | OCFS2_APPEND_FL |
5397 +                       OCFS2_IMMUTABLE_FL | OCFS2_IXUNLINK_FL |
5398 +                       OCFS2_NOATIME_FL | OCFS2_DIRSYNC_FL |
5399 +                       OCFS2_BARRIER_FL | OCFS2_COW_FL);
5400 +
5401 +       if (flags & S_IMMUTABLE)
5402 +               oi->ip_attr |= OCFS2_IMMUTABLE_FL;
5403 +       if (flags & S_IXUNLINK)
5404 +               oi->ip_attr |= OCFS2_IXUNLINK_FL;
5405  
5406 -       oi->ip_attr &= ~(OCFS2_SYNC_FL|OCFS2_APPEND_FL|
5407 -                       OCFS2_IMMUTABLE_FL|OCFS2_NOATIME_FL|OCFS2_DIRSYNC_FL);
5408         if (flags & S_SYNC)
5409                 oi->ip_attr |= OCFS2_SYNC_FL;
5410         if (flags & S_APPEND)
5411                 oi->ip_attr |= OCFS2_APPEND_FL;
5412 -       if (flags & S_IMMUTABLE)
5413 -               oi->ip_attr |= OCFS2_IMMUTABLE_FL;
5414         if (flags & S_NOATIME)
5415                 oi->ip_attr |= OCFS2_NOATIME_FL;
5416         if (flags & S_DIRSYNC)
5417                 oi->ip_attr |= OCFS2_DIRSYNC_FL;
5418 +
5419 +       if (vflags & V_BARRIER)
5420 +               oi->ip_attr |= OCFS2_BARRIER_FL;
5421 +       if (vflags & V_COW)
5422 +               oi->ip_attr |= OCFS2_COW_FL;
5423 +}
5424 +
5425 +int ocfs2_sync_flags(struct inode *inode)
5426 +{
5427 +       struct ocfs2_inode_info *ocfs2_inode = OCFS2_I(inode);
5428 +       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5429 +       handle_t *handle = NULL;
5430 +       struct buffer_head *bh = NULL;
5431 +       int status;
5432 +
5433 +       status = ocfs2_inode_lock(inode, &bh, 1);
5434 +       if (status < 0) {
5435 +               mlog_errno(status);
5436 +               goto bail;
5437 +       }
5438 +
5439 +       status = -EROFS;
5440 +       if (IS_RDONLY(inode))
5441 +               goto bail_unlock;
5442 +
5443 +       handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
5444 +       if (IS_ERR(handle)) {
5445 +               status = PTR_ERR(handle);
5446 +               mlog_errno(status);
5447 +               goto bail_unlock;
5448 +       }
5449 +
5450 +       ocfs2_get_inode_flags(ocfs2_inode);
5451 +       status = ocfs2_mark_inode_dirty(handle, inode, bh);
5452 +       if (status < 0)
5453 +               mlog_errno(status);
5454 +
5455 +       ocfs2_commit_trans(osb, handle);
5456 +bail_unlock:
5457 +       ocfs2_inode_unlock(inode, 1);
5458 +bail:
5459 +       if (bh)
5460 +               brelse(bh);
5461 +
5462 +       mlog_exit(status);
5463 +       return status;
5464  }
5465  
5466  struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno)
5467 @@ -234,6 +298,8 @@ void ocfs2_populate_inode(struct inode *
5468         struct super_block *sb;
5469         struct ocfs2_super *osb;
5470         int use_plocks = 1;
5471 +       uid_t uid;
5472 +       gid_t gid;
5473  
5474         mlog_entry("(0x%p, size:%llu)\n", inode,
5475                    (unsigned long long)le64_to_cpu(fe->i_size));
5476 @@ -265,8 +331,12 @@ void ocfs2_populate_inode(struct inode *
5477         inode->i_generation = le32_to_cpu(fe->i_generation);
5478         inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
5479         inode->i_mode = le16_to_cpu(fe->i_mode);
5480 -       inode->i_uid = le32_to_cpu(fe->i_uid);
5481 -       inode->i_gid = le32_to_cpu(fe->i_gid);
5482 +       uid = le32_to_cpu(fe->i_uid);
5483 +       gid = le32_to_cpu(fe->i_gid);
5484 +       inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
5485 +       inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
5486 +       inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
5487 +               /* le16_to_cpu(raw_inode->i_raw_tag)i */ 0);
5488  
5489         /* Fast symlinks will have i_size but no allocated clusters. */
5490         if (S_ISLNK(inode->i_mode) && !fe->i_clusters)
5491 diff -NurpP --minimal linux-2.6.30.2/fs/ocfs2/inode.h linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ocfs2/inode.h
5492 --- linux-2.6.30.2/fs/ocfs2/inode.h     2009-06-11 17:13:07.000000000 +0200
5493 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ocfs2/inode.h  2009-07-04 01:11:39.000000000 +0200
5494 @@ -152,6 +152,7 @@ struct buffer_head *ocfs2_bread(struct i
5495  
5496  void ocfs2_set_inode_flags(struct inode *inode);
5497  void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi);
5498 +int ocfs2_sync_flags(struct inode *inode);
5499  
5500  static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode)
5501  {
5502 diff -NurpP --minimal linux-2.6.30.2/fs/ocfs2/ioctl.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ocfs2/ioctl.c
5503 --- linux-2.6.30.2/fs/ocfs2/ioctl.c     2008-12-25 00:26:37.000000000 +0100
5504 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ocfs2/ioctl.c  2009-07-04 01:11:39.000000000 +0200
5505 @@ -42,7 +42,7 @@ static int ocfs2_get_inode_attr(struct i
5506         return status;
5507  }
5508  
5509 -static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
5510 +int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
5511                                 unsigned mask)
5512  {
5513         struct ocfs2_inode_info *ocfs2_inode = OCFS2_I(inode);
5514 @@ -67,6 +67,11 @@ static int ocfs2_set_inode_attr(struct i
5515         if (!S_ISDIR(inode->i_mode))
5516                 flags &= ~OCFS2_DIRSYNC_FL;
5517  
5518 +       if (IS_BARRIER(inode)) {
5519 +               vxwprintk_task(1, "messing with the barrier.");
5520 +               goto bail_unlock;
5521 +       }
5522 +
5523         handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
5524         if (IS_ERR(handle)) {
5525                 status = PTR_ERR(handle);
5526 diff -NurpP --minimal linux-2.6.30.2/fs/ocfs2/ioctl.h linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ocfs2/ioctl.h
5527 --- linux-2.6.30.2/fs/ocfs2/ioctl.h     2008-12-25 00:26:37.000000000 +0100
5528 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ocfs2/ioctl.h  2009-07-04 01:11:39.000000000 +0200
5529 @@ -10,6 +10,9 @@
5530  #ifndef OCFS2_IOCTL_H
5531  #define OCFS2_IOCTL_H
5532  
5533 +int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
5534 +                               unsigned mask);
5535 +
5536  long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
5537  long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg);
5538  
5539 diff -NurpP --minimal linux-2.6.30.2/fs/ocfs2/namei.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ocfs2/namei.c
5540 --- linux-2.6.30.2/fs/ocfs2/namei.c     2009-06-11 17:13:07.000000000 +0200
5541 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ocfs2/namei.c  2009-07-06 17:33:52.000000000 +0200
5542 @@ -41,6 +41,7 @@
5543  #include <linux/slab.h>
5544  #include <linux/highmem.h>
5545  #include <linux/quotaops.h>
5546 +#include <linux/vs_tag.h>
5547  
5548  #define MLOG_MASK_PREFIX ML_NAMEI
5549  #include <cluster/masklog.h>
5550 @@ -478,6 +479,7 @@ static int ocfs2_mknod_locked(struct ocf
5551         u64 fe_blkno = 0;
5552         u16 suballoc_bit;
5553         u16 feat;
5554 +       tag_t tag;
5555  
5556         mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry,
5557                    inode->i_mode, (unsigned long)dev, dentry->d_name.len,
5558 @@ -524,8 +526,11 @@ static int ocfs2_mknod_locked(struct ocf
5559         fe->i_blkno = cpu_to_le64(fe_blkno);
5560         fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
5561         fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
5562 -       fe->i_uid = cpu_to_le32(inode->i_uid);
5563 -       fe->i_gid = cpu_to_le32(inode->i_gid);
5564 +
5565 +       tag = dx_current_fstag(osb->sb);
5566 +       fe->i_uid = cpu_to_le32(TAGINO_UID(DX_TAG(inode), inode->i_uid, tag));
5567 +       fe->i_gid = cpu_to_le32(TAGINO_GID(DX_TAG(inode), inode->i_gid, tag));
5568 +       inode->i_tag = tag;
5569         fe->i_mode = cpu_to_le16(inode->i_mode);
5570         if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
5571                 fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
5572 @@ -2037,6 +2042,7 @@ const struct inode_operations ocfs2_dir_
5573         .rename         = ocfs2_rename,
5574         .setattr        = ocfs2_setattr,
5575         .getattr        = ocfs2_getattr,
5576 +       .sync_flags     = ocfs2_sync_flags,
5577         .permission     = ocfs2_permission,
5578         .setxattr       = generic_setxattr,
5579         .getxattr       = generic_getxattr,
5580 diff -NurpP --minimal linux-2.6.30.2/fs/ocfs2/ocfs2_fs.h linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ocfs2/ocfs2_fs.h
5581 --- linux-2.6.30.2/fs/ocfs2/ocfs2_fs.h  2009-06-11 17:13:07.000000000 +0200
5582 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ocfs2/ocfs2_fs.h       2009-07-04 01:11:39.000000000 +0200
5583 @@ -225,18 +225,23 @@
5584  #define OCFS2_INDEXED_DIR_FL   (0x0008)
5585  
5586  /* Inode attributes, keep in sync with EXT2 */
5587 -#define OCFS2_SECRM_FL         (0x00000001)    /* Secure deletion */
5588 -#define OCFS2_UNRM_FL          (0x00000002)    /* Undelete */
5589 -#define OCFS2_COMPR_FL         (0x00000004)    /* Compress file */
5590 -#define OCFS2_SYNC_FL          (0x00000008)    /* Synchronous updates */
5591 -#define OCFS2_IMMUTABLE_FL     (0x00000010)    /* Immutable file */
5592 -#define OCFS2_APPEND_FL                (0x00000020)    /* writes to file may only append */
5593 -#define OCFS2_NODUMP_FL                (0x00000040)    /* do not dump file */
5594 -#define OCFS2_NOATIME_FL       (0x00000080)    /* do not update atime */
5595 -#define OCFS2_DIRSYNC_FL       (0x00010000)    /* dirsync behaviour (directories only) */
5596 +#define OCFS2_SECRM_FL         FS_SECRM_FL     /* Secure deletion */
5597 +#define OCFS2_UNRM_FL          FS_UNRM_FL      /* Undelete */
5598 +#define OCFS2_COMPR_FL         FS_COMPR_FL     /* Compress file */
5599 +#define OCFS2_SYNC_FL          FS_SYNC_FL      /* Synchronous updates */
5600 +#define OCFS2_IMMUTABLE_FL     FS_IMMUTABLE_FL /* Immutable file */
5601 +#define OCFS2_APPEND_FL                FS_APPEND_FL    /* writes to file may only append */
5602 +#define OCFS2_NODUMP_FL                FS_NODUMP_FL    /* do not dump file */
5603 +#define OCFS2_NOATIME_FL       FS_NOATIME_FL   /* do not update atime */
5604  
5605 -#define OCFS2_FL_VISIBLE       (0x000100FF)    /* User visible flags */
5606 -#define OCFS2_FL_MODIFIABLE    (0x000100FF)    /* User modifiable flags */
5607 +#define OCFS2_DIRSYNC_FL       FS_DIRSYNC_FL   /* dirsync behaviour (directories only) */
5608 +#define OCFS2_IXUNLINK_FL      FS_IXUNLINK_FL  /* Immutable invert on unlink */
5609 +
5610 +#define OCFS2_BARRIER_FL       FS_BARRIER_FL   /* Barrier for chroot() */
5611 +#define OCFS2_COW_FL           FS_COW_FL       /* Copy on Write marker */
5612 +
5613 +#define OCFS2_FL_VISIBLE       (0x010300FF)    /* User visible flags */
5614 +#define OCFS2_FL_MODIFIABLE    (0x010300FF)    /* User modifiable flags */
5615  
5616  /*
5617   * Extent record flags (e_node.leaf.flags)
5618 diff -NurpP --minimal linux-2.6.30.2/fs/ocfs2/ocfs2.h linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ocfs2/ocfs2.h
5619 --- linux-2.6.30.2/fs/ocfs2/ocfs2.h     2009-06-11 17:13:07.000000000 +0200
5620 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ocfs2/ocfs2.h  2009-07-04 01:11:39.000000000 +0200
5621 @@ -199,6 +199,7 @@ enum ocfs2_mount_options
5622         OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* POSIX access control lists */
5623         OCFS2_MOUNT_USRQUOTA = 1 << 9, /* We support user quotas */
5624         OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */
5625 +       OCFS2_MOUNT_TAGGED = 1 << 11, /* use tagging */
5626  };
5627  
5628  #define OCFS2_OSB_SOFT_RO      0x0001
5629 diff -NurpP --minimal linux-2.6.30.2/fs/ocfs2/super.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ocfs2/super.c
5630 --- linux-2.6.30.2/fs/ocfs2/super.c     2009-07-23 13:28:48.000000000 +0200
5631 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/ocfs2/super.c  2009-07-04 01:11:39.000000000 +0200
5632 @@ -172,6 +172,7 @@ enum {
5633         Opt_noacl,
5634         Opt_usrquota,
5635         Opt_grpquota,
5636 +       Opt_tag, Opt_notag, Opt_tagid,
5637         Opt_err,
5638  };
5639  
5640 @@ -198,6 +199,9 @@ static const match_table_t tokens = {
5641         {Opt_noacl, "noacl"},
5642         {Opt_usrquota, "usrquota"},
5643         {Opt_grpquota, "grpquota"},
5644 +       {Opt_tag, "tag"},
5645 +       {Opt_notag, "notag"},
5646 +       {Opt_tagid, "tagid=%u"},
5647         {Opt_err, NULL}
5648  };
5649  
5650 @@ -604,6 +608,13 @@ static int ocfs2_remount(struct super_bl
5651                 goto out;
5652         }
5653  
5654 +       if ((parsed_options.mount_opt & OCFS2_MOUNT_TAGGED) &&
5655 +               !(sb->s_flags & MS_TAGGED)) {
5656 +               ret = -EINVAL;
5657 +               mlog(ML_ERROR, "Cannot change tagging on remount\n");
5658 +               goto out;
5659 +       }
5660 +
5661         if ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) !=
5662             (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL)) {
5663                 ret = -EINVAL;
5664 @@ -1128,6 +1139,9 @@ static int ocfs2_fill_super(struct super
5665  
5666         ocfs2_complete_mount_recovery(osb);
5667  
5668 +       if (osb->s_mount_opt & OCFS2_MOUNT_TAGGED)
5669 +               sb->s_flags |= MS_TAGGED;
5670 +
5671         if (ocfs2_mount_local(osb))
5672                 snprintf(nodestr, sizeof(nodestr), "local");
5673         else
5674 @@ -1386,6 +1400,20 @@ static int ocfs2_parse_options(struct su
5675                         printk(KERN_INFO "ocfs2 (no)acl options not supported\n");
5676                         break;
5677  #endif
5678 +#ifndef CONFIG_TAGGING_NONE
5679 +               case Opt_tag:
5680 +                       mopt->mount_opt |= OCFS2_MOUNT_TAGGED;
5681 +                       break;
5682 +               case Opt_notag:
5683 +                       mopt->mount_opt &= ~OCFS2_MOUNT_TAGGED;
5684 +                       break;
5685 +#endif
5686 +#ifdef CONFIG_PROPAGATE
5687 +               case Opt_tagid:
5688 +                       /* use args[0] */
5689 +                       mopt->mount_opt |= OCFS2_MOUNT_TAGGED;
5690 +                       break;
5691 +#endif
5692                 default:
5693                         mlog(ML_ERROR,
5694                              "Unrecognized mount option \"%s\" "
5695 diff -NurpP --minimal linux-2.6.30.2/fs/open.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/open.c
5696 --- linux-2.6.30.2/fs/open.c    2009-06-11 17:13:07.000000000 +0200
5697 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/open.c 2009-07-04 02:02:28.000000000 +0200
5698 @@ -30,22 +30,31 @@
5699  #include <linux/audit.h>
5700  #include <linux/falloc.h>
5701  #include <linux/fs_struct.h>
5702 +#include <linux/vs_base.h>
5703 +#include <linux/vs_limit.h>
5704 +#include <linux/vs_dlimit.h>
5705 +#include <linux/vs_tag.h>
5706 +#include <linux/vs_cowbl.h>
5707  
5708  int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
5709  {
5710         int retval = -ENODEV;
5711  
5712         if (dentry) {
5713 +               struct super_block *sb = dentry->d_sb;
5714 +
5715                 retval = -ENOSYS;
5716 -               if (dentry->d_sb->s_op->statfs) {
5717 +               if (sb->s_op->statfs) {
5718                         memset(buf, 0, sizeof(*buf));
5719                         retval = security_sb_statfs(dentry);
5720                         if (retval)
5721                                 return retval;
5722 -                       retval = dentry->d_sb->s_op->statfs(dentry, buf);
5723 +                       retval = sb->s_op->statfs(dentry, buf);
5724                         if (retval == 0 && buf->f_frsize == 0)
5725                                 buf->f_frsize = buf->f_bsize;
5726                 }
5727 +               if (!vx_check(0, VS_ADMIN|VS_WATCH))
5728 +                       vx_vsi_statfs(sb, buf);
5729         }
5730         return retval;
5731  }
5732 @@ -639,6 +648,10 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, cons
5733         error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
5734         if (error)
5735                 goto out;
5736 +
5737 +       error = cow_check_and_break(&path);
5738 +       if (error)
5739 +               goto dput_and_out;
5740         inode = path.dentry->d_inode;
5741  
5742         error = mnt_want_write(path.mnt);
5743 @@ -672,11 +685,11 @@ static int chown_common(struct dentry * 
5744         newattrs.ia_valid =  ATTR_CTIME;
5745         if (user != (uid_t) -1) {
5746                 newattrs.ia_valid |= ATTR_UID;
5747 -               newattrs.ia_uid = user;
5748 +               newattrs.ia_uid = dx_map_uid(user);
5749         }
5750         if (group != (gid_t) -1) {
5751                 newattrs.ia_valid |= ATTR_GID;
5752 -               newattrs.ia_gid = group;
5753 +               newattrs.ia_gid = dx_map_gid(group);
5754         }
5755         if (!S_ISDIR(inode->i_mode))
5756                 newattrs.ia_valid |=
5757 @@ -699,7 +712,11 @@ SYSCALL_DEFINE3(chown, const char __user
5758         error = mnt_want_write(path.mnt);
5759         if (error)
5760                 goto out_release;
5761 -       error = chown_common(path.dentry, user, group);
5762 +#ifdef CONFIG_VSERVER_COWBL
5763 +       error = cow_check_and_break(&path);
5764 +       if (!error)
5765 +#endif
5766 +               error = chown_common(path.dentry, user, group);
5767         mnt_drop_write(path.mnt);
5768  out_release:
5769         path_put(&path);
5770 @@ -724,7 +741,11 @@ SYSCALL_DEFINE5(fchownat, int, dfd, cons
5771         error = mnt_want_write(path.mnt);
5772         if (error)
5773                 goto out_release;
5774 -       error = chown_common(path.dentry, user, group);
5775 +#ifdef CONFIG_VSERVER_COWBL
5776 +       error = cow_check_and_break(&path);
5777 +       if (!error)
5778 +#endif
5779 +               error = chown_common(path.dentry, user, group);
5780         mnt_drop_write(path.mnt);
5781  out_release:
5782         path_put(&path);
5783 @@ -743,7 +764,11 @@ SYSCALL_DEFINE3(lchown, const char __use
5784         error = mnt_want_write(path.mnt);
5785         if (error)
5786                 goto out_release;
5787 -       error = chown_common(path.dentry, user, group);
5788 +#ifdef CONFIG_VSERVER_COWBL
5789 +       error = cow_check_and_break(&path);
5790 +       if (!error)
5791 +#endif
5792 +               error = chown_common(path.dentry, user, group);
5793         mnt_drop_write(path.mnt);
5794  out_release:
5795         path_put(&path);
5796 @@ -987,6 +1012,7 @@ static void __put_unused_fd(struct files
5797         __FD_CLR(fd, fdt->open_fds);
5798         if (fd < files->next_fd)
5799                 files->next_fd = fd;
5800 +       vx_openfd_dec(fd);
5801  }
5802  
5803  void put_unused_fd(unsigned int fd)
5804 diff -NurpP --minimal linux-2.6.30.2/fs/proc/array.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/proc/array.c
5805 --- linux-2.6.30.2/fs/proc/array.c      2009-06-11 17:13:07.000000000 +0200
5806 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/proc/array.c   2009-07-04 01:11:39.000000000 +0200
5807 @@ -82,6 +82,8 @@
5808  #include <linux/pid_namespace.h>
5809  #include <linux/ptrace.h>
5810  #include <linux/tracehook.h>
5811 +#include <linux/vs_context.h>
5812 +#include <linux/vs_network.h>
5813  
5814  #include <asm/pgtable.h>
5815  #include <asm/processor.h>
5816 @@ -138,8 +140,9 @@ static const char *task_state_array[] = 
5817         "D (disk sleep)",       /*  2 */
5818         "T (stopped)",          /*  4 */
5819         "T (tracing stop)",     /*  8 */
5820 -       "Z (zombie)",           /* 16 */
5821 -       "X (dead)"              /* 32 */
5822 +       "H (on hold)",          /* 16 */
5823 +       "Z (zombie)",           /* 32 */
5824 +       "X (dead)",             /* 64 */
5825  };
5826  
5827  static inline const char *get_task_state(struct task_struct *tsk)
5828 @@ -166,6 +169,9 @@ static inline void task_state(struct seq
5829         rcu_read_lock();
5830         ppid = pid_alive(p) ?
5831                 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
5832 +       if (unlikely(vx_current_initpid(p->pid)))
5833 +               ppid = 0;
5834 +
5835         tpid = 0;
5836         if (pid_alive(p)) {
5837                 struct task_struct *tracer = tracehook_tracer_task(p);
5838 @@ -281,7 +287,7 @@ static inline void task_sig(struct seq_f
5839  }
5840  
5841  static void render_cap_t(struct seq_file *m, const char *header,
5842 -                       kernel_cap_t *a)
5843 +                       struct vx_info *vxi, kernel_cap_t *a)
5844  {
5845         unsigned __capi;
5846  
5847 @@ -306,10 +312,11 @@ static inline void task_cap(struct seq_f
5848         cap_bset        = cred->cap_bset;
5849         rcu_read_unlock();
5850  
5851 -       render_cap_t(m, "CapInh:\t", &cap_inheritable);
5852 -       render_cap_t(m, "CapPrm:\t", &cap_permitted);
5853 -       render_cap_t(m, "CapEff:\t", &cap_effective);
5854 -       render_cap_t(m, "CapBnd:\t", &cap_bset);
5855 +       /* FIXME: maybe move the p->vx_info masking to __task_cred() ? */
5856 +       render_cap_t(m, "CapInh:\t", p->vx_info, &cap_inheritable);
5857 +       render_cap_t(m, "CapPrm:\t", p->vx_info, &cap_permitted);
5858 +       render_cap_t(m, "CapEff:\t", p->vx_info, &cap_effective);
5859 +       render_cap_t(m, "CapBnd:\t", p->vx_info, &cap_bset);
5860  }
5861  
5862  static inline void task_context_switch_counts(struct seq_file *m,
5863 @@ -321,6 +328,42 @@ static inline void task_context_switch_c
5864                         p->nivcsw);
5865  }
5866  
5867 +int proc_pid_nsproxy(struct seq_file *m, struct pid_namespace *ns,
5868 +                       struct pid *pid, struct task_struct *task)
5869 +{
5870 +       seq_printf(m,   "Proxy:\t%p(%c)\n"
5871 +                       "Count:\t%u\n"
5872 +                       "uts:\t%p(%c)\n"
5873 +                       "ipc:\t%p(%c)\n"
5874 +                       "mnt:\t%p(%c)\n"
5875 +                       "pid:\t%p(%c)\n"
5876 +                       "net:\t%p(%c)\n",
5877 +                       task->nsproxy,
5878 +                       (task->nsproxy == init_task.nsproxy ? 'I' : '-'),
5879 +                       atomic_read(&task->nsproxy->count),
5880 +                       task->nsproxy->uts_ns,
5881 +                       (task->nsproxy->uts_ns == init_task.nsproxy->uts_ns ? 'I' : '-'),
5882 +                       task->nsproxy->ipc_ns,
5883 +                       (task->nsproxy->ipc_ns == init_task.nsproxy->ipc_ns ? 'I' : '-'),
5884 +                       task->nsproxy->mnt_ns,
5885 +                       (task->nsproxy->mnt_ns == init_task.nsproxy->mnt_ns ? 'I' : '-'),
5886 +                       task->nsproxy->pid_ns,
5887 +                       (task->nsproxy->pid_ns == init_task.nsproxy->pid_ns ? 'I' : '-'),
5888 +                       task->nsproxy->net_ns,
5889 +                       (task->nsproxy->net_ns == init_task.nsproxy->net_ns ? 'I' : '-'));
5890 +       return 0;
5891 +}
5892 +
5893 +void task_vs_id(struct seq_file *m, struct task_struct *task)
5894 +{
5895 +       if (task_vx_flags(task, VXF_HIDE_VINFO, 0))
5896 +               return;
5897 +
5898 +       seq_printf(m, "VxID: %d\n", vx_task_xid(task));
5899 +       seq_printf(m, "NxID: %d\n", nx_task_nid(task));
5900 +}
5901 +
5902 +
5903  int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
5904                         struct pid *pid, struct task_struct *task)
5905  {
5906 @@ -336,6 +379,7 @@ int proc_pid_status(struct seq_file *m, 
5907         task_sig(m, task);
5908         task_cap(m, task);
5909         cpuset_task_status_allowed(m, task);
5910 +       task_vs_id(m, task);
5911  #if defined(CONFIG_S390)
5912         task_show_regs(m, task);
5913  #endif
5914 @@ -452,6 +496,17 @@ static int do_task_stat(struct seq_file 
5915         /* convert nsec -> ticks */
5916         start_time = nsec_to_clock_t(start_time);
5917  
5918 +       /* fixup start time for virt uptime */
5919 +       if (vx_flags(VXF_VIRT_UPTIME, 0)) {
5920 +               unsigned long long bias =
5921 +                       current->vx_info->cvirt.bias_clock;
5922 +
5923 +               if (start_time > bias)
5924 +                       start_time -= bias;
5925 +               else
5926 +                       start_time = 0;
5927 +       }
5928 +
5929         seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \
5930  %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
5931  %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n",
5932 diff -NurpP --minimal linux-2.6.30.2/fs/proc/base.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/proc/base.c
5933 --- linux-2.6.30.2/fs/proc/base.c       2009-06-11 17:13:07.000000000 +0200
5934 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/proc/base.c    2009-07-04 02:07:58.000000000 +0200
5935 @@ -81,6 +81,8 @@
5936  #include <linux/elf.h>
5937  #include <linux/pid_namespace.h>
5938  #include <linux/fs_struct.h>
5939 +#include <linux/vs_context.h>
5940 +#include <linux/vs_network.h>
5941  #include "internal.h"
5942  
5943  /* NOTE:
5944 @@ -1444,6 +1446,8 @@ static struct inode *proc_pid_make_inode
5945                 inode->i_gid = cred->egid;
5946                 rcu_read_unlock();
5947         }
5948 +       /* procfs is xid tagged */
5949 +       inode->i_tag = (tag_t)vx_task_xid(task);
5950         security_task_to_inode(task, inode);
5951  
5952  out:
5953 @@ -1994,6 +1998,13 @@ static struct dentry *proc_pident_lookup
5954         if (!task)
5955                 goto out_no_task;
5956  
5957 +       /* TODO: maybe we can come up with a generic approach? */
5958 +       if (task_vx_flags(task, VXF_HIDE_VINFO, 0) &&
5959 +               (dentry->d_name.len == 5) &&
5960 +               (!memcmp(dentry->d_name.name, "vinfo", 5) ||
5961 +               !memcmp(dentry->d_name.name, "ninfo", 5)))
5962 +               goto out;
5963 +
5964         /*
5965          * Yes, it does not scale. And it should not. Don't add
5966          * new entries into /proc/<tgid>/ without very good reasons.
5967 @@ -2379,7 +2390,7 @@ out_iput:
5968  static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
5969  {
5970         struct dentry *error;
5971 -       struct task_struct *task = get_proc_task(dir);
5972 +       struct task_struct *task = get_proc_task_real(dir);
5973         const struct pid_entry *p, *last;
5974  
5975         error = ERR_PTR(-ENOENT);
5976 @@ -2469,6 +2480,9 @@ static int proc_pid_personality(struct s
5977  static const struct file_operations proc_task_operations;
5978  static const struct inode_operations proc_task_inode_operations;
5979  
5980 +extern int proc_pid_vx_info(struct task_struct *, char *);
5981 +extern int proc_pid_nx_info(struct task_struct *, char *);
5982 +
5983  static const struct pid_entry tgid_base_stuff[] = {
5984         DIR("task",       S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
5985         DIR("fd",         S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
5986 @@ -2527,6 +2541,8 @@ static const struct pid_entry tgid_base_
5987  #ifdef CONFIG_CGROUPS
5988         REG("cgroup",  S_IRUGO, proc_cgroup_operations),
5989  #endif
5990 +       INF("vinfo",      S_IRUGO, proc_pid_vx_info),
5991 +       INF("ninfo",      S_IRUGO, proc_pid_nx_info),
5992         INF("oom_score",  S_IRUGO, proc_oom_score),
5993         REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adjust_operations),
5994  #ifdef CONFIG_AUDITSYSCALL
5995 @@ -2542,6 +2558,7 @@ static const struct pid_entry tgid_base_
5996  #ifdef CONFIG_TASK_IO_ACCOUNTING
5997         INF("io",       S_IRUGO, proc_tgid_io_accounting),
5998  #endif
5999 +       ONE("nsproxy",  S_IRUGO, proc_pid_nsproxy),
6000  };
6001  
6002  static int proc_tgid_base_readdir(struct file * filp,
6003 @@ -2738,7 +2755,7 @@ retry:
6004         iter.task = NULL;
6005         pid = find_ge_pid(iter.tgid, ns);
6006         if (pid) {
6007 -               iter.tgid = pid_nr_ns(pid, ns);
6008 +               iter.tgid = pid_unmapped_nr_ns(pid, ns);
6009                 iter.task = pid_task(pid, PIDTYPE_PID);
6010                 /* What we to know is if the pid we have find is the
6011                  * pid of a thread_group_leader.  Testing for task
6012 @@ -2768,7 +2785,7 @@ static int proc_pid_fill_cache(struct fi
6013         struct tgid_iter iter)
6014  {
6015         char name[PROC_NUMBUF];
6016 -       int len = snprintf(name, sizeof(name), "%d", iter.tgid);
6017 +       int len = snprintf(name, sizeof(name), "%d", vx_map_tgid(iter.tgid));
6018         return proc_fill_cache(filp, dirent, filldir, name, len,
6019                                 proc_pid_instantiate, iter.task, NULL);
6020  }
6021 @@ -2777,7 +2794,7 @@ static int proc_pid_fill_cache(struct fi
6022  int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
6023  {
6024         unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
6025 -       struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode);
6026 +       struct task_struct *reaper = get_proc_task_real(filp->f_path.dentry->d_inode);
6027         struct tgid_iter iter;
6028         struct pid_namespace *ns;
6029  
6030 @@ -2797,6 +2814,8 @@ int proc_pid_readdir(struct file * filp,
6031              iter.task;
6032              iter.tgid += 1, iter = next_tgid(ns, iter)) {
6033                 filp->f_pos = iter.tgid + TGID_OFFSET;
6034 +               if (!vx_proc_task_visible(iter.task))
6035 +                       continue;
6036                 if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) {
6037                         put_task_struct(iter.task);
6038                         goto out;
6039 @@ -2943,6 +2962,8 @@ static struct dentry *proc_task_lookup(s
6040         tid = name_to_int(dentry);
6041         if (tid == ~0U)
6042                 goto out;
6043 +       if (vx_current_initpid(tid))
6044 +               goto out;
6045  
6046         ns = dentry->d_sb->s_fs_info;
6047         rcu_read_lock();
6048 diff -NurpP --minimal linux-2.6.30.2/fs/proc/generic.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/proc/generic.c
6049 --- linux-2.6.30.2/fs/proc/generic.c    2009-06-11 17:13:07.000000000 +0200
6050 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/proc/generic.c 2009-07-04 01:11:39.000000000 +0200
6051 @@ -20,6 +20,7 @@
6052  #include <linux/bitops.h>
6053  #include <linux/spinlock.h>
6054  #include <linux/completion.h>
6055 +#include <linux/vserver/inode.h>
6056  #include <asm/uaccess.h>
6057  
6058  #include "internal.h"
6059 @@ -425,6 +426,8 @@ struct dentry *proc_lookup_de(struct pro
6060         for (de = de->subdir; de ; de = de->next) {
6061                 if (de->namelen != dentry->d_name.len)
6062                         continue;
6063 +                       if (!vx_hide_check(0, de->vx_flags))
6064 +                               continue;
6065                 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
6066                         unsigned int ino;
6067  
6068 @@ -433,6 +436,8 @@ struct dentry *proc_lookup_de(struct pro
6069                         spin_unlock(&proc_subdir_lock);
6070                         error = -EINVAL;
6071                         inode = proc_get_inode(dir->i_sb, ino, de);
6072 +                               /* generic proc entries belong to the host */
6073 +                               inode->i_tag = 0;
6074                         goto out_unlock;
6075                 }
6076         }
6077 @@ -510,6 +515,8 @@ int proc_readdir_de(struct proc_dir_entr
6078  
6079                                 /* filldir passes info to user space */
6080                                 de_get(de);
6081 +                               if (!vx_hide_check(0, de->vx_flags))
6082 +                                       goto skip;
6083                                 spin_unlock(&proc_subdir_lock);
6084                                 if (filldir(dirent, de->name, de->namelen, filp->f_pos,
6085                                             de->low_ino, de->mode >> 12) < 0) {
6086 @@ -517,6 +524,7 @@ int proc_readdir_de(struct proc_dir_entr
6087                                         goto out;
6088                                 }
6089                                 spin_lock(&proc_subdir_lock);
6090 +                       skip:
6091                                 filp->f_pos++;
6092                                 next = de->next;
6093                                 de_put(de);
6094 @@ -631,6 +639,7 @@ static struct proc_dir_entry *__proc_cre
6095         ent->nlink = nlink;
6096         atomic_set(&ent->count, 1);
6097         ent->pde_users = 0;
6098 +       ent->vx_flags = IATTR_PROC_DEFAULT;
6099         spin_lock_init(&ent->pde_unload_lock);
6100         ent->pde_unload_completion = NULL;
6101         INIT_LIST_HEAD(&ent->pde_openers);
6102 @@ -654,7 +663,8 @@ struct proc_dir_entry *proc_symlink(cons
6103                                 kfree(ent->data);
6104                                 kfree(ent);
6105                                 ent = NULL;
6106 -                       }
6107 +                       } else
6108 +                               ent->vx_flags = IATTR_PROC_SYMLINK;
6109                 } else {
6110                         kfree(ent);
6111                         ent = NULL;
6112 diff -NurpP --minimal linux-2.6.30.2/fs/proc/inode.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/proc/inode.c
6113 --- linux-2.6.30.2/fs/proc/inode.c      2009-06-11 17:13:07.000000000 +0200
6114 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/proc/inode.c   2009-07-04 01:11:39.000000000 +0200
6115 @@ -459,6 +459,8 @@ struct inode *proc_get_inode(struct supe
6116                         inode->i_uid = de->uid;
6117                         inode->i_gid = de->gid;
6118                 }
6119 +               if (de->vx_flags)
6120 +                       PROC_I(inode)->vx_flags = de->vx_flags;
6121                 if (de->size)
6122                         inode->i_size = de->size;
6123                 if (de->nlink)
6124 diff -NurpP --minimal linux-2.6.30.2/fs/proc/internal.h linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/proc/internal.h
6125 --- linux-2.6.30.2/fs/proc/internal.h   2009-06-11 17:13:07.000000000 +0200
6126 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/proc/internal.h        2009-07-04 01:11:39.000000000 +0200
6127 @@ -10,6 +10,7 @@
6128   */
6129  
6130  #include <linux/proc_fs.h>
6131 +#include <linux/vs_pid.h>
6132  
6133  extern struct proc_dir_entry proc_root;
6134  #ifdef CONFIG_PROC_SYSCTL
6135 @@ -51,6 +52,9 @@ extern int proc_pid_status(struct seq_fi
6136                                 struct pid *pid, struct task_struct *task);
6137  extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
6138                                 struct pid *pid, struct task_struct *task);
6139 +extern int proc_pid_nsproxy(struct seq_file *m, struct pid_namespace *ns,
6140 +                               struct pid *pid, struct task_struct *task);
6141 +
6142  extern loff_t mem_lseek(struct file *file, loff_t offset, int orig);
6143  
6144  extern const struct file_operations proc_maps_operations;
6145 @@ -70,11 +74,16 @@ static inline struct pid *proc_pid(struc
6146         return PROC_I(inode)->pid;
6147  }
6148  
6149 -static inline struct task_struct *get_proc_task(struct inode *inode)
6150 +static inline struct task_struct *get_proc_task_real(struct inode *inode)
6151  {
6152         return get_pid_task(proc_pid(inode), PIDTYPE_PID);
6153  }
6154  
6155 +static inline struct task_struct *get_proc_task(struct inode *inode)
6156 +{
6157 +       return vx_get_proc_task(inode, proc_pid(inode));
6158 +}
6159 +
6160  static inline int proc_fd(struct inode *inode)
6161  {
6162         return PROC_I(inode)->fd;
6163 diff -NurpP --minimal linux-2.6.30.2/fs/proc/loadavg.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/proc/loadavg.c
6164 --- linux-2.6.30.2/fs/proc/loadavg.c    2008-12-25 00:26:37.000000000 +0100
6165 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/proc/loadavg.c 2009-07-04 01:11:39.000000000 +0200
6166 @@ -12,21 +12,37 @@
6167  
6168  static int loadavg_proc_show(struct seq_file *m, void *v)
6169  {
6170 +       unsigned long running;
6171 +       unsigned int threads;
6172         int a, b, c;
6173         unsigned long seq;
6174  
6175         do {
6176                 seq = read_seqbegin(&xtime_lock);
6177 -               a = avenrun[0] + (FIXED_1/200);
6178 -               b = avenrun[1] + (FIXED_1/200);
6179 -               c = avenrun[2] + (FIXED_1/200);
6180 +               if (vx_flags(VXF_VIRT_LOAD, 0)) {
6181 +                       struct vx_info *vxi = current->vx_info;
6182 +
6183 +                       a = vxi->cvirt.load[0] + (FIXED_1/200);
6184 +                       b = vxi->cvirt.load[1] + (FIXED_1/200);
6185 +                       c = vxi->cvirt.load[2] + (FIXED_1/200);
6186 +
6187 +                       running = atomic_read(&vxi->cvirt.nr_running);
6188 +                       threads = atomic_read(&vxi->cvirt.nr_threads);
6189 +               } else {
6190 +                       a = avenrun[0] + (FIXED_1/200);
6191 +                       b = avenrun[1] + (FIXED_1/200);
6192 +                       c = avenrun[2] + (FIXED_1/200);
6193 +
6194 +                       running = nr_running();
6195 +                       threads = nr_threads;
6196 +               }
6197         } while (read_seqretry(&xtime_lock, seq));
6198  
6199         seq_printf(m, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
6200                 LOAD_INT(a), LOAD_FRAC(a),
6201                 LOAD_INT(b), LOAD_FRAC(b),
6202                 LOAD_INT(c), LOAD_FRAC(c),
6203 -               nr_running(), nr_threads,
6204 +               running, threads,
6205                 task_active_pid_ns(current)->last_pid);
6206         return 0;
6207  }
6208 diff -NurpP --minimal linux-2.6.30.2/fs/proc/meminfo.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/proc/meminfo.c
6209 --- linux-2.6.30.2/fs/proc/meminfo.c    2009-06-11 17:13:07.000000000 +0200
6210 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/proc/meminfo.c 2009-07-04 01:11:39.000000000 +0200
6211 @@ -41,7 +41,7 @@ static int meminfo_proc_show(struct seq_
6212  
6213         cached = global_page_state(NR_FILE_PAGES) -
6214                         total_swapcache_pages - i.bufferram;
6215 -       if (cached < 0)
6216 +       if (cached < 0 || vx_flags(VXF_VIRT_MEM, 0))
6217                 cached = 0;
6218  
6219         get_vmalloc_info(&vmi);
6220 diff -NurpP --minimal linux-2.6.30.2/fs/proc/root.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/proc/root.c
6221 --- linux-2.6.30.2/fs/proc/root.c       2009-06-11 17:13:07.000000000 +0200
6222 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/proc/root.c    2009-07-04 01:11:39.000000000 +0200
6223 @@ -18,9 +18,14 @@
6224  #include <linux/bitops.h>
6225  #include <linux/mount.h>
6226  #include <linux/pid_namespace.h>
6227 +#include <linux/vserver/inode.h>
6228  
6229  #include "internal.h"
6230  
6231 +struct proc_dir_entry *proc_virtual;
6232 +
6233 +extern void proc_vx_init(void);
6234 +
6235  static int proc_test_super(struct super_block *sb, void *data)
6236  {
6237         return sb->s_fs_info == data;
6238 @@ -136,6 +141,7 @@ void __init proc_root_init(void)
6239  #endif
6240         proc_mkdir("bus", NULL);
6241         proc_sys_init();
6242 +       proc_vx_init();
6243  }
6244  
6245  static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
6246 @@ -203,6 +209,7 @@ struct proc_dir_entry proc_root = {
6247         .proc_iops      = &proc_root_inode_operations, 
6248         .proc_fops      = &proc_root_operations,
6249         .parent         = &proc_root,
6250 +       .vx_flags       = IATTR_ADMIN | IATTR_WATCH,
6251  };
6252  
6253  int pid_ns_prepare_proc(struct pid_namespace *ns)
6254 diff -NurpP --minimal linux-2.6.30.2/fs/proc/uptime.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/proc/uptime.c
6255 --- linux-2.6.30.2/fs/proc/uptime.c     2009-06-11 17:13:07.000000000 +0200
6256 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/proc/uptime.c  2009-07-04 02:10:28.000000000 +0200
6257 @@ -4,6 +4,7 @@
6258  #include <linux/sched.h>
6259  #include <linux/seq_file.h>
6260  #include <linux/time.h>
6261 +#include <linux/vserver/cvirt.h>
6262  #include <asm/cputime.h>
6263  
6264  static int uptime_proc_show(struct seq_file *m, void *v)
6265 @@ -15,6 +16,10 @@ static int uptime_proc_show(struct seq_f
6266         do_posix_clock_monotonic_gettime(&uptime);
6267         monotonic_to_bootbased(&uptime);
6268         cputime_to_timespec(idletime, &idle);
6269 +
6270 +       if (vx_flags(VXF_VIRT_UPTIME, 0))
6271 +               vx_vsi_uptime(&uptime, &idle);
6272 +
6273         seq_printf(m, "%lu.%02lu %lu.%02lu\n",
6274                         (unsigned long) uptime.tv_sec,
6275                         (uptime.tv_nsec / (NSEC_PER_SEC / 100)),
6276 diff -NurpP --minimal linux-2.6.30.2/fs/quota/quota.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/quota/quota.c
6277 --- linux-2.6.30.2/fs/quota/quota.c     2009-06-11 17:13:07.000000000 +0200
6278 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/quota/quota.c  2009-07-14 15:05:36.000000000 +0200
6279 @@ -18,6 +18,7 @@
6280  #include <linux/capability.h>
6281  #include <linux/quotaops.h>
6282  #include <linux/types.h>
6283 +#include <linux/vs_context.h>
6284  
6285  /* Check validity of generic quotactl commands */
6286  static int generic_quotactl_valid(struct super_block *sb, int type, int cmd,
6287 @@ -83,11 +84,11 @@ static int generic_quotactl_valid(struct
6288         if (cmd == Q_GETQUOTA) {
6289                 if (((type == USRQUOTA && current_euid() != id) ||
6290                      (type == GRPQUOTA && !in_egroup_p(id))) &&
6291 -                   !capable(CAP_SYS_ADMIN))
6292 +                   !vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
6293                         return -EPERM;
6294         }
6295         else if (cmd != Q_GETFMT && cmd != Q_SYNC && cmd != Q_GETINFO)
6296 -               if (!capable(CAP_SYS_ADMIN))
6297 +               if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
6298                         return -EPERM;
6299  
6300         return 0;
6301 @@ -135,10 +136,10 @@ static int xqm_quotactl_valid(struct sup
6302         if (cmd == Q_XGETQUOTA) {
6303                 if (((type == XQM_USRQUOTA && current_euid() != id) ||
6304                      (type == XQM_GRPQUOTA && !in_egroup_p(id))) &&
6305 -                    !capable(CAP_SYS_ADMIN))
6306 +                    !vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
6307                         return -EPERM;
6308         } else if (cmd != Q_XGETQSTAT && cmd != Q_XQUOTASYNC) {
6309 -               if (!capable(CAP_SYS_ADMIN))
6310 +               if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
6311                         return -EPERM;
6312         }
6313  
6314 @@ -348,6 +349,46 @@ static int do_quotactl(struct super_bloc
6315         return 0;
6316  }
6317  
6318 +#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE)
6319 +
6320 +#include <linux/vroot.h>
6321 +#include <linux/major.h>
6322 +#include <linux/module.h>
6323 +#include <linux/kallsyms.h>
6324 +#include <linux/vserver/debug.h>
6325 +
6326 +static vroot_grb_func *vroot_get_real_bdev = NULL;
6327 +
6328 +static spinlock_t vroot_grb_lock = SPIN_LOCK_UNLOCKED;
6329 +
6330 +int register_vroot_grb(vroot_grb_func *func) {
6331 +       int ret = -EBUSY;
6332 +
6333 +       spin_lock(&vroot_grb_lock);
6334 +       if (!vroot_get_real_bdev) {
6335 +               vroot_get_real_bdev = func;
6336 +               ret = 0;
6337 +       }
6338 +       spin_unlock(&vroot_grb_lock);
6339 +       return ret;
6340 +}
6341 +EXPORT_SYMBOL(register_vroot_grb);
6342 +
6343 +int unregister_vroot_grb(vroot_grb_func *func) {
6344 +       int ret = -EINVAL;
6345 +
6346 +       spin_lock(&vroot_grb_lock);
6347 +       if (vroot_get_real_bdev) {
6348 +               vroot_get_real_bdev = NULL;
6349 +               ret = 0;
6350 +       }
6351 +       spin_unlock(&vroot_grb_lock);
6352 +       return ret;
6353 +}
6354 +EXPORT_SYMBOL(unregister_vroot_grb);
6355 +
6356 +#endif
6357 +
6358  /*
6359   * look up a superblock on which quota ops will be performed
6360   * - use the name of a block device to find the superblock thereon
6361 @@ -365,6 +406,22 @@ static struct super_block *quotactl_bloc
6362         putname(tmp);
6363         if (IS_ERR(bdev))
6364                 return ERR_CAST(bdev);
6365 +#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE)
6366 +       if (bdev && bdev->bd_inode &&
6367 +                       imajor(bdev->bd_inode) == VROOT_MAJOR) {
6368 +               struct block_device *bdnew = (void *)-EINVAL;
6369 +
6370 +               if (vroot_get_real_bdev)
6371 +                       bdnew = vroot_get_real_bdev(bdev);
6372 +               else
6373 +                       vxdprintk(VXD_CBIT(misc, 0),
6374 +                                       "vroot_get_real_bdev not set");
6375 +               bdput(bdev);
6376 +               if (IS_ERR(bdnew))
6377 +                       return ERR_PTR(PTR_ERR(bdnew));
6378 +               bdev = bdnew;
6379 +       }
6380 +#endif
6381         sb = get_super(bdev);
6382         bdput(bdev);
6383         if (!sb)
6384 diff -NurpP --minimal linux-2.6.30.2/fs/reiserfs/file.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/reiserfs/file.c
6385 --- linux-2.6.30.2/fs/reiserfs/file.c   2009-06-11 17:13:08.000000000 +0200
6386 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/reiserfs/file.c        2009-07-04 01:11:39.000000000 +0200
6387 @@ -307,4 +307,5 @@ const struct inode_operations reiserfs_f
6388         .listxattr = reiserfs_listxattr,
6389         .removexattr = reiserfs_removexattr,
6390         .permission = reiserfs_permission,
6391 +       .sync_flags = reiserfs_sync_flags,
6392  };
6393 diff -NurpP --minimal linux-2.6.30.2/fs/reiserfs/inode.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/reiserfs/inode.c
6394 --- linux-2.6.30.2/fs/reiserfs/inode.c  2009-06-11 17:13:08.000000000 +0200
6395 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/reiserfs/inode.c       2009-07-04 02:52:58.000000000 +0200
6396 @@ -18,6 +18,7 @@
6397  #include <linux/writeback.h>
6398  #include <linux/quotaops.h>
6399  #include <linux/swap.h>
6400 +#include <linux/vs_tag.h>
6401  
6402  int reiserfs_commit_write(struct file *f, struct page *page,
6403                           unsigned from, unsigned to);
6404 @@ -1117,6 +1118,8 @@ static void init_inode(struct inode *ino
6405         struct buffer_head *bh;
6406         struct item_head *ih;
6407         __u32 rdev;
6408 +       uid_t uid;
6409 +       gid_t gid;
6410         //int version = ITEM_VERSION_1;
6411  
6412         bh = PATH_PLAST_BUFFER(path);
6413 @@ -1140,12 +1143,13 @@ static void init_inode(struct inode *ino
6414                     (struct stat_data_v1 *)B_I_PITEM(bh, ih);
6415                 unsigned long blocks;
6416  
6417 +               uid = sd_v1_uid(sd);
6418 +               gid = sd_v1_gid(sd);
6419 +
6420                 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
6421                 set_inode_sd_version(inode, STAT_DATA_V1);
6422                 inode->i_mode = sd_v1_mode(sd);
6423                 inode->i_nlink = sd_v1_nlink(sd);
6424 -               inode->i_uid = sd_v1_uid(sd);
6425 -               inode->i_gid = sd_v1_gid(sd);
6426                 inode->i_size = sd_v1_size(sd);
6427                 inode->i_atime.tv_sec = sd_v1_atime(sd);
6428                 inode->i_mtime.tv_sec = sd_v1_mtime(sd);
6429 @@ -1187,11 +1191,12 @@ static void init_inode(struct inode *ino
6430                 // (directories and symlinks)
6431                 struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih);
6432  
6433 +               uid    = sd_v2_uid(sd);
6434 +               gid    = sd_v2_gid(sd);
6435 +
6436                 inode->i_mode = sd_v2_mode(sd);
6437                 inode->i_nlink = sd_v2_nlink(sd);
6438 -               inode->i_uid = sd_v2_uid(sd);
6439                 inode->i_size = sd_v2_size(sd);
6440 -               inode->i_gid = sd_v2_gid(sd);
6441                 inode->i_mtime.tv_sec = sd_v2_mtime(sd);
6442                 inode->i_atime.tv_sec = sd_v2_atime(sd);
6443                 inode->i_ctime.tv_sec = sd_v2_ctime(sd);
6444 @@ -1221,6 +1226,10 @@ static void init_inode(struct inode *ino
6445                 sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode);
6446         }
6447  
6448 +       inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
6449 +       inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
6450 +       inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, 0);
6451 +
6452         pathrelse(path);
6453         if (S_ISREG(inode->i_mode)) {
6454                 inode->i_op = &reiserfs_file_inode_operations;
6455 @@ -1243,13 +1252,15 @@ static void init_inode(struct inode *ino
6456  static void inode2sd(void *sd, struct inode *inode, loff_t size)
6457  {
6458         struct stat_data *sd_v2 = (struct stat_data *)sd;
6459 +       uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
6460 +       gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
6461         __u16 flags;
6462  
6463 +       set_sd_v2_uid(sd_v2, uid);
6464 +       set_sd_v2_gid(sd_v2, gid);
6465         set_sd_v2_mode(sd_v2, inode->i_mode);
6466         set_sd_v2_nlink(sd_v2, inode->i_nlink);
6467 -       set_sd_v2_uid(sd_v2, inode->i_uid);
6468         set_sd_v2_size(sd_v2, size);
6469 -       set_sd_v2_gid(sd_v2, inode->i_gid);
6470         set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec);
6471         set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec);
6472         set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec);
6473 @@ -2832,14 +2843,19 @@ int reiserfs_commit_write(struct file *f
6474  void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode)
6475  {
6476         if (reiserfs_attrs(inode->i_sb)) {
6477 -               if (sd_attrs & REISERFS_SYNC_FL)
6478 -                       inode->i_flags |= S_SYNC;
6479 -               else
6480 -                       inode->i_flags &= ~S_SYNC;
6481                 if (sd_attrs & REISERFS_IMMUTABLE_FL)
6482                         inode->i_flags |= S_IMMUTABLE;
6483                 else
6484                         inode->i_flags &= ~S_IMMUTABLE;
6485 +               if (sd_attrs & REISERFS_IXUNLINK_FL)
6486 +                       inode->i_flags |= S_IXUNLINK;
6487 +               else
6488 +                       inode->i_flags &= ~S_IXUNLINK;
6489 +
6490 +               if (sd_attrs & REISERFS_SYNC_FL)
6491 +                       inode->i_flags |= S_SYNC;
6492 +               else
6493 +                       inode->i_flags &= ~S_SYNC;
6494                 if (sd_attrs & REISERFS_APPEND_FL)
6495                         inode->i_flags |= S_APPEND;
6496                 else
6497 @@ -2852,6 +2868,15 @@ void sd_attrs_to_i_attrs(__u16 sd_attrs,
6498                         REISERFS_I(inode)->i_flags |= i_nopack_mask;
6499                 else
6500                         REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
6501 +
6502 +               if (sd_attrs & REISERFS_BARRIER_FL)
6503 +                       inode->i_vflags |= V_BARRIER;
6504 +               else
6505 +                       inode->i_vflags &= ~V_BARRIER;
6506 +               if (sd_attrs & REISERFS_COW_FL)
6507 +                       inode->i_vflags |= V_COW;
6508 +               else
6509 +                       inode->i_vflags &= ~V_COW;
6510         }
6511  }
6512  
6513 @@ -2862,6 +2887,11 @@ void i_attrs_to_sd_attrs(struct inode *i
6514                         *sd_attrs |= REISERFS_IMMUTABLE_FL;
6515                 else
6516                         *sd_attrs &= ~REISERFS_IMMUTABLE_FL;
6517 +               if (inode->i_flags & S_IXUNLINK)
6518 +                       *sd_attrs |= REISERFS_IXUNLINK_FL;
6519 +               else
6520 +                       *sd_attrs &= ~REISERFS_IXUNLINK_FL;
6521 +
6522                 if (inode->i_flags & S_SYNC)
6523                         *sd_attrs |= REISERFS_SYNC_FL;
6524                 else
6525 @@ -2874,6 +2904,15 @@ void i_attrs_to_sd_attrs(struct inode *i
6526                         *sd_attrs |= REISERFS_NOTAIL_FL;
6527                 else
6528                         *sd_attrs &= ~REISERFS_NOTAIL_FL;
6529 +
6530 +               if (inode->i_vflags & V_BARRIER)
6531 +                       *sd_attrs |= REISERFS_BARRIER_FL;
6532 +               else
6533 +                       *sd_attrs &= ~REISERFS_BARRIER_FL;
6534 +               if (inode->i_vflags & V_COW)
6535 +                       *sd_attrs |= REISERFS_COW_FL;
6536 +               else
6537 +                       *sd_attrs &= ~REISERFS_COW_FL;
6538         }
6539  }
6540  
6541 @@ -3041,6 +3080,22 @@ static ssize_t reiserfs_direct_IO(int rw
6542                                   reiserfs_get_blocks_direct_io, NULL);
6543  }
6544  
6545 +int reiserfs_sync_flags(struct inode *inode)
6546 +{
6547 +       u16 oldflags, newflags;
6548 +
6549 +       oldflags = REISERFS_I(inode)->i_attrs;
6550 +       newflags = oldflags;
6551 +       i_attrs_to_sd_attrs(inode, &newflags);
6552 +
6553 +       if (oldflags ^ newflags) {
6554 +               REISERFS_I(inode)->i_attrs = newflags;
6555 +               inode->i_ctime = CURRENT_TIME_SEC;
6556 +               mark_inode_dirty(inode);
6557 +       }
6558 +       return 0;
6559 +}
6560 +
6561  int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
6562  {
6563         struct inode *inode = dentry->d_inode;
6564 @@ -3094,9 +3149,11 @@ int reiserfs_setattr(struct dentry *dent
6565         }
6566  
6567         error = inode_change_ok(inode, attr);
6568 +
6569         if (!error) {
6570                 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
6571 -                   (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
6572 +                   (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) ||
6573 +                   (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) {
6574                         error = reiserfs_chown_xattrs(inode, attr);
6575  
6576                         if (!error) {
6577 @@ -3126,6 +3183,9 @@ int reiserfs_setattr(struct dentry *dent
6578                                         inode->i_uid = attr->ia_uid;
6579                                 if (attr->ia_valid & ATTR_GID)
6580                                         inode->i_gid = attr->ia_gid;
6581 +                               if ((attr->ia_valid & ATTR_TAG) &&
6582 +                                       IS_TAGGED(inode))
6583 +                                       inode->i_tag = attr->ia_tag;
6584                                 mark_inode_dirty(inode);
6585                                 error =
6586                                     journal_end(&th, inode->i_sb, jbegin_count);
6587 diff -NurpP --minimal linux-2.6.30.2/fs/reiserfs/ioctl.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/reiserfs/ioctl.c
6588 --- linux-2.6.30.2/fs/reiserfs/ioctl.c  2009-06-11 17:13:08.000000000 +0200
6589 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/reiserfs/ioctl.c       2009-07-04 01:11:39.000000000 +0200
6590 @@ -7,6 +7,7 @@
6591  #include <linux/mount.h>
6592  #include <linux/reiserfs_fs.h>
6593  #include <linux/time.h>
6594 +#include <linux/mount.h>
6595  #include <asm/uaccess.h>
6596  #include <linux/pagemap.h>
6597  #include <linux/smp_lock.h>
6598 @@ -23,7 +24,7 @@
6599  int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
6600                    unsigned long arg)
6601  {
6602 -       unsigned int flags;
6603 +       unsigned int flags, oldflags;
6604         int err = 0;
6605  
6606         switch (cmd) {
6607 @@ -43,6 +44,7 @@ int reiserfs_ioctl(struct inode *inode, 
6608  
6609                 flags = REISERFS_I(inode)->i_attrs;
6610                 i_attrs_to_sd_attrs(inode, (__u16 *) & flags);
6611 +               flags &= REISERFS_FL_USER_VISIBLE;
6612                 return put_user(flags, (int __user *)arg);
6613         case REISERFS_IOC_SETFLAGS:{
6614                         if (!reiserfs_attrs(inode->i_sb))
6615 @@ -60,6 +62,10 @@ int reiserfs_ioctl(struct inode *inode, 
6616                                 err = -EFAULT;
6617                                 goto setflags_out;
6618                         }
6619 +                       if (IS_BARRIER(inode)) {
6620 +                               vxwprintk_task(1, "messing with the barrier.");
6621 +                               return -EACCES;
6622 +                       }
6623                         /*
6624                          * Is it quota file? Do not allow user to mess with it
6625                          */
6626 @@ -84,6 +90,10 @@ int reiserfs_ioctl(struct inode *inode, 
6627                                         goto setflags_out;
6628                                 }
6629                         }
6630 +
6631 +                       oldflags = REISERFS_I(inode)->i_attrs;
6632 +                       flags = flags & REISERFS_FL_USER_MODIFIABLE;
6633 +                       flags |= oldflags & ~REISERFS_FL_USER_MODIFIABLE;
6634                         sd_attrs_to_i_attrs(flags, inode);
6635                         REISERFS_I(inode)->i_attrs = flags;
6636                         inode->i_ctime = CURRENT_TIME_SEC;
6637 diff -NurpP --minimal linux-2.6.30.2/fs/reiserfs/namei.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/reiserfs/namei.c
6638 --- linux-2.6.30.2/fs/reiserfs/namei.c  2009-06-11 17:13:08.000000000 +0200
6639 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/reiserfs/namei.c       2009-07-04 02:11:56.000000000 +0200
6640 @@ -17,6 +17,7 @@
6641  #include <linux/reiserfs_acl.h>
6642  #include <linux/reiserfs_xattr.h>
6643  #include <linux/quotaops.h>
6644 +#include <linux/vs_tag.h>
6645  
6646  #define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; }
6647  #define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i);
6648 @@ -354,6 +355,7 @@ static struct dentry *reiserfs_lookup(st
6649         if (retval == IO_ERROR) {
6650                 return ERR_PTR(-EIO);
6651         }
6652 +               dx_propagate_tag(nd, inode);
6653  
6654         return d_splice_alias(inode, dentry);
6655  }
6656 @@ -570,6 +572,7 @@ static int new_inode_init(struct inode *
6657         } else {
6658                 inode->i_gid = current_fsgid();
6659         }
6660 +       inode->i_tag = dx_current_fstag(inode->i_sb);
6661         vfs_dq_init(inode);
6662         return 0;
6663  }
6664 @@ -1515,6 +1518,7 @@ const struct inode_operations reiserfs_d
6665         .listxattr = reiserfs_listxattr,
6666         .removexattr = reiserfs_removexattr,
6667         .permission = reiserfs_permission,
6668 +       .sync_flags = reiserfs_sync_flags,
6669  };
6670  
6671  /*
6672 @@ -1531,6 +1535,7 @@ const struct inode_operations reiserfs_s
6673         .listxattr = reiserfs_listxattr,
6674         .removexattr = reiserfs_removexattr,
6675         .permission = reiserfs_permission,
6676 +       .sync_flags = reiserfs_sync_flags,
6677  
6678  };
6679  
6680 @@ -1544,5 +1549,6 @@ const struct inode_operations reiserfs_s
6681         .listxattr = reiserfs_listxattr,
6682         .removexattr = reiserfs_removexattr,
6683         .permission = reiserfs_permission,
6684 +       .sync_flags = reiserfs_sync_flags,
6685  
6686  };
6687 diff -NurpP --minimal linux-2.6.30.2/fs/reiserfs/super.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/reiserfs/super.c
6688 --- linux-2.6.30.2/fs/reiserfs/super.c  2009-06-11 17:13:08.000000000 +0200
6689 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/reiserfs/super.c       2009-07-06 17:43:06.000000000 +0200
6690 @@ -905,6 +905,14 @@ static int reiserfs_parse_options(struct
6691                 {"user_xattr",.setmask = 1 << REISERFS_UNSUPPORTED_OPT},
6692                 {"nouser_xattr",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT},
6693  #endif
6694 +#ifndef CONFIG_TAGGING_NONE
6695 +               {"tagxid",.setmask = 1 << REISERFS_TAGGED},
6696 +               {"tag",.setmask = 1 << REISERFS_TAGGED},
6697 +               {"notag",.clrmask = 1 << REISERFS_TAGGED},
6698 +#endif
6699 +#ifdef CONFIG_PROPAGATE
6700 +               {"tag",.arg_required = 'T',.values = NULL},
6701 +#endif
6702  #ifdef CONFIG_REISERFS_FS_POSIX_ACL
6703                 {"acl",.setmask = 1 << REISERFS_POSIXACL},
6704                 {"noacl",.clrmask = 1 << REISERFS_POSIXACL},
6705 @@ -1210,6 +1218,13 @@ static int reiserfs_remount(struct super
6706         handle_quota_files(s, qf_names, &qfmt);
6707  #endif
6708  
6709 +       if ((mount_options & (1 << REISERFS_TAGGED)) &&
6710 +               !(s->s_flags & MS_TAGGED)) {
6711 +               reiserfs_warning(s, "super-vs01",
6712 +                       "reiserfs: tagging not permitted on remount.");
6713 +               return -EINVAL;
6714 +       }
6715 +
6716         handle_attrs(s);
6717  
6718         /* Add options that are safe here */
6719 @@ -1670,6 +1685,10 @@ static int reiserfs_fill_super(struct su
6720                 goto error;
6721         }
6722  
6723 +       /* map mount option tagxid */
6724 +       if (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TAGGED))
6725 +               s->s_flags |= MS_TAGGED;
6726 +
6727         rs = SB_DISK_SUPER_BLOCK(s);
6728         /* Let's do basic sanity check to verify that underlying device is not
6729            smaller than the filesystem. If the check fails then abort and scream,
6730 diff -NurpP --minimal linux-2.6.30.2/fs/reiserfs/xattr.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/reiserfs/xattr.c
6731 --- linux-2.6.30.2/fs/reiserfs/xattr.c  2009-06-11 17:13:08.000000000 +0200
6732 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/reiserfs/xattr.c       2009-07-04 01:11:39.000000000 +0200
6733 @@ -39,6 +39,7 @@
6734  #include <linux/namei.h>
6735  #include <linux/errno.h>
6736  #include <linux/fs.h>
6737 +#include <linux/mount.h>
6738  #include <linux/file.h>
6739  #include <linux/pagemap.h>
6740  #include <linux/xattr.h>
6741 diff -NurpP --minimal linux-2.6.30.2/fs/stat.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/stat.c
6742 --- linux-2.6.30.2/fs/stat.c    2009-06-11 17:13:08.000000000 +0200
6743 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/stat.c 2009-07-04 01:11:39.000000000 +0200
6744 @@ -26,6 +26,7 @@ void generic_fillattr(struct inode *inod
6745         stat->nlink = inode->i_nlink;
6746         stat->uid = inode->i_uid;
6747         stat->gid = inode->i_gid;
6748 +       stat->tag = inode->i_tag;
6749         stat->rdev = inode->i_rdev;
6750         stat->atime = inode->i_atime;
6751         stat->mtime = inode->i_mtime;
6752 diff -NurpP --minimal linux-2.6.30.2/fs/super.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/super.c
6753 --- linux-2.6.30.2/fs/super.c   2009-06-11 17:13:08.000000000 +0200
6754 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/super.c        2009-07-04 01:11:39.000000000 +0200
6755 @@ -39,6 +39,9 @@
6756  #include <linux/mutex.h>
6757  #include <linux/file.h>
6758  #include <linux/async.h>
6759 +#include <linux/devpts_fs.h>
6760 +#include <linux/proc_fs.h>
6761 +#include <linux/vs_context.h>
6762  #include <asm/uaccess.h>
6763  #include "internal.h"
6764  
6765 @@ -992,12 +995,18 @@ struct vfsmount *
6766  vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
6767  {
6768         struct vfsmount *mnt;
6769 +       struct super_block *sb;
6770         char *secdata = NULL;
6771         int error;
6772  
6773         if (!type)
6774                 return ERR_PTR(-ENODEV);
6775  
6776 +       error = -EPERM;
6777 +       if ((type->fs_flags & FS_BINARY_MOUNTDATA) &&
6778 +               !vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT))
6779 +               goto out;
6780 +
6781         error = -ENOMEM;
6782         mnt = alloc_vfsmnt(name);
6783         if (!mnt)
6784 @@ -1016,9 +1025,17 @@ vfs_kern_mount(struct file_system_type *
6785         error = type->get_sb(type, flags, name, data, mnt);
6786         if (error < 0)
6787                 goto out_free_secdata;
6788 -       BUG_ON(!mnt->mnt_sb);
6789  
6790 -       error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
6791 +       sb = mnt->mnt_sb;
6792 +       BUG_ON(!sb);
6793 +
6794 +       error = -EPERM;
6795 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT) && !sb->s_bdev &&
6796 +               (sb->s_magic != PROC_SUPER_MAGIC) &&
6797 +               (sb->s_magic != DEVPTS_SUPER_MAGIC))
6798 +               goto out_sb;
6799 +
6800 +       error = security_sb_kern_mount(sb, flags, secdata);
6801         if (error)
6802                 goto out_sb;
6803  
6804 diff -NurpP --minimal linux-2.6.30.2/fs/sysfs/mount.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/sysfs/mount.c
6805 --- linux-2.6.30.2/fs/sysfs/mount.c     2009-06-11 17:13:08.000000000 +0200
6806 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/sysfs/mount.c  2009-07-04 01:11:39.000000000 +0200
6807 @@ -47,7 +47,7 @@ static int sysfs_fill_super(struct super
6808  
6809         sb->s_blocksize = PAGE_CACHE_SIZE;
6810         sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
6811 -       sb->s_magic = SYSFS_MAGIC;
6812 +       sb->s_magic = SYSFS_SUPER_MAGIC;
6813         sb->s_op = &sysfs_ops;
6814         sb->s_time_gran = 1;
6815         sysfs_sb = sb;
6816 diff -NurpP --minimal linux-2.6.30.2/fs/utimes.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/utimes.c
6817 --- linux-2.6.30.2/fs/utimes.c  2009-03-24 14:22:37.000000000 +0100
6818 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/utimes.c       2009-07-04 01:11:39.000000000 +0200
6819 @@ -8,6 +8,8 @@
6820  #include <linux/stat.h>
6821  #include <linux/utime.h>
6822  #include <linux/syscalls.h>
6823 +#include <linux/mount.h>
6824 +#include <linux/vs_cowbl.h>
6825  #include <asm/uaccess.h>
6826  #include <asm/unistd.h>
6827  
6828 diff -NurpP --minimal linux-2.6.30.2/fs/xattr.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xattr.c
6829 --- linux-2.6.30.2/fs/xattr.c   2009-06-11 17:13:09.000000000 +0200
6830 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xattr.c        2009-07-04 01:11:39.000000000 +0200
6831 @@ -18,6 +18,7 @@
6832  #include <linux/module.h>
6833  #include <linux/fsnotify.h>
6834  #include <linux/audit.h>
6835 +#include <linux/mount.h>
6836  #include <asm/uaccess.h>
6837  
6838  
6839 diff -NurpP --minimal linux-2.6.30.2/fs/xfs/linux-2.6/xfs_ioctl.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/linux-2.6/xfs_ioctl.c
6840 --- linux-2.6.30.2/fs/xfs/linux-2.6/xfs_ioctl.c 2009-06-11 17:13:09.000000000 +0200
6841 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/linux-2.6/xfs_ioctl.c      2009-07-04 01:11:39.000000000 +0200
6842 @@ -743,6 +743,10 @@ xfs_merge_ioc_xflags(
6843                 xflags |= XFS_XFLAG_IMMUTABLE;
6844         else
6845                 xflags &= ~XFS_XFLAG_IMMUTABLE;
6846 +       if (flags & FS_IXUNLINK_FL)
6847 +               xflags |= XFS_XFLAG_IXUNLINK;
6848 +       else
6849 +               xflags &= ~XFS_XFLAG_IXUNLINK;
6850         if (flags & FS_APPEND_FL)
6851                 xflags |= XFS_XFLAG_APPEND;
6852         else
6853 @@ -771,6 +775,8 @@ xfs_di2lxflags(
6854  
6855         if (di_flags & XFS_DIFLAG_IMMUTABLE)
6856                 flags |= FS_IMMUTABLE_FL;
6857 +       if (di_flags & XFS_DIFLAG_IXUNLINK)
6858 +               flags |= FS_IXUNLINK_FL;
6859         if (di_flags & XFS_DIFLAG_APPEND)
6860                 flags |= FS_APPEND_FL;
6861         if (di_flags & XFS_DIFLAG_SYNC)
6862 @@ -829,6 +835,8 @@ xfs_set_diflags(
6863         di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
6864         if (xflags & XFS_XFLAG_IMMUTABLE)
6865                 di_flags |= XFS_DIFLAG_IMMUTABLE;
6866 +       if (xflags & XFS_XFLAG_IXUNLINK)
6867 +               di_flags |= XFS_DIFLAG_IXUNLINK;
6868         if (xflags & XFS_XFLAG_APPEND)
6869                 di_flags |= XFS_DIFLAG_APPEND;
6870         if (xflags & XFS_XFLAG_SYNC)
6871 @@ -871,6 +879,10 @@ xfs_diflags_to_linux(
6872                 inode->i_flags |= S_IMMUTABLE;
6873         else
6874                 inode->i_flags &= ~S_IMMUTABLE;
6875 +       if (xflags & XFS_XFLAG_IXUNLINK)
6876 +               inode->i_flags |= S_IXUNLINK;
6877 +       else
6878 +               inode->i_flags &= ~S_IXUNLINK;
6879         if (xflags & XFS_XFLAG_APPEND)
6880                 inode->i_flags |= S_APPEND;
6881         else
6882 @@ -1345,10 +1357,18 @@ xfs_file_ioctl(
6883         case XFS_IOC_FSGETXATTRA:
6884                 return xfs_ioc_fsgetxattr(ip, 1, arg);
6885         case XFS_IOC_FSSETXATTR:
6886 +               if (IS_BARRIER(inode)) {
6887 +                       vxwprintk_task(1, "messing with the barrier.");
6888 +                       return -XFS_ERROR(EACCES);
6889 +               }
6890                 return xfs_ioc_fssetxattr(ip, filp, arg);
6891         case XFS_IOC_GETXFLAGS:
6892                 return xfs_ioc_getxflags(ip, arg);
6893         case XFS_IOC_SETXFLAGS:
6894 +               if (IS_BARRIER(inode)) {
6895 +                       vxwprintk_task(1, "messing with the barrier.");
6896 +                       return -XFS_ERROR(EACCES);
6897 +               }
6898                 return xfs_ioc_setxflags(ip, filp, arg);
6899  
6900         case XFS_IOC_FSSETDM: {
6901 diff -NurpP --minimal linux-2.6.30.2/fs/xfs/linux-2.6/xfs_iops.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/linux-2.6/xfs_iops.c
6902 --- linux-2.6.30.2/fs/xfs/linux-2.6/xfs_iops.c  2009-06-11 17:13:09.000000000 +0200
6903 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/linux-2.6/xfs_iops.c       2009-07-04 01:11:39.000000000 +0200
6904 @@ -54,6 +54,7 @@
6905  #include <linux/security.h>
6906  #include <linux/falloc.h>
6907  #include <linux/fiemap.h>
6908 +#include <linux/vs_tag.h>
6909  
6910  /*
6911   * Bring the atime in the XFS inode uptodate.
6912 @@ -540,6 +541,7 @@ xfs_vn_getattr(
6913         stat->nlink = ip->i_d.di_nlink;
6914         stat->uid = ip->i_d.di_uid;
6915         stat->gid = ip->i_d.di_gid;
6916 +       stat->tag = ip->i_d.di_tag;
6917         stat->ino = ip->i_ino;
6918         stat->atime = inode->i_atime;
6919         stat->mtime.tv_sec = ip->i_d.di_mtime.t_sec;
6920 @@ -576,6 +578,12 @@ xfs_vn_getattr(
6921  }
6922  
6923  STATIC int
6924 +xfs_vn_sync_xflags(struct inode *inode)
6925 +{
6926 +       return -xfs_sync_xflags(XFS_I(inode));
6927 +}
6928 +
6929 +STATIC int
6930  xfs_vn_setattr(
6931         struct dentry   *dentry,
6932         struct iattr    *iattr)
6933 @@ -733,6 +741,7 @@ static const struct inode_operations xfs
6934         .listxattr              = xfs_vn_listxattr,
6935         .fallocate              = xfs_vn_fallocate,
6936         .fiemap                 = xfs_vn_fiemap,
6937 +       .sync_flags             = xfs_vn_sync_xflags,
6938  };
6939  
6940  static const struct inode_operations xfs_dir_inode_operations = {
6941 @@ -758,6 +767,7 @@ static const struct inode_operations xfs
6942         .getxattr               = generic_getxattr,
6943         .removexattr            = generic_removexattr,
6944         .listxattr              = xfs_vn_listxattr,
6945 +       .sync_flags             = xfs_vn_sync_xflags,
6946  };
6947  
6948  static const struct inode_operations xfs_dir_ci_inode_operations = {
6949 @@ -807,6 +817,10 @@ xfs_diflags_to_iflags(
6950                 inode->i_flags |= S_IMMUTABLE;
6951         else
6952                 inode->i_flags &= ~S_IMMUTABLE;
6953 +       if (ip->i_d.di_flags & XFS_DIFLAG_IXUNLINK)
6954 +               inode->i_flags |= S_IXUNLINK;
6955 +       else
6956 +               inode->i_flags &= ~S_IXUNLINK;
6957         if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
6958                 inode->i_flags |= S_APPEND;
6959         else
6960 @@ -819,6 +833,15 @@ xfs_diflags_to_iflags(
6961                 inode->i_flags |= S_NOATIME;
6962         else
6963                 inode->i_flags &= ~S_NOATIME;
6964 +
6965 +       if (ip->i_d.di_vflags & XFS_DIVFLAG_BARRIER)
6966 +               inode->i_vflags |= V_BARRIER;
6967 +       else
6968 +               inode->i_vflags &= ~V_BARRIER;
6969 +       if (ip->i_d.di_vflags & XFS_DIVFLAG_COW)
6970 +               inode->i_vflags |= V_COW;
6971 +       else
6972 +               inode->i_vflags &= ~V_COW;
6973  }
6974  
6975  /*
6976 @@ -847,6 +870,7 @@ xfs_setup_inode(
6977         inode->i_nlink  = ip->i_d.di_nlink;
6978         inode->i_uid    = ip->i_d.di_uid;
6979         inode->i_gid    = ip->i_d.di_gid;
6980 +       inode->i_tag    = ip->i_d.di_tag;
6981  
6982         switch (inode->i_mode & S_IFMT) {
6983         case S_IFBLK:
6984 diff -NurpP --minimal linux-2.6.30.2/fs/xfs/linux-2.6/xfs_linux.h linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/linux-2.6/xfs_linux.h
6985 --- linux-2.6.30.2/fs/xfs/linux-2.6/xfs_linux.h 2009-06-11 17:13:09.000000000 +0200
6986 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/linux-2.6/xfs_linux.h      2009-07-04 01:11:39.000000000 +0200
6987 @@ -119,6 +119,7 @@
6988  
6989  #define current_cpu()          (raw_smp_processor_id())
6990  #define current_pid()          (current->pid)
6991 +#define current_fstag(cred,vp) (dx_current_fstag((vp)->i_sb))
6992  #define current_test_flags(f)  (current->flags & (f))
6993  #define current_set_flags_nested(sp, f)                \
6994                 (*(sp) = current->flags, current->flags |= (f))
6995 diff -NurpP --minimal linux-2.6.30.2/fs/xfs/linux-2.6/xfs_super.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/linux-2.6/xfs_super.c
6996 --- linux-2.6.30.2/fs/xfs/linux-2.6/xfs_super.c 2009-06-11 17:13:09.000000000 +0200
6997 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/linux-2.6/xfs_super.c      2009-07-04 01:11:39.000000000 +0200
6998 @@ -118,6 +118,9 @@ mempool_t *xfs_ioend_pool;
6999  #define MNTOPT_DMAPI   "dmapi"         /* DMI enabled (DMAPI / XDSM) */
7000  #define MNTOPT_XDSM    "xdsm"          /* DMI enabled (DMAPI / XDSM) */
7001  #define MNTOPT_DMI     "dmi"           /* DMI enabled (DMAPI / XDSM) */
7002 +#define MNTOPT_TAGXID  "tagxid"        /* context tagging for inodes */
7003 +#define MNTOPT_TAGGED  "tag"           /* context tagging for inodes */
7004 +#define MNTOPT_NOTAGTAG        "notag"         /* do not use context tagging */
7005  
7006  /*
7007   * Table driven mount option parser.
7008 @@ -126,10 +129,14 @@ mempool_t *xfs_ioend_pool;
7009   * in the future, too.
7010   */
7011  enum {
7012 +       Opt_tag, Opt_notag,
7013         Opt_barrier, Opt_nobarrier, Opt_err
7014  };
7015  
7016  static const match_table_t tokens = {
7017 +       {Opt_tag, "tagxid"},
7018 +       {Opt_tag, "tag"},
7019 +       {Opt_notag, "notag"},
7020         {Opt_barrier, "barrier"},
7021         {Opt_nobarrier, "nobarrier"},
7022         {Opt_err, NULL}
7023 @@ -383,6 +390,19 @@ xfs_parseargs(
7024                 } else if (!strcmp(this_char, "irixsgid")) {
7025                         cmn_err(CE_WARN,
7026         "XFS: irixsgid is now a sysctl(2) variable, option is deprecated.");
7027 +#ifndef CONFIG_TAGGING_NONE
7028 +               } else if (!strcmp(this_char, MNTOPT_TAGGED)) {
7029 +                       mp->m_flags |= XFS_MOUNT_TAGGED;
7030 +               } else if (!strcmp(this_char, MNTOPT_NOTAGTAG)) {
7031 +                       mp->m_flags &= ~XFS_MOUNT_TAGGED;
7032 +               } else if (!strcmp(this_char, MNTOPT_TAGXID)) {
7033 +                       mp->m_flags |= XFS_MOUNT_TAGGED;
7034 +#endif
7035 +#ifdef CONFIG_PROPAGATE
7036 +               } else if (!strcmp(this_char, MNTOPT_TAGGED)) {
7037 +                       /* use value */
7038 +                       mp->m_flags |= XFS_MOUNT_TAGGED;
7039 +#endif
7040                 } else {
7041                         cmn_err(CE_WARN,
7042                                 "XFS: unknown mount option [%s].", this_char);
7043 @@ -1233,6 +1253,16 @@ xfs_fs_remount(
7044                 case Opt_nobarrier:
7045                         mp->m_flags &= ~XFS_MOUNT_BARRIER;
7046                         break;
7047 +               case Opt_tag:
7048 +                       if (!(sb->s_flags & MS_TAGGED)) {
7049 +                               printk(KERN_INFO
7050 +                                       "XFS: %s: tagging not permitted on remount.\n",
7051 +                                       sb->s_id);
7052 +                               return -EINVAL;
7053 +                       }
7054 +                       break;
7055 +               case Opt_notag:
7056 +                       break;
7057                 default:
7058                         /*
7059                          * Logically we would return an error here to prevent
7060 @@ -1443,6 +1473,9 @@ xfs_fs_fill_super(
7061  
7062         XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname);
7063  
7064 +       if (mp->m_flags & XFS_MOUNT_TAGGED)
7065 +               sb->s_flags |= MS_TAGGED;
7066 +
7067         sb->s_dirt = 1;
7068         sb->s_magic = XFS_SB_MAGIC;
7069         sb->s_blocksize = mp->m_sb.sb_blocksize;
7070 diff -NurpP --minimal linux-2.6.30.2/fs/xfs/xfs_dinode.h linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/xfs_dinode.h
7071 --- linux-2.6.30.2/fs/xfs/xfs_dinode.h  2009-06-11 17:13:09.000000000 +0200
7072 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/xfs_dinode.h       2009-07-04 01:11:39.000000000 +0200
7073 @@ -50,7 +50,9 @@ typedef struct xfs_dinode {
7074         __be32          di_gid;         /* owner's group id */
7075         __be32          di_nlink;       /* number of links to file */
7076         __be16          di_projid;      /* owner's project id */
7077 -       __u8            di_pad[8];      /* unused, zeroed space */
7078 +       __be16          di_tag;         /* context tagging */
7079 +       __be16          di_vflags;      /* vserver specific flags */
7080 +       __u8            di_pad[4];      /* unused, zeroed space */
7081         __be16          di_flushiter;   /* incremented on flush */
7082         xfs_timestamp_t di_atime;       /* time last accessed */
7083         xfs_timestamp_t di_mtime;       /* time last modified */
7084 @@ -183,6 +185,8 @@ static inline void xfs_dinode_put_rdev(s
7085  #define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */
7086  #define XFS_DIFLAG_NODEFRAG_BIT     13 /* do not reorganize/defragment */
7087  #define XFS_DIFLAG_FILESTREAM_BIT   14  /* use filestream allocator */
7088 +#define XFS_DIFLAG_IXUNLINK_BIT     15 /* Immutable inver on unlink */
7089 +
7090  #define XFS_DIFLAG_REALTIME      (1 << XFS_DIFLAG_REALTIME_BIT)
7091  #define XFS_DIFLAG_PREALLOC      (1 << XFS_DIFLAG_PREALLOC_BIT)
7092  #define XFS_DIFLAG_NEWRTBM       (1 << XFS_DIFLAG_NEWRTBM_BIT)
7093 @@ -198,6 +202,7 @@ static inline void xfs_dinode_put_rdev(s
7094  #define XFS_DIFLAG_EXTSZINHERIT  (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
7095  #define XFS_DIFLAG_NODEFRAG      (1 << XFS_DIFLAG_NODEFRAG_BIT)
7096  #define XFS_DIFLAG_FILESTREAM    (1 << XFS_DIFLAG_FILESTREAM_BIT)
7097 +#define XFS_DIFLAG_IXUNLINK      (1 << XFS_DIFLAG_IXUNLINK_BIT)
7098  
7099  #ifdef CONFIG_XFS_RT
7100  #define XFS_IS_REALTIME_INODE(ip) ((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME)
7101 @@ -210,6 +215,10 @@ static inline void xfs_dinode_put_rdev(s
7102          XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
7103          XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
7104          XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
7105 -        XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM)
7106 +        XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM | \
7107 +        XFS_DIFLAG_IXUNLINK)
7108 +
7109 +#define XFS_DIVFLAG_BARRIER    0x01
7110 +#define XFS_DIVFLAG_COW                0x02
7111  
7112  #endif /* __XFS_DINODE_H__ */
7113 diff -NurpP --minimal linux-2.6.30.2/fs/xfs/xfs_fs.h linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/xfs_fs.h
7114 --- linux-2.6.30.2/fs/xfs/xfs_fs.h      2009-03-24 14:22:37.000000000 +0100
7115 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/xfs_fs.h   2009-07-04 01:11:39.000000000 +0200
7116 @@ -67,6 +67,9 @@ struct fsxattr {
7117  #define XFS_XFLAG_EXTSZINHERIT 0x00001000      /* inherit inode extent size */
7118  #define XFS_XFLAG_NODEFRAG     0x00002000      /* do not defragment */
7119  #define XFS_XFLAG_FILESTREAM   0x00004000      /* use filestream allocator */
7120 +#define XFS_XFLAG_IXUNLINK     0x00008000      /* immutable invert on unlink */
7121 +#define XFS_XFLAG_BARRIER      0x10000000      /* chroot() barrier */
7122 +#define XFS_XFLAG_COW          0x20000000      /* copy on write mark */
7123  #define XFS_XFLAG_HASATTR      0x80000000      /* no DIFLAG for this   */
7124  
7125  /*
7126 @@ -289,7 +292,8 @@ typedef struct xfs_bstat {
7127         __s32           bs_extents;     /* number of extents            */
7128         __u32           bs_gen;         /* generation count             */
7129         __u16           bs_projid;      /* project id                   */
7130 -       unsigned char   bs_pad[14];     /* pad space, unused            */
7131 +       __u16           bs_tag;         /* context tagging              */
7132 +       unsigned char   bs_pad[12];     /* pad space, unused            */
7133         __u32           bs_dmevmask;    /* DMIG event mask              */
7134         __u16           bs_dmstate;     /* DMIG state info              */
7135         __u16           bs_aextents;    /* attribute number of extents  */
7136 diff -NurpP --minimal linux-2.6.30.2/fs/xfs/xfs_ialloc.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/xfs_ialloc.c
7137 --- linux-2.6.30.2/fs/xfs/xfs_ialloc.c  2009-06-11 17:13:09.000000000 +0200
7138 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/xfs_ialloc.c       2009-07-04 01:11:39.000000000 +0200
7139 @@ -41,7 +41,6 @@
7140  #include "xfs_error.h"
7141  #include "xfs_bmap.h"
7142  
7143 -
7144  /*
7145   * Allocation group level functions.
7146   */
7147 diff -NurpP --minimal linux-2.6.30.2/fs/xfs/xfs_inode.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/xfs_inode.c
7148 --- linux-2.6.30.2/fs/xfs/xfs_inode.c   2009-06-11 17:13:09.000000000 +0200
7149 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/xfs_inode.c        2009-07-04 01:11:39.000000000 +0200
7150 @@ -250,6 +250,7 @@ xfs_inotobp(
7151         return 0;
7152  }
7153  
7154 +#include <linux/vs_tag.h>
7155  
7156  /*
7157   * This routine is called to map an inode to the buffer containing
7158 @@ -645,15 +646,25 @@ xfs_iformat_btree(
7159  void
7160  xfs_dinode_from_disk(
7161         xfs_icdinode_t          *to,
7162 -       xfs_dinode_t            *from)
7163 +       xfs_dinode_t            *from,
7164 +       int tagged)
7165  {
7166 +       uint32_t uid, gid, tag;
7167 +
7168         to->di_magic = be16_to_cpu(from->di_magic);
7169         to->di_mode = be16_to_cpu(from->di_mode);
7170         to->di_version = from ->di_version;
7171         to->di_format = from->di_format;
7172         to->di_onlink = be16_to_cpu(from->di_onlink);
7173 -       to->di_uid = be32_to_cpu(from->di_uid);
7174 -       to->di_gid = be32_to_cpu(from->di_gid);
7175 +
7176 +       uid = be32_to_cpu(from->di_uid);
7177 +       gid = be32_to_cpu(from->di_gid);
7178 +       tag = be16_to_cpu(from->di_tag);
7179 +
7180 +       to->di_uid = INOTAG_UID(tagged, uid, gid);
7181 +       to->di_gid = INOTAG_GID(tagged, uid, gid);
7182 +       to->di_tag = INOTAG_TAG(tagged, uid, gid, tag);
7183 +
7184         to->di_nlink = be32_to_cpu(from->di_nlink);
7185         to->di_projid = be16_to_cpu(from->di_projid);
7186         memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
7187 @@ -674,21 +685,26 @@ xfs_dinode_from_disk(
7188         to->di_dmevmask = be32_to_cpu(from->di_dmevmask);
7189         to->di_dmstate  = be16_to_cpu(from->di_dmstate);
7190         to->di_flags    = be16_to_cpu(from->di_flags);
7191 +       to->di_vflags   = be16_to_cpu(from->di_vflags);
7192         to->di_gen      = be32_to_cpu(from->di_gen);
7193  }
7194  
7195  void
7196  xfs_dinode_to_disk(
7197         xfs_dinode_t            *to,
7198 -       xfs_icdinode_t          *from)
7199 +       xfs_icdinode_t          *from,
7200 +       int tagged)
7201  {
7202         to->di_magic = cpu_to_be16(from->di_magic);
7203         to->di_mode = cpu_to_be16(from->di_mode);
7204         to->di_version = from ->di_version;
7205         to->di_format = from->di_format;
7206         to->di_onlink = cpu_to_be16(from->di_onlink);
7207 -       to->di_uid = cpu_to_be32(from->di_uid);
7208 -       to->di_gid = cpu_to_be32(from->di_gid);
7209 +
7210 +       to->di_uid = cpu_to_be32(TAGINO_UID(tagged, from->di_uid, from->di_tag));
7211 +       to->di_gid = cpu_to_be32(TAGINO_GID(tagged, from->di_gid, from->di_tag));
7212 +       to->di_tag = cpu_to_be16(TAGINO_TAG(tagged, from->di_tag));
7213 +
7214         to->di_nlink = cpu_to_be32(from->di_nlink);
7215         to->di_projid = cpu_to_be16(from->di_projid);
7216         memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
7217 @@ -709,12 +725,14 @@ xfs_dinode_to_disk(
7218         to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
7219         to->di_dmstate = cpu_to_be16(from->di_dmstate);
7220         to->di_flags = cpu_to_be16(from->di_flags);
7221 +       to->di_vflags = cpu_to_be16(from->di_vflags);
7222         to->di_gen = cpu_to_be32(from->di_gen);
7223  }
7224  
7225  STATIC uint
7226  _xfs_dic2xflags(
7227 -       __uint16_t              di_flags)
7228 +       __uint16_t              di_flags,
7229 +       __uint16_t              di_vflags)
7230  {
7231         uint                    flags = 0;
7232  
7233 @@ -725,6 +743,8 @@ _xfs_dic2xflags(
7234                         flags |= XFS_XFLAG_PREALLOC;
7235                 if (di_flags & XFS_DIFLAG_IMMUTABLE)
7236                         flags |= XFS_XFLAG_IMMUTABLE;
7237 +               if (di_flags & XFS_DIFLAG_IXUNLINK)
7238 +                       flags |= XFS_XFLAG_IXUNLINK;
7239                 if (di_flags & XFS_DIFLAG_APPEND)
7240                         flags |= XFS_XFLAG_APPEND;
7241                 if (di_flags & XFS_DIFLAG_SYNC)
7242 @@ -749,6 +769,10 @@ _xfs_dic2xflags(
7243                         flags |= XFS_XFLAG_FILESTREAM;
7244         }
7245  
7246 +       if (di_vflags & XFS_DIVFLAG_BARRIER)
7247 +               flags |= FS_BARRIER_FL;
7248 +       if (di_vflags & XFS_DIVFLAG_COW)
7249 +               flags |= FS_COW_FL;
7250         return flags;
7251  }
7252  
7253 @@ -758,7 +782,7 @@ xfs_ip2xflags(
7254  {
7255         xfs_icdinode_t          *dic = &ip->i_d;
7256  
7257 -       return _xfs_dic2xflags(dic->di_flags) |
7258 +       return _xfs_dic2xflags(dic->di_flags, dic->di_vflags) |
7259                                 (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0);
7260  }
7261  
7262 @@ -766,7 +790,8 @@ uint
7263  xfs_dic2xflags(
7264         xfs_dinode_t            *dip)
7265  {
7266 -       return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) |
7267 +       return _xfs_dic2xflags(be16_to_cpu(dip->di_flags),
7268 +                               be16_to_cpu(dip->di_vflags)) |
7269                                 (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
7270  }
7271  
7272 @@ -802,7 +827,6 @@ xfs_iread(
7273         if (error)
7274                 return error;
7275         dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
7276 -
7277         /*
7278          * If we got something that isn't an inode it means someone
7279          * (nfs or dmi) has a stale handle.
7280 @@ -827,7 +851,8 @@ xfs_iread(
7281          * Otherwise, just get the truly permanent information.
7282          */
7283         if (dip->di_mode) {
7284 -               xfs_dinode_from_disk(&ip->i_d, dip);
7285 +               xfs_dinode_from_disk(&ip->i_d, dip,
7286 +                       mp->m_flags & XFS_MOUNT_TAGGED);
7287                 error = xfs_iformat(ip, dip);
7288                 if (error)  {
7289  #ifdef DEBUG
7290 @@ -1027,6 +1052,7 @@ xfs_ialloc(
7291         ASSERT(ip->i_d.di_nlink == nlink);
7292         ip->i_d.di_uid = current_fsuid();
7293         ip->i_d.di_gid = current_fsgid();
7294 +       ip->i_d.di_tag = current_fstag(cr, &ip->i_vnode);
7295         ip->i_d.di_projid = prid;
7296         memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
7297  
7298 @@ -1087,6 +1113,7 @@ xfs_ialloc(
7299         ip->i_d.di_dmevmask = 0;
7300         ip->i_d.di_dmstate = 0;
7301         ip->i_d.di_flags = 0;
7302 +       ip->i_d.di_vflags = 0;
7303         flags = XFS_ILOG_CORE;
7304         switch (mode & S_IFMT) {
7305         case S_IFIFO:
7306 @@ -2163,6 +2190,7 @@ xfs_ifree(
7307         }
7308         ip->i_d.di_mode = 0;            /* mark incore inode as free */
7309         ip->i_d.di_flags = 0;
7310 +       ip->i_d.di_vflags = 0;
7311         ip->i_d.di_dmevmask = 0;
7312         ip->i_d.di_forkoff = 0;         /* mark the attr fork not in use */
7313         ip->i_df.if_ext_max =
7314 @@ -3130,7 +3158,8 @@ xfs_iflush_int(
7315          * because if the inode is dirty at all the core must
7316          * be.
7317          */
7318 -       xfs_dinode_to_disk(dip, &ip->i_d);
7319 +       xfs_dinode_to_disk(dip, &ip->i_d,
7320 +               mp->m_flags & XFS_MOUNT_TAGGED);
7321  
7322         /* Wrap, we never let the log put out DI_MAX_FLUSH */
7323         if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
7324 diff -NurpP --minimal linux-2.6.30.2/fs/xfs/xfs_inode.h linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/xfs_inode.h
7325 --- linux-2.6.30.2/fs/xfs/xfs_inode.h   2009-06-11 17:13:09.000000000 +0200
7326 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/xfs_inode.h        2009-07-04 01:11:39.000000000 +0200
7327 @@ -134,7 +134,9 @@ typedef struct xfs_icdinode {
7328         __uint32_t      di_gid;         /* owner's group id */
7329         __uint32_t      di_nlink;       /* number of links to file */
7330         __uint16_t      di_projid;      /* owner's project id */
7331 -       __uint8_t       di_pad[8];      /* unused, zeroed space */
7332 +       __uint16_t      di_tag;         /* context tagging */
7333 +       __uint16_t      di_vflags;      /* vserver specific flags */
7334 +       __uint8_t       di_pad[4];      /* unused, zeroed space */
7335         __uint16_t      di_flushiter;   /* incremented on flush */
7336         xfs_ictimestamp_t di_atime;     /* time last accessed */
7337         xfs_ictimestamp_t di_mtime;     /* time last modified */
7338 @@ -589,9 +591,9 @@ int         xfs_itobp(struct xfs_mount *, struc
7339  int            xfs_iread(struct xfs_mount *, struct xfs_trans *,
7340                           struct xfs_inode *, xfs_daddr_t, uint);
7341  void           xfs_dinode_from_disk(struct xfs_icdinode *,
7342 -                                    struct xfs_dinode *);
7343 +                                    struct xfs_dinode *, int);
7344  void           xfs_dinode_to_disk(struct xfs_dinode *,
7345 -                                  struct xfs_icdinode *);
7346 +                                  struct xfs_icdinode *, int);
7347  void           xfs_idestroy_fork(struct xfs_inode *, int);
7348  void           xfs_idata_realloc(struct xfs_inode *, int, int);
7349  void           xfs_iroot_realloc(struct xfs_inode *, int, int);
7350 diff -NurpP --minimal linux-2.6.30.2/fs/xfs/xfs_itable.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/xfs_itable.c
7351 --- linux-2.6.30.2/fs/xfs/xfs_itable.c  2009-06-11 17:13:09.000000000 +0200
7352 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/xfs_itable.c       2009-07-04 01:11:39.000000000 +0200
7353 @@ -82,6 +82,7 @@ xfs_bulkstat_one_iget(
7354         buf->bs_mode = dic->di_mode;
7355         buf->bs_uid = dic->di_uid;
7356         buf->bs_gid = dic->di_gid;
7357 +       buf->bs_tag = dic->di_tag;
7358         buf->bs_size = dic->di_size;
7359         /*
7360          * We are reading the atime from the Linux inode because the
7361 diff -NurpP --minimal linux-2.6.30.2/fs/xfs/xfs_log_recover.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/xfs_log_recover.c
7362 --- linux-2.6.30.2/fs/xfs/xfs_log_recover.c     2009-06-11 17:13:09.000000000 +0200
7363 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/xfs_log_recover.c  2009-07-04 01:11:39.000000000 +0200
7364 @@ -2453,7 +2453,8 @@ xlog_recover_do_inode_trans(
7365         }
7366  
7367         /* The core is in in-core format */
7368 -       xfs_dinode_to_disk(dip, (xfs_icdinode_t *)item->ri_buf[1].i_addr);
7369 +       xfs_dinode_to_disk(dip, (xfs_icdinode_t *)item->ri_buf[1].i_addr,
7370 +               mp->m_flags & XFS_MOUNT_TAGGED);
7371  
7372         /* the rest is in on-disk format */
7373         if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) {
7374 diff -NurpP --minimal linux-2.6.30.2/fs/xfs/xfs_mount.h linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/xfs_mount.h
7375 --- linux-2.6.30.2/fs/xfs/xfs_mount.h   2009-06-11 17:13:09.000000000 +0200
7376 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/xfs_mount.h        2009-07-04 01:11:39.000000000 +0200
7377 @@ -361,6 +361,7 @@ typedef struct xfs_mount {
7378                                                    allocator */
7379  #define XFS_MOUNT_NOATTR2      (1ULL << 25)    /* disable use of attr2 format */
7380  
7381 +#define XFS_MOUNT_TAGGED       (1ULL << 31)    /* context tagging */
7382  
7383  /*
7384   * Default minimum read and write sizes.
7385 diff -NurpP --minimal linux-2.6.30.2/fs/xfs/xfs_vnodeops.c linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/xfs_vnodeops.c
7386 --- linux-2.6.30.2/fs/xfs/xfs_vnodeops.c        2009-06-11 17:13:09.000000000 +0200
7387 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/xfs_vnodeops.c     2009-07-04 01:11:39.000000000 +0200
7388 @@ -53,6 +53,90 @@
7389  #include "xfs_filestream.h"
7390  #include "xfs_vnodeops.h"
7391  
7392 +
7393 +STATIC void
7394 +xfs_get_inode_flags(
7395 +       xfs_inode_t     *ip)
7396 +{
7397 +       struct inode    *inode = VFS_I(ip);
7398 +       unsigned int    flags = inode->i_flags;
7399 +       unsigned int    vflags = inode->i_vflags;
7400 +
7401 +       if (flags & S_IMMUTABLE)
7402 +               ip->i_d.di_flags |= XFS_DIFLAG_IMMUTABLE;
7403 +       else
7404 +               ip->i_d.di_flags &= ~XFS_DIFLAG_IMMUTABLE;
7405 +       if (flags & S_IXUNLINK)
7406 +               ip->i_d.di_flags |= XFS_DIFLAG_IXUNLINK;
7407 +       else
7408 +               ip->i_d.di_flags &= ~XFS_DIFLAG_IXUNLINK;
7409 +
7410 +       if (vflags & V_BARRIER)
7411 +               ip->i_d.di_vflags |= XFS_DIVFLAG_BARRIER;
7412 +       else
7413 +               ip->i_d.di_vflags &= ~XFS_DIVFLAG_BARRIER;
7414 +       if (vflags & V_COW)
7415 +               ip->i_d.di_vflags |= XFS_DIVFLAG_COW;
7416 +       else
7417 +               ip->i_d.di_vflags &= ~XFS_DIVFLAG_COW;
7418 +}
7419 +
7420 +int
7421 +xfs_sync_xflags(
7422 +       xfs_inode_t             *ip)
7423 +{
7424 +       struct xfs_mount        *mp = ip->i_mount;
7425 +       struct xfs_trans        *tp;
7426 +       unsigned int            lock_flags = 0;
7427 +       int                     code;
7428 +
7429 +       xfs_itrace_entry(ip);
7430 +
7431 +       if (mp->m_flags & XFS_MOUNT_RDONLY)
7432 +               return XFS_ERROR(EROFS);
7433 +
7434 +       /*
7435 +        * we acquire the inode lock and do an error checking pass.
7436 +        */
7437 +       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
7438 +       code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
7439 +       if (code)
7440 +               goto error_return;
7441 +
7442 +       lock_flags = XFS_ILOCK_EXCL;
7443 +       xfs_ilock(ip, lock_flags);
7444 +
7445 +       xfs_trans_ijoin(tp, ip, lock_flags);
7446 +       xfs_trans_ihold(tp, ip);
7447 +
7448 +       xfs_get_inode_flags(ip);
7449 +       // xfs_diflags_to_linux(ip);
7450 +
7451 +       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
7452 +       xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
7453 +
7454 +       XFS_STATS_INC(xs_ig_attrchg);
7455 +
7456 +       /*
7457 +        * If this is a synchronous mount, make sure that the
7458 +        * transaction goes to disk before returning to the user.
7459 +        */
7460 +       if (mp->m_flags & XFS_MOUNT_WSYNC)
7461 +               xfs_trans_set_sync(tp);
7462 +       code = xfs_trans_commit(tp, 0);
7463 +       xfs_iunlock(ip, lock_flags);
7464 +
7465 +       if (code)
7466 +               return code;
7467 +       return 0;
7468 +
7469 + error_return:
7470 +       xfs_trans_cancel(tp, 0);
7471 +       if (lock_flags)
7472 +               xfs_iunlock(ip, lock_flags);
7473 +       return code;
7474 +}
7475 +
7476  int
7477  xfs_setattr(
7478         struct xfs_inode        *ip,
7479 @@ -68,6 +152,7 @@ xfs_setattr(
7480         uint                    commit_flags=0;
7481         uid_t                   uid=0, iuid=0;
7482         gid_t                   gid=0, igid=0;
7483 +       tag_t                   tag=0, itag=0;
7484         int                     timeflags = 0;
7485         struct xfs_dquot        *udqp, *gdqp, *olddquot1, *olddquot2;
7486         int                     need_iolock = 1;
7487 @@ -164,7 +249,7 @@ xfs_setattr(
7488         /*
7489          * Change file ownership.  Must be the owner or privileged.
7490          */
7491 -       if (mask & (ATTR_UID|ATTR_GID)) {
7492 +       if (mask & (ATTR_UID|ATTR_GID|ATTR_TAG)) {
7493                 /*
7494                  * These IDs could have changed since we last looked at them.
7495                  * But, we're assured that if the ownership did change
7496 @@ -173,15 +258,19 @@ xfs_setattr(
7497                  */
7498                 iuid = ip->i_d.di_uid;
7499                 igid = ip->i_d.di_gid;
7500 +               itag = ip->i_d.di_tag;
7501                 gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
7502                 uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
7503 +               tag = (mask & ATTR_TAG) ? iattr->ia_tag : itag;
7504  
7505                 /*
7506                  * Do a quota reservation only if uid/gid is actually
7507                  * going to change.
7508                  */
7509                 if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
7510 -                   (XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
7511 +                   (XFS_IS_GQUOTA_ON(mp) && igid != gid) ||
7512 +                   (XFS_IS_GQUOTA_ON(mp) && itag != tag)) {
7513 +                       /* TODO: handle tagging? */
7514                         ASSERT(tp);
7515                         code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
7516                                                 capable(CAP_FOWNER) ?
7517 @@ -334,7 +423,7 @@ xfs_setattr(
7518         /*
7519          * Change file ownership.  Must be the owner or privileged.
7520          */
7521 -       if (mask & (ATTR_UID|ATTR_GID)) {
7522 +       if (mask & (ATTR_UID|ATTR_GID|ATTR_TAG)) {
7523                 /*
7524                  * CAP_FSETID overrides the following restrictions:
7525                  *
7526 @@ -350,6 +439,10 @@ xfs_setattr(
7527                  * Change the ownerships and register quota modifications
7528                  * in the transaction.
7529                  */
7530 +               if (itag != tag) {
7531 +                       ip->i_d.di_tag = tag;
7532 +                       inode->i_tag = tag;
7533 +               }
7534                 if (iuid != uid) {
7535                         if (XFS_IS_UQUOTA_ON(mp)) {
7536                                 ASSERT(mask & ATTR_UID);
7537 diff -NurpP --minimal linux-2.6.30.2/fs/xfs/xfs_vnodeops.h linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/xfs_vnodeops.h
7538 --- linux-2.6.30.2/fs/xfs/xfs_vnodeops.h        2009-06-11 17:13:09.000000000 +0200
7539 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/fs/xfs/xfs_vnodeops.h     2009-07-04 01:11:39.000000000 +0200
7540 @@ -14,6 +14,7 @@ struct xfs_inode;
7541  struct xfs_iomap;
7542  
7543  
7544 +int xfs_sync_xflags(struct xfs_inode *ip);
7545  int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
7546  #define        XFS_ATTR_DMI            0x01    /* invocation from a DMI function */
7547  #define        XFS_ATTR_NONBLOCK       0x02    /* return EAGAIN if operation would block */
7548 diff -NurpP --minimal linux-2.6.30.2/include/asm-generic/tlb.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/asm-generic/tlb.h
7549 --- linux-2.6.30.2/include/asm-generic/tlb.h    2008-12-25 00:26:37.000000000 +0100
7550 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/asm-generic/tlb.h 2009-07-04 01:11:39.000000000 +0200
7551 @@ -14,6 +14,7 @@
7552  #define _ASM_GENERIC__TLB_H
7553  
7554  #include <linux/swap.h>
7555 +#include <linux/vs_memory.h>
7556  #include <asm/pgalloc.h>
7557  #include <asm/tlbflush.h>
7558  
7559 diff -NurpP --minimal linux-2.6.30.2/include/linux/capability.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/capability.h
7560 --- linux-2.6.30.2/include/linux/capability.h   2009-06-11 17:13:13.000000000 +0200
7561 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/capability.h        2009-07-04 01:11:39.000000000 +0200
7562 @@ -285,6 +285,7 @@ struct cpu_vfs_cap_data {
7563     arbitrary SCSI commands */
7564  /* Allow setting encryption key on loopback filesystem */
7565  /* Allow setting zone reclaim policy */
7566 +/* Allow the selection of a security context */
7567  
7568  #define CAP_SYS_ADMIN        21
7569  
7570 @@ -357,7 +358,13 @@ struct cpu_vfs_cap_data {
7571  
7572  #define CAP_MAC_ADMIN        33
7573  
7574 -#define CAP_LAST_CAP         CAP_MAC_ADMIN
7575 +/* Allow context manipulations */
7576 +/* Allow changing context info on files */
7577 +
7578 +#define CAP_CONTEXT         34
7579 +
7580 +
7581 +#define CAP_LAST_CAP         CAP_CONTEXT
7582  
7583  #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
7584  
7585 diff -NurpP --minimal linux-2.6.30.2/include/linux/devpts_fs.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/devpts_fs.h
7586 --- linux-2.6.30.2/include/linux/devpts_fs.h    2008-12-25 00:26:37.000000000 +0100
7587 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/devpts_fs.h 2009-07-04 01:11:39.000000000 +0200
7588 @@ -45,5 +45,4 @@ static inline void devpts_pty_kill(struc
7589  
7590  #endif
7591  
7592 -
7593  #endif /* _LINUX_DEVPTS_FS_H */
7594 diff -NurpP --minimal linux-2.6.30.2/include/linux/ext2_fs.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/ext2_fs.h
7595 --- linux-2.6.30.2/include/linux/ext2_fs.h      2009-03-24 14:22:41.000000000 +0100
7596 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/ext2_fs.h   2009-07-04 01:11:39.000000000 +0200
7597 @@ -189,8 +189,12 @@ struct ext2_group_desc
7598  #define EXT2_NOTAIL_FL                 FS_NOTAIL_FL    /* file tail should not be merged */
7599  #define EXT2_DIRSYNC_FL                        FS_DIRSYNC_FL   /* dirsync behaviour (directories only) */
7600  #define EXT2_TOPDIR_FL                 FS_TOPDIR_FL    /* Top of directory hierarchies*/
7601 +#define EXT2_IXUNLINK_FL               FS_IXUNLINK_FL  /* Immutable invert on unlink */
7602  #define EXT2_RESERVED_FL               FS_RESERVED_FL  /* reserved for ext2 lib */
7603  
7604 +#define EXT2_BARRIER_FL                        FS_BARRIER_FL   /* Barrier for chroot() */
7605 +#define EXT2_COW_FL                    FS_COW_FL       /* Copy on Write marker */
7606 +
7607  #define EXT2_FL_USER_VISIBLE           FS_FL_USER_VISIBLE      /* User visible flags */
7608  #define EXT2_FL_USER_MODIFIABLE                FS_FL_USER_MODIFIABLE   /* User modifiable flags */
7609  
7610 @@ -271,7 +275,7 @@ struct ext2_inode {
7611                 struct {
7612                         __u8    l_i_frag;       /* Fragment number */
7613                         __u8    l_i_fsize;      /* Fragment size */
7614 -                       __u16   i_pad1;
7615 +                       __u16   l_i_tag;        /* Context Tag */
7616                         __le16  l_i_uid_high;   /* these 2 fields    */
7617                         __le16  l_i_gid_high;   /* were reserved2[0] */
7618                         __u32   l_i_reserved2;
7619 @@ -303,6 +307,7 @@ struct ext2_inode {
7620  #define i_gid_low      i_gid
7621  #define i_uid_high     osd2.linux2.l_i_uid_high
7622  #define i_gid_high     osd2.linux2.l_i_gid_high
7623 +#define i_raw_tag      osd2.linux2.l_i_tag
7624  #define i_reserved2    osd2.linux2.l_i_reserved2
7625  #endif
7626  
7627 @@ -347,6 +352,7 @@ struct ext2_inode {
7628  #define EXT2_MOUNT_USRQUOTA            0x020000  /* user quota */
7629  #define EXT2_MOUNT_GRPQUOTA            0x040000  /* group quota */
7630  #define EXT2_MOUNT_RESERVATION         0x080000  /* Preallocation */
7631 +#define EXT2_MOUNT_TAGGED              (1<<24)   /* Enable Context Tags */
7632  
7633  
7634  #define clear_opt(o, opt)              o &= ~EXT2_MOUNT_##opt
7635 diff -NurpP --minimal linux-2.6.30.2/include/linux/ext3_fs.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/ext3_fs.h
7636 --- linux-2.6.30.2/include/linux/ext3_fs.h      2009-06-11 17:13:14.000000000 +0200
7637 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/ext3_fs.h   2009-07-04 01:11:39.000000000 +0200
7638 @@ -173,10 +173,14 @@ struct ext3_group_desc
7639  #define EXT3_NOTAIL_FL                 0x00008000 /* file tail should not be merged */
7640  #define EXT3_DIRSYNC_FL                        0x00010000 /* dirsync behaviour (directories only) */
7641  #define EXT3_TOPDIR_FL                 0x00020000 /* Top of directory hierarchies*/
7642 +#define EXT3_IXUNLINK_FL               0x08000000 /* Immutable invert on unlink */
7643  #define EXT3_RESERVED_FL               0x80000000 /* reserved for ext3 lib */
7644  
7645 -#define EXT3_FL_USER_VISIBLE           0x0003DFFF /* User visible flags */
7646 -#define EXT3_FL_USER_MODIFIABLE                0x000380FF /* User modifiable flags */
7647 +#define EXT3_BARRIER_FL                        0x04000000 /* Barrier for chroot() */
7648 +#define EXT3_COW_FL                    0x20000000 /* Copy on Write marker */
7649 +
7650 +#define EXT3_FL_USER_VISIBLE           0x0103DFFF /* User visible flags */
7651 +#define EXT3_FL_USER_MODIFIABLE                0x010380FF /* User modifiable flags */
7652  
7653  /* Flags that should be inherited by new inodes from their parent. */
7654  #define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\
7655 @@ -317,7 +321,7 @@ struct ext3_inode {
7656                 struct {
7657                         __u8    l_i_frag;       /* Fragment number */
7658                         __u8    l_i_fsize;      /* Fragment size */
7659 -                       __u16   i_pad1;
7660 +                       __u16   l_i_tag;        /* Context Tag */
7661                         __le16  l_i_uid_high;   /* these 2 fields    */
7662                         __le16  l_i_gid_high;   /* were reserved2[0] */
7663                         __u32   l_i_reserved2;
7664 @@ -351,6 +355,7 @@ struct ext3_inode {
7665  #define i_gid_low      i_gid
7666  #define i_uid_high     osd2.linux2.l_i_uid_high
7667  #define i_gid_high     osd2.linux2.l_i_gid_high
7668 +#define i_raw_tag      osd2.linux2.l_i_tag
7669  #define i_reserved2    osd2.linux2.l_i_reserved2
7670  
7671  #elif defined(__GNU__)
7672 @@ -414,6 +419,7 @@ struct ext3_inode {
7673  #define EXT3_MOUNT_GRPQUOTA            0x200000 /* "old" group quota */
7674  #define EXT3_MOUNT_DATA_ERR_ABORT      0x400000 /* Abort on file data write
7675                                                   * error in ordered mode */
7676 +#define EXT3_MOUNT_TAGGED              (1<<24) /* Enable Context Tags */
7677  
7678  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
7679  #ifndef _LINUX_EXT2_FS_H
7680 @@ -875,6 +881,7 @@ struct buffer_head * ext3_bread (handle_
7681  int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
7682         sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result,
7683         int create, int extend_disksize);
7684 +extern int ext3_sync_flags(struct inode *inode);
7685  
7686  extern struct inode *ext3_iget(struct super_block *, unsigned long);
7687  extern int  ext3_write_inode (struct inode *, int);
7688 diff -NurpP --minimal linux-2.6.30.2/include/linux/fs.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/fs.h
7689 --- linux-2.6.30.2/include/linux/fs.h   2009-06-11 17:13:14.000000000 +0200
7690 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/fs.h        2009-07-04 02:28:05.000000000 +0200
7691 @@ -205,6 +205,9 @@ struct inodes_stat_t {
7692  #define MS_KERNMOUNT   (1<<22) /* this is a kern_mount call */
7693  #define MS_I_VERSION   (1<<23) /* Update inode I_version field */
7694  #define MS_STRICTATIME (1<<24) /* Always perform atime updates */
7695 +#define MS_TAGGED      (1<<25) /* use generic inode tagging */
7696 +#define MS_TAGID       (1<<26) /* use specific tag for this mount */
7697 +#define MS_NOTAGCHECK  (1<<27) /* don't check tags */
7698  #define MS_ACTIVE      (1<<30)
7699  #define MS_NOUSER      (1<<31)
7700  
7701 @@ -231,6 +234,14 @@ struct inodes_stat_t {
7702  #define S_NOCMTIME     128     /* Do not update file c/mtime */
7703  #define S_SWAPFILE     256     /* Do not truncate: swapon got its bmaps */
7704  #define S_PRIVATE      512     /* Inode is fs-internal */
7705 +#define S_IXUNLINK     1024    /* Immutable Invert on unlink */
7706 +
7707 +/* Linux-VServer related Inode flags */
7708 +
7709 +#define V_VALID                1
7710 +#define V_XATTR                2
7711 +#define V_BARRIER      4       /* Barrier for chroot() */
7712 +#define V_COW          8       /* Copy on Write */
7713  
7714  /*
7715   * Note that nosuid etc flags are inode-specific: setting some file-system
7716 @@ -253,12 +264,15 @@ struct inodes_stat_t {
7717  #define IS_DIRSYNC(inode)      (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \
7718                                         ((inode)->i_flags & (S_SYNC|S_DIRSYNC)))
7719  #define IS_MANDLOCK(inode)     __IS_FLG(inode, MS_MANDLOCK)
7720 -#define IS_NOATIME(inode)   __IS_FLG(inode, MS_RDONLY|MS_NOATIME)
7721 -#define IS_I_VERSION(inode)   __IS_FLG(inode, MS_I_VERSION)
7722 +#define IS_NOATIME(inode)      __IS_FLG(inode, MS_RDONLY|MS_NOATIME)
7723 +#define IS_I_VERSION(inode)    __IS_FLG(inode, MS_I_VERSION)
7724 +#define IS_TAGGED(inode)       __IS_FLG(inode, MS_TAGGED)
7725  
7726  #define IS_NOQUOTA(inode)      ((inode)->i_flags & S_NOQUOTA)
7727  #define IS_APPEND(inode)       ((inode)->i_flags & S_APPEND)
7728  #define IS_IMMUTABLE(inode)    ((inode)->i_flags & S_IMMUTABLE)
7729 +#define IS_IXUNLINK(inode)     ((inode)->i_flags & S_IXUNLINK)
7730 +#define IS_IXORUNLINK(inode)   ((IS_IXUNLINK(inode) ? S_IMMUTABLE : 0) ^ IS_IMMUTABLE(inode))
7731  #define IS_POSIXACL(inode)     __IS_FLG(inode, MS_POSIXACL)
7732  
7733  #define IS_DEADDIR(inode)      ((inode)->i_flags & S_DEAD)
7734 @@ -266,6 +280,16 @@ struct inodes_stat_t {
7735  #define IS_SWAPFILE(inode)     ((inode)->i_flags & S_SWAPFILE)
7736  #define IS_PRIVATE(inode)      ((inode)->i_flags & S_PRIVATE)
7737  
7738 +#define IS_BARRIER(inode)      (S_ISDIR((inode)->i_mode) && ((inode)->i_vflags & V_BARRIER))
7739 +
7740 +#ifdef CONFIG_VSERVER_COWBL
7741 +#  define IS_COW(inode)                (IS_IXUNLINK(inode) && IS_IMMUTABLE(inode))
7742 +#  define IS_COW_LINK(inode)   (S_ISREG((inode)->i_mode) && ((inode)->i_nlink > 1))
7743 +#else
7744 +#  define IS_COW(inode)                (0)
7745 +#  define IS_COW_LINK(inode)   (0)
7746 +#endif
7747 +
7748  /* the read-only stuff doesn't really belong here, but any other place is
7749     probably as bad and I don't want to create yet another include file. */
7750  
7751 @@ -343,11 +367,14 @@ struct inodes_stat_t {
7752  #define FS_TOPDIR_FL                   0x00020000 /* Top of directory hierarchies*/
7753  #define FS_EXTENT_FL                   0x00080000 /* Extents */
7754  #define FS_DIRECTIO_FL                 0x00100000 /* Use direct i/o */
7755 +#define FS_IXUNLINK_FL                 0x08000000 /* Immutable invert on unlink */
7756  #define FS_RESERVED_FL                 0x80000000 /* reserved for ext2 lib */
7757  
7758 -#define FS_FL_USER_VISIBLE             0x0003DFFF /* User visible flags */
7759 -#define FS_FL_USER_MODIFIABLE          0x000380FF /* User modifiable flags */
7760 +#define FS_BARRIER_FL                  0x04000000 /* Barrier for chroot() */
7761 +#define FS_COW_FL                      0x20000000 /* Copy on Write marker */
7762  
7763 +#define FS_FL_USER_VISIBLE             0x0103DFFF /* User visible flags */
7764 +#define FS_FL_USER_MODIFIABLE          0x010380FF /* User modifiable flags */
7765  
7766  #define SYNC_FILE_RANGE_WAIT_BEFORE    1
7767  #define SYNC_FILE_RANGE_WRITE          2
7768 @@ -429,6 +456,7 @@ typedef void (dio_iodone_t)(struct kiocb
7769  #define ATTR_KILL_PRIV (1 << 14)
7770  #define ATTR_OPEN      (1 << 15) /* Truncating from open(O_TRUNC) */
7771  #define ATTR_TIMES_SET (1 << 16)
7772 +#define ATTR_TAG       (1 << 17)
7773  
7774  /*
7775   * This is the Inode Attributes structure, used for notify_change().  It
7776 @@ -444,6 +472,7 @@ struct iattr {
7777         umode_t         ia_mode;
7778         uid_t           ia_uid;
7779         gid_t           ia_gid;
7780 +       tag_t           ia_tag;
7781         loff_t          ia_size;
7782         struct timespec ia_atime;
7783         struct timespec ia_mtime;
7784 @@ -457,6 +486,9 @@ struct iattr {
7785         struct file     *ia_file;
7786  };
7787  
7788 +#define ATTR_FLAG_BARRIER      512     /* Barrier for chroot() */
7789 +#define ATTR_FLAG_IXUNLINK     1024    /* Immutable invert on unlink */
7790 +
7791  /*
7792   * Includes for diskquotas.
7793   */
7794 @@ -720,7 +752,9 @@ struct inode {
7795         unsigned int            i_nlink;
7796         uid_t                   i_uid;
7797         gid_t                   i_gid;
7798 +       tag_t                   i_tag;
7799         dev_t                   i_rdev;
7800 +       dev_t                   i_mdev;
7801         u64                     i_version;
7802         loff_t                  i_size;
7803  #ifdef __NEED_I_SIZE_ORDERED
7804 @@ -768,7 +802,8 @@ struct inode {
7805         unsigned long           i_state;
7806         unsigned long           dirtied_when;   /* jiffies of first dirtying */
7807  
7808 -       unsigned int            i_flags;
7809 +       unsigned short          i_flags;
7810 +       unsigned short          i_vflags;
7811  
7812         atomic_t                i_writecount;
7813  #ifdef CONFIG_SECURITY
7814 @@ -852,12 +887,12 @@ static inline void i_size_write(struct i
7815  
7816  static inline unsigned iminor(const struct inode *inode)
7817  {
7818 -       return MINOR(inode->i_rdev);
7819 +       return MINOR(inode->i_mdev);
7820  }
7821  
7822  static inline unsigned imajor(const struct inode *inode)
7823  {
7824 -       return MAJOR(inode->i_rdev);
7825 +       return MAJOR(inode->i_mdev);
7826  }
7827  
7828  extern struct block_device *I_BDEV(struct inode *inode);
7829 @@ -916,6 +951,7 @@ struct file {
7830         loff_t                  f_pos;
7831         struct fown_struct      f_owner;
7832         const struct cred       *f_cred;
7833 +       xid_t                   f_xid;
7834         struct file_ra_state    f_ra;
7835  
7836         u64                     f_version;
7837 @@ -1057,6 +1093,7 @@ struct file_lock {
7838         struct file *fl_file;
7839         loff_t fl_start;
7840         loff_t fl_end;
7841 +       xid_t fl_xid;
7842  
7843         struct fasync_struct *  fl_fasync; /* for lease break notifications */
7844         unsigned long fl_break_time;    /* for nonblocking lease breaks */
7845 @@ -1537,6 +1574,7 @@ struct inode_operations {
7846                           loff_t len);
7847         int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
7848                       u64 len);
7849 +       int (*sync_flags) (struct inode *);
7850  };
7851  
7852  struct seq_file;
7853 @@ -1552,6 +1590,7 @@ extern ssize_t vfs_readv(struct file *, 
7854                 unsigned long, loff_t *);
7855  extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
7856                 unsigned long, loff_t *);
7857 +ssize_t vfs_sendfile(struct file *, struct file *, loff_t *, size_t, loff_t);
7858  
7859  struct super_operations {
7860         struct inode *(*alloc_inode)(struct super_block *sb);
7861 @@ -2323,6 +2362,7 @@ extern int dcache_dir_open(struct inode 
7862  extern int dcache_dir_close(struct inode *, struct file *);
7863  extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
7864  extern int dcache_readdir(struct file *, void *, filldir_t);
7865 +extern int dcache_readdir_filter(struct file *, void *, filldir_t, int (*)(struct dentry *));
7866  extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *);
7867  extern int simple_statfs(struct dentry *, struct kstatfs *);
7868  extern int simple_link(struct dentry *, struct inode *, struct dentry *);
7869 diff -NurpP --minimal linux-2.6.30.2/include/linux/if_tun.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/if_tun.h
7870 --- linux-2.6.30.2/include/linux/if_tun.h       2009-06-11 17:13:14.000000000 +0200
7871 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/if_tun.h    2009-07-04 02:28:56.000000000 +0200
7872 @@ -48,6 +48,7 @@
7873  #define TUNGETIFF      _IOR('T', 210, unsigned int)
7874  #define TUNGETSNDBUF   _IOR('T', 211, int)
7875  #define TUNSETSNDBUF   _IOW('T', 212, int)
7876 +#define TUNSETNID     _IOW('T', 215, int)
7877  
7878  /* TUNSETIFF ifr flags */
7879  #define IFF_TUN                0x0001
7880 diff -NurpP --minimal linux-2.6.30.2/include/linux/init_task.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/init_task.h
7881 --- linux-2.6.30.2/include/linux/init_task.h    2009-06-11 17:13:14.000000000 +0200
7882 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/init_task.h 2009-07-04 02:26:54.000000000 +0200
7883 @@ -174,6 +174,10 @@ extern struct cred init_cred;
7884         INIT_TRACE_IRQFLAGS                                             \
7885         INIT_LOCKDEP                                                    \
7886         INIT_FTRACE_GRAPH                                               \
7887 +       .xid            = 0,                                            \
7888 +       .vx_info        = NULL,                                         \
7889 +       .nid            = 0,                                            \
7890 +       .nx_info        = NULL,                                         \
7891  }
7892  
7893  
7894 diff -NurpP --minimal linux-2.6.30.2/include/linux/interrupt.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/interrupt.h
7895 --- linux-2.6.30.2/include/linux/interrupt.h    2009-06-11 17:13:14.000000000 +0200
7896 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/interrupt.h 2009-07-04 01:11:39.000000000 +0200
7897 @@ -9,8 +9,8 @@
7898  #include <linux/cpumask.h>
7899  #include <linux/irqreturn.h>
7900  #include <linux/irqnr.h>
7901 -#include <linux/hardirq.h>
7902  #include <linux/sched.h>
7903 +#include <linux/hardirq.h>
7904  #include <linux/irqflags.h>
7905  #include <linux/smp.h>
7906  #include <linux/percpu.h>
7907 diff -NurpP --minimal linux-2.6.30.2/include/linux/ipc.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/ipc.h
7908 --- linux-2.6.30.2/include/linux/ipc.h  2008-12-25 00:26:37.000000000 +0100
7909 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/ipc.h       2009-07-04 01:11:39.000000000 +0200
7910 @@ -93,6 +93,7 @@ struct kern_ipc_perm
7911         key_t           key;
7912         uid_t           uid;
7913         gid_t           gid;
7914 +       xid_t           xid;
7915         uid_t           cuid;
7916         gid_t           cgid;
7917         mode_t          mode; 
7918 diff -NurpP --minimal linux-2.6.30.2/include/linux/Kbuild linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/Kbuild
7919 --- linux-2.6.30.2/include/linux/Kbuild 2009-06-11 17:13:13.000000000 +0200
7920 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/Kbuild      2009-07-04 01:11:39.000000000 +0200
7921 @@ -373,5 +373,8 @@ unifdef-y += xattr.h
7922  unifdef-y += xfrm.h
7923  
7924  objhdr-y += version.h
7925 +
7926 +header-y += vserver/
7927  header-y += wimax.h
7928  header-y += wimax/
7929 +
7930 diff -NurpP --minimal linux-2.6.30.2/include/linux/loop.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/loop.h
7931 --- linux-2.6.30.2/include/linux/loop.h 2009-06-11 17:13:15.000000000 +0200
7932 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/loop.h      2009-07-04 01:11:39.000000000 +0200
7933 @@ -45,6 +45,7 @@ struct loop_device {
7934         struct loop_func_table *lo_encryption;
7935         __u32           lo_init[2];
7936         uid_t           lo_key_owner;   /* Who set the key */
7937 +       xid_t           lo_xid;
7938         int             (*ioctl)(struct loop_device *, int cmd, 
7939                                  unsigned long arg); 
7940  
7941 diff -NurpP --minimal linux-2.6.30.2/include/linux/magic.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/magic.h
7942 --- linux-2.6.30.2/include/linux/magic.h        2009-06-11 17:13:15.000000000 +0200
7943 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/magic.h     2009-07-04 01:11:39.000000000 +0200
7944 @@ -3,7 +3,7 @@
7945  
7946  #define ADFS_SUPER_MAGIC       0xadf5
7947  #define AFFS_SUPER_MAGIC       0xadff
7948 -#define AFS_SUPER_MAGIC                0x5346414F
7949 +#define AFS_SUPER_MAGIC                0x5346414F
7950  #define AUTOFS_SUPER_MAGIC     0x0187
7951  #define CODA_SUPER_MAGIC       0x73757245
7952  #define DEBUGFS_MAGIC          0x64626720
7953 @@ -33,6 +33,7 @@
7954  #define NFS_SUPER_MAGIC                0x6969
7955  #define OPENPROM_SUPER_MAGIC   0x9fa1
7956  #define PROC_SUPER_MAGIC       0x9fa0
7957 +#define DEVPTS_SUPER_MAGIC     0x1cd1
7958  #define QNX4_SUPER_MAGIC       0x002f          /* qnx4 fs detection */
7959  
7960  #define REISERFS_SUPER_MAGIC   0x52654973      /* used by gcc */
7961 diff -NurpP --minimal linux-2.6.30.2/include/linux/major.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/major.h
7962 --- linux-2.6.30.2/include/linux/major.h        2009-06-11 17:13:15.000000000 +0200
7963 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/major.h     2009-07-04 01:11:39.000000000 +0200
7964 @@ -15,6 +15,7 @@
7965  #define HD_MAJOR               IDE0_MAJOR
7966  #define PTY_SLAVE_MAJOR                3
7967  #define TTY_MAJOR              4
7968 +#define VROOT_MAJOR            4
7969  #define TTYAUX_MAJOR           5
7970  #define LP_MAJOR               6
7971  #define VCS_MAJOR              7
7972 diff -NurpP --minimal linux-2.6.30.2/include/linux/mm_types.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/mm_types.h
7973 --- linux-2.6.30.2/include/linux/mm_types.h     2009-06-11 17:13:15.000000000 +0200
7974 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/mm_types.h  2009-07-04 01:11:39.000000000 +0200
7975 @@ -236,6 +236,7 @@ struct mm_struct {
7976  
7977         /* Architecture-specific MM context */
7978         mm_context_t context;
7979 +       struct vx_info *mm_vx_info;
7980  
7981         /* Swap token stuff */
7982         /*
7983 diff -NurpP --minimal linux-2.6.30.2/include/linux/mount.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/mount.h
7984 --- linux-2.6.30.2/include/linux/mount.h        2009-06-11 17:13:15.000000000 +0200
7985 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/mount.h     2009-07-04 01:11:39.000000000 +0200
7986 @@ -36,6 +36,9 @@ struct mnt_namespace;
7987  #define MNT_UNBINDABLE 0x2000  /* if the vfsmount is a unbindable mount */
7988  #define MNT_PNODE_MASK 0x3000  /* propagation flag mask */
7989  
7990 +#define MNT_TAGID      0x10000
7991 +#define MNT_NOTAG      0x20000
7992 +
7993  struct vfsmount {
7994         struct list_head mnt_hash;
7995         struct vfsmount *mnt_parent;    /* fs we are mounted on */
7996 @@ -70,6 +73,7 @@ struct vfsmount {
7997          * are held, and all mnt_writer[]s on this mount have 0 as their ->count
7998          */
7999         atomic_t __mnt_writers;
8000 +       tag_t mnt_tag;                  /* tagging used for vfsmount */
8001  };
8002  
8003  static inline struct vfsmount *mntget(struct vfsmount *mnt)
8004 diff -NurpP --minimal linux-2.6.30.2/include/linux/net.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/net.h
8005 --- linux-2.6.30.2/include/linux/net.h  2009-06-11 17:13:15.000000000 +0200
8006 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/net.h       2009-07-04 01:11:39.000000000 +0200
8007 @@ -68,6 +68,7 @@ struct net;
8008  #define SOCK_NOSPACE           2
8009  #define SOCK_PASSCRED          3
8010  #define SOCK_PASSSEC           4
8011 +#define SOCK_USER_SOCKET       5
8012  
8013  #ifndef ARCH_HAS_SOCKET_TYPES
8014  /**
8015 diff -NurpP --minimal linux-2.6.30.2/include/linux/nfs_mount.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/nfs_mount.h
8016 --- linux-2.6.30.2/include/linux/nfs_mount.h    2009-03-24 14:22:43.000000000 +0100
8017 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/nfs_mount.h 2009-07-04 01:11:39.000000000 +0200
8018 @@ -63,7 +63,8 @@ struct nfs_mount_data {
8019  #define NFS_MOUNT_SECFLAVOUR   0x2000  /* 5 */
8020  #define NFS_MOUNT_NORDIRPLUS   0x4000  /* 5 */
8021  #define NFS_MOUNT_UNSHARED     0x8000  /* 5 */
8022 -#define NFS_MOUNT_FLAGMASK     0xFFFF
8023 +#define NFS_MOUNT_TAGGED       0x10000 /* context tagging */
8024 +#define NFS_MOUNT_FLAGMASK     0x1FFFF
8025  
8026  /* The following are for internal use only */
8027  #define NFS_MOUNT_LOOKUP_CACHE_NONEG   0x10000
8028 diff -NurpP --minimal linux-2.6.30.2/include/linux/nsproxy.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/nsproxy.h
8029 --- linux-2.6.30.2/include/linux/nsproxy.h      2009-06-11 17:13:17.000000000 +0200
8030 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/nsproxy.h   2009-07-04 01:11:39.000000000 +0200
8031 @@ -3,6 +3,7 @@
8032  
8033  #include <linux/spinlock.h>
8034  #include <linux/sched.h>
8035 +#include <linux/vserver/debug.h>
8036  
8037  struct mnt_namespace;
8038  struct uts_namespace;
8039 @@ -63,22 +64,33 @@ static inline struct nsproxy *task_nspro
8040  }
8041  
8042  int copy_namespaces(unsigned long flags, struct task_struct *tsk);
8043 +struct nsproxy *copy_nsproxy(struct nsproxy *orig);
8044  void exit_task_namespaces(struct task_struct *tsk);
8045  void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
8046  void free_nsproxy(struct nsproxy *ns);
8047  int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
8048         struct fs_struct *);
8049  
8050 -static inline void put_nsproxy(struct nsproxy *ns)
8051 +#define        get_nsproxy(n)  __get_nsproxy(n, __FILE__, __LINE__)
8052 +
8053 +static inline void __get_nsproxy(struct nsproxy *ns,
8054 +       const char *_file, int _line)
8055  {
8056 -       if (atomic_dec_and_test(&ns->count)) {
8057 -               free_nsproxy(ns);
8058 -       }
8059 +       vxlprintk(VXD_CBIT(space, 0), "get_nsproxy(%p[%u])",
8060 +               ns, atomic_read(&ns->count), _file, _line);
8061 +       atomic_inc(&ns->count);
8062  }
8063  
8064 -static inline void get_nsproxy(struct nsproxy *ns)
8065 +#define        put_nsproxy(n)  __put_nsproxy(n, __FILE__, __LINE__)
8066 +
8067 +static inline void __put_nsproxy(struct nsproxy *ns,
8068 +       const char *_file, int _line)
8069  {
8070 -       atomic_inc(&ns->count);
8071 +       vxlprintk(VXD_CBIT(space, 0), "put_nsproxy(%p[%u])",
8072 +               ns, atomic_read(&ns->count), _file, _line);
8073 +       if (atomic_dec_and_test(&ns->count)) {
8074 +               free_nsproxy(ns);
8075 +       }
8076  }
8077  
8078  #ifdef CONFIG_CGROUP_NS
8079 diff -NurpP --minimal linux-2.6.30.2/include/linux/pid.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/pid.h
8080 --- linux-2.6.30.2/include/linux/pid.h  2009-03-24 14:22:43.000000000 +0100
8081 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/pid.h       2009-07-04 01:11:39.000000000 +0200
8082 @@ -8,7 +8,8 @@ enum pid_type
8083         PIDTYPE_PID,
8084         PIDTYPE_PGID,
8085         PIDTYPE_SID,
8086 -       PIDTYPE_MAX
8087 +       PIDTYPE_MAX,
8088 +       PIDTYPE_REALPID
8089  };
8090  
8091  /*
8092 @@ -160,6 +161,7 @@ static inline pid_t pid_nr(struct pid *p
8093  }
8094  
8095  pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns);
8096 +pid_t pid_unmapped_nr_ns(struct pid *pid, struct pid_namespace *ns);
8097  pid_t pid_vnr(struct pid *pid);
8098  
8099  #define do_each_pid_task(pid, type, task)                              \
8100 diff -NurpP --minimal linux-2.6.30.2/include/linux/proc_fs.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/proc_fs.h
8101 --- linux-2.6.30.2/include/linux/proc_fs.h      2009-06-11 17:13:17.000000000 +0200
8102 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/proc_fs.h   2009-07-04 01:11:39.000000000 +0200
8103 @@ -56,6 +56,7 @@ struct proc_dir_entry {
8104         nlink_t nlink;
8105         uid_t uid;
8106         gid_t gid;
8107 +       int vx_flags;
8108         loff_t size;
8109         const struct inode_operations *proc_iops;
8110         /*
8111 @@ -264,12 +265,18 @@ static inline void kclist_add(struct kco
8112  extern void kclist_add(struct kcore_list *, void *, size_t);
8113  #endif
8114  
8115 +struct vx_info;
8116 +struct nx_info;
8117 +
8118  union proc_op {
8119         int (*proc_get_link)(struct inode *, struct path *);
8120         int (*proc_read)(struct task_struct *task, char *page);
8121         int (*proc_show)(struct seq_file *m,
8122                 struct pid_namespace *ns, struct pid *pid,
8123                 struct task_struct *task);
8124 +       int (*proc_vs_read)(char *page);
8125 +       int (*proc_vxi_read)(struct vx_info *vxi, char *page);
8126 +       int (*proc_nxi_read)(struct nx_info *nxi, char *page);
8127  };
8128  
8129  struct ctl_table_header;
8130 @@ -277,6 +284,7 @@ struct ctl_table;
8131  
8132  struct proc_inode {
8133         struct pid *pid;
8134 +       int vx_flags;
8135         int fd;
8136         union proc_op op;
8137         struct proc_dir_entry *pde;
8138 diff -NurpP --minimal linux-2.6.30.2/include/linux/reiserfs_fs.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/reiserfs_fs.h
8139 --- linux-2.6.30.2/include/linux/reiserfs_fs.h  2009-06-11 17:13:17.000000000 +0200
8140 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/reiserfs_fs.h       2009-07-04 01:11:39.000000000 +0200
8141 @@ -868,6 +868,11 @@ struct stat_data_v1 {
8142  #define REISERFS_COMPR_FL     FS_COMPR_FL
8143  #define REISERFS_NOTAIL_FL    FS_NOTAIL_FL
8144  
8145 +/* unfortunately reiserfs sdattr is only 16 bit */
8146 +#define REISERFS_IXUNLINK_FL  (FS_IXUNLINK_FL >> 16)
8147 +#define REISERFS_BARRIER_FL   (FS_BARRIER_FL >> 16)
8148 +#define REISERFS_COW_FL       (FS_COW_FL >> 16)
8149 +
8150  /* persistent flags that file inherits from the parent directory */
8151  #define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL |        \
8152                                 REISERFS_SYNC_FL |      \
8153 @@ -877,6 +882,9 @@ struct stat_data_v1 {
8154                                 REISERFS_COMPR_FL |     \
8155                                 REISERFS_NOTAIL_FL )
8156  
8157 +#define REISERFS_FL_USER_VISIBLE       0x80FF
8158 +#define REISERFS_FL_USER_MODIFIABLE    0x80FF
8159 +
8160  /* Stat Data on disk (reiserfs version of UFS disk inode minus the
8161     address blocks) */
8162  struct stat_data {
8163 @@ -1958,6 +1966,7 @@ static inline void reiserfs_update_sd(st
8164  void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode);
8165  void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs);
8166  int reiserfs_setattr(struct dentry *dentry, struct iattr *attr);
8167 +int reiserfs_sync_flags(struct inode *inode);
8168  
8169  /* namei.c */
8170  void set_de_name_and_namelen(struct reiserfs_dir_entry *de);
8171 diff -NurpP --minimal linux-2.6.30.2/include/linux/reiserfs_fs_sb.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/reiserfs_fs_sb.h
8172 --- linux-2.6.30.2/include/linux/reiserfs_fs_sb.h       2009-06-11 17:13:17.000000000 +0200
8173 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/reiserfs_fs_sb.h    2009-07-04 01:11:39.000000000 +0200
8174 @@ -455,6 +455,7 @@ enum reiserfs_mount_options {
8175         REISERFS_POSIXACL,
8176         REISERFS_BARRIER_NONE,
8177         REISERFS_BARRIER_FLUSH,
8178 +       REISERFS_TAGGED,
8179  
8180         /* Actions on error */
8181         REISERFS_ERROR_PANIC,
8182 diff -NurpP --minimal linux-2.6.30.2/include/linux/sched.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/sched.h
8183 --- linux-2.6.30.2/include/linux/sched.h        2009-06-11 17:13:17.000000000 +0200
8184 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/sched.h     2009-07-04 02:31:28.000000000 +0200
8185 @@ -71,7 +71,6 @@ struct sched_param {
8186  #include <linux/path.h>
8187  #include <linux/compiler.h>
8188  #include <linux/completion.h>
8189 -#include <linux/pid.h>
8190  #include <linux/percpu.h>
8191  #include <linux/topology.h>
8192  #include <linux/proportions.h>
8193 @@ -88,6 +87,7 @@ struct sched_param {
8194  #include <linux/kobject.h>
8195  #include <linux/latencytop.h>
8196  #include <linux/cred.h>
8197 +#include <linux/pid.h>
8198  
8199  #include <asm/processor.h>
8200  
8201 @@ -179,12 +179,13 @@ extern unsigned long long time_sync_thre
8202  #define TASK_UNINTERRUPTIBLE   2
8203  #define __TASK_STOPPED         4
8204  #define __TASK_TRACED          8
8205 +#define TASK_ONHOLD            16
8206  /* in tsk->exit_state */
8207 -#define EXIT_ZOMBIE            16
8208 -#define EXIT_DEAD              32
8209 +#define EXIT_ZOMBIE            32
8210 +#define EXIT_DEAD              64
8211  /* in tsk->state again */
8212 -#define TASK_DEAD              64
8213 -#define TASK_WAKEKILL          128
8214 +#define TASK_DEAD              128
8215 +#define TASK_WAKEKILL          256
8216  
8217  /* Convenience macros for the sake of set_task_state */
8218  #define TASK_KILLABLE          (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
8219 @@ -367,25 +368,28 @@ extern void arch_unmap_area_topdown(stru
8220   * The mm counters are not protected by its page_table_lock,
8221   * so must be incremented atomically.
8222   */
8223 -#define set_mm_counter(mm, member, value) atomic_long_set(&(mm)->_##member, value)
8224 -#define get_mm_counter(mm, member) ((unsigned long)atomic_long_read(&(mm)->_##member))
8225 -#define add_mm_counter(mm, member, value) atomic_long_add(value, &(mm)->_##member)
8226 -#define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
8227 -#define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
8228 +#define __set_mm_counter(mm, member, value) \
8229 +       atomic_long_set(&(mm)->_##member, value)
8230 +#define get_mm_counter(mm, member) \
8231 +       ((unsigned long)atomic_long_read(&(mm)->_##member))
8232  
8233  #else  /* !USE_SPLIT_PTLOCKS */
8234  /*
8235   * The mm counters are protected by its page_table_lock,
8236   * so can be incremented directly.
8237   */
8238 -#define set_mm_counter(mm, member, value) (mm)->_##member = (value)
8239 +#define __set_mm_counter(mm, member, value) (mm)->_##member = (value)
8240  #define get_mm_counter(mm, member) ((mm)->_##member)
8241 -#define add_mm_counter(mm, member, value) (mm)->_##member += (value)
8242 -#define inc_mm_counter(mm, member) (mm)->_##member++
8243 -#define dec_mm_counter(mm, member) (mm)->_##member--
8244  
8245  #endif /* !USE_SPLIT_PTLOCKS */
8246  
8247 +#define set_mm_counter(mm, member, value) \
8248 +       vx_ ## member ## pages_sub((mm), (get_mm_counter(mm, member) - value))
8249 +#define add_mm_counter(mm, member, value) \
8250 +       vx_ ## member ## pages_add((mm), (value))
8251 +#define inc_mm_counter(mm, member) vx_ ## member ## pages_inc((mm))
8252 +#define dec_mm_counter(mm, member) vx_ ## member ## pages_dec((mm))
8253 +
8254  #define get_mm_rss(mm)                                 \
8255         (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
8256  #define update_hiwater_rss(mm) do {                    \
8257 @@ -1134,7 +1138,9 @@ struct task_struct {
8258         const struct sched_class *sched_class;
8259         struct sched_entity se;
8260         struct sched_rt_entity rt;
8261 -
8262 +#ifdef CONFIG_VSERVER_HARDCPU
8263 +       struct list_head hq;
8264 +#endif
8265  #ifdef CONFIG_PREEMPT_NOTIFIERS
8266         /* list of struct preempt_notifier: */
8267         struct hlist_head preempt_notifiers;
8268 @@ -1291,6 +1297,14 @@ struct task_struct {
8269  #endif
8270         seccomp_t seccomp;
8271  
8272 +/* vserver context data */
8273 +       struct vx_info *vx_info;
8274 +       struct nx_info *nx_info;
8275 +
8276 +       xid_t xid;
8277 +       nid_t nid;
8278 +       tag_t tag;
8279 +
8280  /* Thread group tracking */
8281         u32 parent_exec_id;
8282         u32 self_exec_id;
8283 @@ -1508,6 +1522,11 @@ struct pid_namespace;
8284  pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
8285                         struct pid_namespace *ns);
8286  
8287 +#include <linux/vserver/base.h>
8288 +#include <linux/vserver/context.h>
8289 +#include <linux/vserver/debug.h>
8290 +#include <linux/vserver/pid.h>
8291 +
8292  static inline pid_t task_pid_nr(struct task_struct *tsk)
8293  {
8294         return tsk->pid;
8295 @@ -1521,7 +1540,8 @@ static inline pid_t task_pid_nr_ns(struc
8296  
8297  static inline pid_t task_pid_vnr(struct task_struct *tsk)
8298  {
8299 -       return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
8300 +       // return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
8301 +       return vx_map_pid(__task_pid_nr_ns(tsk, PIDTYPE_PID, NULL));
8302  }
8303  
8304  
8305 @@ -1534,7 +1554,7 @@ pid_t task_tgid_nr_ns(struct task_struct
8306  
8307  static inline pid_t task_tgid_vnr(struct task_struct *tsk)
8308  {
8309 -       return pid_vnr(task_tgid(tsk));
8310 +       return vx_map_tgid(pid_vnr(task_tgid(tsk)));
8311  }
8312  
8313  
8314 diff -NurpP --minimal linux-2.6.30.2/include/linux/shmem_fs.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/shmem_fs.h
8315 --- linux-2.6.30.2/include/linux/shmem_fs.h     2008-12-25 00:26:37.000000000 +0100
8316 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/shmem_fs.h  2009-07-04 01:11:39.000000000 +0200
8317 @@ -8,6 +8,9 @@
8318  
8319  #define SHMEM_NR_DIRECT 16
8320  
8321 +#define TMPFS_SUPER_MAGIC      0x01021994
8322 +
8323 +
8324  struct shmem_inode_info {
8325         spinlock_t              lock;
8326         unsigned long           flags;
8327 diff -NurpP --minimal linux-2.6.30.2/include/linux/stat.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/stat.h
8328 --- linux-2.6.30.2/include/linux/stat.h 2008-12-25 00:26:37.000000000 +0100
8329 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/stat.h      2009-07-04 01:11:39.000000000 +0200
8330 @@ -66,6 +66,7 @@ struct kstat {
8331         unsigned int    nlink;
8332         uid_t           uid;
8333         gid_t           gid;
8334 +       tag_t           tag;
8335         dev_t           rdev;
8336         loff_t          size;
8337         struct timespec  atime;
8338 diff -NurpP --minimal linux-2.6.30.2/include/linux/sunrpc/auth.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/sunrpc/auth.h
8339 --- linux-2.6.30.2/include/linux/sunrpc/auth.h  2008-12-25 00:26:37.000000000 +0100
8340 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/sunrpc/auth.h       2009-07-04 01:11:39.000000000 +0200
8341 @@ -25,6 +25,7 @@
8342  struct auth_cred {
8343         uid_t   uid;
8344         gid_t   gid;
8345 +       tag_t   tag;
8346         struct group_info *group_info;
8347         unsigned char machine_cred : 1;
8348  };
8349 diff -NurpP --minimal linux-2.6.30.2/include/linux/sunrpc/clnt.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/sunrpc/clnt.h
8350 --- linux-2.6.30.2/include/linux/sunrpc/clnt.h  2009-03-24 14:22:43.000000000 +0100
8351 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/sunrpc/clnt.h       2009-07-04 01:11:39.000000000 +0200
8352 @@ -43,7 +43,8 @@ struct rpc_clnt {
8353         unsigned int            cl_softrtry : 1,/* soft timeouts */
8354                                 cl_discrtry : 1,/* disconnect before retry */
8355                                 cl_autobind : 1,/* use getport() */
8356 -                               cl_chatty   : 1;/* be verbose */
8357 +                               cl_chatty   : 1,/* be verbose */
8358 +                               cl_tag      : 1;/* context tagging */
8359  
8360         struct rpc_rtt *        cl_rtt;         /* RTO estimator data */
8361         const struct rpc_timeout *cl_timeout;   /* Timeout strategy */
8362 diff -NurpP --minimal linux-2.6.30.2/include/linux/syscalls.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/syscalls.h
8363 --- linux-2.6.30.2/include/linux/syscalls.h     2009-06-11 17:13:18.000000000 +0200
8364 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/syscalls.h  2009-07-04 01:11:39.000000000 +0200
8365 @@ -425,6 +425,8 @@ asmlinkage long sys_symlink(const char _
8366  asmlinkage long sys_unlink(const char __user *pathname);
8367  asmlinkage long sys_rename(const char __user *oldname,
8368                                 const char __user *newname);
8369 +asmlinkage long sys_copyfile(const char __user *from, const char __user *to,
8370 +                               umode_t mode);
8371  asmlinkage long sys_chmod(const char __user *filename, mode_t mode);
8372  asmlinkage long sys_fchmod(unsigned int fd, mode_t mode);
8373  
8374 diff -NurpP --minimal linux-2.6.30.2/include/linux/sysctl.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/sysctl.h
8375 --- linux-2.6.30.2/include/linux/sysctl.h       2009-06-11 17:13:18.000000000 +0200
8376 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/sysctl.h    2009-07-04 01:11:39.000000000 +0200
8377 @@ -70,6 +70,7 @@ enum
8378         CTL_ABI=9,              /* Binary emulation */
8379         CTL_CPU=10,             /* CPU stuff (speed scaling, etc) */
8380         CTL_ARLAN=254,          /* arlan wireless driver */
8381 +       CTL_VSERVER=4242,       /* Linux-VServer debug */
8382         CTL_S390DBF=5677,       /* s390 debug */
8383         CTL_SUNRPC=7249,        /* sunrpc debug */
8384         CTL_PM=9899,            /* frv power management */
8385 @@ -104,6 +105,7 @@ enum
8386  
8387         KERN_PANIC=15,          /* int: panic timeout */
8388         KERN_REALROOTDEV=16,    /* real root device to mount after initrd */
8389 +       KERN_VSHELPER=17,       /* string: path to vshelper policy agent */
8390  
8391         KERN_SPARC_REBOOT=21,   /* reboot command on Sparc */
8392         KERN_CTLALTDEL=22,      /* int: allow ctl-alt-del to reboot */
8393 diff -NurpP --minimal linux-2.6.30.2/include/linux/sysfs.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/sysfs.h
8394 --- linux-2.6.30.2/include/linux/sysfs.h        2008-12-25 00:26:37.000000000 +0100
8395 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/sysfs.h     2009-07-04 01:11:39.000000000 +0200
8396 @@ -17,6 +17,8 @@
8397  #include <linux/list.h>
8398  #include <asm/atomic.h>
8399  
8400 +#define SYSFS_SUPER_MAGIC      0x62656572
8401 +
8402  struct kobject;
8403  struct module;
8404  
8405 diff -NurpP --minimal linux-2.6.30.2/include/linux/time.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/time.h
8406 --- linux-2.6.30.2/include/linux/time.h 2009-06-11 17:13:18.000000000 +0200
8407 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/time.h      2009-07-04 01:11:39.000000000 +0200
8408 @@ -190,6 +190,9 @@ static __always_inline void timespec_add
8409         a->tv_sec += __iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns);
8410         a->tv_nsec = ns;
8411  }
8412 +
8413 +#include <linux/vs_time.h>
8414 +
8415  #endif /* __KERNEL__ */
8416  
8417  #define NFDBITS                        __NFDBITS
8418 diff -NurpP --minimal linux-2.6.30.2/include/linux/types.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/types.h
8419 --- linux-2.6.30.2/include/linux/types.h        2009-06-11 17:13:18.000000000 +0200
8420 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/types.h     2009-07-04 01:11:39.000000000 +0200
8421 @@ -37,6 +37,9 @@ typedef __kernel_uid32_t      uid_t;
8422  typedef __kernel_gid32_t       gid_t;
8423  typedef __kernel_uid16_t        uid16_t;
8424  typedef __kernel_gid16_t        gid16_t;
8425 +typedef unsigned int           xid_t;
8426 +typedef unsigned int           nid_t;
8427 +typedef unsigned int           tag_t;
8428  
8429  typedef unsigned long          uintptr_t;
8430  
8431 diff -NurpP --minimal linux-2.6.30.2/include/linux/vroot.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vroot.h
8432 --- linux-2.6.30.2/include/linux/vroot.h        1970-01-01 01:00:00.000000000 +0100
8433 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vroot.h     2009-07-04 01:11:39.000000000 +0200
8434 @@ -0,0 +1,51 @@
8435 +
8436 +/*
8437 + * include/linux/vroot.h
8438 + *
8439 + * written by Herbert Pötzl, 9/11/2002
8440 + * ported to 2.6 by Herbert Pötzl, 30/12/2004
8441 + *
8442 + * Copyright (C) 2002-2007 by Herbert Pötzl.
8443 + * Redistribution of this file is permitted under the
8444 + * GNU General Public License.
8445 + */
8446 +
8447 +#ifndef _LINUX_VROOT_H
8448 +#define _LINUX_VROOT_H
8449 +
8450 +
8451 +#ifdef __KERNEL__
8452 +
8453 +/* Possible states of device */
8454 +enum {
8455 +       Vr_unbound,
8456 +       Vr_bound,
8457 +};
8458 +
8459 +struct vroot_device {
8460 +       int             vr_number;
8461 +       int             vr_refcnt;
8462 +
8463 +       struct semaphore        vr_ctl_mutex;
8464 +       struct block_device    *vr_device;
8465 +       int                     vr_state;
8466 +};
8467 +
8468 +
8469 +typedef struct block_device *(vroot_grb_func)(struct block_device *);
8470 +
8471 +extern int register_vroot_grb(vroot_grb_func *);
8472 +extern int unregister_vroot_grb(vroot_grb_func *);
8473 +
8474 +#endif /* __KERNEL__ */
8475 +
8476 +#define MAX_VROOT_DEFAULT      8
8477 +
8478 +/*
8479 + * IOCTL commands --- we will commandeer 0x56 ('V')
8480 + */
8481 +
8482 +#define VROOT_SET_DEV          0x5600
8483 +#define VROOT_CLR_DEV          0x5601
8484 +
8485 +#endif /* _LINUX_VROOT_H */
8486 diff -NurpP --minimal linux-2.6.30.2/include/linux/vs_base.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_base.h
8487 --- linux-2.6.30.2/include/linux/vs_base.h      1970-01-01 01:00:00.000000000 +0100
8488 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_base.h   2009-07-04 01:11:39.000000000 +0200
8489 @@ -0,0 +1,10 @@
8490 +#ifndef _VS_BASE_H
8491 +#define _VS_BASE_H
8492 +
8493 +#include "vserver/base.h"
8494 +#include "vserver/check.h"
8495 +#include "vserver/debug.h"
8496 +
8497 +#else
8498 +#warning duplicate inclusion
8499 +#endif
8500 diff -NurpP --minimal linux-2.6.30.2/include/linux/vs_context.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_context.h
8501 --- linux-2.6.30.2/include/linux/vs_context.h   1970-01-01 01:00:00.000000000 +0100
8502 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_context.h        2009-07-04 01:11:39.000000000 +0200
8503 @@ -0,0 +1,227 @@
8504 +#ifndef _VS_CONTEXT_H
8505 +#define _VS_CONTEXT_H
8506 +
8507 +#include "vserver/base.h"
8508 +#include "vserver/check.h"
8509 +#include "vserver/context.h"
8510 +#include "vserver/history.h"
8511 +#include "vserver/debug.h"
8512 +
8513 +#include <linux/sched.h>
8514 +
8515 +
8516 +#define get_vx_info(i) __get_vx_info(i, __FILE__, __LINE__, __HERE__)
8517 +
8518 +static inline struct vx_info *__get_vx_info(struct vx_info *vxi,
8519 +       const char *_file, int _line, void *_here)
8520 +{
8521 +       if (!vxi)
8522 +               return NULL;
8523 +
8524 +       vxlprintk(VXD_CBIT(xid, 2), "get_vx_info(%p[#%d.%d])",
8525 +               vxi, vxi ? vxi->vx_id : 0,
8526 +               vxi ? atomic_read(&vxi->vx_usecnt) : 0,
8527 +               _file, _line);
8528 +       __vxh_get_vx_info(vxi, _here);
8529 +
8530 +       atomic_inc(&vxi->vx_usecnt);
8531 +       return vxi;
8532 +}
8533 +
8534 +
8535 +extern void free_vx_info(struct vx_info *);
8536 +
8537 +#define put_vx_info(i) __put_vx_info(i, __FILE__, __LINE__, __HERE__)
8538 +
8539 +static inline void __put_vx_info(struct vx_info *vxi,
8540 +       const char *_file, int _line, void *_here)
8541 +{
8542 +       if (!vxi)
8543 +               return;
8544 +
8545 +       vxlprintk(VXD_CBIT(xid, 2), "put_vx_info(%p[#%d.%d])",
8546 +               vxi, vxi ? vxi->vx_id : 0,
8547 +               vxi ? atomic_read(&vxi->vx_usecnt) : 0,
8548 +               _file, _line);
8549 +       __vxh_put_vx_info(vxi, _here);
8550 +
8551 +       if (atomic_dec_and_test(&vxi->vx_usecnt))
8552 +               free_vx_info(vxi);
8553 +}
8554 +
8555 +
8556 +#define init_vx_info(p, i) \
8557 +       __init_vx_info(p, i, __FILE__, __LINE__, __HERE__)
8558 +
8559 +static inline void __init_vx_info(struct vx_info **vxp, struct vx_info *vxi,
8560 +       const char *_file, int _line, void *_here)
8561 +{
8562 +       if (vxi) {
8563 +               vxlprintk(VXD_CBIT(xid, 3),
8564 +                       "init_vx_info(%p[#%d.%d])",
8565 +                       vxi, vxi ? vxi->vx_id : 0,
8566 +                       vxi ? atomic_read(&vxi->vx_usecnt) : 0,
8567 +                       _file, _line);
8568 +               __vxh_init_vx_info(vxi, vxp, _here);
8569 +
8570 +               atomic_inc(&vxi->vx_usecnt);
8571 +       }
8572 +       *vxp = vxi;
8573 +}
8574 +
8575 +
8576 +#define set_vx_info(p, i) \
8577 +       __set_vx_info(p, i, __FILE__, __LINE__, __HERE__)
8578 +
8579 +static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi,
8580 +       const char *_file, int _line, void *_here)
8581 +{
8582 +       struct vx_info *vxo;
8583 +
8584 +       if (!vxi)
8585 +               return;
8586 +
8587 +       vxlprintk(VXD_CBIT(xid, 3), "set_vx_info(%p[#%d.%d])",
8588 +               vxi, vxi ? vxi->vx_id : 0,
8589 +               vxi ? atomic_read(&vxi->vx_usecnt) : 0,
8590 +               _file, _line);
8591 +       __vxh_set_vx_info(vxi, vxp, _here);
8592 +
8593 +       atomic_inc(&vxi->vx_usecnt);
8594 +       vxo = xchg(vxp, vxi);
8595 +       BUG_ON(vxo);
8596 +}
8597 +
8598 +
8599 +#define clr_vx_info(p) __clr_vx_info(p, __FILE__, __LINE__, __HERE__)
8600 +
8601 +static inline void __clr_vx_info(struct vx_info **vxp,
8602 +       const char *_file, int _line, void *_here)
8603 +{
8604 +       struct vx_info *vxo;
8605 +
8606 +       vxo = xchg(vxp, NULL);
8607 +       if (!vxo)
8608 +               return;
8609 +
8610 +       vxlprintk(VXD_CBIT(xid, 3), "clr_vx_info(%p[#%d.%d])",
8611 +               vxo, vxo ? vxo->vx_id : 0,
8612 +               vxo ? atomic_read(&vxo->vx_usecnt) : 0,
8613 +               _file, _line);
8614 +       __vxh_clr_vx_info(vxo, vxp, _here);
8615 +
8616 +       if (atomic_dec_and_test(&vxo->vx_usecnt))
8617 +               free_vx_info(vxo);
8618 +}
8619 +
8620 +
8621 +#define claim_vx_info(v, p) \
8622 +       __claim_vx_info(v, p, __FILE__, __LINE__, __HERE__)
8623 +
8624 +static inline void __claim_vx_info(struct vx_info *vxi,
8625 +       struct task_struct *task,
8626 +       const char *_file, int _line, void *_here)
8627 +{
8628 +       vxlprintk(VXD_CBIT(xid, 3), "claim_vx_info(%p[#%d.%d.%d]) %p",
8629 +               vxi, vxi ? vxi->vx_id : 0,
8630 +               vxi ? atomic_read(&vxi->vx_usecnt) : 0,
8631 +               vxi ? atomic_read(&vxi->vx_tasks) : 0,
8632 +               task, _file, _line);
8633 +       __vxh_claim_vx_info(vxi, task, _here);
8634 +
8635 +       atomic_inc(&vxi->vx_tasks);
8636 +}
8637 +
8638 +
8639 +extern void unhash_vx_info(struct vx_info *);
8640 +
8641 +#define release_vx_info(v, p) \
8642 +       __release_vx_info(v, p, __FILE__, __LINE__, __HERE__)
8643 +
8644 +static inline void __release_vx_info(struct vx_info *vxi,
8645 +       struct task_struct *task,
8646 +       const char *_file, int _line, void *_here)
8647 +{
8648 +       vxlprintk(VXD_CBIT(xid, 3), "release_vx_info(%p[#%d.%d.%d]) %p",
8649 +               vxi, vxi ? vxi->vx_id : 0,
8650 +               vxi ? atomic_read(&vxi->vx_usecnt) : 0,
8651 +               vxi ? atomic_read(&vxi->vx_tasks) : 0,
8652 +               task, _file, _line);
8653 +       __vxh_release_vx_info(vxi, task, _here);
8654 +
8655 +       might_sleep();
8656 +
8657 +       if (atomic_dec_and_test(&vxi->vx_tasks))
8658 +               unhash_vx_info(vxi);
8659 +}
8660 +
8661 +
8662 +#define task_get_vx_info(p) \
8663 +       __task_get_vx_info(p, __FILE__, __LINE__, __HERE__)
8664 +
8665 +static inline struct vx_info *__task_get_vx_info(struct task_struct *p,
8666 +       const char *_file, int _line, void *_here)
8667 +{
8668 +       struct vx_info *vxi;
8669 +
8670 +       task_lock(p);
8671 +       vxlprintk(VXD_CBIT(xid, 5), "task_get_vx_info(%p)",
8672 +               p, _file, _line);
8673 +       vxi = __get_vx_info(p->vx_info, _file, _line, _here);
8674 +       task_unlock(p);
8675 +       return vxi;
8676 +}
8677 +
8678 +
8679 +static inline void __wakeup_vx_info(struct vx_info *vxi)
8680 +{
8681 +       if (waitqueue_active(&vxi->vx_wait))
8682 +               wake_up_interruptible(&vxi->vx_wait);
8683 +}
8684 +
8685 +
8686 +#define enter_vx_info(v, s) __enter_vx_info(v, s, __FILE__, __LINE__)
8687 +
8688 +static inline void __enter_vx_info(struct vx_info *vxi,
8689 +       struct vx_info_save *vxis, const char *_file, int _line)
8690 +{
8691 +       vxlprintk(VXD_CBIT(xid, 5), "enter_vx_info(%p[#%d],%p) %p[#%d,%p]",
8692 +               vxi, vxi ? vxi->vx_id : 0, vxis, current,
8693 +               current->xid, current->vx_info, _file, _line);
8694 +       vxis->vxi = xchg(&current->vx_info, vxi);
8695 +       vxis->xid = current->xid;
8696 +       current->xid = vxi ? vxi->vx_id : 0;
8697 +}
8698 +
8699 +#define leave_vx_info(s) __leave_vx_info(s, __FILE__, __LINE__)
8700 +
8701 +static inline void __leave_vx_info(struct vx_info_save *vxis,
8702 +       const char *_file, int _line)
8703 +{
8704 +       vxlprintk(VXD_CBIT(xid, 5), "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]",
8705 +               vxis, vxis->xid, vxis->vxi, current,
8706 +               current->xid, current->vx_info, _file, _line);
8707 +       (void)xchg(&current->vx_info, vxis->vxi);
8708 +       current->xid = vxis->xid;
8709 +}
8710 +
8711 +
8712 +static inline void __enter_vx_admin(struct vx_info_save *vxis)
8713 +{
8714 +       vxis->vxi = xchg(&current->vx_info, NULL);
8715 +       vxis->xid = xchg(&current->xid, (xid_t)0);
8716 +}
8717 +
8718 +static inline void __leave_vx_admin(struct vx_info_save *vxis)
8719 +{
8720 +       (void)xchg(&current->xid, vxis->xid);
8721 +       (void)xchg(&current->vx_info, vxis->vxi);
8722 +}
8723 +
8724 +extern void exit_vx_info(struct task_struct *, int);
8725 +extern void exit_vx_info_early(struct task_struct *, int);
8726 +
8727 +
8728 +#else
8729 +#warning duplicate inclusion
8730 +#endif
8731 diff -NurpP --minimal linux-2.6.30.2/include/linux/vs_cowbl.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_cowbl.h
8732 --- linux-2.6.30.2/include/linux/vs_cowbl.h     1970-01-01 01:00:00.000000000 +0100
8733 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_cowbl.h  2009-07-04 01:11:39.000000000 +0200
8734 @@ -0,0 +1,47 @@
8735 +#ifndef _VS_COWBL_H
8736 +#define _VS_COWBL_H
8737 +
8738 +#include <linux/fs.h>
8739 +#include <linux/dcache.h>
8740 +#include <linux/namei.h>
8741 +
8742 +extern struct dentry *cow_break_link(const char *pathname);
8743 +
8744 +static inline int cow_check_and_break(struct path *path)
8745 +{
8746 +       struct inode *inode = path->dentry->d_inode;
8747 +       int error = 0;
8748 +
8749 +       /* do we need this check? */
8750 +       if (IS_RDONLY(inode))
8751 +               return -EROFS;
8752 +
8753 +       if (IS_COW(inode)) {
8754 +               if (IS_COW_LINK(inode)) {
8755 +                       struct dentry *new_dentry, *old_dentry = path->dentry;
8756 +                       char *pp, *buf;
8757 +
8758 +                       buf = kmalloc(PATH_MAX, GFP_KERNEL);
8759 +                       if (!buf) {
8760 +                               return -ENOMEM;
8761 +                       }
8762 +                       pp = d_path(path, buf, PATH_MAX);
8763 +                       new_dentry = cow_break_link(pp);
8764 +                       kfree(buf);
8765 +                       if (!IS_ERR(new_dentry)) {
8766 +                               path->dentry = new_dentry;
8767 +                               dput(old_dentry);
8768 +                       } else
8769 +                               error = PTR_ERR(new_dentry);
8770 +               } else {
8771 +                       inode->i_flags &= ~(S_IXUNLINK | S_IMMUTABLE);
8772 +                       inode->i_ctime = CURRENT_TIME;
8773 +                       mark_inode_dirty(inode);
8774 +               }
8775 +       }
8776 +       return error;
8777 +}
8778 +
8779 +#else
8780 +#warning duplicate inclusion
8781 +#endif
8782 diff -NurpP --minimal linux-2.6.30.2/include/linux/vs_cvirt.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_cvirt.h
8783 --- linux-2.6.30.2/include/linux/vs_cvirt.h     1970-01-01 01:00:00.000000000 +0100
8784 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_cvirt.h  2009-07-04 01:11:39.000000000 +0200
8785 @@ -0,0 +1,50 @@
8786 +#ifndef _VS_CVIRT_H
8787 +#define _VS_CVIRT_H
8788 +
8789 +#include "vserver/cvirt.h"
8790 +#include "vserver/context.h"
8791 +#include "vserver/base.h"
8792 +#include "vserver/check.h"
8793 +#include "vserver/debug.h"
8794 +
8795 +
8796 +static inline void vx_activate_task(struct task_struct *p)
8797 +{
8798 +       struct vx_info *vxi;
8799 +
8800 +       if ((vxi = p->vx_info)) {
8801 +               vx_update_load(vxi);
8802 +               atomic_inc(&vxi->cvirt.nr_running);
8803 +       }
8804 +}
8805 +
8806 +static inline void vx_deactivate_task(struct task_struct *p)
8807 +{
8808 +       struct vx_info *vxi;
8809 +
8810 +       if ((vxi = p->vx_info)) {
8811 +               vx_update_load(vxi);
8812 +               atomic_dec(&vxi->cvirt.nr_running);
8813 +       }
8814 +}
8815 +
8816 +static inline void vx_uninterruptible_inc(struct task_struct *p)
8817 +{
8818 +       struct vx_info *vxi;
8819 +
8820 +       if ((vxi = p->vx_info))
8821 +               atomic_inc(&vxi->cvirt.nr_uninterruptible);
8822 +}
8823 +
8824 +static inline void vx_uninterruptible_dec(struct task_struct *p)
8825 +{
8826 +       struct vx_info *vxi;
8827 +
8828 +       if ((vxi = p->vx_info))
8829 +               atomic_dec(&vxi->cvirt.nr_uninterruptible);
8830 +}
8831 +
8832 +
8833 +#else
8834 +#warning duplicate inclusion
8835 +#endif
8836 diff -NurpP --minimal linux-2.6.30.2/include/linux/vs_device.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_device.h
8837 --- linux-2.6.30.2/include/linux/vs_device.h    1970-01-01 01:00:00.000000000 +0100
8838 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_device.h 2009-07-04 01:11:39.000000000 +0200
8839 @@ -0,0 +1,45 @@
8840 +#ifndef _VS_DEVICE_H
8841 +#define _VS_DEVICE_H
8842 +
8843 +#include "vserver/base.h"
8844 +#include "vserver/device.h"
8845 +#include "vserver/debug.h"
8846 +
8847 +
8848 +#ifdef CONFIG_VSERVER_DEVICE
8849 +
8850 +int vs_map_device(struct vx_info *, dev_t, dev_t *, umode_t);
8851 +
8852 +#define vs_device_perm(v, d, m, p) \
8853 +       ((vs_map_device(current_vx_info(), d, NULL, m) & (p)) == (p))
8854 +
8855 +#else
8856 +
8857 +static inline
8858 +int vs_map_device(struct vx_info *vxi,
8859 +       dev_t device, dev_t *target, umode_t mode)
8860 +{
8861 +       if (target)
8862 +               *target = device;
8863 +       return ~0;
8864 +}
8865 +
8866 +#define vs_device_perm(v, d, m, p) ((p) == (p))
8867 +
8868 +#endif
8869 +
8870 +
8871 +#define vs_map_chrdev(d, t, p) \
8872 +       ((vs_map_device(current_vx_info(), d, t, S_IFCHR) & (p)) == (p))
8873 +#define vs_map_blkdev(d, t, p) \
8874 +       ((vs_map_device(current_vx_info(), d, t, S_IFBLK) & (p)) == (p))
8875 +
8876 +#define vs_chrdev_perm(d, p) \
8877 +       vs_device_perm(current_vx_info(), d, S_IFCHR, p)
8878 +#define vs_blkdev_perm(d, p) \
8879 +       vs_device_perm(current_vx_info(), d, S_IFBLK, p)
8880 +
8881 +
8882 +#else
8883 +#warning duplicate inclusion
8884 +#endif
8885 diff -NurpP --minimal linux-2.6.30.2/include/linux/vs_dlimit.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_dlimit.h
8886 --- linux-2.6.30.2/include/linux/vs_dlimit.h    1970-01-01 01:00:00.000000000 +0100
8887 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_dlimit.h 2009-07-04 01:11:39.000000000 +0200
8888 @@ -0,0 +1,211 @@
8889 +#ifndef _VS_DLIMIT_H
8890 +#define _VS_DLIMIT_H
8891 +
8892 +#include <linux/fs.h>
8893 +
8894 +#include "vserver/dlimit.h"
8895 +#include "vserver/base.h"
8896 +#include "vserver/debug.h"
8897 +
8898 +
8899 +#define get_dl_info(i) __get_dl_info(i, __FILE__, __LINE__)
8900 +
8901 +static inline struct dl_info *__get_dl_info(struct dl_info *dli,
8902 +       const char *_file, int _line)
8903 +{
8904 +       if (!dli)
8905 +               return NULL;
8906 +       vxlprintk(VXD_CBIT(dlim, 4), "get_dl_info(%p[#%d.%d])",
8907 +               dli, dli ? dli->dl_tag : 0,
8908 +               dli ? atomic_read(&dli->dl_usecnt) : 0,
8909 +               _file, _line);
8910 +       atomic_inc(&dli->dl_usecnt);
8911 +       return dli;
8912 +}
8913 +
8914 +
8915 +#define free_dl_info(i) \
8916 +       call_rcu(&(i)->dl_rcu, rcu_free_dl_info)
8917 +
8918 +#define put_dl_info(i) __put_dl_info(i, __FILE__, __LINE__)
8919 +
8920 +static inline void __put_dl_info(struct dl_info *dli,
8921 +       const char *_file, int _line)
8922 +{
8923 +       if (!dli)
8924 +               return;
8925 +       vxlprintk(VXD_CBIT(dlim, 4), "put_dl_info(%p[#%d.%d])",
8926 +               dli, dli ? dli->dl_tag : 0,
8927 +               dli ? atomic_read(&dli->dl_usecnt) : 0,
8928 +               _file, _line);
8929 +       if (atomic_dec_and_test(&dli->dl_usecnt))
8930 +               free_dl_info(dli);
8931 +}
8932 +
8933 +
8934 +#define __dlimit_char(d)       ((d) ? '*' : ' ')
8935 +
8936 +static inline int __dl_alloc_space(struct super_block *sb,
8937 +       tag_t tag, dlsize_t nr, const char *file, int line)
8938 +{
8939 +       struct dl_info *dli = NULL;
8940 +       int ret = 0;
8941 +
8942 +       if (nr == 0)
8943 +               goto out;
8944 +       dli = locate_dl_info(sb, tag);
8945 +       if (!dli)
8946 +               goto out;
8947 +
8948 +       spin_lock(&dli->dl_lock);
8949 +       ret = (dli->dl_space_used + nr > dli->dl_space_total);
8950 +       if (!ret)
8951 +               dli->dl_space_used += nr;
8952 +       spin_unlock(&dli->dl_lock);
8953 +       put_dl_info(dli);
8954 +out:
8955 +       vxlprintk(VXD_CBIT(dlim, 1),
8956 +               "ALLOC (%p,#%d)%c %lld bytes (%d)",
8957 +               sb, tag, __dlimit_char(dli), (long long)nr,
8958 +               ret, file, line);
8959 +       return ret;
8960 +}
8961 +
8962 +static inline void __dl_free_space(struct super_block *sb,
8963 +       tag_t tag, dlsize_t nr, const char *_file, int _line)
8964 +{
8965 +       struct dl_info *dli = NULL;
8966 +
8967 +       if (nr == 0)
8968 +               goto out;
8969 +       dli = locate_dl_info(sb, tag);
8970 +       if (!dli)
8971 +               goto out;
8972 +
8973 +       spin_lock(&dli->dl_lock);
8974 +       if (dli->dl_space_used > nr)
8975 +               dli->dl_space_used -= nr;
8976 +       else
8977 +               dli->dl_space_used = 0;
8978 +       spin_unlock(&dli->dl_lock);
8979 +       put_dl_info(dli);
8980 +out:
8981 +       vxlprintk(VXD_CBIT(dlim, 1),
8982 +               "FREE  (%p,#%d)%c %lld bytes",
8983 +               sb, tag, __dlimit_char(dli), (long long)nr,
8984 +               _file, _line);
8985 +}
8986 +
8987 +static inline int __dl_alloc_inode(struct super_block *sb,
8988 +       tag_t tag, const char *_file, int _line)
8989 +{
8990 +       struct dl_info *dli;
8991 +       int ret = 0;
8992 +
8993 +       dli = locate_dl_info(sb, tag);
8994 +       if (!dli)
8995 +               goto out;
8996 +
8997 +       spin_lock(&dli->dl_lock);
8998 +       ret = (dli->dl_inodes_used >= dli->dl_inodes_total);
8999 +       if (!ret)
9000 +               dli->dl_inodes_used++;
9001 +       spin_unlock(&dli->dl_lock);
9002 +       put_dl_info(dli);
9003 +out:
9004 +       vxlprintk(VXD_CBIT(dlim, 0),
9005 +               "ALLOC (%p,#%d)%c inode (%d)",
9006 +               sb, tag, __dlimit_char(dli), ret, _file, _line);
9007 +       return ret;
9008 +}
9009 +
9010 +static inline void __dl_free_inode(struct super_block *sb,
9011 +       tag_t tag, const char *_file, int _line)
9012 +{
9013 +       struct dl_info *dli;
9014 +
9015 +       dli = locate_dl_info(sb, tag);
9016 +       if (!dli)
9017 +               goto out;
9018 +
9019 +       spin_lock(&dli->dl_lock);
9020 +       if (dli->dl_inodes_used > 1)
9021 +               dli->dl_inodes_used--;
9022 +       else
9023 +               dli->dl_inodes_used = 0;
9024 +       spin_unlock(&dli->dl_lock);
9025 +       put_dl_info(dli);
9026 +out:
9027 +       vxlprintk(VXD_CBIT(dlim, 0),
9028 +               "FREE  (%p,#%d)%c inode",
9029 +               sb, tag, __dlimit_char(dli), _file, _line);
9030 +}
9031 +
9032 +static inline void __dl_adjust_block(struct super_block *sb, tag_t tag,
9033 +       unsigned long long *free_blocks, unsigned long long *root_blocks,
9034 +       const char *_file, int _line)
9035 +{
9036 +       struct dl_info *dli;
9037 +       uint64_t broot, bfree;
9038 +
9039 +       dli = locate_dl_info(sb, tag);
9040 +       if (!dli)
9041 +               return;
9042 +
9043 +       spin_lock(&dli->dl_lock);
9044 +       broot = (dli->dl_space_total -
9045 +               (dli->dl_space_total >> 10) * dli->dl_nrlmult)
9046 +               >> sb->s_blocksize_bits;
9047 +       bfree = (dli->dl_space_total - dli->dl_space_used)
9048 +                       >> sb->s_blocksize_bits;
9049 +       spin_unlock(&dli->dl_lock);
9050 +
9051 +       vxlprintk(VXD_CBIT(dlim, 2),
9052 +               "ADJUST: %lld,%lld on %lld,%lld [mult=%d]",
9053 +               (long long)bfree, (long long)broot,
9054 +               *free_blocks, *root_blocks, dli->dl_nrlmult,
9055 +               _file, _line);
9056 +       if (free_blocks) {
9057 +               if (*free_blocks > bfree)
9058 +                       *free_blocks = bfree;
9059 +       }
9060 +       if (root_blocks) {
9061 +               if (*root_blocks > broot)
9062 +                       *root_blocks = broot;
9063 +       }
9064 +       put_dl_info(dli);
9065 +}
9066 +
9067 +#define DLIMIT_ALLOC_SPACE(in, bytes) \
9068 +       __dl_alloc_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
9069 +               __FILE__, __LINE__ )
9070 +
9071 +#define DLIMIT_FREE_SPACE(in, bytes) \
9072 +       __dl_free_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
9073 +               __FILE__, __LINE__ )
9074 +
9075 +#define DLIMIT_ALLOC_BLOCK(in, nr) \
9076 +       __dl_alloc_space((in)->i_sb, (in)->i_tag, \
9077 +               ((dlsize_t)(nr)) << (in)->i_sb->s_blocksize_bits, \
9078 +               __FILE__, __LINE__ )
9079 +
9080 +#define DLIMIT_FREE_BLOCK(in, nr) \
9081 +       __dl_free_space((in)->i_sb, (in)->i_tag, \
9082 +               ((dlsize_t)(nr)) << (in)->i_sb->s_blocksize_bits, \
9083 +               __FILE__, __LINE__ )
9084 +
9085 +
9086 +#define DLIMIT_ALLOC_INODE(in) \
9087 +       __dl_alloc_inode((in)->i_sb, (in)->i_tag, __FILE__, __LINE__ )
9088 +
9089 +#define DLIMIT_FREE_INODE(in) \
9090 +       __dl_free_inode((in)->i_sb, (in)->i_tag, __FILE__, __LINE__ )
9091 +
9092 +
9093 +#define DLIMIT_ADJUST_BLOCK(sb, tag, fb, rb) \
9094 +       __dl_adjust_block(sb, tag, fb, rb, __FILE__, __LINE__ )
9095 +
9096 +
9097 +#else
9098 +#warning duplicate inclusion
9099 +#endif
9100 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/base.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/base.h
9101 --- linux-2.6.30.2/include/linux/vserver/base.h 1970-01-01 01:00:00.000000000 +0100
9102 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/base.h      2009-07-04 01:11:39.000000000 +0200
9103 @@ -0,0 +1,157 @@
9104 +#ifndef _VX_BASE_H
9105 +#define _VX_BASE_H
9106 +
9107 +
9108 +/* context state changes */
9109 +
9110 +enum {
9111 +       VSC_STARTUP = 1,
9112 +       VSC_SHUTDOWN,
9113 +
9114 +       VSC_NETUP,
9115 +       VSC_NETDOWN,
9116 +};
9117 +
9118 +
9119 +
9120 +#define vx_task_xid(t) ((t)->xid)
9121 +
9122 +#define vx_current_xid() vx_task_xid(current)
9123 +
9124 +#define current_vx_info() (current->vx_info)
9125 +
9126 +
9127 +#define nx_task_nid(t) ((t)->nid)
9128 +
9129 +#define nx_current_nid() nx_task_nid(current)
9130 +
9131 +#define current_nx_info() (current->nx_info)
9132 +
9133 +
9134 +/* generic flag merging */
9135 +
9136 +#define vs_check_flags(v, m, f)        (((v) & (m)) ^ (f))
9137 +
9138 +#define vs_mask_flags(v, f, m) (((v) & ~(m)) | ((f) & (m)))
9139 +
9140 +#define vs_mask_mask(v, f, m)  (((v) & ~(m)) | ((v) & (f) & (m)))
9141 +
9142 +#define vs_check_bit(v, n)     ((v) & (1LL << (n)))
9143 +
9144 +
9145 +/* context flags */
9146 +
9147 +#define __vx_flags(v)  ((v) ? (v)->vx_flags : 0)
9148 +
9149 +#define vx_current_flags()     __vx_flags(current->vx_info)
9150 +
9151 +#define vx_info_flags(v, m, f) \
9152 +       vs_check_flags(__vx_flags(v), m, f)
9153 +
9154 +#define task_vx_flags(t, m, f) \
9155 +       ((t) && vx_info_flags((t)->vx_info, m, f))
9156 +
9157 +#define vx_flags(m, f) vx_info_flags(current->vx_info, m, f)
9158 +
9159 +
9160 +/* context caps */
9161 +
9162 +#define __vx_ccaps(v)  ((v) ? (v)->vx_ccaps : 0)
9163 +
9164 +#define vx_current_ccaps()     __vx_ccaps(current->vx_info)
9165 +
9166 +#define vx_info_ccaps(v, c)    (__vx_ccaps(v) & (c))
9167 +
9168 +#define vx_ccaps(c)    vx_info_ccaps(current->vx_info, (c))
9169 +
9170 +
9171 +
9172 +/* network flags */
9173 +
9174 +#define __nx_flags(n)  ((n) ? (n)->nx_flags : 0)
9175 +
9176 +#define nx_current_flags()     __nx_flags(current->nx_info)
9177 +
9178 +#define nx_info_flags(n, m, f) \
9179 +       vs_check_flags(__nx_flags(n), m, f)
9180 +
9181 +#define task_nx_flags(t, m, f) \
9182 +       ((t) && nx_info_flags((t)->nx_info, m, f))
9183 +
9184 +#define nx_flags(m, f) nx_info_flags(current->nx_info, m, f)
9185 +
9186 +
9187 +/* network caps */
9188 +
9189 +#define __nx_ncaps(n)  ((n) ? (n)->nx_ncaps : 0)
9190 +
9191 +#define nx_current_ncaps()     __nx_ncaps(current->nx_info)
9192 +
9193 +#define nx_info_ncaps(n, c)    (__nx_ncaps(n) & (c))
9194 +
9195 +#define nx_ncaps(c)    nx_info_ncaps(current->nx_info, c)
9196 +
9197 +
9198 +/* context mask capabilities */
9199 +
9200 +#define __vx_mcaps(v)  ((v) ? (v)->vx_ccaps >> 32UL : ~0 )
9201 +
9202 +#define vx_info_mcaps(v, c)    (__vx_mcaps(v) & (c))
9203 +
9204 +#define vx_mcaps(c)    vx_info_mcaps(current->vx_info, c)
9205 +
9206 +
9207 +/* context bcap mask */
9208 +
9209 +#define __vx_bcaps(v)          ((v)->vx_bcaps)
9210 +
9211 +#define vx_current_bcaps()     __vx_bcaps(current->vx_info)
9212 +
9213 +
9214 +/* mask given bcaps */
9215 +
9216 +#define vx_info_mbcaps(v, c)   ((v) ? cap_intersect(__vx_bcaps(v), c) : c)
9217 +
9218 +#define vx_mbcaps(c)           vx_info_mbcaps(current->vx_info, c)
9219 +
9220 +
9221 +/* masked cap_bset */
9222 +
9223 +#define vx_info_cap_bset(v)    vx_info_mbcaps(v, current->cap_bset)
9224 +
9225 +#define vx_current_cap_bset()  vx_info_cap_bset(current->vx_info)
9226 +
9227 +#if 0
9228 +#define vx_info_mbcap(v, b) \
9229 +       (!vx_info_flags(v, VXF_STATE_SETUP, 0) ? \
9230 +       vx_info_bcaps(v, b) : (b))
9231 +
9232 +#define task_vx_mbcap(t, b) \
9233 +       vx_info_mbcap((t)->vx_info, (t)->b)
9234 +
9235 +#define vx_mbcap(b)    task_vx_mbcap(current, b)
9236 +#endif
9237 +
9238 +#define vx_cap_raised(v, c, f) cap_raised(vx_info_mbcaps(v, c), f)
9239 +
9240 +#define vx_capable(b, c) (capable(b) || \
9241 +       (cap_raised(current_cap(), b) && vx_ccaps(c)))
9242 +
9243 +#define nx_capable(b, c) (capable(b) || \
9244 +       (cap_raised(current_cap(), b) && nx_ncaps(c)))
9245 +
9246 +#define vx_current_initpid(n) \
9247 +       (current->vx_info && \
9248 +       (current->vx_info->vx_initpid == (n)))
9249 +
9250 +
9251 +#define __vx_state(v)  ((v) ? ((v)->vx_state) : 0)
9252 +
9253 +#define vx_info_state(v, m)    (__vx_state(v) & (m))
9254 +
9255 +
9256 +#define __nx_state(n)  ((n) ? ((n)->nx_state) : 0)
9257 +
9258 +#define nx_info_state(n, m)    (__nx_state(n) & (m))
9259 +
9260 +#endif
9261 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/cacct_cmd.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/cacct_cmd.h
9262 --- linux-2.6.30.2/include/linux/vserver/cacct_cmd.h    1970-01-01 01:00:00.000000000 +0100
9263 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/cacct_cmd.h 2009-07-04 01:11:39.000000000 +0200
9264 @@ -0,0 +1,23 @@
9265 +#ifndef _VX_CACCT_CMD_H
9266 +#define _VX_CACCT_CMD_H
9267 +
9268 +
9269 +/* virtual host info name commands */
9270 +
9271 +#define VCMD_sock_stat         VC_CMD(VSTAT, 5, 0)
9272 +
9273 +struct vcmd_sock_stat_v0 {
9274 +       uint32_t field;
9275 +       uint32_t count[3];
9276 +       uint64_t total[3];
9277 +};
9278 +
9279 +
9280 +#ifdef __KERNEL__
9281 +
9282 +#include <linux/compiler.h>
9283 +
9284 +extern int vc_sock_stat(struct vx_info *, void __user *);
9285 +
9286 +#endif /* __KERNEL__ */
9287 +#endif /* _VX_CACCT_CMD_H */
9288 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/cacct_def.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/cacct_def.h
9289 --- linux-2.6.30.2/include/linux/vserver/cacct_def.h    1970-01-01 01:00:00.000000000 +0100
9290 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/cacct_def.h 2009-07-04 01:11:39.000000000 +0200
9291 @@ -0,0 +1,43 @@
9292 +#ifndef _VX_CACCT_DEF_H
9293 +#define _VX_CACCT_DEF_H
9294 +
9295 +#include <asm/atomic.h>
9296 +#include <linux/vserver/cacct.h>
9297 +
9298 +
9299 +struct _vx_sock_acc {
9300 +       atomic_long_t count;
9301 +       atomic_long_t total;
9302 +};
9303 +
9304 +/* context sub struct */
9305 +
9306 +struct _vx_cacct {
9307 +       struct _vx_sock_acc sock[VXA_SOCK_SIZE][3];
9308 +       atomic_t slab[8];
9309 +       atomic_t page[6][8];
9310 +};
9311 +
9312 +#ifdef CONFIG_VSERVER_DEBUG
9313 +
9314 +static inline void __dump_vx_cacct(struct _vx_cacct *cacct)
9315 +{
9316 +       int i, j;
9317 +
9318 +       printk("\t_vx_cacct:");
9319 +       for (i = 0; i < 6; i++) {
9320 +               struct _vx_sock_acc *ptr = cacct->sock[i];
9321 +
9322 +               printk("\t [%d] =", i);
9323 +               for (j = 0; j < 3; j++) {
9324 +                       printk(" [%d] = %8lu, %8lu", j,
9325 +                               atomic_long_read(&ptr[j].count),
9326 +                               atomic_long_read(&ptr[j].total));
9327 +               }
9328 +               printk("\n");
9329 +       }
9330 +}
9331 +
9332 +#endif
9333 +
9334 +#endif /* _VX_CACCT_DEF_H */
9335 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/cacct.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/cacct.h
9336 --- linux-2.6.30.2/include/linux/vserver/cacct.h        1970-01-01 01:00:00.000000000 +0100
9337 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/cacct.h     2009-07-04 01:11:39.000000000 +0200
9338 @@ -0,0 +1,15 @@
9339 +#ifndef _VX_CACCT_H
9340 +#define _VX_CACCT_H
9341 +
9342 +
9343 +enum sock_acc_field {
9344 +       VXA_SOCK_UNSPEC = 0,
9345 +       VXA_SOCK_UNIX,
9346 +       VXA_SOCK_INET,
9347 +       VXA_SOCK_INET6,
9348 +       VXA_SOCK_PACKET,
9349 +       VXA_SOCK_OTHER,
9350 +       VXA_SOCK_SIZE   /* array size */
9351 +};
9352 +
9353 +#endif /* _VX_CACCT_H */
9354 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/cacct_int.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/cacct_int.h
9355 --- linux-2.6.30.2/include/linux/vserver/cacct_int.h    1970-01-01 01:00:00.000000000 +0100
9356 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/cacct_int.h 2009-07-04 01:11:39.000000000 +0200
9357 @@ -0,0 +1,21 @@
9358 +#ifndef _VX_CACCT_INT_H
9359 +#define _VX_CACCT_INT_H
9360 +
9361 +
9362 +#ifdef __KERNEL__
9363 +
9364 +static inline
9365 +unsigned long vx_sock_count(struct _vx_cacct *cacct, int type, int pos)
9366 +{
9367 +       return atomic_long_read(&cacct->sock[type][pos].count);
9368 +}
9369 +
9370 +
9371 +static inline
9372 +unsigned long vx_sock_total(struct _vx_cacct *cacct, int type, int pos)
9373 +{
9374 +       return atomic_long_read(&cacct->sock[type][pos].total);
9375 +}
9376 +
9377 +#endif /* __KERNEL__ */
9378 +#endif /* _VX_CACCT_INT_H */
9379 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/check.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/check.h
9380 --- linux-2.6.30.2/include/linux/vserver/check.h        1970-01-01 01:00:00.000000000 +0100
9381 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/check.h     2009-07-04 01:11:39.000000000 +0200
9382 @@ -0,0 +1,89 @@
9383 +#ifndef _VS_CHECK_H
9384 +#define _VS_CHECK_H
9385 +
9386 +
9387 +#define MAX_S_CONTEXT  65535   /* Arbitrary limit */
9388 +
9389 +#ifdef CONFIG_VSERVER_DYNAMIC_IDS
9390 +#define MIN_D_CONTEXT  49152   /* dynamic contexts start here */
9391 +#else
9392 +#define MIN_D_CONTEXT  65536
9393 +#endif
9394 +
9395 +/* check conditions */
9396 +
9397 +#define VS_ADMIN       0x0001
9398 +#define VS_WATCH       0x0002
9399 +#define VS_HIDE                0x0004
9400 +#define VS_HOSTID      0x0008
9401 +
9402 +#define VS_IDENT       0x0010
9403 +#define VS_EQUIV       0x0020
9404 +#define VS_PARENT      0x0040
9405 +#define VS_CHILD       0x0080
9406 +
9407 +#define VS_ARG_MASK    0x00F0
9408 +
9409 +#define VS_DYNAMIC     0x0100
9410 +#define VS_STATIC      0x0200
9411 +
9412 +#define VS_ATR_MASK    0x0F00
9413 +
9414 +#ifdef CONFIG_VSERVER_PRIVACY
9415 +#define VS_ADMIN_P     (0)
9416 +#define VS_WATCH_P     (0)
9417 +#else
9418 +#define VS_ADMIN_P     VS_ADMIN
9419 +#define VS_WATCH_P     VS_WATCH
9420 +#endif
9421 +
9422 +#define VS_HARDIRQ     0x1000
9423 +#define VS_SOFTIRQ     0x2000
9424 +#define VS_IRQ         0x4000
9425 +
9426 +#define VS_IRQ_MASK    0xF000
9427 +
9428 +#include <linux/hardirq.h>
9429 +
9430 +/*
9431 + * check current context for ADMIN/WATCH and
9432 + * optionally against supplied argument
9433 + */
9434 +static inline int __vs_check(int cid, int id, unsigned int mode)
9435 +{
9436 +       if (mode & VS_ARG_MASK) {
9437 +               if ((mode & VS_IDENT) && (id == cid))
9438 +                       return 1;
9439 +       }
9440 +       if (mode & VS_ATR_MASK) {
9441 +               if ((mode & VS_DYNAMIC) &&
9442 +                       (id >= MIN_D_CONTEXT) &&
9443 +                       (id <= MAX_S_CONTEXT))
9444 +                       return 1;
9445 +               if ((mode & VS_STATIC) &&
9446 +                       (id > 1) && (id < MIN_D_CONTEXT))
9447 +                       return 1;
9448 +       }
9449 +       if (mode & VS_IRQ_MASK) {
9450 +               if ((mode & VS_IRQ) && unlikely(in_interrupt()))
9451 +                       return 1;
9452 +               if ((mode & VS_HARDIRQ) && unlikely(in_irq()))
9453 +                       return 1;
9454 +               if ((mode & VS_SOFTIRQ) && unlikely(in_softirq()))
9455 +                       return 1;
9456 +       }
9457 +       return (((mode & VS_ADMIN) && (cid == 0)) ||
9458 +               ((mode & VS_WATCH) && (cid == 1)) ||
9459 +               ((mode & VS_HOSTID) && (id == 0)));
9460 +}
9461 +
9462 +#define vx_check(c, m) __vs_check(vx_current_xid(), c, (m) | VS_IRQ)
9463 +
9464 +#define vx_weak_check(c, m)    ((m) ? vx_check(c, m) : 1)
9465 +
9466 +
9467 +#define nx_check(c, m) __vs_check(nx_current_nid(), c, m)
9468 +
9469 +#define nx_weak_check(c, m)    ((m) ? nx_check(c, m) : 1)
9470 +
9471 +#endif
9472 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/context_cmd.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/context_cmd.h
9473 --- linux-2.6.30.2/include/linux/vserver/context_cmd.h  1970-01-01 01:00:00.000000000 +0100
9474 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/context_cmd.h       2009-07-04 01:11:39.000000000 +0200
9475 @@ -0,0 +1,128 @@
9476 +#ifndef _VX_CONTEXT_CMD_H
9477 +#define _VX_CONTEXT_CMD_H
9478 +
9479 +
9480 +/* vinfo commands */
9481 +
9482 +#define VCMD_task_xid          VC_CMD(VINFO, 1, 0)
9483 +
9484 +#ifdef __KERNEL__
9485 +extern int vc_task_xid(uint32_t);
9486 +
9487 +#endif /* __KERNEL__ */
9488 +
9489 +#define VCMD_vx_info           VC_CMD(VINFO, 5, 0)
9490 +
9491 +struct vcmd_vx_info_v0 {
9492 +       uint32_t xid;
9493 +       uint32_t initpid;
9494 +       /* more to come */
9495 +};
9496 +
9497 +#ifdef __KERNEL__
9498 +extern int vc_vx_info(struct vx_info *, void __user *);
9499 +
9500 +#endif /* __KERNEL__ */
9501 +
9502 +#define VCMD_ctx_stat          VC_CMD(VSTAT, 0, 0)
9503 +
9504 +struct vcmd_ctx_stat_v0 {
9505 +       uint32_t usecnt;
9506 +       uint32_t tasks;
9507 +       /* more to come */
9508 +};
9509 +
9510 +#ifdef __KERNEL__
9511 +extern int vc_ctx_stat(struct vx_info *, void __user *);
9512 +
9513 +#endif /* __KERNEL__ */
9514 +
9515 +/* context commands */
9516 +
9517 +#define VCMD_ctx_create_v0     VC_CMD(VPROC, 1, 0)
9518 +#define VCMD_ctx_create                VC_CMD(VPROC, 1, 1)
9519 +
9520 +struct vcmd_ctx_create {
9521 +       uint64_t flagword;
9522 +};
9523 +
9524 +#define VCMD_ctx_migrate_v0    VC_CMD(PROCMIG, 1, 0)
9525 +#define VCMD_ctx_migrate       VC_CMD(PROCMIG, 1, 1)
9526 +
9527 +struct vcmd_ctx_migrate {
9528 +       uint64_t flagword;
9529 +};
9530 +
9531 +#ifdef __KERNEL__
9532 +extern int vc_ctx_create(uint32_t, void __user *);
9533 +extern int vc_ctx_migrate(struct vx_info *, void __user *);
9534 +
9535 +#endif /* __KERNEL__ */
9536 +
9537 +
9538 +/* flag commands */
9539 +
9540 +#define VCMD_get_cflags                VC_CMD(FLAGS, 1, 0)
9541 +#define VCMD_set_cflags                VC_CMD(FLAGS, 2, 0)
9542 +
9543 +struct vcmd_ctx_flags_v0 {
9544 +       uint64_t flagword;
9545 +       uint64_t mask;
9546 +};
9547 +
9548 +#ifdef __KERNEL__
9549 +extern int vc_get_cflags(struct vx_info *, void __user *);
9550 +extern int vc_set_cflags(struct vx_info *, void __user *);
9551 +
9552 +#endif /* __KERNEL__ */
9553 +
9554 +
9555 +/* context caps commands */
9556 +
9557 +#define VCMD_get_ccaps         VC_CMD(FLAGS, 3, 1)
9558 +#define VCMD_set_ccaps         VC_CMD(FLAGS, 4, 1)
9559 +
9560 +struct vcmd_ctx_caps_v1 {
9561 +       uint64_t ccaps;
9562 +       uint64_t cmask;
9563 +};
9564 +
9565 +#ifdef __KERNEL__
9566 +extern int vc_get_ccaps(struct vx_info *, void __user *);
9567 +extern int vc_set_ccaps(struct vx_info *, void __user *);
9568 +
9569 +#endif /* __KERNEL__ */
9570 +
9571 +
9572 +/* bcaps commands */
9573 +
9574 +#define VCMD_get_bcaps         VC_CMD(FLAGS, 9, 0)
9575 +#define VCMD_set_bcaps         VC_CMD(FLAGS, 10, 0)
9576 +
9577 +struct vcmd_bcaps {
9578 +       uint64_t bcaps;
9579 +       uint64_t bmask;
9580 +};
9581 +
9582 +#ifdef __KERNEL__
9583 +extern int vc_get_bcaps(struct vx_info *, void __user *);
9584 +extern int vc_set_bcaps(struct vx_info *, void __user *);
9585 +
9586 +#endif /* __KERNEL__ */
9587 +
9588 +
9589 +/* OOM badness */
9590 +
9591 +#define VCMD_get_badness       VC_CMD(MEMCTRL, 5, 0)
9592 +#define VCMD_set_badness       VC_CMD(MEMCTRL, 6, 0)
9593 +
9594 +struct vcmd_badness_v0 {
9595 +       int64_t bias;
9596 +};
9597 +
9598 +#ifdef __KERNEL__
9599 +extern int vc_get_badness(struct vx_info *, void __user *);
9600 +extern int vc_set_badness(struct vx_info *, void __user *);
9601 +
9602 +#endif /* __KERNEL__ */
9603 +#endif /* _VX_CONTEXT_CMD_H */
9604 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/context.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/context.h
9605 --- linux-2.6.30.2/include/linux/vserver/context.h      1970-01-01 01:00:00.000000000 +0100
9606 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/context.h   2009-07-04 01:11:39.000000000 +0200
9607 @@ -0,0 +1,179 @@
9608 +#ifndef _VX_CONTEXT_H
9609 +#define _VX_CONTEXT_H
9610 +
9611 +#include <linux/types.h>
9612 +#include <linux/capability.h>
9613 +
9614 +
9615 +/* context flags */
9616 +
9617 +#define VXF_INFO_SCHED         0x00000002
9618 +#define VXF_INFO_NPROC         0x00000004
9619 +#define VXF_INFO_PRIVATE       0x00000008
9620 +
9621 +#define VXF_INFO_INIT          0x00000010
9622 +#define VXF_INFO_HIDE          0x00000020
9623 +#define VXF_INFO_ULIMIT                0x00000040
9624 +#define VXF_INFO_NSPACE                0x00000080
9625 +
9626 +#define VXF_SCHED_HARD         0x00000100
9627 +#define VXF_SCHED_PRIO         0x00000200
9628 +#define VXF_SCHED_PAUSE                0x00000400
9629 +
9630 +#define VXF_VIRT_MEM           0x00010000
9631 +#define VXF_VIRT_UPTIME                0x00020000
9632 +#define VXF_VIRT_CPU           0x00040000
9633 +#define VXF_VIRT_LOAD          0x00080000
9634 +#define VXF_VIRT_TIME          0x00100000
9635 +
9636 +#define VXF_HIDE_MOUNT         0x01000000
9637 +/* was VXF_HIDE_NETIF          0x02000000 */
9638 +#define VXF_HIDE_VINFO         0x04000000
9639 +
9640 +#define VXF_STATE_SETUP                (1ULL << 32)
9641 +#define VXF_STATE_INIT         (1ULL << 33)
9642 +#define VXF_STATE_ADMIN                (1ULL << 34)
9643 +
9644 +#define VXF_SC_HELPER          (1ULL << 36)
9645 +#define VXF_REBOOT_KILL                (1ULL << 37)
9646 +#define VXF_PERSISTENT         (1ULL << 38)
9647 +
9648 +#define VXF_FORK_RSS           (1ULL << 48)
9649 +#define VXF_PROLIFIC           (1ULL << 49)
9650 +
9651 +#define VXF_IGNEG_NICE         (1ULL << 52)
9652 +
9653 +#define VXF_ONE_TIME           (0x0007ULL << 32)
9654 +
9655 +#define VXF_INIT_SET           (VXF_STATE_SETUP | VXF_STATE_INIT | VXF_STATE_ADMIN)
9656 +
9657 +
9658 +/* context migration */
9659 +
9660 +#define VXM_SET_INIT           0x00000001
9661 +#define VXM_SET_REAPER         0x00000002
9662 +
9663 +/* context caps */
9664 +
9665 +#define VXC_CAP_MASK           0x00000000
9666 +
9667 +#define VXC_SET_UTSNAME                0x00000001
9668 +#define VXC_SET_RLIMIT         0x00000002
9669 +#define VXC_FS_SECURITY                0x00000004
9670 +
9671 +/* was VXC_RAW_ICMP            0x00000100 */
9672 +#define VXC_SYSLOG             0x00001000
9673 +
9674 +#define VXC_SECURE_MOUNT       0x00010000
9675 +#define VXC_SECURE_REMOUNT     0x00020000
9676 +#define VXC_BINARY_MOUNT       0x00040000
9677 +
9678 +#define VXC_QUOTA_CTL          0x00100000
9679 +#define VXC_ADMIN_MAPPER       0x00200000
9680 +#define VXC_ADMIN_CLOOP                0x00400000
9681 +
9682 +#define VXC_KTHREAD            0x01000000
9683 +
9684 +
9685 +#ifdef __KERNEL__
9686 +
9687 +#include <linux/list.h>
9688 +#include <linux/spinlock.h>
9689 +#include <linux/rcupdate.h>
9690 +
9691 +#include "limit_def.h"
9692 +#include "sched_def.h"
9693 +#include "cvirt_def.h"
9694 +#include "cacct_def.h"
9695 +#include "device_def.h"
9696 +
9697 +#define VX_SPACES      2
9698 +
9699 +struct _vx_info_pc {
9700 +       struct _vx_sched_pc sched_pc;
9701 +       struct _vx_cvirt_pc cvirt_pc;
9702 +};
9703 +
9704 +struct vx_info {
9705 +       struct hlist_node vx_hlist;             /* linked list of contexts */
9706 +       xid_t vx_id;                            /* context id */
9707 +       atomic_t vx_usecnt;                     /* usage count */
9708 +       atomic_t vx_tasks;                      /* tasks count */
9709 +       struct vx_info *vx_parent;              /* parent context */
9710 +       int vx_state;                           /* context state */
9711 +
9712 +       unsigned long vx_nsmask[VX_SPACES];     /* assignment mask */
9713 +       struct nsproxy *vx_nsproxy[VX_SPACES];  /* private namespaces */
9714 +       struct fs_struct *vx_fs[VX_SPACES];     /* private namespace fs */
9715 +
9716 +       uint64_t vx_flags;                      /* context flags */
9717 +       uint64_t vx_ccaps;                      /* context caps (vserver) */
9718 +       kernel_cap_t vx_bcaps;                  /* bounding caps (system) */
9719 +       // kernel_cap_t vx_cap_bset;            /* the guest's bset */
9720 +
9721 +       struct task_struct *vx_reaper;          /* guest reaper process */
9722 +       pid_t vx_initpid;                       /* PID of guest init */
9723 +       int64_t vx_badness_bias;                /* OOM points bias */
9724 +
9725 +       struct _vx_limit limit;                 /* vserver limits */
9726 +       struct _vx_sched sched;                 /* vserver scheduler */
9727 +       struct _vx_cvirt cvirt;                 /* virtual/bias stuff */
9728 +       struct _vx_cacct cacct;                 /* context accounting */
9729 +
9730 +       struct _vx_device dmap;                 /* default device map targets */
9731 +
9732 +#ifndef CONFIG_SMP
9733 +       struct _vx_info_pc info_pc;             /* per cpu data */
9734 +#else
9735 +       struct _vx_info_pc *ptr_pc;             /* per cpu array */
9736 +#endif
9737 +
9738 +       wait_queue_head_t vx_wait;              /* context exit waitqueue */
9739 +       int reboot_cmd;                         /* last sys_reboot() cmd */
9740 +       int exit_code;                          /* last process exit code */
9741 +
9742 +       char vx_name[65];                       /* vserver name */
9743 +};
9744 +
9745 +#ifndef CONFIG_SMP
9746 +#define        vx_ptr_pc(vxi)          (&(vxi)->info_pc)
9747 +#define        vx_per_cpu(vxi, v, id)  vx_ptr_pc(vxi)->v
9748 +#else
9749 +#define        vx_ptr_pc(vxi)          ((vxi)->ptr_pc)
9750 +#define        vx_per_cpu(vxi, v, id)  per_cpu_ptr(vx_ptr_pc(vxi), id)->v
9751 +#endif
9752 +
9753 +#define        vx_cpu(vxi, v)          vx_per_cpu(vxi, v, smp_processor_id())
9754 +
9755 +
9756 +struct vx_info_save {
9757 +       struct vx_info *vxi;
9758 +       xid_t xid;
9759 +};
9760 +
9761 +
9762 +/* status flags */
9763 +
9764 +#define VXS_HASHED     0x0001
9765 +#define VXS_PAUSED     0x0010
9766 +#define VXS_SHUTDOWN   0x0100
9767 +#define VXS_HELPER     0x1000
9768 +#define VXS_RELEASED   0x8000
9769 +
9770 +
9771 +extern void claim_vx_info(struct vx_info *, struct task_struct *);
9772 +extern void release_vx_info(struct vx_info *, struct task_struct *);
9773 +
9774 +extern struct vx_info *lookup_vx_info(int);
9775 +extern struct vx_info *lookup_or_create_vx_info(int);
9776 +
9777 +extern int get_xid_list(int, unsigned int *, int);
9778 +extern int xid_is_hashed(xid_t);
9779 +
9780 +extern int vx_migrate_task(struct task_struct *, struct vx_info *, int);
9781 +
9782 +extern long vs_state_change(struct vx_info *, unsigned int);
9783 +
9784 +
9785 +#endif /* __KERNEL__ */
9786 +#endif /* _VX_CONTEXT_H */
9787 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/cvirt_cmd.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/cvirt_cmd.h
9788 --- linux-2.6.30.2/include/linux/vserver/cvirt_cmd.h    1970-01-01 01:00:00.000000000 +0100
9789 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/cvirt_cmd.h 2009-07-04 01:11:39.000000000 +0200
9790 @@ -0,0 +1,53 @@
9791 +#ifndef _VX_CVIRT_CMD_H
9792 +#define _VX_CVIRT_CMD_H
9793 +
9794 +
9795 +/* virtual host info name commands */
9796 +
9797 +#define VCMD_set_vhi_name      VC_CMD(VHOST, 1, 0)
9798 +#define VCMD_get_vhi_name      VC_CMD(VHOST, 2, 0)
9799 +
9800 +struct vcmd_vhi_name_v0 {
9801 +       uint32_t field;
9802 +       char name[65];
9803 +};
9804 +
9805 +
9806 +enum vhi_name_field {
9807 +       VHIN_CONTEXT = 0,
9808 +       VHIN_SYSNAME,
9809 +       VHIN_NODENAME,
9810 +       VHIN_RELEASE,
9811 +       VHIN_VERSION,
9812 +       VHIN_MACHINE,
9813 +       VHIN_DOMAINNAME,
9814 +};
9815 +
9816 +
9817 +#ifdef __KERNEL__
9818 +
9819 +#include <linux/compiler.h>
9820 +
9821 +extern int vc_set_vhi_name(struct vx_info *, void __user *);
9822 +extern int vc_get_vhi_name(struct vx_info *, void __user *);
9823 +
9824 +#endif /* __KERNEL__ */
9825 +
9826 +#define VCMD_virt_stat         VC_CMD(VSTAT, 3, 0)
9827 +
9828 +struct vcmd_virt_stat_v0 {
9829 +       uint64_t offset;
9830 +       uint64_t uptime;
9831 +       uint32_t nr_threads;
9832 +       uint32_t nr_running;
9833 +       uint32_t nr_uninterruptible;
9834 +       uint32_t nr_onhold;
9835 +       uint32_t nr_forks;
9836 +       uint32_t load[3];
9837 +};
9838 +
9839 +#ifdef __KERNEL__
9840 +extern int vc_virt_stat(struct vx_info *, void __user *);
9841 +
9842 +#endif /* __KERNEL__ */
9843 +#endif /* _VX_CVIRT_CMD_H */
9844 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/cvirt_def.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/cvirt_def.h
9845 --- linux-2.6.30.2/include/linux/vserver/cvirt_def.h    1970-01-01 01:00:00.000000000 +0100
9846 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/cvirt_def.h 2009-07-04 01:11:39.000000000 +0200
9847 @@ -0,0 +1,80 @@
9848 +#ifndef _VX_CVIRT_DEF_H
9849 +#define _VX_CVIRT_DEF_H
9850 +
9851 +#include <linux/jiffies.h>
9852 +#include <linux/spinlock.h>
9853 +#include <linux/wait.h>
9854 +#include <linux/time.h>
9855 +#include <asm/atomic.h>
9856 +
9857 +
9858 +struct _vx_usage_stat {
9859 +       uint64_t user;
9860 +       uint64_t nice;
9861 +       uint64_t system;
9862 +       uint64_t softirq;
9863 +       uint64_t irq;
9864 +       uint64_t idle;
9865 +       uint64_t iowait;
9866 +};
9867 +
9868 +struct _vx_syslog {
9869 +       wait_queue_head_t log_wait;
9870 +       spinlock_t logbuf_lock;         /* lock for the log buffer */
9871 +
9872 +       unsigned long log_start;        /* next char to be read by syslog() */
9873 +       unsigned long con_start;        /* next char to be sent to consoles */
9874 +       unsigned long log_end;  /* most-recently-written-char + 1 */
9875 +       unsigned long logged_chars;     /* #chars since last read+clear operation */
9876 +
9877 +       char log_buf[1024];
9878 +};
9879 +
9880 +
9881 +/* context sub struct */
9882 +
9883 +struct _vx_cvirt {
9884 +       atomic_t nr_threads;            /* number of current threads */
9885 +       atomic_t nr_running;            /* number of running threads */
9886 +       atomic_t nr_uninterruptible;    /* number of uninterruptible threads */
9887 +
9888 +       atomic_t nr_onhold;             /* processes on hold */
9889 +       uint32_t onhold_last;           /* jiffies when put on hold */
9890 +
9891 +       struct timeval bias_tv;         /* time offset to the host */
9892 +       struct timespec bias_idle;
9893 +       struct timespec bias_uptime;    /* context creation point */
9894 +       uint64_t bias_clock;            /* offset in clock_t */
9895 +
9896 +       spinlock_t load_lock;           /* lock for the load averages */
9897 +       atomic_t load_updates;          /* nr of load updates done so far */
9898 +       uint32_t load_last;             /* last time load was calculated */
9899 +       uint32_t load[3];               /* load averages 1,5,15 */
9900 +
9901 +       atomic_t total_forks;           /* number of forks so far */
9902 +
9903 +       struct _vx_syslog syslog;
9904 +};
9905 +
9906 +struct _vx_cvirt_pc {
9907 +       struct _vx_usage_stat cpustat;
9908 +};
9909 +
9910 +
9911 +#ifdef CONFIG_VSERVER_DEBUG
9912 +
9913 +static inline void __dump_vx_cvirt(struct _vx_cvirt *cvirt)
9914 +{
9915 +       printk("\t_vx_cvirt:\n");
9916 +       printk("\t threads: %4d, %4d, %4d, %4d\n",
9917 +               atomic_read(&cvirt->nr_threads),
9918 +               atomic_read(&cvirt->nr_running),
9919 +               atomic_read(&cvirt->nr_uninterruptible),
9920 +               atomic_read(&cvirt->nr_onhold));
9921 +       /* add rest here */
9922 +       printk("\t total_forks = %d\n", atomic_read(&cvirt->total_forks));
9923 +}
9924 +
9925 +#endif
9926 +
9927 +#endif /* _VX_CVIRT_DEF_H */
9928 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/cvirt.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/cvirt.h
9929 --- linux-2.6.30.2/include/linux/vserver/cvirt.h        1970-01-01 01:00:00.000000000 +0100
9930 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/cvirt.h     2009-07-04 01:11:39.000000000 +0200
9931 @@ -0,0 +1,20 @@
9932 +#ifndef _VX_CVIRT_H
9933 +#define _VX_CVIRT_H
9934 +
9935 +
9936 +#ifdef __KERNEL__
9937 +
9938 +struct timespec;
9939 +
9940 +void vx_vsi_uptime(struct timespec *, struct timespec *);
9941 +
9942 +
9943 +struct vx_info;
9944 +
9945 +void vx_update_load(struct vx_info *);
9946 +
9947 +
9948 +int vx_do_syslog(int, char __user *, int);
9949 +
9950 +#endif /* __KERNEL__ */
9951 +#endif /* _VX_CVIRT_H */
9952 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/debug_cmd.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/debug_cmd.h
9953 --- linux-2.6.30.2/include/linux/vserver/debug_cmd.h    1970-01-01 01:00:00.000000000 +0100
9954 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/debug_cmd.h 2009-07-04 01:11:39.000000000 +0200
9955 @@ -0,0 +1,58 @@
9956 +#ifndef _VX_DEBUG_CMD_H
9957 +#define _VX_DEBUG_CMD_H
9958 +
9959 +
9960 +/* debug commands */
9961 +
9962 +#define VCMD_dump_history      VC_CMD(DEBUG, 1, 0)
9963 +
9964 +#define VCMD_read_history      VC_CMD(DEBUG, 5, 0)
9965 +#define VCMD_read_monitor      VC_CMD(DEBUG, 6, 0)
9966 +
9967 +struct  vcmd_read_history_v0 {
9968 +       uint32_t index;
9969 +       uint32_t count;
9970 +       char __user *data;
9971 +};
9972 +
9973 +struct  vcmd_read_monitor_v0 {
9974 +       uint32_t index;
9975 +       uint32_t count;
9976 +       char __user *data;
9977 +};
9978 +
9979 +
9980 +#ifdef __KERNEL__
9981 +
9982 +#ifdef CONFIG_COMPAT
9983 +
9984 +#include <asm/compat.h>
9985 +
9986 +struct vcmd_read_history_v0_x32 {
9987 +       uint32_t index;
9988 +       uint32_t count;
9989 +       compat_uptr_t data_ptr;
9990 +};
9991 +
9992 +struct vcmd_read_monitor_v0_x32 {
9993 +       uint32_t index;
9994 +       uint32_t count;
9995 +       compat_uptr_t data_ptr;
9996 +};
9997 +
9998 +#endif  /* CONFIG_COMPAT */
9999 +
10000 +extern int vc_dump_history(uint32_t);
10001 +
10002 +extern int vc_read_history(uint32_t, void __user *);
10003 +extern int vc_read_monitor(uint32_t, void __user *);
10004 +
10005 +#ifdef CONFIG_COMPAT
10006 +
10007 +extern int vc_read_history_x32(uint32_t, void __user *);
10008 +extern int vc_read_monitor_x32(uint32_t, void __user *);
10009 +
10010 +#endif  /* CONFIG_COMPAT */
10011 +
10012 +#endif /* __KERNEL__ */
10013 +#endif /* _VX_DEBUG_CMD_H */
10014 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/debug.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/debug.h
10015 --- linux-2.6.30.2/include/linux/vserver/debug.h        1970-01-01 01:00:00.000000000 +0100
10016 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/debug.h     2009-07-04 01:11:39.000000000 +0200
10017 @@ -0,0 +1,127 @@
10018 +#ifndef _VX_DEBUG_H
10019 +#define _VX_DEBUG_H
10020 +
10021 +
10022 +#define VXD_CBIT(n, m) (vx_debug_ ## n & (1 << (m)))
10023 +#define VXD_CMIN(n, m) (vx_debug_ ## n > (m))
10024 +#define VXD_MASK(n, m) (vx_debug_ ## n & (m))
10025 +
10026 +#define VXD_DEV(d)     (d), (d)->bd_inode->i_ino,              \
10027 +                       imajor((d)->bd_inode), iminor((d)->bd_inode)
10028 +#define VXF_DEV                "%p[%lu,%d:%d]"
10029 +
10030 +
10031 +#define vxd_path(p)                                            \
10032 +       ({ static char _buffer[PATH_MAX];                       \
10033 +          d_path(p, _buffer, sizeof(_buffer)); })
10034 +
10035 +#define vxd_cond_path(n)                                       \
10036 +       ((n) ? vxd_path(&(n)->path) : "<null>" )
10037 +
10038 +
10039 +#ifdef CONFIG_VSERVER_DEBUG
10040 +
10041 +extern unsigned int vx_debug_switch;
10042 +extern unsigned int vx_debug_xid;
10043 +extern unsigned int vx_debug_nid;
10044 +extern unsigned int vx_debug_tag;
10045 +extern unsigned int vx_debug_net;
10046 +extern unsigned int vx_debug_limit;
10047 +extern unsigned int vx_debug_cres;
10048 +extern unsigned int vx_debug_dlim;
10049 +extern unsigned int vx_debug_quota;
10050 +extern unsigned int vx_debug_cvirt;
10051 +extern unsigned int vx_debug_space;
10052 +extern unsigned int vx_debug_misc;
10053 +
10054 +
10055 +#define VX_LOGLEVEL    "vxD: "
10056 +#define VX_PROC_FMT    "%p: "
10057 +#define VX_PROCESS     current
10058 +
10059 +#define vxdprintk(c, f, x...)                                  \
10060 +       do {                                                    \
10061 +               if (c)                                          \
10062 +                       printk(VX_LOGLEVEL VX_PROC_FMT f "\n",  \
10063 +                               VX_PROCESS , ##x);              \
10064 +       } while (0)
10065 +
10066 +#define vxlprintk(c, f, x...)                                  \
10067 +       do {                                                    \
10068 +               if (c)                                          \
10069 +                       printk(VX_LOGLEVEL f " @%s:%d\n", x);   \
10070 +       } while (0)
10071 +
10072 +#define vxfprintk(c, f, x...)                                  \
10073 +       do {                                                    \
10074 +               if (c)                                          \
10075 +                       printk(VX_LOGLEVEL f " %s@%s:%d\n", x); \
10076 +       } while (0)
10077 +
10078 +
10079 +struct vx_info;
10080 +
10081 +void dump_vx_info(struct vx_info *, int);
10082 +void dump_vx_info_inactive(int);
10083 +
10084 +#else  /* CONFIG_VSERVER_DEBUG */
10085 +
10086 +#define vx_debug_switch 0
10087 +#define vx_debug_xid   0
10088 +#define vx_debug_nid   0
10089 +#define vx_debug_tag   0
10090 +#define vx_debug_net   0
10091 +#define vx_debug_limit 0
10092 +#define vx_debug_cres  0
10093 +#define vx_debug_dlim  0
10094 +#define vx_debug_cvirt 0
10095 +
10096 +#define vxdprintk(x...) do { } while (0)
10097 +#define vxlprintk(x...) do { } while (0)
10098 +#define vxfprintk(x...) do { } while (0)
10099 +
10100 +#endif /* CONFIG_VSERVER_DEBUG */
10101 +
10102 +
10103 +#ifdef CONFIG_VSERVER_WARN
10104 +
10105 +#define VX_WARNLEVEL   KERN_WARNING "vxW: "
10106 +#define VX_WARN_TASK   "[»%s«,%u:#%u|%u|%u] "
10107 +#define VX_WARN_XID    "[xid #%u] "
10108 +#define VX_WARN_NID    "[nid #%u] "
10109 +#define VX_WARN_TAG    "[tag #%u] "
10110 +
10111 +#define vxwprintk(c, f, x...)                                  \
10112 +       do {                                                    \
10113 +               if (c)                                          \
10114 +                       printk(VX_WARNLEVEL f "\n", ##x);       \
10115 +       } while (0)
10116 +
10117 +#else  /* CONFIG_VSERVER_WARN */
10118 +
10119 +#define vxwprintk(x...) do { } while (0)
10120 +
10121 +#endif /* CONFIG_VSERVER_WARN */
10122 +
10123 +#define vxwprintk_task(c, f, x...)                             \
10124 +       vxwprintk(c, VX_WARN_TASK f,                            \
10125 +               current->comm, current->pid,                    \
10126 +               current->xid, current->nid, current->tag, ##x)
10127 +#define vxwprintk_xid(c, f, x...)                              \
10128 +       vxwprintk(c, VX_WARN_XID f, current->xid, x)
10129 +#define vxwprintk_nid(c, f, x...)                              \
10130 +       vxwprintk(c, VX_WARN_NID f, current->nid, x)
10131 +#define vxwprintk_tag(c, f, x...)                              \
10132 +       vxwprintk(c, VX_WARN_TAG f, current->tag, x)
10133 +
10134 +#ifdef CONFIG_VSERVER_DEBUG
10135 +#define vxd_assert_lock(l)     assert_spin_locked(l)
10136 +#define vxd_assert(c, f, x...) vxlprintk(!(c), \
10137 +       "assertion [" f "] failed.", ##x, __FILE__, __LINE__)
10138 +#else
10139 +#define vxd_assert_lock(l)     do { } while (0)
10140 +#define vxd_assert(c, f, x...) do { } while (0)
10141 +#endif
10142 +
10143 +
10144 +#endif /* _VX_DEBUG_H */
10145 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/device_cmd.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/device_cmd.h
10146 --- linux-2.6.30.2/include/linux/vserver/device_cmd.h   1970-01-01 01:00:00.000000000 +0100
10147 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/device_cmd.h        2009-07-04 01:11:39.000000000 +0200
10148 @@ -0,0 +1,44 @@
10149 +#ifndef _VX_DEVICE_CMD_H
10150 +#define _VX_DEVICE_CMD_H
10151 +
10152 +
10153 +/*  device vserver commands */
10154 +
10155 +#define VCMD_set_mapping       VC_CMD(DEVICE, 1, 0)
10156 +#define VCMD_unset_mapping     VC_CMD(DEVICE, 2, 0)
10157 +
10158 +struct vcmd_set_mapping_v0 {
10159 +       const char __user *device;
10160 +       const char __user *target;
10161 +       uint32_t flags;
10162 +};
10163 +
10164 +
10165 +#ifdef __KERNEL__
10166 +
10167 +#ifdef CONFIG_COMPAT
10168 +
10169 +#include <asm/compat.h>
10170 +
10171 +struct vcmd_set_mapping_v0_x32 {
10172 +       compat_uptr_t device_ptr;
10173 +       compat_uptr_t target_ptr;
10174 +       uint32_t flags;
10175 +};
10176 +
10177 +#endif /* CONFIG_COMPAT */
10178 +
10179 +#include <linux/compiler.h>
10180 +
10181 +extern int vc_set_mapping(struct vx_info *, void __user *);
10182 +extern int vc_unset_mapping(struct vx_info *, void __user *);
10183 +
10184 +#ifdef CONFIG_COMPAT
10185 +
10186 +extern int vc_set_mapping_x32(struct vx_info *, void __user *);
10187 +extern int vc_unset_mapping_x32(struct vx_info *, void __user *);
10188 +
10189 +#endif /* CONFIG_COMPAT */
10190 +
10191 +#endif /* __KERNEL__ */
10192 +#endif /* _VX_DEVICE_CMD_H */
10193 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/device_def.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/device_def.h
10194 --- linux-2.6.30.2/include/linux/vserver/device_def.h   1970-01-01 01:00:00.000000000 +0100
10195 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/device_def.h        2009-07-04 01:11:39.000000000 +0200
10196 @@ -0,0 +1,17 @@
10197 +#ifndef _VX_DEVICE_DEF_H
10198 +#define _VX_DEVICE_DEF_H
10199 +
10200 +#include <linux/types.h>
10201 +
10202 +struct vx_dmap_target {
10203 +       dev_t target;
10204 +       uint32_t flags;
10205 +};
10206 +
10207 +struct _vx_device {
10208 +#ifdef CONFIG_VSERVER_DEVICE
10209 +       struct vx_dmap_target targets[2];
10210 +#endif
10211 +};
10212 +
10213 +#endif /* _VX_DEVICE_DEF_H */
10214 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/device.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/device.h
10215 --- linux-2.6.30.2/include/linux/vserver/device.h       1970-01-01 01:00:00.000000000 +0100
10216 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/device.h    2009-07-04 01:11:39.000000000 +0200
10217 @@ -0,0 +1,15 @@
10218 +#ifndef _VX_DEVICE_H
10219 +#define _VX_DEVICE_H
10220 +
10221 +
10222 +#define DATTR_CREATE   0x00000001
10223 +#define DATTR_OPEN     0x00000002
10224 +
10225 +#define DATTR_REMAP    0x00000010
10226 +
10227 +#define DATTR_MASK     0x00000013
10228 +
10229 +
10230 +#else  /* _VX_DEVICE_H */
10231 +#warning duplicate inclusion
10232 +#endif /* _VX_DEVICE_H */
10233 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/dlimit_cmd.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/dlimit_cmd.h
10234 --- linux-2.6.30.2/include/linux/vserver/dlimit_cmd.h   1970-01-01 01:00:00.000000000 +0100
10235 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/dlimit_cmd.h        2009-07-04 01:11:39.000000000 +0200
10236 @@ -0,0 +1,74 @@
10237 +#ifndef _VX_DLIMIT_CMD_H
10238 +#define _VX_DLIMIT_CMD_H
10239 +
10240 +
10241 +/*  dlimit vserver commands */
10242 +
10243 +#define VCMD_add_dlimit                VC_CMD(DLIMIT, 1, 0)
10244 +#define VCMD_rem_dlimit                VC_CMD(DLIMIT, 2, 0)
10245 +
10246 +#define VCMD_set_dlimit                VC_CMD(DLIMIT, 5, 0)
10247 +#define VCMD_get_dlimit                VC_CMD(DLIMIT, 6, 0)
10248 +
10249 +struct vcmd_ctx_dlimit_base_v0 {
10250 +       const char __user *name;
10251 +       uint32_t flags;
10252 +};
10253 +
10254 +struct vcmd_ctx_dlimit_v0 {
10255 +       const char __user *name;
10256 +       uint32_t space_used;                    /* used space in kbytes */
10257 +       uint32_t space_total;                   /* maximum space in kbytes */
10258 +       uint32_t inodes_used;                   /* used inodes */
10259 +       uint32_t inodes_total;                  /* maximum inodes */
10260 +       uint32_t reserved;                      /* reserved for root in % */
10261 +       uint32_t flags;
10262 +};
10263 +
10264 +#define CDLIM_UNSET            ((uint32_t)0UL)
10265 +#define CDLIM_INFINITY         ((uint32_t)~0UL)
10266 +#define CDLIM_KEEP             ((uint32_t)~1UL)
10267 +
10268 +#ifdef __KERNEL__
10269 +
10270 +#ifdef CONFIG_COMPAT
10271 +
10272 +#include <asm/compat.h>
10273 +
10274 +struct vcmd_ctx_dlimit_base_v0_x32 {
10275 +       compat_uptr_t name_ptr;
10276 +       uint32_t flags;
10277 +};
10278 +
10279 +struct vcmd_ctx_dlimit_v0_x32 {
10280 +       compat_uptr_t name_ptr;
10281 +       uint32_t space_used;                    /* used space in kbytes */
10282 +       uint32_t space_total;                   /* maximum space in kbytes */
10283 +       uint32_t inodes_used;                   /* used inodes */
10284 +       uint32_t inodes_total;                  /* maximum inodes */
10285 +       uint32_t reserved;                      /* reserved for root in % */
10286 +       uint32_t flags;
10287 +};
10288 +
10289 +#endif /* CONFIG_COMPAT */
10290 +
10291 +#include <linux/compiler.h>
10292 +
10293 +extern int vc_add_dlimit(uint32_t, void __user *);
10294 +extern int vc_rem_dlimit(uint32_t, void __user *);
10295 +
10296 +extern int vc_set_dlimit(uint32_t, void __user *);
10297 +extern int vc_get_dlimit(uint32_t, void __user *);
10298 +
10299 +#ifdef CONFIG_COMPAT
10300 +
10301 +extern int vc_add_dlimit_x32(uint32_t, void __user *);
10302 +extern int vc_rem_dlimit_x32(uint32_t, void __user *);
10303 +
10304 +extern int vc_set_dlimit_x32(uint32_t, void __user *);
10305 +extern int vc_get_dlimit_x32(uint32_t, void __user *);
10306 +
10307 +#endif /* CONFIG_COMPAT */
10308 +
10309 +#endif /* __KERNEL__ */
10310 +#endif /* _VX_DLIMIT_CMD_H */
10311 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/dlimit.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/dlimit.h
10312 --- linux-2.6.30.2/include/linux/vserver/dlimit.h       1970-01-01 01:00:00.000000000 +0100
10313 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/dlimit.h    2009-07-04 01:11:39.000000000 +0200
10314 @@ -0,0 +1,54 @@
10315 +#ifndef _VX_DLIMIT_H
10316 +#define _VX_DLIMIT_H
10317 +
10318 +#include "switch.h"
10319 +
10320 +
10321 +#ifdef __KERNEL__
10322 +
10323 +/*      keep in sync with CDLIM_INFINITY       */
10324 +
10325 +#define DLIM_INFINITY          (~0ULL)
10326 +
10327 +#include <linux/spinlock.h>
10328 +#include <linux/rcupdate.h>
10329 +
10330 +struct super_block;
10331 +
10332 +struct dl_info {
10333 +       struct hlist_node dl_hlist;             /* linked list of contexts */
10334 +       struct rcu_head dl_rcu;                 /* the rcu head */
10335 +       tag_t dl_tag;                           /* context tag */
10336 +       atomic_t dl_usecnt;                     /* usage count */
10337 +       atomic_t dl_refcnt;                     /* reference count */
10338 +
10339 +       struct super_block *dl_sb;              /* associated superblock */
10340 +
10341 +       spinlock_t dl_lock;                     /* protect the values */
10342 +
10343 +       unsigned long long dl_space_used;       /* used space in bytes */
10344 +       unsigned long long dl_space_total;      /* maximum space in bytes */
10345 +       unsigned long dl_inodes_used;           /* used inodes */
10346 +       unsigned long dl_inodes_total;          /* maximum inodes */
10347 +
10348 +       unsigned int dl_nrlmult;                /* non root limit mult */
10349 +};
10350 +
10351 +struct rcu_head;
10352 +
10353 +extern void rcu_free_dl_info(struct rcu_head *);
10354 +extern void unhash_dl_info(struct dl_info *);
10355 +
10356 +extern struct dl_info *locate_dl_info(struct super_block *, tag_t);
10357 +
10358 +
10359 +struct kstatfs;
10360 +
10361 +extern void vx_vsi_statfs(struct super_block *, struct kstatfs *);
10362 +
10363 +typedef uint64_t dlsize_t;
10364 +
10365 +#endif /* __KERNEL__ */
10366 +#else  /* _VX_DLIMIT_H */
10367 +#warning duplicate inclusion
10368 +#endif /* _VX_DLIMIT_H */
10369 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/global.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/global.h
10370 --- linux-2.6.30.2/include/linux/vserver/global.h       1970-01-01 01:00:00.000000000 +0100
10371 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/global.h    2009-07-04 04:51:55.000000000 +0200
10372 @@ -0,0 +1,19 @@
10373 +#ifndef _VX_GLOBAL_H
10374 +#define _VX_GLOBAL_H
10375 +
10376 +
10377 +extern atomic_t vx_global_ctotal;
10378 +extern atomic_t vx_global_cactive;
10379 +
10380 +extern atomic_t nx_global_ctotal;
10381 +extern atomic_t nx_global_cactive;
10382 +
10383 +extern atomic_t vs_global_nsproxy;
10384 +extern atomic_t vs_global_fs;
10385 +extern atomic_t vs_global_mnt_ns;
10386 +extern atomic_t vs_global_uts_ns;
10387 +extern atomic_t vs_global_user_ns;
10388 +extern atomic_t vs_global_pid_ns;
10389 +
10390 +
10391 +#endif /* _VX_GLOBAL_H */
10392 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/history.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/history.h
10393 --- linux-2.6.30.2/include/linux/vserver/history.h      1970-01-01 01:00:00.000000000 +0100
10394 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/history.h   2009-07-04 01:11:39.000000000 +0200
10395 @@ -0,0 +1,197 @@
10396 +#ifndef _VX_HISTORY_H
10397 +#define _VX_HISTORY_H
10398 +
10399 +
10400 +enum {
10401 +       VXH_UNUSED = 0,
10402 +       VXH_THROW_OOPS = 1,
10403 +
10404 +       VXH_GET_VX_INFO,
10405 +       VXH_PUT_VX_INFO,
10406 +       VXH_INIT_VX_INFO,
10407 +       VXH_SET_VX_INFO,
10408 +       VXH_CLR_VX_INFO,
10409 +       VXH_CLAIM_VX_INFO,
10410 +       VXH_RELEASE_VX_INFO,
10411 +       VXH_ALLOC_VX_INFO,
10412 +       VXH_DEALLOC_VX_INFO,
10413 +       VXH_HASH_VX_INFO,
10414 +       VXH_UNHASH_VX_INFO,
10415 +       VXH_LOC_VX_INFO,
10416 +       VXH_LOOKUP_VX_INFO,
10417 +       VXH_CREATE_VX_INFO,
10418 +};
10419 +
10420 +struct _vxhe_vxi {
10421 +       struct vx_info *ptr;
10422 +       unsigned xid;
10423 +       unsigned usecnt;
10424 +       unsigned tasks;
10425 +};
10426 +
10427 +struct _vxhe_set_clr {
10428 +       void *data;
10429 +};
10430 +
10431 +struct _vxhe_loc_lookup {
10432 +       unsigned arg;
10433 +};
10434 +
10435 +struct _vx_hist_entry {
10436 +       void *loc;
10437 +       unsigned short seq;
10438 +       unsigned short type;
10439 +       struct _vxhe_vxi vxi;
10440 +       union {
10441 +               struct _vxhe_set_clr sc;
10442 +               struct _vxhe_loc_lookup ll;
10443 +       };
10444 +};
10445 +
10446 +#ifdef CONFIG_VSERVER_HISTORY
10447 +
10448 +extern unsigned volatile int vxh_active;
10449 +
10450 +struct _vx_hist_entry *vxh_advance(void *loc);
10451 +
10452 +
10453 +static inline
10454 +void   __vxh_copy_vxi(struct _vx_hist_entry *entry, struct vx_info *vxi)
10455 +{
10456 +       entry->vxi.ptr = vxi;
10457 +       if (vxi) {
10458 +               entry->vxi.usecnt = atomic_read(&vxi->vx_usecnt);
10459 +               entry->vxi.tasks = atomic_read(&vxi->vx_tasks);
10460 +               entry->vxi.xid = vxi->vx_id;
10461 +       }
10462 +}
10463 +
10464 +
10465 +#define        __HERE__ current_text_addr()
10466 +
10467 +#define __VXH_BODY(__type, __data, __here)     \
10468 +       struct _vx_hist_entry *entry;           \
10469 +                                               \
10470 +       preempt_disable();                      \
10471 +       entry = vxh_advance(__here);            \
10472 +       __data;                                 \
10473 +       entry->type = __type;                   \
10474 +       preempt_enable();
10475 +
10476 +
10477 +       /* pass vxi only */
10478 +
10479 +#define __VXH_SMPL                             \
10480 +       __vxh_copy_vxi(entry, vxi)
10481 +
10482 +static inline
10483 +void   __vxh_smpl(struct vx_info *vxi, int __type, void *__here)
10484 +{
10485 +       __VXH_BODY(__type, __VXH_SMPL, __here)
10486 +}
10487 +
10488 +       /* pass vxi and data (void *) */
10489 +
10490 +#define __VXH_DATA                             \
10491 +       __vxh_copy_vxi(entry, vxi);             \
10492 +       entry->sc.data = data
10493 +
10494 +static inline
10495 +void   __vxh_data(struct vx_info *vxi, void *data,
10496 +                       int __type, void *__here)
10497 +{
10498 +       __VXH_BODY(__type, __VXH_DATA, __here)
10499 +}
10500 +
10501 +       /* pass vxi and arg (long) */
10502 +
10503 +#define __VXH_LONG                             \
10504 +       __vxh_copy_vxi(entry, vxi);             \
10505 +       entry->ll.arg = arg
10506 +
10507 +static inline
10508 +void   __vxh_long(struct vx_info *vxi, long arg,
10509 +                       int __type, void *__here)
10510 +{
10511 +       __VXH_BODY(__type, __VXH_LONG, __here)
10512 +}
10513 +
10514 +
10515 +static inline
10516 +void   __vxh_throw_oops(void *__here)
10517 +{
10518 +       __VXH_BODY(VXH_THROW_OOPS, {}, __here);
10519 +       /* prevent further acquisition */
10520 +       vxh_active = 0;
10521 +}
10522 +
10523 +
10524 +#define vxh_throw_oops()       __vxh_throw_oops(__HERE__);
10525 +
10526 +#define __vxh_get_vx_info(v, h)        __vxh_smpl(v, VXH_GET_VX_INFO, h);
10527 +#define __vxh_put_vx_info(v, h)        __vxh_smpl(v, VXH_PUT_VX_INFO, h);
10528 +
10529 +#define __vxh_init_vx_info(v, d, h) \
10530 +       __vxh_data(v, d, VXH_INIT_VX_INFO, h);
10531 +#define __vxh_set_vx_info(v, d, h) \
10532 +       __vxh_data(v, d, VXH_SET_VX_INFO, h);
10533 +#define __vxh_clr_vx_info(v, d, h) \
10534 +       __vxh_data(v, d, VXH_CLR_VX_INFO, h);
10535 +
10536 +#define __vxh_claim_vx_info(v, d, h) \
10537 +       __vxh_data(v, d, VXH_CLAIM_VX_INFO, h);
10538 +#define __vxh_release_vx_info(v, d, h) \
10539 +       __vxh_data(v, d, VXH_RELEASE_VX_INFO, h);
10540 +
10541 +#define vxh_alloc_vx_info(v) \
10542 +       __vxh_smpl(v, VXH_ALLOC_VX_INFO, __HERE__);
10543 +#define vxh_dealloc_vx_info(v) \
10544 +       __vxh_smpl(v, VXH_DEALLOC_VX_INFO, __HERE__);
10545 +
10546 +#define vxh_hash_vx_info(v) \
10547 +       __vxh_smpl(v, VXH_HASH_VX_INFO, __HERE__);
10548 +#define vxh_unhash_vx_info(v) \
10549 +       __vxh_smpl(v, VXH_UNHASH_VX_INFO, __HERE__);
10550 +
10551 +#define vxh_loc_vx_info(v, l) \
10552 +       __vxh_long(v, l, VXH_LOC_VX_INFO, __HERE__);
10553 +#define vxh_lookup_vx_info(v, l) \
10554 +       __vxh_long(v, l, VXH_LOOKUP_VX_INFO, __HERE__);
10555 +#define vxh_create_vx_info(v, l) \
10556 +       __vxh_long(v, l, VXH_CREATE_VX_INFO, __HERE__);
10557 +
10558 +extern void vxh_dump_history(void);
10559 +
10560 +
10561 +#else  /* CONFIG_VSERVER_HISTORY */
10562 +
10563 +#define        __HERE__        0
10564 +
10565 +#define vxh_throw_oops()               do { } while (0)
10566 +
10567 +#define __vxh_get_vx_info(v, h)                do { } while (0)
10568 +#define __vxh_put_vx_info(v, h)                do { } while (0)
10569 +
10570 +#define __vxh_init_vx_info(v, d, h)    do { } while (0)
10571 +#define __vxh_set_vx_info(v, d, h)     do { } while (0)
10572 +#define __vxh_clr_vx_info(v, d, h)     do { } while (0)
10573 +
10574 +#define __vxh_claim_vx_info(v, d, h)   do { } while (0)
10575 +#define __vxh_release_vx_info(v, d, h) do { } while (0)
10576 +
10577 +#define vxh_alloc_vx_info(v)           do { } while (0)
10578 +#define vxh_dealloc_vx_info(v)         do { } while (0)
10579 +
10580 +#define vxh_hash_vx_info(v)            do { } while (0)
10581 +#define vxh_unhash_vx_info(v)          do { } while (0)
10582 +
10583 +#define vxh_loc_vx_info(v, l)          do { } while (0)
10584 +#define vxh_lookup_vx_info(v, l)       do { } while (0)
10585 +#define vxh_create_vx_info(v, l)       do { } while (0)
10586 +
10587 +#define vxh_dump_history()             do { } while (0)
10588 +
10589 +
10590 +#endif /* CONFIG_VSERVER_HISTORY */
10591 +
10592 +#endif /* _VX_HISTORY_H */
10593 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/inode_cmd.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/inode_cmd.h
10594 --- linux-2.6.30.2/include/linux/vserver/inode_cmd.h    1970-01-01 01:00:00.000000000 +0100
10595 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/inode_cmd.h 2009-07-04 01:11:39.000000000 +0200
10596 @@ -0,0 +1,59 @@
10597 +#ifndef _VX_INODE_CMD_H
10598 +#define _VX_INODE_CMD_H
10599 +
10600 +
10601 +/*  inode vserver commands */
10602 +
10603 +#define VCMD_get_iattr         VC_CMD(INODE, 1, 1)
10604 +#define VCMD_set_iattr         VC_CMD(INODE, 2, 1)
10605 +
10606 +#define VCMD_fget_iattr                VC_CMD(INODE, 3, 0)
10607 +#define VCMD_fset_iattr                VC_CMD(INODE, 4, 0)
10608 +
10609 +struct vcmd_ctx_iattr_v1 {
10610 +       const char __user *name;
10611 +       uint32_t tag;
10612 +       uint32_t flags;
10613 +       uint32_t mask;
10614 +};
10615 +
10616 +struct vcmd_ctx_fiattr_v0 {
10617 +       uint32_t tag;
10618 +       uint32_t flags;
10619 +       uint32_t mask;
10620 +};
10621 +
10622 +
10623 +#ifdef __KERNEL__
10624 +
10625 +
10626 +#ifdef CONFIG_COMPAT
10627 +
10628 +#include <asm/compat.h>
10629 +
10630 +struct vcmd_ctx_iattr_v1_x32 {
10631 +       compat_uptr_t name_ptr;
10632 +       uint32_t tag;
10633 +       uint32_t flags;
10634 +       uint32_t mask;
10635 +};
10636 +
10637 +#endif /* CONFIG_COMPAT */
10638 +
10639 +#include <linux/compiler.h>
10640 +
10641 +extern int vc_get_iattr(void __user *);
10642 +extern int vc_set_iattr(void __user *);
10643 +
10644 +extern int vc_fget_iattr(uint32_t, void __user *);
10645 +extern int vc_fset_iattr(uint32_t, void __user *);
10646 +
10647 +#ifdef CONFIG_COMPAT
10648 +
10649 +extern int vc_get_iattr_x32(void __user *);
10650 +extern int vc_set_iattr_x32(void __user *);
10651 +
10652 +#endif /* CONFIG_COMPAT */
10653 +
10654 +#endif /* __KERNEL__ */
10655 +#endif /* _VX_INODE_CMD_H */
10656 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/inode.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/inode.h
10657 --- linux-2.6.30.2/include/linux/vserver/inode.h        1970-01-01 01:00:00.000000000 +0100
10658 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/inode.h     2009-07-04 01:11:39.000000000 +0200
10659 @@ -0,0 +1,38 @@
10660 +#ifndef _VX_INODE_H
10661 +#define _VX_INODE_H
10662 +
10663 +
10664 +#define IATTR_TAG      0x01000000
10665 +
10666 +#define IATTR_ADMIN    0x00000001
10667 +#define IATTR_WATCH    0x00000002
10668 +#define IATTR_HIDE     0x00000004
10669 +#define IATTR_FLAGS    0x00000007
10670 +
10671 +#define IATTR_BARRIER  0x00010000
10672 +#define IATTR_IXUNLINK 0x00020000
10673 +#define IATTR_IMMUTABLE 0x00040000
10674 +
10675 +#ifdef __KERNEL__
10676 +
10677 +
10678 +#ifdef CONFIG_VSERVER_PROC_SECURE
10679 +#define IATTR_PROC_DEFAULT     ( IATTR_ADMIN | IATTR_HIDE )
10680 +#define IATTR_PROC_SYMLINK     ( IATTR_ADMIN )
10681 +#else
10682 +#define IATTR_PROC_DEFAULT     ( IATTR_ADMIN )
10683 +#define IATTR_PROC_SYMLINK     ( IATTR_ADMIN )
10684 +#endif
10685 +
10686 +#define vx_hide_check(c, m)    (((m) & IATTR_HIDE) ? vx_check(c, m) : 1)
10687 +
10688 +#endif /* __KERNEL__ */
10689 +
10690 +/* inode ioctls */
10691 +
10692 +#define FIOC_GETXFLG   _IOR('x', 5, long)
10693 +#define FIOC_SETXFLG   _IOW('x', 6, long)
10694 +
10695 +#else  /* _VX_INODE_H */
10696 +#warning duplicate inclusion
10697 +#endif /* _VX_INODE_H */
10698 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/Kbuild linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/Kbuild
10699 --- linux-2.6.30.2/include/linux/vserver/Kbuild 1970-01-01 01:00:00.000000000 +0100
10700 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/Kbuild      2009-07-04 01:11:39.000000000 +0200
10701 @@ -0,0 +1,8 @@
10702 +
10703 +unifdef-y += context_cmd.h network_cmd.h space_cmd.h \
10704 +       cacct_cmd.h cvirt_cmd.h limit_cmd.h dlimit_cmd.h \
10705 +       inode_cmd.h tag_cmd.h sched_cmd.h signal_cmd.h \
10706 +       debug_cmd.h device_cmd.h
10707 +
10708 +unifdef-y += switch.h network.h monitor.h inode.h device.h
10709 +
10710 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/limit_cmd.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/limit_cmd.h
10711 --- linux-2.6.30.2/include/linux/vserver/limit_cmd.h    1970-01-01 01:00:00.000000000 +0100
10712 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/limit_cmd.h 2009-07-04 01:11:39.000000000 +0200
10713 @@ -0,0 +1,69 @@
10714 +#ifndef _VX_LIMIT_CMD_H
10715 +#define _VX_LIMIT_CMD_H
10716 +
10717 +
10718 +/*  rlimit vserver commands */
10719 +
10720 +#define VCMD_get_rlimit                VC_CMD(RLIMIT, 1, 0)
10721 +#define VCMD_set_rlimit                VC_CMD(RLIMIT, 2, 0)
10722 +#define VCMD_get_rlimit_mask   VC_CMD(RLIMIT, 3, 0)
10723 +#define VCMD_reset_minmax      VC_CMD(RLIMIT, 9, 0)
10724 +
10725 +struct vcmd_ctx_rlimit_v0 {
10726 +       uint32_t id;
10727 +       uint64_t minimum;
10728 +       uint64_t softlimit;
10729 +       uint64_t maximum;
10730 +};
10731 +
10732 +struct vcmd_ctx_rlimit_mask_v0 {
10733 +       uint32_t minimum;
10734 +       uint32_t softlimit;
10735 +       uint32_t maximum;
10736 +};
10737 +
10738 +#define VCMD_rlimit_stat       VC_CMD(VSTAT, 1, 0)
10739 +
10740 +struct vcmd_rlimit_stat_v0 {
10741 +       uint32_t id;
10742 +       uint32_t hits;
10743 +       uint64_t value;
10744 +       uint64_t minimum;
10745 +       uint64_t maximum;
10746 +};
10747 +
10748 +#define CRLIM_UNSET            (0ULL)
10749 +#define CRLIM_INFINITY         (~0ULL)
10750 +#define CRLIM_KEEP             (~1ULL)
10751 +
10752 +#ifdef __KERNEL__
10753 +
10754 +#ifdef CONFIG_IA32_EMULATION
10755 +
10756 +struct vcmd_ctx_rlimit_v0_x32 {
10757 +       uint32_t id;
10758 +       uint64_t minimum;
10759 +       uint64_t softlimit;
10760 +       uint64_t maximum;
10761 +} __attribute__ ((packed));
10762 +
10763 +#endif /* CONFIG_IA32_EMULATION */
10764 +
10765 +#include <linux/compiler.h>
10766 +
10767 +extern int vc_get_rlimit_mask(uint32_t, void __user *);
10768 +extern int vc_get_rlimit(struct vx_info *, void __user *);
10769 +extern int vc_set_rlimit(struct vx_info *, void __user *);
10770 +extern int vc_reset_minmax(struct vx_info *, void __user *);
10771 +
10772 +extern int vc_rlimit_stat(struct vx_info *, void __user *);
10773 +
10774 +#ifdef CONFIG_IA32_EMULATION
10775 +
10776 +extern int vc_get_rlimit_x32(struct vx_info *, void __user *);
10777 +extern int vc_set_rlimit_x32(struct vx_info *, void __user *);
10778 +
10779 +#endif /* CONFIG_IA32_EMULATION */
10780 +
10781 +#endif /* __KERNEL__ */
10782 +#endif /* _VX_LIMIT_CMD_H */
10783 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/limit_def.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/limit_def.h
10784 --- linux-2.6.30.2/include/linux/vserver/limit_def.h    1970-01-01 01:00:00.000000000 +0100
10785 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/limit_def.h 2009-07-04 01:11:39.000000000 +0200
10786 @@ -0,0 +1,47 @@
10787 +#ifndef _VX_LIMIT_DEF_H
10788 +#define _VX_LIMIT_DEF_H
10789 +
10790 +#include <asm/atomic.h>
10791 +#include <asm/resource.h>
10792 +
10793 +#include "limit.h"
10794 +
10795 +
10796 +struct _vx_res_limit {
10797 +       rlim_t soft;            /* Context soft limit */
10798 +       rlim_t hard;            /* Context hard limit */
10799 +
10800 +       rlim_atomic_t rcur;     /* Current value */
10801 +       rlim_t rmin;            /* Context minimum */
10802 +       rlim_t rmax;            /* Context maximum */
10803 +
10804 +       atomic_t lhit;          /* Limit hits */
10805 +};
10806 +
10807 +/* context sub struct */
10808 +
10809 +struct _vx_limit {
10810 +       struct _vx_res_limit res[NUM_LIMITS];
10811 +};
10812 +
10813 +#ifdef CONFIG_VSERVER_DEBUG
10814 +
10815 +static inline void __dump_vx_limit(struct _vx_limit *limit)
10816 +{
10817 +       int i;
10818 +
10819 +       printk("\t_vx_limit:");
10820 +       for (i = 0; i < NUM_LIMITS; i++) {
10821 +               printk("\t [%2d] = %8lu %8lu/%8lu, %8ld/%8ld, %8d\n",
10822 +                       i, (unsigned long)__rlim_get(limit, i),
10823 +                       (unsigned long)__rlim_rmin(limit, i),
10824 +                       (unsigned long)__rlim_rmax(limit, i),
10825 +                       (long)__rlim_soft(limit, i),
10826 +                       (long)__rlim_hard(limit, i),
10827 +                       atomic_read(&__rlim_lhit(limit, i)));
10828 +       }
10829 +}
10830 +
10831 +#endif
10832 +
10833 +#endif /* _VX_LIMIT_DEF_H */
10834 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/limit.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/limit.h
10835 --- linux-2.6.30.2/include/linux/vserver/limit.h        1970-01-01 01:00:00.000000000 +0100
10836 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/limit.h     2009-07-04 01:11:39.000000000 +0200
10837 @@ -0,0 +1,70 @@
10838 +#ifndef _VX_LIMIT_H
10839 +#define _VX_LIMIT_H
10840 +
10841 +#define VLIMIT_NSOCK   16
10842 +#define VLIMIT_OPENFD  17
10843 +#define VLIMIT_ANON    18
10844 +#define VLIMIT_SHMEM   19
10845 +#define VLIMIT_SEMARY  20
10846 +#define VLIMIT_NSEMS   21
10847 +#define VLIMIT_DENTRY  22
10848 +#define VLIMIT_MAPPED  23
10849 +
10850 +
10851 +#ifdef __KERNEL__
10852 +
10853 +#define        VLIM_NOCHECK    ((1L << VLIMIT_DENTRY) | (1L << RLIMIT_RSS))
10854 +
10855 +/*     keep in sync with CRLIM_INFINITY */
10856 +
10857 +#define        VLIM_INFINITY   (~0ULL)
10858 +
10859 +#include <asm/atomic.h>
10860 +#include <asm/resource.h>
10861 +
10862 +#ifndef RLIM_INFINITY
10863 +#warning RLIM_INFINITY is undefined
10864 +#endif
10865 +
10866 +#define __rlim_val(l, r, v)    ((l)->res[r].v)
10867 +
10868 +#define __rlim_soft(l, r)      __rlim_val(l, r, soft)
10869 +#define __rlim_hard(l, r)      __rlim_val(l, r, hard)
10870 +
10871 +#define __rlim_rcur(l, r)      __rlim_val(l, r, rcur)
10872 +#define __rlim_rmin(l, r)      __rlim_val(l, r, rmin)
10873 +#define __rlim_rmax(l, r)      __rlim_val(l, r, rmax)
10874 +
10875 +#define __rlim_lhit(l, r)      __rlim_val(l, r, lhit)
10876 +#define __rlim_hit(l, r)       atomic_inc(&__rlim_lhit(l, r))
10877 +
10878 +typedef atomic_long_t rlim_atomic_t;
10879 +typedef unsigned long rlim_t;
10880 +
10881 +#define __rlim_get(l, r)       atomic_long_read(&__rlim_rcur(l, r))
10882 +#define __rlim_set(l, r, v)    atomic_long_set(&__rlim_rcur(l, r), v)
10883 +#define __rlim_inc(l, r)       atomic_long_inc(&__rlim_rcur(l, r))
10884 +#define __rlim_dec(l, r)       atomic_long_dec(&__rlim_rcur(l, r))
10885 +#define __rlim_add(l, r, v)    atomic_long_add(v, &__rlim_rcur(l, r))
10886 +#define __rlim_sub(l, r, v)    atomic_long_sub(v, &__rlim_rcur(l, r))
10887 +
10888 +
10889 +#if    (RLIM_INFINITY == VLIM_INFINITY)
10890 +#define        VX_VLIM(r) ((long long)(long)(r))
10891 +#define        VX_RLIM(v) ((rlim_t)(v))
10892 +#else
10893 +#define        VX_VLIM(r) (((r) == RLIM_INFINITY) \
10894 +               ? VLIM_INFINITY : (long long)(r))
10895 +#define        VX_RLIM(v) (((v) == VLIM_INFINITY) \
10896 +               ? RLIM_INFINITY : (rlim_t)(v))
10897 +#endif
10898 +
10899 +struct sysinfo;
10900 +
10901 +void vx_vsi_meminfo(struct sysinfo *);
10902 +void vx_vsi_swapinfo(struct sysinfo *);
10903 +
10904 +#define NUM_LIMITS     24
10905 +
10906 +#endif /* __KERNEL__ */
10907 +#endif /* _VX_LIMIT_H */
10908 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/limit_int.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/limit_int.h
10909 --- linux-2.6.30.2/include/linux/vserver/limit_int.h    1970-01-01 01:00:00.000000000 +0100
10910 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/limit_int.h 2009-07-04 01:11:39.000000000 +0200
10911 @@ -0,0 +1,198 @@
10912 +#ifndef _VX_LIMIT_INT_H
10913 +#define _VX_LIMIT_INT_H
10914 +
10915 +#include "context.h"
10916 +
10917 +#ifdef __KERNEL__
10918 +
10919 +#define VXD_RCRES_COND(r)      VXD_CBIT(cres, r)
10920 +#define VXD_RLIMIT_COND(r)     VXD_CBIT(limit, r)
10921 +
10922 +extern const char *vlimit_name[NUM_LIMITS];
10923 +
10924 +static inline void __vx_acc_cres(struct vx_info *vxi,
10925 +       int res, int dir, void *_data, char *_file, int _line)
10926 +{
10927 +       if (VXD_RCRES_COND(res))
10928 +               vxlprintk(1, "vx_acc_cres[%5d,%s,%2d]: %5ld%s (%p)",
10929 +                       (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
10930 +                       (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
10931 +                       (dir > 0) ? "++" : "--", _data, _file, _line);
10932 +       if (!vxi)
10933 +               return;
10934 +
10935 +       if (dir > 0)
10936 +               __rlim_inc(&vxi->limit, res);
10937 +       else
10938 +               __rlim_dec(&vxi->limit, res);
10939 +}
10940 +
10941 +static inline void __vx_add_cres(struct vx_info *vxi,
10942 +       int res, int amount, void *_data, char *_file, int _line)
10943 +{
10944 +       if (VXD_RCRES_COND(res))
10945 +               vxlprintk(1, "vx_add_cres[%5d,%s,%2d]: %5ld += %5d (%p)",
10946 +                       (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
10947 +                       (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
10948 +                       amount, _data, _file, _line);
10949 +       if (amount == 0)
10950 +               return;
10951 +       if (!vxi)
10952 +               return;
10953 +       __rlim_add(&vxi->limit, res, amount);
10954 +}
10955 +
10956 +static inline
10957 +int __vx_cres_adjust_max(struct _vx_limit *limit, int res, rlim_t value)
10958 +{
10959 +       int cond = (value > __rlim_rmax(limit, res));
10960 +
10961 +       if (cond)
10962 +               __rlim_rmax(limit, res) = value;
10963 +       return cond;
10964 +}
10965 +
10966 +static inline
10967 +int __vx_cres_adjust_min(struct _vx_limit *limit, int res, rlim_t value)
10968 +{
10969 +       int cond = (value < __rlim_rmin(limit, res));
10970 +
10971 +       if (cond)
10972 +               __rlim_rmin(limit, res) = value;
10973 +       return cond;
10974 +}
10975 +
10976 +static inline
10977 +void __vx_cres_fixup(struct _vx_limit *limit, int res, rlim_t value)
10978 +{
10979 +       if (!__vx_cres_adjust_max(limit, res, value))
10980 +               __vx_cres_adjust_min(limit, res, value);
10981 +}
10982 +
10983 +
10984 +/*     return values:
10985 +        +1 ... no limit hit
10986 +        -1 ... over soft limit
10987 +         0 ... over hard limit         */
10988 +
10989 +static inline int __vx_cres_avail(struct vx_info *vxi,
10990 +       int res, int num, char *_file, int _line)
10991 +{
10992 +       struct _vx_limit *limit;
10993 +       rlim_t value;
10994 +
10995 +       if (VXD_RLIMIT_COND(res))
10996 +               vxlprintk(1, "vx_cres_avail[%5d,%s,%2d]: %5ld/%5ld > %5ld + %5d",
10997 +                       (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
10998 +                       (vxi ? (long)__rlim_soft(&vxi->limit, res) : -1),
10999 +                       (vxi ? (long)__rlim_hard(&vxi->limit, res) : -1),
11000 +                       (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
11001 +                       num, _file, _line);
11002 +       if (!vxi)
11003 +               return 1;
11004 +
11005 +       limit = &vxi->limit;
11006 +       value = __rlim_get(limit, res);
11007 +
11008 +       if (!__vx_cres_adjust_max(limit, res, value))
11009 +               __vx_cres_adjust_min(limit, res, value);
11010 +
11011 +       if (num == 0)
11012 +               return 1;
11013 +
11014 +       if (__rlim_soft(limit, res) == RLIM_INFINITY)
11015 +               return -1;
11016 +       if (value + num <= __rlim_soft(limit, res))
11017 +               return -1;
11018 +
11019 +       if (__rlim_hard(limit, res) == RLIM_INFINITY)
11020 +               return 1;
11021 +       if (value + num <= __rlim_hard(limit, res))
11022 +               return 1;
11023 +
11024 +       __rlim_hit(limit, res);
11025 +       return 0;
11026 +}
11027 +
11028 +
11029 +static const int VLA_RSS[] = { RLIMIT_RSS, VLIMIT_ANON, VLIMIT_MAPPED, 0 };
11030 +
11031 +static inline
11032 +rlim_t __vx_cres_array_sum(struct _vx_limit *limit, const int *array)
11033 +{
11034 +       rlim_t value, sum = 0;
11035 +       int res;
11036 +
11037 +       while ((res = *array++)) {
11038 +               value = __rlim_get(limit, res);
11039 +               __vx_cres_fixup(limit, res, value);
11040 +               sum += value;
11041 +       }
11042 +       return sum;
11043 +}
11044 +
11045 +static inline
11046 +rlim_t __vx_cres_array_fixup(struct _vx_limit *limit, const int *array)
11047 +{
11048 +       rlim_t value = __vx_cres_array_sum(limit, array + 1);
11049 +       int res = *array;
11050 +
11051 +       if (value == __rlim_get(limit, res))
11052 +               return value;
11053 +
11054 +       __rlim_set(limit, res, value);
11055 +       /* now adjust min/max */
11056 +       if (!__vx_cres_adjust_max(limit, res, value))
11057 +               __vx_cres_adjust_min(limit, res, value);
11058 +
11059 +       return value;
11060 +}
11061 +
11062 +static inline int __vx_cres_array_avail(struct vx_info *vxi,
11063 +       const int *array, int num, char *_file, int _line)
11064 +{
11065 +       struct _vx_limit *limit;
11066 +       rlim_t value = 0;
11067 +       int res;
11068 +
11069 +       if (num == 0)
11070 +               return 1;
11071 +       if (!vxi)
11072 +               return 1;
11073 +
11074 +       limit = &vxi->limit;
11075 +       res = *array;
11076 +       value = __vx_cres_array_sum(limit, array + 1);
11077 +
11078 +       __rlim_set(limit, res, value);
11079 +       __vx_cres_fixup(limit, res, value);
11080 +
11081 +       return __vx_cres_avail(vxi, res, num, _file, _line);
11082 +}
11083 +
11084 +
11085 +static inline void vx_limit_fixup(struct _vx_limit *limit, int id)
11086 +{
11087 +       rlim_t value;
11088 +       int res;
11089 +
11090 +       /* complex resources first */
11091 +       if ((id < 0) || (id == RLIMIT_RSS))
11092 +               __vx_cres_array_fixup(limit, VLA_RSS);
11093 +
11094 +       for (res = 0; res < NUM_LIMITS; res++) {
11095 +               if ((id > 0) && (res != id))
11096 +                       continue;
11097 +
11098 +               value = __rlim_get(limit, res);
11099 +               __vx_cres_fixup(limit, res, value);
11100 +
11101 +               /* not supposed to happen, maybe warn? */
11102 +               if (__rlim_rmax(limit, res) > __rlim_hard(limit, res))
11103 +                       __rlim_rmax(limit, res) = __rlim_hard(limit, res);
11104 +       }
11105 +}
11106 +
11107 +
11108 +#endif /* __KERNEL__ */
11109 +#endif /* _VX_LIMIT_INT_H */
11110 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/monitor.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/monitor.h
11111 --- linux-2.6.30.2/include/linux/vserver/monitor.h      1970-01-01 01:00:00.000000000 +0100
11112 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/monitor.h   2009-07-04 01:11:39.000000000 +0200
11113 @@ -0,0 +1,96 @@
11114 +#ifndef _VX_MONITOR_H
11115 +#define _VX_MONITOR_H
11116 +
11117 +#include <linux/types.h>
11118 +
11119 +enum {
11120 +       VXM_UNUSED = 0,
11121 +
11122 +       VXM_SYNC = 0x10,
11123 +
11124 +       VXM_UPDATE = 0x20,
11125 +       VXM_UPDATE_1,
11126 +       VXM_UPDATE_2,
11127 +
11128 +       VXM_RQINFO_1 = 0x24,
11129 +       VXM_RQINFO_2,
11130 +
11131 +       VXM_ACTIVATE = 0x40,
11132 +       VXM_DEACTIVATE,
11133 +       VXM_IDLE,
11134 +
11135 +       VXM_HOLD = 0x44,
11136 +       VXM_UNHOLD,
11137 +
11138 +       VXM_MIGRATE = 0x48,
11139 +       VXM_RESCHED,
11140 +
11141 +       /* all other bits are flags */
11142 +       VXM_SCHED = 0x80,
11143 +};
11144 +
11145 +struct _vxm_update_1 {
11146 +       uint32_t tokens_max;
11147 +       uint32_t fill_rate;
11148 +       uint32_t interval;
11149 +};
11150 +
11151 +struct _vxm_update_2 {
11152 +       uint32_t tokens_min;
11153 +       uint32_t fill_rate;
11154 +       uint32_t interval;
11155 +};
11156 +
11157 +struct _vxm_rqinfo_1 {
11158 +       uint16_t running;
11159 +       uint16_t onhold;
11160 +       uint16_t iowait;
11161 +       uint16_t uintr;
11162 +       uint32_t idle_tokens;
11163 +};
11164 +
11165 +struct _vxm_rqinfo_2 {
11166 +       uint32_t norm_time;
11167 +       uint32_t idle_time;
11168 +       uint32_t idle_skip;
11169 +};
11170 +
11171 +struct _vxm_sched {
11172 +       uint32_t tokens;
11173 +       uint32_t norm_time;
11174 +       uint32_t idle_time;
11175 +};
11176 +
11177 +struct _vxm_task {
11178 +       uint16_t pid;
11179 +       uint16_t state;
11180 +};
11181 +
11182 +struct _vxm_event {
11183 +       uint32_t jif;
11184 +       union {
11185 +               uint32_t seq;
11186 +               uint32_t sec;
11187 +       };
11188 +       union {
11189 +               uint32_t tokens;
11190 +               uint32_t nsec;
11191 +               struct _vxm_task tsk;
11192 +       };
11193 +};
11194 +
11195 +struct _vx_mon_entry {
11196 +       uint16_t type;
11197 +       uint16_t xid;
11198 +       union {
11199 +               struct _vxm_event ev;
11200 +               struct _vxm_sched sd;
11201 +               struct _vxm_update_1 u1;
11202 +               struct _vxm_update_2 u2;
11203 +               struct _vxm_rqinfo_1 q1;
11204 +               struct _vxm_rqinfo_2 q2;
11205 +       };
11206 +};
11207 +
11208 +
11209 +#endif /* _VX_MONITOR_H */
11210 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/network_cmd.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/network_cmd.h
11211 --- linux-2.6.30.2/include/linux/vserver/network_cmd.h  1970-01-01 01:00:00.000000000 +0100
11212 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/network_cmd.h       2009-07-04 01:11:39.000000000 +0200
11213 @@ -0,0 +1,150 @@
11214 +#ifndef _VX_NETWORK_CMD_H
11215 +#define _VX_NETWORK_CMD_H
11216 +
11217 +
11218 +/* vinfo commands */
11219 +
11220 +#define VCMD_task_nid          VC_CMD(VINFO, 2, 0)
11221 +
11222 +#ifdef __KERNEL__
11223 +extern int vc_task_nid(uint32_t);
11224 +
11225 +#endif /* __KERNEL__ */
11226 +
11227 +#define VCMD_nx_info           VC_CMD(VINFO, 6, 0)
11228 +
11229 +struct vcmd_nx_info_v0 {
11230 +       uint32_t nid;
11231 +       /* more to come */
11232 +};
11233 +
11234 +#ifdef __KERNEL__
11235 +extern int vc_nx_info(struct nx_info *, void __user *);
11236 +
11237 +#endif /* __KERNEL__ */
11238 +
11239 +#include <linux/in.h>
11240 +#include <linux/in6.h>
11241 +
11242 +#define VCMD_net_create_v0     VC_CMD(VNET, 1, 0)
11243 +#define VCMD_net_create                VC_CMD(VNET, 1, 1)
11244 +
11245 +struct  vcmd_net_create {
11246 +       uint64_t flagword;
11247 +};
11248 +
11249 +#define VCMD_net_migrate       VC_CMD(NETMIG, 1, 0)
11250 +
11251 +#define VCMD_net_add           VC_CMD(NETALT, 1, 0)
11252 +#define VCMD_net_remove                VC_CMD(NETALT, 2, 0)
11253 +
11254 +struct vcmd_net_addr_v0 {
11255 +       uint16_t type;
11256 +       uint16_t count;
11257 +       struct in_addr ip[4];
11258 +       struct in_addr mask[4];
11259 +};
11260 +
11261 +#define VCMD_net_add_ipv4      VC_CMD(NETALT, 1, 1)
11262 +#define VCMD_net_remove_ipv4   VC_CMD(NETALT, 2, 1)
11263 +
11264 +struct vcmd_net_addr_ipv4_v1 {
11265 +       uint16_t type;
11266 +       uint16_t flags;
11267 +       struct in_addr ip;
11268 +       struct in_addr mask;
11269 +};
11270 +
11271 +#define VCMD_net_add_ipv6      VC_CMD(NETALT, 3, 1)
11272 +#define VCMD_net_remove_ipv6   VC_CMD(NETALT, 4, 1)
11273 +
11274 +struct vcmd_net_addr_ipv6_v1 {
11275 +       uint16_t type;
11276 +       uint16_t flags;
11277 +       uint32_t prefix;
11278 +       struct in6_addr ip;
11279 +       struct in6_addr mask;
11280 +};
11281 +
11282 +#define VCMD_add_match_ipv4    VC_CMD(NETALT, 5, 0)
11283 +#define VCMD_get_match_ipv4    VC_CMD(NETALT, 6, 0)
11284 +
11285 +struct vcmd_match_ipv4_v0 {
11286 +       uint16_t type;
11287 +       uint16_t flags;
11288 +       uint16_t parent;
11289 +       uint16_t prefix;
11290 +       struct in_addr ip;
11291 +       struct in_addr ip2;
11292 +       struct in_addr mask;
11293 +};
11294 +
11295 +#define VCMD_add_match_ipv6    VC_CMD(NETALT, 7, 0)
11296 +#define VCMD_get_match_ipv6    VC_CMD(NETALT, 8, 0)
11297 +
11298 +struct vcmd_match_ipv6_v0 {
11299 +       uint16_t type;
11300 +       uint16_t flags;
11301 +       uint16_t parent;
11302 +       uint16_t prefix;
11303 +       struct in6_addr ip;
11304 +       struct in6_addr ip2;
11305 +       struct in6_addr mask;
11306 +};
11307 +
11308 +
11309 +#ifdef __KERNEL__
11310 +extern int vc_net_create(uint32_t, void __user *);
11311 +extern int vc_net_migrate(struct nx_info *, void __user *);
11312 +
11313 +extern int vc_net_add(struct nx_info *, void __user *);
11314 +extern int vc_net_remove(struct nx_info *, void __user *);
11315 +
11316 +extern int vc_net_add_ipv4(struct nx_info *, void __user *);
11317 +extern int vc_net_remove_ipv4(struct nx_info *, void __user *);
11318 +
11319 +extern int vc_net_add_ipv6(struct nx_info *, void __user *);
11320 +extern int vc_net_remove_ipv6(struct nx_info *, void __user *);
11321 +
11322 +extern int vc_add_match_ipv4(struct nx_info *, void __user *);
11323 +extern int vc_get_match_ipv4(struct nx_info *, void __user *);
11324 +
11325 +extern int vc_add_match_ipv6(struct nx_info *, void __user *);
11326 +extern int vc_get_match_ipv6(struct nx_info *, void __user *);
11327 +
11328 +#endif /* __KERNEL__ */
11329 +
11330 +
11331 +/* flag commands */
11332 +
11333 +#define VCMD_get_nflags                VC_CMD(FLAGS, 5, 0)
11334 +#define VCMD_set_nflags                VC_CMD(FLAGS, 6, 0)
11335 +
11336 +struct vcmd_net_flags_v0 {
11337 +       uint64_t flagword;
11338 +       uint64_t mask;
11339 +};
11340 +
11341 +#ifdef __KERNEL__
11342 +extern int vc_get_nflags(struct nx_info *, void __user *);
11343 +extern int vc_set_nflags(struct nx_info *, void __user *);
11344 +
11345 +#endif /* __KERNEL__ */
11346 +
11347 +
11348 +/* network caps commands */
11349 +
11350 +#define VCMD_get_ncaps         VC_CMD(FLAGS, 7, 0)
11351 +#define VCMD_set_ncaps         VC_CMD(FLAGS, 8, 0)
11352 +
11353 +struct vcmd_net_caps_v0 {
11354 +       uint64_t ncaps;
11355 +       uint64_t cmask;
11356 +};
11357 +
11358 +#ifdef __KERNEL__
11359 +extern int vc_get_ncaps(struct nx_info *, void __user *);
11360 +extern int vc_set_ncaps(struct nx_info *, void __user *);
11361 +
11362 +#endif /* __KERNEL__ */
11363 +#endif /* _VX_CONTEXT_CMD_H */
11364 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/network.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/network.h
11365 --- linux-2.6.30.2/include/linux/vserver/network.h      1970-01-01 01:00:00.000000000 +0100
11366 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/network.h   2009-07-04 01:11:39.000000000 +0200
11367 @@ -0,0 +1,146 @@
11368 +#ifndef _VX_NETWORK_H
11369 +#define _VX_NETWORK_H
11370 +
11371 +#include <linux/types.h>
11372 +
11373 +
11374 +#define MAX_N_CONTEXT  65535   /* Arbitrary limit */
11375 +
11376 +
11377 +/* network flags */
11378 +
11379 +#define NXF_INFO_PRIVATE       0x00000008
11380 +
11381 +#define NXF_SINGLE_IP          0x00000100
11382 +#define NXF_LBACK_REMAP                0x00000200
11383 +#define NXF_LBACK_ALLOW                0x00000400
11384 +
11385 +#define NXF_HIDE_NETIF         0x02000000
11386 +#define NXF_HIDE_LBACK         0x04000000
11387 +
11388 +#define NXF_STATE_SETUP                (1ULL << 32)
11389 +#define NXF_STATE_ADMIN                (1ULL << 34)
11390 +
11391 +#define NXF_SC_HELPER          (1ULL << 36)
11392 +#define NXF_PERSISTENT         (1ULL << 38)
11393 +
11394 +#define NXF_ONE_TIME           (0x0005ULL << 32)
11395 +
11396 +
11397 +#define        NXF_INIT_SET            (__nxf_init_set())
11398 +
11399 +static inline uint64_t __nxf_init_set(void) {
11400 +       return    NXF_STATE_ADMIN
11401 +#ifdef CONFIG_VSERVER_AUTO_LBACK
11402 +               | NXF_LBACK_REMAP
11403 +               | NXF_HIDE_LBACK
11404 +#endif
11405 +#ifdef CONFIG_VSERVER_AUTO_SINGLE
11406 +               | NXF_SINGLE_IP
11407 +#endif
11408 +               | NXF_HIDE_NETIF;
11409 +}
11410 +
11411 +
11412 +/* network caps */
11413 +
11414 +#define NXC_TUN_CREATE         0x00000001
11415 +
11416 +#define NXC_RAW_ICMP           0x00000100
11417 +
11418 +
11419 +/* address types */
11420 +
11421 +#define NXA_TYPE_IPV4          0x0001
11422 +#define NXA_TYPE_IPV6          0x0002
11423 +
11424 +#define NXA_TYPE_NONE          0x0000
11425 +#define NXA_TYPE_ANY           0x00FF
11426 +
11427 +#define NXA_TYPE_ADDR          0x0010
11428 +#define NXA_TYPE_MASK          0x0020
11429 +#define NXA_TYPE_RANGE         0x0040
11430 +
11431 +#define NXA_MASK_ALL           (NXA_TYPE_ADDR | NXA_TYPE_MASK | NXA_TYPE_RANGE)
11432 +
11433 +#define NXA_MOD_BCAST          0x0100
11434 +#define NXA_MOD_LBACK          0x0200
11435 +
11436 +#define NXA_LOOPBACK           0x1000
11437 +
11438 +#define NXA_MASK_BIND          (NXA_MASK_ALL | NXA_MOD_BCAST | NXA_MOD_LBACK)
11439 +#define NXA_MASK_SHOW          (NXA_MASK_ALL | NXA_LOOPBACK)
11440 +
11441 +#ifdef __KERNEL__
11442 +
11443 +#include <linux/list.h>
11444 +#include <linux/spinlock.h>
11445 +#include <linux/rcupdate.h>
11446 +#include <linux/in.h>
11447 +#include <linux/in6.h>
11448 +#include <asm/atomic.h>
11449 +
11450 +struct nx_addr_v4 {
11451 +       struct nx_addr_v4 *next;
11452 +       struct in_addr ip[2];
11453 +       struct in_addr mask;
11454 +       uint16_t type;
11455 +       uint16_t flags;
11456 +};
11457 +
11458 +struct nx_addr_v6 {
11459 +       struct nx_addr_v6 *next;
11460 +       struct in6_addr ip;
11461 +       struct in6_addr mask;
11462 +       uint32_t prefix;
11463 +       uint16_t type;
11464 +       uint16_t flags;
11465 +};
11466 +
11467 +struct nx_info {
11468 +       struct hlist_node nx_hlist;     /* linked list of nxinfos */
11469 +       nid_t nx_id;                    /* vnet id */
11470 +       atomic_t nx_usecnt;             /* usage count */
11471 +       atomic_t nx_tasks;              /* tasks count */
11472 +       int nx_state;                   /* context state */
11473 +
11474 +       uint64_t nx_flags;              /* network flag word */
11475 +       uint64_t nx_ncaps;              /* network capabilities */
11476 +
11477 +       struct in_addr v4_lback;        /* Loopback address */
11478 +       struct in_addr v4_bcast;        /* Broadcast address */
11479 +       struct nx_addr_v4 v4;           /* First/Single ipv4 address */
11480 +#ifdef CONFIG_IPV6
11481 +       struct nx_addr_v6 v6;           /* First/Single ipv6 address */
11482 +#endif
11483 +       char nx_name[65];               /* network context name */
11484 +};
11485 +
11486 +
11487 +/* status flags */
11488 +
11489 +#define NXS_HASHED      0x0001
11490 +#define NXS_SHUTDOWN    0x0100
11491 +#define NXS_RELEASED    0x8000
11492 +
11493 +extern struct nx_info *lookup_nx_info(int);
11494 +
11495 +extern int get_nid_list(int, unsigned int *, int);
11496 +extern int nid_is_hashed(nid_t);
11497 +
11498 +extern int nx_migrate_task(struct task_struct *, struct nx_info *);
11499 +
11500 +extern long vs_net_change(struct nx_info *, unsigned int);
11501 +
11502 +struct sock;
11503 +
11504 +
11505 +#define NX_IPV4(n)     ((n)->v4.type != NXA_TYPE_NONE)
11506 +#ifdef  CONFIG_IPV6
11507 +#define NX_IPV6(n)     ((n)->v6.type != NXA_TYPE_NONE)
11508 +#else
11509 +#define NX_IPV6(n)     (0)
11510 +#endif
11511 +
11512 +#endif /* __KERNEL__ */
11513 +#endif /* _VX_NETWORK_H */
11514 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/percpu.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/percpu.h
11515 --- linux-2.6.30.2/include/linux/vserver/percpu.h       1970-01-01 01:00:00.000000000 +0100
11516 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/percpu.h    2009-07-04 01:11:39.000000000 +0200
11517 @@ -0,0 +1,14 @@
11518 +#ifndef _VX_PERCPU_H
11519 +#define _VX_PERCPU_H
11520 +
11521 +#include "cvirt_def.h"
11522 +#include "sched_def.h"
11523 +
11524 +struct _vx_percpu {
11525 +       struct _vx_cvirt_pc cvirt;
11526 +       struct _vx_sched_pc sched;
11527 +};
11528 +
11529 +#define        PERCPU_PERCTX   (sizeof(struct _vx_percpu))
11530 +
11531 +#endif /* _VX_PERCPU_H */
11532 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/pid.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/pid.h
11533 --- linux-2.6.30.2/include/linux/vserver/pid.h  1970-01-01 01:00:00.000000000 +0100
11534 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/pid.h       2009-07-04 01:11:39.000000000 +0200
11535 @@ -0,0 +1,51 @@
11536 +#ifndef _VSERVER_PID_H
11537 +#define _VSERVER_PID_H
11538 +
11539 +/* pid faking stuff */
11540 +
11541 +#define vx_info_map_pid(v, p) \
11542 +       __vx_info_map_pid((v), (p), __func__, __FILE__, __LINE__)
11543 +#define vx_info_map_tgid(v,p)  vx_info_map_pid(v,p)
11544 +#define vx_map_pid(p) vx_info_map_pid(current->vx_info, p)
11545 +#define vx_map_tgid(p) vx_map_pid(p)
11546 +
11547 +static inline int __vx_info_map_pid(struct vx_info *vxi, int pid,
11548 +       const char *func, const char *file, int line)
11549 +{
11550 +       if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
11551 +               vxfprintk(VXD_CBIT(cvirt, 2),
11552 +                       "vx_map_tgid: %p/%llx: %d -> %d",
11553 +                       vxi, (long long)vxi->vx_flags, pid,
11554 +                       (pid && pid == vxi->vx_initpid) ? 1 : pid,
11555 +                       func, file, line);
11556 +               if (pid == 0)
11557 +                       return 0;
11558 +               if (pid == vxi->vx_initpid)
11559 +                       return 1;
11560 +       }
11561 +       return pid;
11562 +}
11563 +
11564 +#define vx_info_rmap_pid(v, p) \
11565 +       __vx_info_rmap_pid((v), (p), __func__, __FILE__, __LINE__)
11566 +#define vx_rmap_pid(p) vx_info_rmap_pid(current->vx_info, p)
11567 +#define vx_rmap_tgid(p) vx_rmap_pid(p)
11568 +
11569 +static inline int __vx_info_rmap_pid(struct vx_info *vxi, int pid,
11570 +       const char *func, const char *file, int line)
11571 +{
11572 +       if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
11573 +               vxfprintk(VXD_CBIT(cvirt, 2),
11574 +                       "vx_rmap_tgid: %p/%llx: %d -> %d",
11575 +                       vxi, (long long)vxi->vx_flags, pid,
11576 +                       (pid == 1) ? vxi->vx_initpid : pid,
11577 +                       func, file, line);
11578 +               if ((pid == 1) && vxi->vx_initpid)
11579 +                       return vxi->vx_initpid;
11580 +               if (pid == vxi->vx_initpid)
11581 +                       return ~0U;
11582 +       }
11583 +       return pid;
11584 +}
11585 +
11586 +#endif
11587 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/sched_cmd.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/sched_cmd.h
11588 --- linux-2.6.30.2/include/linux/vserver/sched_cmd.h    1970-01-01 01:00:00.000000000 +0100
11589 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/sched_cmd.h 2009-07-04 01:11:39.000000000 +0200
11590 @@ -0,0 +1,108 @@
11591 +#ifndef _VX_SCHED_CMD_H
11592 +#define _VX_SCHED_CMD_H
11593 +
11594 +
11595 +/*  sched vserver commands */
11596 +
11597 +#define VCMD_set_sched_v2      VC_CMD(SCHED, 1, 2)
11598 +#define VCMD_set_sched_v3      VC_CMD(SCHED, 1, 3)
11599 +#define VCMD_set_sched_v4      VC_CMD(SCHED, 1, 4)
11600 +
11601 +struct vcmd_set_sched_v2 {
11602 +       int32_t fill_rate;
11603 +       int32_t interval;
11604 +       int32_t tokens;
11605 +       int32_t tokens_min;
11606 +       int32_t tokens_max;
11607 +       uint64_t cpu_mask;
11608 +};
11609 +
11610 +struct vcmd_set_sched_v3 {
11611 +       uint32_t set_mask;
11612 +       int32_t fill_rate;
11613 +       int32_t interval;
11614 +       int32_t tokens;
11615 +       int32_t tokens_min;
11616 +       int32_t tokens_max;
11617 +       int32_t priority_bias;
11618 +};
11619 +
11620 +struct vcmd_set_sched_v4 {
11621 +       uint32_t set_mask;
11622 +       int32_t fill_rate;
11623 +       int32_t interval;
11624 +       int32_t tokens;
11625 +       int32_t tokens_min;
11626 +       int32_t tokens_max;
11627 +       int32_t prio_bias;
11628 +       int32_t cpu_id;
11629 +       int32_t bucket_id;
11630 +};
11631 +
11632 +#define VCMD_set_sched         VC_CMD(SCHED, 1, 5)
11633 +#define VCMD_get_sched         VC_CMD(SCHED, 2, 5)
11634 +
11635 +struct vcmd_sched_v5 {
11636 +       uint32_t mask;
11637 +       int32_t cpu_id;
11638 +       int32_t bucket_id;
11639 +       int32_t fill_rate[2];
11640 +       int32_t interval[2];
11641 +       int32_t tokens;
11642 +       int32_t tokens_min;
11643 +       int32_t tokens_max;
11644 +       int32_t prio_bias;
11645 +};
11646 +
11647 +#define VXSM_FILL_RATE         0x0001
11648 +#define VXSM_INTERVAL          0x0002
11649 +#define VXSM_FILL_RATE2                0x0004
11650 +#define VXSM_INTERVAL2         0x0008
11651 +#define VXSM_TOKENS            0x0010
11652 +#define VXSM_TOKENS_MIN                0x0020
11653 +#define VXSM_TOKENS_MAX                0x0040
11654 +#define VXSM_PRIO_BIAS         0x0100
11655 +
11656 +#define VXSM_IDLE_TIME         0x0200
11657 +#define VXSM_FORCE             0x0400
11658 +
11659 +#define        VXSM_V3_MASK            0x0173
11660 +#define        VXSM_SET_MASK           0x01FF
11661 +
11662 +#define VXSM_CPU_ID            0x1000
11663 +#define VXSM_BUCKET_ID         0x2000
11664 +
11665 +#define VXSM_MSEC              0x4000
11666 +
11667 +#define SCHED_KEEP             (-2)    /* only for v2 */
11668 +
11669 +#ifdef __KERNEL__
11670 +
11671 +#include <linux/compiler.h>
11672 +
11673 +extern int vc_set_sched_v2(struct vx_info *, void __user *);
11674 +extern int vc_set_sched_v3(struct vx_info *, void __user *);
11675 +extern int vc_set_sched_v4(struct vx_info *, void __user *);
11676 +extern int vc_set_sched(struct vx_info *, void __user *);
11677 +extern int vc_get_sched(struct vx_info *, void __user *);
11678 +
11679 +#endif /* __KERNEL__ */
11680 +
11681 +#define VCMD_sched_info                VC_CMD(SCHED, 3, 0)
11682 +
11683 +struct vcmd_sched_info {
11684 +       int32_t cpu_id;
11685 +       int32_t bucket_id;
11686 +       uint64_t user_msec;
11687 +       uint64_t sys_msec;
11688 +       uint64_t hold_msec;
11689 +       uint32_t token_usec;
11690 +       int32_t vavavoom;
11691 +};
11692 +
11693 +#ifdef __KERNEL__
11694 +
11695 +extern int vc_sched_info(struct vx_info *, void __user *);
11696 +
11697 +#endif /* __KERNEL__ */
11698 +#endif /* _VX_SCHED_CMD_H */
11699 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/sched_def.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/sched_def.h
11700 --- linux-2.6.30.2/include/linux/vserver/sched_def.h    1970-01-01 01:00:00.000000000 +0100
11701 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/sched_def.h 2009-07-04 01:11:39.000000000 +0200
11702 @@ -0,0 +1,68 @@
11703 +#ifndef _VX_SCHED_DEF_H
11704 +#define _VX_SCHED_DEF_H
11705 +
11706 +#include <linux/spinlock.h>
11707 +#include <linux/jiffies.h>
11708 +#include <linux/cpumask.h>
11709 +#include <asm/atomic.h>
11710 +#include <asm/param.h>
11711 +
11712 +
11713 +/* context sub struct */
11714 +
11715 +struct _vx_sched {
11716 +       spinlock_t tokens_lock;         /* lock for token bucket */
11717 +
11718 +       int tokens;                     /* number of CPU tokens */
11719 +       int fill_rate[2];               /* Fill rate: add X tokens... */
11720 +       int interval[2];                /* Divisor:   per Y jiffies   */
11721 +       int tokens_min;                 /* Limit:     minimum for unhold */
11722 +       int tokens_max;                 /* Limit:     no more than N tokens */
11723 +
11724 +       int prio_bias;                  /* bias offset for priority */
11725 +
11726 +       unsigned update_mask;           /* which features should be updated */
11727 +       cpumask_t update;               /* CPUs which should update */
11728 +};
11729 +
11730 +struct _vx_sched_pc {
11731 +       int tokens;                     /* number of CPU tokens */
11732 +       int flags;                      /* bucket flags */
11733 +
11734 +       int fill_rate[2];               /* Fill rate: add X tokens... */
11735 +       int interval[2];                /* Divisor:   per Y jiffies   */
11736 +       int tokens_min;                 /* Limit:     minimum for unhold */
11737 +       int tokens_max;                 /* Limit:     no more than N tokens */
11738 +
11739 +       int prio_bias;                  /* bias offset for priority */
11740 +       int vavavoom;                   /* last calculated vavavoom */
11741 +
11742 +       unsigned long norm_time;        /* last time accounted */
11743 +       unsigned long idle_time;        /* non linear time for fair sched */
11744 +       unsigned long token_time;       /* token time for accounting */
11745 +       unsigned long onhold;           /* jiffies when put on hold */
11746 +
11747 +       uint64_t user_ticks;            /* token tick events */
11748 +       uint64_t sys_ticks;             /* token tick events */
11749 +       uint64_t hold_ticks;            /* token ticks paused */
11750 +};
11751 +
11752 +
11753 +#define VXSF_ONHOLD    0x0001
11754 +#define VXSF_IDLE_TIME 0x0100
11755 +
11756 +#ifdef CONFIG_VSERVER_DEBUG
11757 +
11758 +static inline void __dump_vx_sched(struct _vx_sched *sched)
11759 +{
11760 +       printk("\t_vx_sched:\n");
11761 +       printk("\t tokens: %4d/%4d, %4d/%4d, %4d, %4d\n",
11762 +               sched->fill_rate[0], sched->interval[0],
11763 +               sched->fill_rate[1], sched->interval[1],
11764 +               sched->tokens_min, sched->tokens_max);
11765 +       printk("\t priority = %4d\n", sched->prio_bias);
11766 +}
11767 +
11768 +#endif
11769 +
11770 +#endif /* _VX_SCHED_DEF_H */
11771 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/sched.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/sched.h
11772 --- linux-2.6.30.2/include/linux/vserver/sched.h        1970-01-01 01:00:00.000000000 +0100
11773 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/sched.h     2009-07-04 01:11:39.000000000 +0200
11774 @@ -0,0 +1,26 @@
11775 +#ifndef _VX_SCHED_H
11776 +#define _VX_SCHED_H
11777 +
11778 +
11779 +#ifdef __KERNEL__
11780 +
11781 +struct timespec;
11782 +
11783 +void vx_vsi_uptime(struct timespec *, struct timespec *);
11784 +
11785 +
11786 +struct vx_info;
11787 +
11788 +void vx_update_load(struct vx_info *);
11789 +
11790 +
11791 +int vx_tokens_recalc(struct _vx_sched_pc *,
11792 +       unsigned long *, unsigned long *, int [2]);
11793 +
11794 +void vx_update_sched_param(struct _vx_sched *sched,
11795 +       struct _vx_sched_pc *sched_pc);
11796 +
11797 +#endif /* __KERNEL__ */
11798 +#else  /* _VX_SCHED_H */
11799 +#warning duplicate inclusion
11800 +#endif /* _VX_SCHED_H */
11801 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/signal_cmd.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/signal_cmd.h
11802 --- linux-2.6.30.2/include/linux/vserver/signal_cmd.h   1970-01-01 01:00:00.000000000 +0100
11803 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/signal_cmd.h        2009-07-04 01:11:39.000000000 +0200
11804 @@ -0,0 +1,43 @@
11805 +#ifndef _VX_SIGNAL_CMD_H
11806 +#define _VX_SIGNAL_CMD_H
11807 +
11808 +
11809 +/*  signalling vserver commands */
11810 +
11811 +#define VCMD_ctx_kill          VC_CMD(PROCTRL, 1, 0)
11812 +#define VCMD_wait_exit         VC_CMD(EVENT, 99, 0)
11813 +
11814 +struct vcmd_ctx_kill_v0 {
11815 +       int32_t pid;
11816 +       int32_t sig;
11817 +};
11818 +
11819 +struct vcmd_wait_exit_v0 {
11820 +       int32_t reboot_cmd;
11821 +       int32_t exit_code;
11822 +};
11823 +
11824 +#ifdef __KERNEL__
11825 +
11826 +extern int vc_ctx_kill(struct vx_info *, void __user *);
11827 +extern int vc_wait_exit(struct vx_info *, void __user *);
11828 +
11829 +#endif /* __KERNEL__ */
11830 +
11831 +/*  process alteration commands */
11832 +
11833 +#define VCMD_get_pflags                VC_CMD(PROCALT, 5, 0)
11834 +#define VCMD_set_pflags                VC_CMD(PROCALT, 6, 0)
11835 +
11836 +struct vcmd_pflags_v0 {
11837 +       uint32_t flagword;
11838 +       uint32_t mask;
11839 +};
11840 +
11841 +#ifdef __KERNEL__
11842 +
11843 +extern int vc_get_pflags(uint32_t pid, void __user *);
11844 +extern int vc_set_pflags(uint32_t pid, void __user *);
11845 +
11846 +#endif /* __KERNEL__ */
11847 +#endif /* _VX_SIGNAL_CMD_H */
11848 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/signal.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/signal.h
11849 --- linux-2.6.30.2/include/linux/vserver/signal.h       1970-01-01 01:00:00.000000000 +0100
11850 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/signal.h    2009-07-04 01:11:39.000000000 +0200
11851 @@ -0,0 +1,14 @@
11852 +#ifndef _VX_SIGNAL_H
11853 +#define _VX_SIGNAL_H
11854 +
11855 +
11856 +#ifdef __KERNEL__
11857 +
11858 +struct vx_info;
11859 +
11860 +int vx_info_kill(struct vx_info *, int, int);
11861 +
11862 +#endif /* __KERNEL__ */
11863 +#else  /* _VX_SIGNAL_H */
11864 +#warning duplicate inclusion
11865 +#endif /* _VX_SIGNAL_H */
11866 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/space_cmd.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/space_cmd.h
11867 --- linux-2.6.30.2/include/linux/vserver/space_cmd.h    1970-01-01 01:00:00.000000000 +0100
11868 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/space_cmd.h 2009-07-04 01:11:39.000000000 +0200
11869 @@ -0,0 +1,38 @@
11870 +#ifndef _VX_SPACE_CMD_H
11871 +#define _VX_SPACE_CMD_H
11872 +
11873 +
11874 +#define VCMD_enter_space_v0    VC_CMD(PROCALT, 1, 0)
11875 +#define VCMD_enter_space_v1    VC_CMD(PROCALT, 1, 1)
11876 +#define VCMD_enter_space       VC_CMD(PROCALT, 1, 2)
11877 +
11878 +#define VCMD_set_space_v0      VC_CMD(PROCALT, 3, 0)
11879 +#define VCMD_set_space_v1      VC_CMD(PROCALT, 3, 1)
11880 +#define VCMD_set_space         VC_CMD(PROCALT, 3, 2)
11881 +
11882 +#define VCMD_get_space_mask_v0 VC_CMD(PROCALT, 4, 0)
11883 +
11884 +#define VCMD_get_space_mask    VC_CMD(VSPACE, 0, 1)
11885 +#define VCMD_get_space_default VC_CMD(VSPACE, 1, 0)
11886 +
11887 +
11888 +struct vcmd_space_mask_v1 {
11889 +       uint64_t mask;
11890 +};
11891 +
11892 +struct vcmd_space_mask_v2 {
11893 +       uint64_t mask;
11894 +       uint32_t index;
11895 +};
11896 +
11897 +
11898 +#ifdef __KERNEL__
11899 +
11900 +extern int vc_enter_space_v1(struct vx_info *, void __user *);
11901 +extern int vc_set_space_v1(struct vx_info *, void __user *);
11902 +extern int vc_enter_space(struct vx_info *, void __user *);
11903 +extern int vc_set_space(struct vx_info *, void __user *);
11904 +extern int vc_get_space_mask(void __user *, int);
11905 +
11906 +#endif /* __KERNEL__ */
11907 +#endif /* _VX_SPACE_CMD_H */
11908 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/space.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/space.h
11909 --- linux-2.6.30.2/include/linux/vserver/space.h        1970-01-01 01:00:00.000000000 +0100
11910 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/space.h     2009-07-04 01:11:39.000000000 +0200
11911 @@ -0,0 +1,12 @@
11912 +#ifndef _VX_SPACE_H
11913 +#define _VX_SPACE_H
11914 +
11915 +#include <linux/types.h>
11916 +
11917 +struct vx_info;
11918 +
11919 +int vx_set_space(struct vx_info *vxi, unsigned long mask, unsigned index);
11920 +
11921 +#else  /* _VX_SPACE_H */
11922 +#warning duplicate inclusion
11923 +#endif /* _VX_SPACE_H */
11924 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/switch.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/switch.h
11925 --- linux-2.6.30.2/include/linux/vserver/switch.h       1970-01-01 01:00:00.000000000 +0100
11926 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/switch.h    2009-07-04 01:11:39.000000000 +0200
11927 @@ -0,0 +1,98 @@
11928 +#ifndef _VX_SWITCH_H
11929 +#define _VX_SWITCH_H
11930 +
11931 +#include <linux/types.h>
11932 +
11933 +
11934 +#define VC_CATEGORY(c)         (((c) >> 24) & 0x3F)
11935 +#define VC_COMMAND(c)          (((c) >> 16) & 0xFF)
11936 +#define VC_VERSION(c)          ((c) & 0xFFF)
11937 +
11938 +#define VC_CMD(c, i, v)                ((((VC_CAT_ ## c) & 0x3F) << 24) \
11939 +                               | (((i) & 0xFF) << 16) | ((v) & 0xFFF))
11940 +
11941 +/*
11942 +
11943 +  Syscall Matrix V2.8
11944 +
11945 +        |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL|
11946 +        |STATS  |DESTROY|ALTER  |CHANGE |LIMIT  |TEST   | |       |       |
11947 +        |INFO   |SETUP  |       |MOVE   |       |       | |       |       |
11948 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
11949 +  SYSTEM |VERSION|VSETUP |VHOST  |       |       |       | |DEVICE |       |
11950 +  HOST   |     00|     01|     02|     03|     04|     05| |     06|     07|
11951 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
11952 +  CPU    |       |VPROC  |PROCALT|PROCMIG|PROCTRL|       | |SCHED. |       |
11953 +  PROCESS|     08|     09|     10|     11|     12|     13| |     14|     15|
11954 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
11955 +  MEMORY |       |       |       |       |MEMCTRL|       | |SWAP   |       |
11956 +        |     16|     17|     18|     19|     20|     21| |     22|     23|
11957 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
11958 +  NETWORK|       |VNET   |NETALT |NETMIG |NETCTL |       | |SERIAL |       |
11959 +        |     24|     25|     26|     27|     28|     29| |     30|     31|
11960 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
11961 +  DISK   |       |       |       |TAGMIG |DLIMIT |       | |INODE  |       |
11962 +  VFS    |     32|     33|     34|     35|     36|     37| |     38|     39|
11963 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
11964 +  OTHER  |VSTAT  |       |       |       |       |       | |VINFO  |       |
11965 +        |     40|     41|     42|     43|     44|     45| |     46|     47|
11966 +  =======+=======+=======+=======+=======+=======+=======+ +=======+=======+
11967 +  SPECIAL|EVENT  |       |       |       |FLAGS  |       | |VSPACE |       |
11968 +        |     48|     49|     50|     51|     52|     53| |     54|     55|
11969 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
11970 +  SPECIAL|DEBUG  |       |       |       |RLIMIT |SYSCALL| |       |COMPAT |
11971 +        |     56|     57|     58|     59|     60|TEST 61| |     62|     63|
11972 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
11973 +
11974 +*/
11975 +
11976 +#define VC_CAT_VERSION         0
11977 +
11978 +#define VC_CAT_VSETUP          1
11979 +#define VC_CAT_VHOST           2
11980 +
11981 +#define VC_CAT_DEVICE          6
11982 +
11983 +#define VC_CAT_VPROC           9
11984 +#define VC_CAT_PROCALT         10
11985 +#define VC_CAT_PROCMIG         11
11986 +#define VC_CAT_PROCTRL         12
11987 +
11988 +#define VC_CAT_SCHED           14
11989 +#define VC_CAT_MEMCTRL         20
11990 +
11991 +#define VC_CAT_VNET            25
11992 +#define VC_CAT_NETALT          26
11993 +#define VC_CAT_NETMIG          27
11994 +#define VC_CAT_NETCTRL         28
11995 +
11996 +#define VC_CAT_TAGMIG          35
11997 +#define VC_CAT_DLIMIT          36
11998 +#define VC_CAT_INODE           38
11999 +
12000 +#define VC_CAT_VSTAT           40
12001 +#define VC_CAT_VINFO           46
12002 +#define VC_CAT_EVENT           48
12003 +
12004 +#define VC_CAT_FLAGS           52
12005 +#define VC_CAT_VSPACE          54
12006 +#define VC_CAT_DEBUG           56
12007 +#define VC_CAT_RLIMIT          60
12008 +
12009 +#define VC_CAT_SYSTEST         61
12010 +#define VC_CAT_COMPAT          63
12011 +
12012 +/*  query version */
12013 +
12014 +#define VCMD_get_version       VC_CMD(VERSION, 0, 0)
12015 +#define VCMD_get_vci           VC_CMD(VERSION, 1, 0)
12016 +
12017 +
12018 +#ifdef __KERNEL__
12019 +
12020 +#include <linux/errno.h>
12021 +
12022 +#endif /* __KERNEL__ */
12023 +
12024 +#endif /* _VX_SWITCH_H */
12025 +
12026 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/tag_cmd.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/tag_cmd.h
12027 --- linux-2.6.30.2/include/linux/vserver/tag_cmd.h      1970-01-01 01:00:00.000000000 +0100
12028 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/tag_cmd.h   2009-07-04 01:11:39.000000000 +0200
12029 @@ -0,0 +1,22 @@
12030 +#ifndef _VX_TAG_CMD_H
12031 +#define _VX_TAG_CMD_H
12032 +
12033 +
12034 +/* vinfo commands */
12035 +
12036 +#define VCMD_task_tag          VC_CMD(VINFO, 3, 0)
12037 +
12038 +#ifdef __KERNEL__
12039 +extern int vc_task_tag(uint32_t);
12040 +
12041 +#endif /* __KERNEL__ */
12042 +
12043 +/* context commands */
12044 +
12045 +#define VCMD_tag_migrate       VC_CMD(TAGMIG, 1, 0)
12046 +
12047 +#ifdef __KERNEL__
12048 +extern int vc_tag_migrate(uint32_t);
12049 +
12050 +#endif /* __KERNEL__ */
12051 +#endif /* _VX_TAG_CMD_H */
12052 diff -NurpP --minimal linux-2.6.30.2/include/linux/vserver/tag.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/tag.h
12053 --- linux-2.6.30.2/include/linux/vserver/tag.h  1970-01-01 01:00:00.000000000 +0100
12054 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vserver/tag.h       2009-07-04 01:11:39.000000000 +0200
12055 @@ -0,0 +1,143 @@
12056 +#ifndef _DX_TAG_H
12057 +#define _DX_TAG_H
12058 +
12059 +#include <linux/types.h>
12060 +
12061 +
12062 +#define DX_TAG(in)     (IS_TAGGED(in))
12063 +
12064 +
12065 +#ifdef CONFIG_TAG_NFSD
12066 +#define DX_TAG_NFSD    1
12067 +#else
12068 +#define DX_TAG_NFSD    0
12069 +#endif
12070 +
12071 +
12072 +#ifdef CONFIG_TAGGING_NONE
12073 +
12074 +#define MAX_UID                0xFFFFFFFF
12075 +#define MAX_GID                0xFFFFFFFF
12076 +
12077 +#define INOTAG_TAG(cond, uid, gid, tag)        (0)
12078 +
12079 +#define TAGINO_UID(cond, uid, tag)     (uid)
12080 +#define TAGINO_GID(cond, gid, tag)     (gid)
12081 +
12082 +#endif
12083 +
12084 +
12085 +#ifdef CONFIG_TAGGING_GID16
12086 +
12087 +#define MAX_UID                0xFFFFFFFF
12088 +#define MAX_GID                0x0000FFFF
12089 +
12090 +#define INOTAG_TAG(cond, uid, gid, tag)        \
12091 +       ((cond) ? (((gid) >> 16) & 0xFFFF) : 0)
12092 +
12093 +#define TAGINO_UID(cond, uid, tag)     (uid)
12094 +#define TAGINO_GID(cond, gid, tag)     \
12095 +       ((cond) ? (((gid) & 0xFFFF) | ((tag) << 16)) : (gid))
12096 +
12097 +#endif
12098 +
12099 +
12100 +#ifdef CONFIG_TAGGING_ID24
12101 +
12102 +#define MAX_UID                0x00FFFFFF
12103 +#define MAX_GID                0x00FFFFFF
12104 +
12105 +#define INOTAG_TAG(cond, uid, gid, tag)        \
12106 +       ((cond) ? ((((uid) >> 16) & 0xFF00) | (((gid) >> 24) & 0xFF)) : 0)
12107 +
12108 +#define TAGINO_UID(cond, uid, tag)     \
12109 +       ((cond) ? (((uid) & 0xFFFFFF) | (((tag) & 0xFF00) << 16)) : (uid))
12110 +#define TAGINO_GID(cond, gid, tag)     \
12111 +       ((cond) ? (((gid) & 0xFFFFFF) | (((tag) & 0x00FF) << 24)) : (gid))
12112 +
12113 +#endif
12114 +
12115 +
12116 +#ifdef CONFIG_TAGGING_UID16
12117 +
12118 +#define MAX_UID                0x0000FFFF
12119 +#define MAX_GID                0xFFFFFFFF
12120 +
12121 +#define INOTAG_TAG(cond, uid, gid, tag)        \
12122 +       ((cond) ? (((uid) >> 16) & 0xFFFF) : 0)
12123 +
12124 +#define TAGINO_UID(cond, uid, tag)     \
12125 +       ((cond) ? (((uid) & 0xFFFF) | ((tag) << 16)) : (uid))
12126 +#define TAGINO_GID(cond, gid, tag)     (gid)
12127 +
12128 +#endif
12129 +
12130 +
12131 +#ifdef CONFIG_TAGGING_INTERN
12132 +
12133 +#define MAX_UID                0xFFFFFFFF
12134 +#define MAX_GID                0xFFFFFFFF
12135 +
12136 +#define INOTAG_TAG(cond, uid, gid, tag)        \
12137 +       ((cond) ? (tag) : 0)
12138 +
12139 +#define TAGINO_UID(cond, uid, tag)     (uid)
12140 +#define TAGINO_GID(cond, gid, tag)     (gid)
12141 +
12142 +#endif
12143 +
12144 +
12145 +#ifndef CONFIG_TAGGING_NONE
12146 +#define dx_current_fstag(sb)   \
12147 +       ((sb)->s_flags & MS_TAGGED ? dx_current_tag() : 0)
12148 +#else
12149 +#define dx_current_fstag(sb)   (0)
12150 +#endif
12151 +
12152 +#ifndef CONFIG_TAGGING_INTERN
12153 +#define TAGINO_TAG(cond, tag)  (0)
12154 +#else
12155 +#define TAGINO_TAG(cond, tag)  ((cond) ? (tag) : 0)
12156 +#endif
12157 +
12158 +#define INOTAG_UID(cond, uid, gid)     \
12159 +       ((cond) ? ((uid) & MAX_UID) : (uid))
12160 +#define INOTAG_GID(cond, uid, gid)     \
12161 +       ((cond) ? ((gid) & MAX_GID) : (gid))
12162 +
12163 +
12164 +static inline uid_t dx_map_uid(uid_t uid)
12165 +{
12166 +       if ((uid > MAX_UID) && (uid != -1))
12167 +               uid = -2;
12168 +       return (uid & MAX_UID);
12169 +}
12170 +
12171 +static inline gid_t dx_map_gid(gid_t gid)
12172 +{
12173 +       if ((gid > MAX_GID) && (gid != -1))
12174 +               gid = -2;
12175 +       return (gid & MAX_GID);
12176 +}
12177 +
12178 +struct peer_tag {
12179 +       int32_t xid;
12180 +       int32_t nid;
12181 +};
12182 +
12183 +#define dx_notagcheck(sb) ((sb) && ((sb)->s_flags & MS_NOTAGCHECK))
12184 +
12185 +int dx_parse_tag(char *string, tag_t *tag, int remove, int *mnt_flags,
12186 +                unsigned long *flags);
12187 +
12188 +#ifdef CONFIG_PROPAGATE
12189 +
12190 +void __dx_propagate_tag(struct nameidata *nd, struct inode *inode);
12191 +
12192 +#define dx_propagate_tag(n, i) __dx_propagate_tag(n, i)
12193 +
12194 +#else
12195 +#define dx_propagate_tag(n, i) do { } while (0)
12196 +#endif
12197 +
12198 +#endif /* _DX_TAG_H */
12199 diff -NurpP --minimal linux-2.6.30.2/include/linux/vs_inet6.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_inet6.h
12200 --- linux-2.6.30.2/include/linux/vs_inet6.h     1970-01-01 01:00:00.000000000 +0100
12201 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_inet6.h  2009-07-04 01:11:39.000000000 +0200
12202 @@ -0,0 +1,246 @@
12203 +#ifndef _VS_INET6_H
12204 +#define _VS_INET6_H
12205 +
12206 +#include "vserver/base.h"
12207 +#include "vserver/network.h"
12208 +#include "vserver/debug.h"
12209 +
12210 +#include <net/ipv6.h>
12211 +
12212 +#define NXAV6(a)       &(a)->ip, &(a)->mask, (a)->prefix, (a)->type
12213 +#define NXAV6_FMT      "[%pI6/%pI6/%d:%04x]"
12214 +
12215 +
12216 +#ifdef CONFIG_IPV6
12217 +
12218 +static inline
12219 +int v6_addr_match(struct nx_addr_v6 *nxa,
12220 +       const struct in6_addr *addr, uint16_t mask)
12221 +{
12222 +       int ret = 0;
12223 +
12224 +       switch (nxa->type & mask) {
12225 +       case NXA_TYPE_MASK:
12226 +               ret = ipv6_masked_addr_cmp(&nxa->ip, &nxa->mask, addr);
12227 +               break;
12228 +       case NXA_TYPE_ADDR:
12229 +               ret = ipv6_addr_equal(&nxa->ip, addr);
12230 +               break;
12231 +       case NXA_TYPE_ANY:
12232 +               ret = 1;
12233 +               break;
12234 +       }
12235 +       vxdprintk(VXD_CBIT(net, 0),
12236 +               "v6_addr_match(%p" NXAV6_FMT ",%pI6,%04x) = %d",
12237 +               nxa, NXAV6(nxa), addr, mask, ret);
12238 +       return ret;
12239 +}
12240 +
12241 +static inline
12242 +int v6_addr_in_nx_info(struct nx_info *nxi,
12243 +       const struct in6_addr *addr, uint16_t mask)
12244 +{
12245 +       struct nx_addr_v6 *nxa;
12246 +       int ret = 1;
12247 +
12248 +       if (!nxi)
12249 +               goto out;
12250 +       for (nxa = &nxi->v6; nxa; nxa = nxa->next)
12251 +               if (v6_addr_match(nxa, addr, mask))
12252 +                       goto out;
12253 +       ret = 0;
12254 +out:
12255 +       vxdprintk(VXD_CBIT(net, 0),
12256 +               "v6_addr_in_nx_info(%p[#%u],%pI6,%04x) = %d",
12257 +               nxi, nxi ? nxi->nx_id : 0, addr, mask, ret);
12258 +       return ret;
12259 +}
12260 +
12261 +static inline
12262 +int v6_nx_addr_match(struct nx_addr_v6 *nxa, struct nx_addr_v6 *addr, uint16_t mask)
12263 +{
12264 +       /* FIXME: needs full range checks */
12265 +       return v6_addr_match(nxa, &addr->ip, mask);
12266 +}
12267 +
12268 +static inline
12269 +int v6_nx_addr_in_nx_info(struct nx_info *nxi, struct nx_addr_v6 *nxa, uint16_t mask)
12270 +{
12271 +       struct nx_addr_v6 *ptr;
12272 +
12273 +       for (ptr = &nxi->v6; ptr; ptr = ptr->next)
12274 +               if (v6_nx_addr_match(ptr, nxa, mask))
12275 +                       return 1;
12276 +       return 0;
12277 +}
12278 +
12279 +
12280 +/*
12281 + *     Check if a given address matches for a socket
12282 + *
12283 + *     nxi:            the socket's nx_info if any
12284 + *     addr:           to be verified address
12285 + */
12286 +static inline
12287 +int v6_sock_addr_match (
12288 +       struct nx_info *nxi,
12289 +       struct inet_sock *inet,
12290 +       struct in6_addr *addr)
12291 +{
12292 +       struct sock *sk = &inet->sk;
12293 +       struct in6_addr *saddr = inet6_rcv_saddr(sk);
12294 +
12295 +       if (!ipv6_addr_any(addr) &&
12296 +               ipv6_addr_equal(saddr, addr))
12297 +               return 1;
12298 +       if (ipv6_addr_any(saddr))
12299 +               return v6_addr_in_nx_info(nxi, addr, -1);
12300 +       return 0;
12301 +}
12302 +
12303 +/*
12304 + *     check if address is covered by socket
12305 + *
12306 + *     sk:     the socket to check against
12307 + *     addr:   the address in question (must be != 0)
12308 + */
12309 +
12310 +static inline
12311 +int __v6_addr_match_socket(const struct sock *sk, struct nx_addr_v6 *nxa)
12312 +{
12313 +       struct nx_info *nxi = sk->sk_nx_info;
12314 +       struct in6_addr *saddr = inet6_rcv_saddr(sk);
12315 +
12316 +       vxdprintk(VXD_CBIT(net, 5),
12317 +               "__v6_addr_in_socket(%p," NXAV6_FMT ") %p:%pI6 %p;%lx",
12318 +               sk, NXAV6(nxa), nxi, saddr, sk->sk_socket,
12319 +               (sk->sk_socket?sk->sk_socket->flags:0));
12320 +
12321 +       if (!ipv6_addr_any(saddr)) {    /* direct address match */
12322 +               return v6_addr_match(nxa, saddr, -1);
12323 +       } else if (nxi) {               /* match against nx_info */
12324 +               return v6_nx_addr_in_nx_info(nxi, nxa, -1);
12325 +       } else {                        /* unrestricted any socket */
12326 +               return 1;
12327 +       }
12328 +}
12329 +
12330 +
12331 +/* inet related checks and helpers */
12332 +
12333 +
12334 +struct in_ifaddr;
12335 +struct net_device;
12336 +struct sock;
12337 +
12338 +
12339 +#include <linux/netdevice.h>
12340 +#include <linux/inetdevice.h>
12341 +#include <net/inet_timewait_sock.h>
12342 +
12343 +
12344 +int dev_in_nx_info(struct net_device *, struct nx_info *);
12345 +int v6_dev_in_nx_info(struct net_device *, struct nx_info *);
12346 +int nx_v6_addr_conflict(struct nx_info *, struct nx_info *);
12347 +
12348 +
12349 +
12350 +static inline
12351 +int v6_ifa_in_nx_info(struct inet6_ifaddr *ifa, struct nx_info *nxi)
12352 +{
12353 +       if (!nxi)
12354 +               return 1;
12355 +       if (!ifa)
12356 +               return 0;
12357 +       return v6_addr_in_nx_info(nxi, &ifa->addr, -1);
12358 +}
12359 +
12360 +static inline
12361 +int nx_v6_ifa_visible(struct nx_info *nxi, struct inet6_ifaddr *ifa)
12362 +{
12363 +       vxdprintk(VXD_CBIT(net, 1), "nx_v6_ifa_visible(%p[#%u],%p) %d",
12364 +               nxi, nxi ? nxi->nx_id : 0, ifa,
12365 +               nxi ? v6_ifa_in_nx_info(ifa, nxi) : 0);
12366 +
12367 +       if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
12368 +               return 1;
12369 +       if (v6_ifa_in_nx_info(ifa, nxi))
12370 +               return 1;
12371 +       return 0;
12372 +}
12373 +
12374 +
12375 +struct nx_v6_sock_addr {
12376 +       struct in6_addr saddr;  /* Address used for validation */
12377 +       struct in6_addr baddr;  /* Address used for socket bind */
12378 +};
12379 +
12380 +static inline
12381 +int v6_map_sock_addr(struct inet_sock *inet, struct sockaddr_in6 *addr,
12382 +       struct nx_v6_sock_addr *nsa)
12383 +{
12384 +       // struct sock *sk = &inet->sk;
12385 +       // struct nx_info *nxi = sk->sk_nx_info;
12386 +       struct in6_addr saddr = addr->sin6_addr;
12387 +       struct in6_addr baddr = saddr;
12388 +
12389 +       nsa->saddr = saddr;
12390 +       nsa->baddr = baddr;
12391 +       return 0;
12392 +}
12393 +
12394 +static inline
12395 +void v6_set_sock_addr(struct inet_sock *inet, struct nx_v6_sock_addr *nsa)
12396 +{
12397 +       // struct sock *sk = &inet->sk;
12398 +       // struct in6_addr *saddr = inet6_rcv_saddr(sk);
12399 +
12400 +       // *saddr = nsa->baddr;
12401 +       // inet->saddr = nsa->baddr;
12402 +}
12403 +
12404 +static inline
12405 +int nx_info_has_v6(struct nx_info *nxi)
12406 +{
12407 +       if (!nxi)
12408 +               return 1;
12409 +       if (NX_IPV6(nxi))
12410 +               return 1;
12411 +       return 0;
12412 +}
12413 +
12414 +#else /* CONFIG_IPV6 */
12415 +
12416 +static inline
12417 +int nx_v6_dev_visible(struct nx_info *n, struct net_device *d)
12418 +{
12419 +       return 1;
12420 +}
12421 +
12422 +
12423 +static inline
12424 +int nx_v6_addr_conflict(struct nx_info *n, uint32_t a, const struct sock *s)
12425 +{
12426 +       return 1;
12427 +}
12428 +
12429 +static inline
12430 +int v6_ifa_in_nx_info(struct in_ifaddr *a, struct nx_info *n)
12431 +{
12432 +       return 1;
12433 +}
12434 +
12435 +static inline
12436 +int nx_info_has_v6(struct nx_info *nxi)
12437 +{
12438 +       return 0;
12439 +}
12440 +
12441 +#endif /* CONFIG_IPV6 */
12442 +
12443 +#define current_nx_info_has_v6() \
12444 +       nx_info_has_v6(current_nx_info())
12445 +
12446 +#else
12447 +#warning duplicate inclusion
12448 +#endif
12449 diff -NurpP --minimal linux-2.6.30.2/include/linux/vs_inet.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_inet.h
12450 --- linux-2.6.30.2/include/linux/vs_inet.h      1970-01-01 01:00:00.000000000 +0100
12451 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_inet.h   2009-07-04 01:11:39.000000000 +0200
12452 @@ -0,0 +1,342 @@
12453 +#ifndef _VS_INET_H
12454 +#define _VS_INET_H
12455 +
12456 +#include "vserver/base.h"
12457 +#include "vserver/network.h"
12458 +#include "vserver/debug.h"
12459 +
12460 +#define IPI_LOOPBACK   htonl(INADDR_LOOPBACK)
12461 +
12462 +#define NXAV4(a)       NIPQUAD((a)->ip[0]), NIPQUAD((a)->ip[1]), \
12463 +                       NIPQUAD((a)->mask), (a)->type
12464 +#define NXAV4_FMT      "[" NIPQUAD_FMT "-" NIPQUAD_FMT "/" NIPQUAD_FMT ":%04x]"
12465 +
12466 +
12467 +static inline
12468 +int v4_addr_match(struct nx_addr_v4 *nxa, __be32 addr, uint16_t tmask)
12469 +{
12470 +       __be32 ip = nxa->ip[0].s_addr;
12471 +       __be32 mask = nxa->mask.s_addr;
12472 +       __be32 bcast = ip | ~mask;
12473 +       int ret = 0;
12474 +
12475 +       switch (nxa->type & tmask) {
12476 +       case NXA_TYPE_MASK:
12477 +               ret = (ip == (addr & mask));
12478 +               break;
12479 +       case NXA_TYPE_ADDR:
12480 +               ret = 3;
12481 +               if (addr == ip)
12482 +                       break;
12483 +               /* fall through to broadcast */
12484 +       case NXA_MOD_BCAST:
12485 +               ret = ((tmask & NXA_MOD_BCAST) && (addr == bcast));
12486 +               break;
12487 +       case NXA_TYPE_RANGE:
12488 +               ret = ((nxa->ip[0].s_addr <= addr) &&
12489 +                       (nxa->ip[1].s_addr > addr));
12490 +               break;
12491 +       case NXA_TYPE_ANY:
12492 +               ret = 2;
12493 +               break;
12494 +       }
12495 +
12496 +       vxdprintk(VXD_CBIT(net, 0),
12497 +               "v4_addr_match(%p" NXAV4_FMT "," NIPQUAD_FMT ",%04x) = %d",
12498 +               nxa, NXAV4(nxa), NIPQUAD(addr), tmask, ret);
12499 +       return ret;
12500 +}
12501 +
12502 +static inline
12503 +int v4_addr_in_nx_info(struct nx_info *nxi, __be32 addr, uint16_t tmask)
12504 +{
12505 +       struct nx_addr_v4 *nxa;
12506 +       int ret = 1;
12507 +
12508 +       if (!nxi)
12509 +               goto out;
12510 +
12511 +       ret = 2;
12512 +       /* allow 127.0.0.1 when remapping lback */
12513 +       if ((tmask & NXA_LOOPBACK) &&
12514 +               (addr == IPI_LOOPBACK) &&
12515 +               nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
12516 +               goto out;
12517 +       ret = 3;
12518 +       /* check for lback address */
12519 +       if ((tmask & NXA_MOD_LBACK) &&
12520 +               (nxi->v4_lback.s_addr == addr))
12521 +               goto out;
12522 +       ret = 4;
12523 +       /* check for broadcast address */
12524 +       if ((tmask & NXA_MOD_BCAST) &&
12525 +               (nxi->v4_bcast.s_addr == addr))
12526 +               goto out;
12527 +       ret = 5;
12528 +       /* check for v4 addresses */
12529 +       for (nxa = &nxi->v4; nxa; nxa = nxa->next)
12530 +               if (v4_addr_match(nxa, addr, tmask))
12531 +                       goto out;
12532 +       ret = 0;
12533 +out:
12534 +       vxdprintk(VXD_CBIT(net, 0),
12535 +               "v4_addr_in_nx_info(%p[#%u]," NIPQUAD_FMT ",%04x) = %d",
12536 +               nxi, nxi ? nxi->nx_id : 0, NIPQUAD(addr), tmask, ret);
12537 +       return ret;
12538 +}
12539 +
12540 +static inline
12541 +int v4_nx_addr_match(struct nx_addr_v4 *nxa, struct nx_addr_v4 *addr, uint16_t mask)
12542 +{
12543 +       /* FIXME: needs full range checks */
12544 +       return v4_addr_match(nxa, addr->ip[0].s_addr, mask);
12545 +}
12546 +
12547 +static inline
12548 +int v4_nx_addr_in_nx_info(struct nx_info *nxi, struct nx_addr_v4 *nxa, uint16_t mask)
12549 +{
12550 +       struct nx_addr_v4 *ptr;
12551 +
12552 +       for (ptr = &nxi->v4; ptr; ptr = ptr->next)
12553 +               if (v4_nx_addr_match(ptr, nxa, mask))
12554 +                       return 1;
12555 +       return 0;
12556 +}
12557 +
12558 +#include <net/inet_sock.h>
12559 +
12560 +/*
12561 + *     Check if a given address matches for a socket
12562 + *
12563 + *     nxi:            the socket's nx_info if any
12564 + *     addr:           to be verified address
12565 + */
12566 +static inline
12567 +int v4_sock_addr_match (
12568 +       struct nx_info *nxi,
12569 +       struct inet_sock *inet,
12570 +       __be32 addr)
12571 +{
12572 +       __be32 saddr = inet->rcv_saddr;
12573 +       __be32 bcast = nxi ? nxi->v4_bcast.s_addr : INADDR_BROADCAST;
12574 +
12575 +       if (addr && (saddr == addr || bcast == addr))
12576 +               return 1;
12577 +       if (!saddr)
12578 +               return v4_addr_in_nx_info(nxi, addr, NXA_MASK_BIND);
12579 +       return 0;
12580 +}
12581 +
12582 +
12583 +/* inet related checks and helpers */
12584 +
12585 +
12586 +struct in_ifaddr;
12587 +struct net_device;
12588 +struct sock;
12589 +
12590 +#ifdef CONFIG_INET
12591 +
12592 +#include <linux/netdevice.h>
12593 +#include <linux/inetdevice.h>
12594 +#include <net/inet_sock.h>
12595 +#include <net/inet_timewait_sock.h>
12596 +
12597 +
12598 +int dev_in_nx_info(struct net_device *, struct nx_info *);
12599 +int v4_dev_in_nx_info(struct net_device *, struct nx_info *);
12600 +int nx_v4_addr_conflict(struct nx_info *, struct nx_info *);
12601 +
12602 +
12603 +/*
12604 + *     check if address is covered by socket
12605 + *
12606 + *     sk:     the socket to check against
12607 + *     addr:   the address in question (must be != 0)
12608 + */
12609 +
12610 +static inline
12611 +int __v4_addr_match_socket(const struct sock *sk, struct nx_addr_v4 *nxa)
12612 +{
12613 +       struct nx_info *nxi = sk->sk_nx_info;
12614 +       __be32 saddr = inet_rcv_saddr(sk);
12615 +
12616 +       vxdprintk(VXD_CBIT(net, 5),
12617 +               "__v4_addr_in_socket(%p," NXAV4_FMT ") %p:" NIPQUAD_FMT " %p;%lx",
12618 +               sk, NXAV4(nxa), nxi, NIPQUAD(saddr), sk->sk_socket,
12619 +               (sk->sk_socket?sk->sk_socket->flags:0));
12620 +
12621 +       if (saddr) {            /* direct address match */
12622 +               return v4_addr_match(nxa, saddr, -1);
12623 +       } else if (nxi) {       /* match against nx_info */
12624 +               return v4_nx_addr_in_nx_info(nxi, nxa, -1);
12625 +       } else {                /* unrestricted any socket */
12626 +               return 1;
12627 +       }
12628 +}
12629 +
12630 +
12631 +
12632 +static inline
12633 +int nx_dev_visible(struct nx_info *nxi, struct net_device *dev)
12634 +{
12635 +       vxdprintk(VXD_CBIT(net, 1), "nx_dev_visible(%p[#%u],%p Â»%s«) %d",
12636 +               nxi, nxi ? nxi->nx_id : 0, dev, dev->name,
12637 +               nxi ? dev_in_nx_info(dev, nxi) : 0);
12638 +
12639 +       if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
12640 +               return 1;
12641 +       if (dev_in_nx_info(dev, nxi))
12642 +               return 1;
12643 +       return 0;
12644 +}
12645 +
12646 +
12647 +static inline
12648 +int v4_ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
12649 +{
12650 +       if (!nxi)
12651 +               return 1;
12652 +       if (!ifa)
12653 +               return 0;
12654 +       return v4_addr_in_nx_info(nxi, ifa->ifa_local, NXA_MASK_SHOW);
12655 +}
12656 +
12657 +static inline
12658 +int nx_v4_ifa_visible(struct nx_info *nxi, struct in_ifaddr *ifa)
12659 +{
12660 +       vxdprintk(VXD_CBIT(net, 1), "nx_v4_ifa_visible(%p[#%u],%p) %d",
12661 +               nxi, nxi ? nxi->nx_id : 0, ifa,
12662 +               nxi ? v4_ifa_in_nx_info(ifa, nxi) : 0);
12663 +
12664 +       if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
12665 +               return 1;
12666 +       if (v4_ifa_in_nx_info(ifa, nxi))
12667 +               return 1;
12668 +       return 0;
12669 +}
12670 +
12671 +
12672 +struct nx_v4_sock_addr {
12673 +       __be32 saddr;   /* Address used for validation */
12674 +       __be32 baddr;   /* Address used for socket bind */
12675 +};
12676 +
12677 +static inline
12678 +int v4_map_sock_addr(struct inet_sock *inet, struct sockaddr_in *addr,
12679 +       struct nx_v4_sock_addr *nsa)
12680 +{
12681 +       struct sock *sk = &inet->sk;
12682 +       struct nx_info *nxi = sk->sk_nx_info;
12683 +       __be32 saddr = addr->sin_addr.s_addr;
12684 +       __be32 baddr = saddr;
12685 +
12686 +       vxdprintk(VXD_CBIT(net, 3),
12687 +               "inet_bind(%p)* %p,%p;%lx " NIPQUAD_FMT,
12688 +               sk, sk->sk_nx_info, sk->sk_socket,
12689 +               (sk->sk_socket ? sk->sk_socket->flags : 0),
12690 +               NIPQUAD(saddr));
12691 +
12692 +       if (nxi) {
12693 +               if (saddr == INADDR_ANY) {
12694 +                       if (nx_info_flags(nxi, NXF_SINGLE_IP, 0))
12695 +                               baddr = nxi->v4.ip[0].s_addr;
12696 +               } else if (saddr == IPI_LOOPBACK) {
12697 +                       if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
12698 +                               baddr = nxi->v4_lback.s_addr;
12699 +               } else {        /* normal address bind */
12700 +                       if (!v4_addr_in_nx_info(nxi, saddr, NXA_MASK_BIND))
12701 +                               return -EADDRNOTAVAIL;
12702 +               }
12703 +       }
12704 +
12705 +       vxdprintk(VXD_CBIT(net, 3),
12706 +               "inet_bind(%p) " NIPQUAD_FMT ", " NIPQUAD_FMT,
12707 +               sk, NIPQUAD(saddr), NIPQUAD(baddr));
12708 +
12709 +       nsa->saddr = saddr;
12710 +       nsa->baddr = baddr;
12711 +       return 0;
12712 +}
12713 +
12714 +static inline
12715 +void v4_set_sock_addr(struct inet_sock *inet, struct nx_v4_sock_addr *nsa)
12716 +{
12717 +       inet->saddr = nsa->baddr;
12718 +       inet->rcv_saddr = nsa->baddr;
12719 +}
12720 +
12721 +
12722 +/*
12723 + *      helper to simplify inet_lookup_listener
12724 + *
12725 + *      nxi:   the socket's nx_info if any
12726 + *      addr:  to be verified address
12727 + *      saddr: socket address
12728 + */
12729 +static inline int v4_inet_addr_match (
12730 +       struct nx_info *nxi,
12731 +       __be32 addr,
12732 +       __be32 saddr)
12733 +{
12734 +       if (addr && (saddr == addr))
12735 +               return 1;
12736 +       if (!saddr)
12737 +               return nxi ? v4_addr_in_nx_info(nxi, addr, NXA_MASK_BIND) : 1;
12738 +       return 0;
12739 +}
12740 +
12741 +static inline __be32 nx_map_sock_lback(struct nx_info *nxi, __be32 addr)
12742 +{
12743 +       if (nx_info_flags(nxi, NXF_HIDE_LBACK, 0) &&
12744 +               (addr == nxi->v4_lback.s_addr))
12745 +               return IPI_LOOPBACK;
12746 +       return addr;
12747 +}
12748 +
12749 +static inline
12750 +int nx_info_has_v4(struct nx_info *nxi)
12751 +{
12752 +       if (!nxi)
12753 +               return 1;
12754 +       if (NX_IPV4(nxi))
12755 +               return 1;
12756 +       if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
12757 +               return 1;
12758 +       return 0;
12759 +}
12760 +
12761 +#else /* CONFIG_INET */
12762 +
12763 +static inline
12764 +int nx_dev_visible(struct nx_info *n, struct net_device *d)
12765 +{
12766 +       return 1;
12767 +}
12768 +
12769 +static inline
12770 +int nx_v4_addr_conflict(struct nx_info *n, uint32_t a, const struct sock *s)
12771 +{
12772 +       return 1;
12773 +}
12774 +
12775 +static inline
12776 +int v4_ifa_in_nx_info(struct in_ifaddr *a, struct nx_info *n)
12777 +{
12778 +       return 1;
12779 +}
12780 +
12781 +static inline
12782 +int nx_info_has_v4(struct nx_info *nxi)
12783 +{
12784 +       return 0;
12785 +}
12786 +
12787 +#endif /* CONFIG_INET */
12788 +
12789 +#define current_nx_info_has_v4() \
12790 +       nx_info_has_v4(current_nx_info())
12791 +
12792 +#else
12793 +// #warning duplicate inclusion
12794 +#endif
12795 diff -NurpP --minimal linux-2.6.30.2/include/linux/vs_limit.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_limit.h
12796 --- linux-2.6.30.2/include/linux/vs_limit.h     1970-01-01 01:00:00.000000000 +0100
12797 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_limit.h  2009-07-04 01:11:39.000000000 +0200
12798 @@ -0,0 +1,140 @@
12799 +#ifndef _VS_LIMIT_H
12800 +#define _VS_LIMIT_H
12801 +
12802 +#include "vserver/limit.h"
12803 +#include "vserver/base.h"
12804 +#include "vserver/context.h"
12805 +#include "vserver/debug.h"
12806 +#include "vserver/context.h"
12807 +#include "vserver/limit_int.h"
12808 +
12809 +
12810 +#define vx_acc_cres(v, d, p, r) \
12811 +       __vx_acc_cres(v, r, d, p, __FILE__, __LINE__)
12812 +
12813 +#define vx_acc_cres_cond(x, d, p, r) \
12814 +       __vx_acc_cres(((x) == vx_current_xid()) ? current->vx_info : 0, \
12815 +       r, d, p, __FILE__, __LINE__)
12816 +
12817 +
12818 +#define vx_add_cres(v, a, p, r) \
12819 +       __vx_add_cres(v, r, a, p, __FILE__, __LINE__)
12820 +#define vx_sub_cres(v, a, p, r)                vx_add_cres(v, -(a), p, r)
12821 +
12822 +#define vx_add_cres_cond(x, a, p, r) \
12823 +       __vx_add_cres(((x) == vx_current_xid()) ? current->vx_info : 0, \
12824 +       r, a, p, __FILE__, __LINE__)
12825 +#define vx_sub_cres_cond(x, a, p, r)   vx_add_cres_cond(x, -(a), p, r)
12826 +
12827 +
12828 +/* process and file limits */
12829 +
12830 +#define vx_nproc_inc(p) \
12831 +       vx_acc_cres((p)->vx_info, 1, p, RLIMIT_NPROC)
12832 +
12833 +#define vx_nproc_dec(p) \
12834 +       vx_acc_cres((p)->vx_info,-1, p, RLIMIT_NPROC)
12835 +
12836 +#define vx_files_inc(f) \
12837 +       vx_acc_cres_cond((f)->f_xid, 1, f, RLIMIT_NOFILE)
12838 +
12839 +#define vx_files_dec(f) \
12840 +       vx_acc_cres_cond((f)->f_xid,-1, f, RLIMIT_NOFILE)
12841 +
12842 +#define vx_locks_inc(l) \
12843 +       vx_acc_cres_cond((l)->fl_xid, 1, l, RLIMIT_LOCKS)
12844 +
12845 +#define vx_locks_dec(l) \
12846 +       vx_acc_cres_cond((l)->fl_xid,-1, l, RLIMIT_LOCKS)
12847 +
12848 +#define vx_openfd_inc(f) \
12849 +       vx_acc_cres(current->vx_info, 1, (void *)(long)(f), VLIMIT_OPENFD)
12850 +
12851 +#define vx_openfd_dec(f) \
12852 +       vx_acc_cres(current->vx_info,-1, (void *)(long)(f), VLIMIT_OPENFD)
12853 +
12854 +
12855 +#define vx_cres_avail(v, n, r) \
12856 +       __vx_cres_avail(v, r, n, __FILE__, __LINE__)
12857 +
12858 +
12859 +#define vx_nproc_avail(n) \
12860 +       vx_cres_avail(current->vx_info, n, RLIMIT_NPROC)
12861 +
12862 +#define vx_files_avail(n) \
12863 +       vx_cres_avail(current->vx_info, n, RLIMIT_NOFILE)
12864 +
12865 +#define vx_locks_avail(n) \
12866 +       vx_cres_avail(current->vx_info, n, RLIMIT_LOCKS)
12867 +
12868 +#define vx_openfd_avail(n) \
12869 +       vx_cres_avail(current->vx_info, n, VLIMIT_OPENFD)
12870 +
12871 +
12872 +/* dentry limits */
12873 +
12874 +#define vx_dentry_inc(d) do {                                          \
12875 +       if (atomic_read(&d->d_count) == 1)                              \
12876 +               vx_acc_cres(current->vx_info, 1, d, VLIMIT_DENTRY);     \
12877 +       } while (0)
12878 +
12879 +#define vx_dentry_dec(d) do {                                          \
12880 +       if (atomic_read(&d->d_count) == 0)                              \
12881 +               vx_acc_cres(current->vx_info,-1, d, VLIMIT_DENTRY);     \
12882 +       } while (0)
12883 +
12884 +#define vx_dentry_avail(n) \
12885 +       vx_cres_avail(current->vx_info, n, VLIMIT_DENTRY)
12886 +
12887 +
12888 +/* socket limits */
12889 +
12890 +#define vx_sock_inc(s) \
12891 +       vx_acc_cres((s)->sk_vx_info, 1, s, VLIMIT_NSOCK)
12892 +
12893 +#define vx_sock_dec(s) \
12894 +       vx_acc_cres((s)->sk_vx_info,-1, s, VLIMIT_NSOCK)
12895 +
12896 +#define vx_sock_avail(n) \
12897 +       vx_cres_avail(current->vx_info, n, VLIMIT_NSOCK)
12898 +
12899 +
12900 +/* ipc resource limits */
12901 +
12902 +#define vx_ipcmsg_add(v, u, a) \
12903 +       vx_add_cres(v, a, u, RLIMIT_MSGQUEUE)
12904 +
12905 +#define vx_ipcmsg_sub(v, u, a) \
12906 +       vx_sub_cres(v, a, u, RLIMIT_MSGQUEUE)
12907 +
12908 +#define vx_ipcmsg_avail(v, a) \
12909 +       vx_cres_avail(v, a, RLIMIT_MSGQUEUE)
12910 +
12911 +
12912 +#define vx_ipcshm_add(v, k, a) \
12913 +       vx_add_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM)
12914 +
12915 +#define vx_ipcshm_sub(v, k, a) \
12916 +       vx_sub_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM)
12917 +
12918 +#define vx_ipcshm_avail(v, a) \
12919 +       vx_cres_avail(v, a, VLIMIT_SHMEM)
12920 +
12921 +
12922 +#define vx_semary_inc(a) \
12923 +       vx_acc_cres(current->vx_info, 1, a, VLIMIT_SEMARY)
12924 +
12925 +#define vx_semary_dec(a) \
12926 +       vx_acc_cres(current->vx_info, -1, a, VLIMIT_SEMARY)
12927 +
12928 +
12929 +#define vx_nsems_add(a,n) \
12930 +       vx_add_cres(current->vx_info, n, a, VLIMIT_NSEMS)
12931 +
12932 +#define vx_nsems_sub(a,n) \
12933 +       vx_sub_cres(current->vx_info, n, a, VLIMIT_NSEMS)
12934 +
12935 +
12936 +#else
12937 +#warning duplicate inclusion
12938 +#endif
12939 diff -NurpP --minimal linux-2.6.30.2/include/linux/vs_memory.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_memory.h
12940 --- linux-2.6.30.2/include/linux/vs_memory.h    1970-01-01 01:00:00.000000000 +0100
12941 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_memory.h 2009-07-04 01:11:39.000000000 +0200
12942 @@ -0,0 +1,159 @@
12943 +#ifndef _VS_MEMORY_H
12944 +#define _VS_MEMORY_H
12945 +
12946 +#include "vserver/limit.h"
12947 +#include "vserver/base.h"
12948 +#include "vserver/context.h"
12949 +#include "vserver/debug.h"
12950 +#include "vserver/context.h"
12951 +#include "vserver/limit_int.h"
12952 +
12953 +
12954 +#define __acc_add_long(a, v)   (*(v) += (a))
12955 +#define __acc_inc_long(v)      (++*(v))
12956 +#define __acc_dec_long(v)      (--*(v))
12957 +
12958 +#if    NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
12959 +#define __acc_add_atomic(a, v) atomic_long_add(a, v)
12960 +#define __acc_inc_atomic(v)    atomic_long_inc(v)
12961 +#define __acc_dec_atomic(v)    atomic_long_dec(v)
12962 +#else  /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
12963 +#define __acc_add_atomic(a, v) __acc_add_long(a, v)
12964 +#define __acc_inc_atomic(v)    __acc_inc_long(v)
12965 +#define __acc_dec_atomic(v)    __acc_dec_long(v)
12966 +#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
12967 +
12968 +
12969 +#define vx_acc_page(m, d, v, r) do {                                   \
12970 +       if ((d) > 0)                                                    \
12971 +               __acc_inc_long(&(m)->v);                                \
12972 +       else                                                            \
12973 +               __acc_dec_long(&(m)->v);                                \
12974 +       __vx_acc_cres(m->mm_vx_info, r, d, m, __FILE__, __LINE__);      \
12975 +} while (0)
12976 +
12977 +#define vx_acc_page_atomic(m, d, v, r) do {                            \
12978 +       if ((d) > 0)                                                    \
12979 +               __acc_inc_atomic(&(m)->v);                              \
12980 +       else                                                            \
12981 +               __acc_dec_atomic(&(m)->v);                              \
12982 +       __vx_acc_cres(m->mm_vx_info, r, d, m, __FILE__, __LINE__);      \
12983 +} while (0)
12984 +
12985 +
12986 +#define vx_acc_pages(m, p, v, r) do {                                  \
12987 +       unsigned long __p = (p);                                        \
12988 +       __acc_add_long(__p, &(m)->v);                                   \
12989 +       __vx_add_cres(m->mm_vx_info, r, __p, m, __FILE__, __LINE__);    \
12990 +} while (0)
12991 +
12992 +#define vx_acc_pages_atomic(m, p, v, r) do {                           \
12993 +       unsigned long __p = (p);                                        \
12994 +       __acc_add_atomic(__p, &(m)->v);                                 \
12995 +       __vx_add_cres(m->mm_vx_info, r, __p, m, __FILE__, __LINE__);    \
12996 +} while (0)
12997 +
12998 +
12999 +
13000 +#define vx_acc_vmpage(m, d) \
13001 +       vx_acc_page(m, d, total_vm,  RLIMIT_AS)
13002 +#define vx_acc_vmlpage(m, d) \
13003 +       vx_acc_page(m, d, locked_vm, RLIMIT_MEMLOCK)
13004 +#define vx_acc_file_rsspage(m, d) \
13005 +       vx_acc_page_atomic(m, d, _file_rss, VLIMIT_MAPPED)
13006 +#define vx_acc_anon_rsspage(m, d) \
13007 +       vx_acc_page_atomic(m, d, _anon_rss, VLIMIT_ANON)
13008 +
13009 +#define vx_acc_vmpages(m, p) \
13010 +       vx_acc_pages(m, p, total_vm,  RLIMIT_AS)
13011 +#define vx_acc_vmlpages(m, p) \
13012 +       vx_acc_pages(m, p, locked_vm, RLIMIT_MEMLOCK)
13013 +#define vx_acc_file_rsspages(m, p) \
13014 +       vx_acc_pages_atomic(m, p, _file_rss, VLIMIT_MAPPED)
13015 +#define vx_acc_anon_rsspages(m, p) \
13016 +       vx_acc_pages_atomic(m, p, _anon_rss, VLIMIT_ANON)
13017 +
13018 +#define vx_pages_add(s, r, p)  __vx_add_cres(s, r, p, 0, __FILE__, __LINE__)
13019 +#define vx_pages_sub(s, r, p)  vx_pages_add(s, r, -(p))
13020 +
13021 +#define vx_vmpages_inc(m)              vx_acc_vmpage(m, 1)
13022 +#define vx_vmpages_dec(m)              vx_acc_vmpage(m, -1)
13023 +#define vx_vmpages_add(m, p)           vx_acc_vmpages(m, p)
13024 +#define vx_vmpages_sub(m, p)           vx_acc_vmpages(m, -(p))
13025 +
13026 +#define vx_vmlocked_inc(m)             vx_acc_vmlpage(m, 1)
13027 +#define vx_vmlocked_dec(m)             vx_acc_vmlpage(m, -1)
13028 +#define vx_vmlocked_add(m, p)          vx_acc_vmlpages(m, p)
13029 +#define vx_vmlocked_sub(m, p)          vx_acc_vmlpages(m, -(p))
13030 +
13031 +#define vx_file_rsspages_inc(m)                vx_acc_file_rsspage(m, 1)
13032 +#define vx_file_rsspages_dec(m)                vx_acc_file_rsspage(m, -1)
13033 +#define vx_file_rsspages_add(m, p)     vx_acc_file_rsspages(m, p)
13034 +#define vx_file_rsspages_sub(m, p)     vx_acc_file_rsspages(m, -(p))
13035 +
13036 +#define vx_anon_rsspages_inc(m)                vx_acc_anon_rsspage(m, 1)
13037 +#define vx_anon_rsspages_dec(m)                vx_acc_anon_rsspage(m, -1)
13038 +#define vx_anon_rsspages_add(m, p)     vx_acc_anon_rsspages(m, p)
13039 +#define vx_anon_rsspages_sub(m, p)     vx_acc_anon_rsspages(m, -(p))
13040 +
13041 +
13042 +#define vx_pages_avail(m, p, r) \
13043 +       __vx_cres_avail((m)->mm_vx_info, r, p, __FILE__, __LINE__)
13044 +
13045 +#define vx_vmpages_avail(m, p) vx_pages_avail(m, p, RLIMIT_AS)
13046 +#define vx_vmlocked_avail(m, p)        vx_pages_avail(m, p, RLIMIT_MEMLOCK)
13047 +#define vx_anon_avail(m, p)    vx_pages_avail(m, p, VLIMIT_ANON)
13048 +#define vx_mapped_avail(m, p)  vx_pages_avail(m, p, VLIMIT_MAPPED)
13049 +
13050 +#define vx_rss_avail(m, p) \
13051 +       __vx_cres_array_avail((m)->mm_vx_info, VLA_RSS, p, __FILE__, __LINE__)
13052 +
13053 +
13054 +enum {
13055 +       VXPT_UNKNOWN = 0,
13056 +       VXPT_ANON,
13057 +       VXPT_NONE,
13058 +       VXPT_FILE,
13059 +       VXPT_SWAP,
13060 +       VXPT_WRITE
13061 +};
13062 +
13063 +#if 0
13064 +#define        vx_page_fault(mm, vma, type, ret)
13065 +#else
13066 +
13067 +static inline
13068 +void __vx_page_fault(struct mm_struct *mm,
13069 +       struct vm_area_struct *vma, int type, int ret)
13070 +{
13071 +       struct vx_info *vxi = mm->mm_vx_info;
13072 +       int what;
13073 +/*
13074 +       static char *page_type[6] =
13075 +               { "UNKNOWN", "ANON", "NONE", "FILE", "SWAP", "WRITE" };
13076 +       static char *page_what[4] =
13077 +               { "FAULT_OOM", "FAULT_SIGBUS", "FAULT_MINOR", "FAULT_MAJOR" };
13078 +*/
13079 +
13080 +       if (!vxi)
13081 +               return;
13082 +
13083 +       what = (ret & 0x3);
13084 +
13085 +/*     printk("[%d] page[%d][%d] %2x %s %s\n", vxi->vx_id,
13086 +               type, what, ret, page_type[type], page_what[what]);
13087 +*/
13088 +       if (ret & VM_FAULT_WRITE)
13089 +               what |= 0x4;
13090 +       atomic_inc(&vxi->cacct.page[type][what]);
13091 +}
13092 +
13093 +#define        vx_page_fault(mm, vma, type, ret)       __vx_page_fault(mm, vma, type, ret)
13094 +#endif
13095 +
13096 +
13097 +extern unsigned long vx_badness(struct task_struct *task, struct mm_struct *mm);
13098 +
13099 +#else
13100 +#warning duplicate inclusion
13101 +#endif
13102 diff -NurpP --minimal linux-2.6.30.2/include/linux/vs_network.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_network.h
13103 --- linux-2.6.30.2/include/linux/vs_network.h   1970-01-01 01:00:00.000000000 +0100
13104 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_network.h        2009-07-04 01:11:39.000000000 +0200
13105 @@ -0,0 +1,169 @@
13106 +#ifndef _NX_VS_NETWORK_H
13107 +#define _NX_VS_NETWORK_H
13108 +
13109 +#include "vserver/context.h"
13110 +#include "vserver/network.h"
13111 +#include "vserver/base.h"
13112 +#include "vserver/check.h"
13113 +#include "vserver/debug.h"
13114 +
13115 +#include <linux/sched.h>
13116 +
13117 +
13118 +#define get_nx_info(i) __get_nx_info(i, __FILE__, __LINE__)
13119 +
13120 +static inline struct nx_info *__get_nx_info(struct nx_info *nxi,
13121 +       const char *_file, int _line)
13122 +{
13123 +       if (!nxi)
13124 +               return NULL;
13125 +
13126 +       vxlprintk(VXD_CBIT(nid, 2), "get_nx_info(%p[#%d.%d])",
13127 +               nxi, nxi ? nxi->nx_id : 0,
13128 +               nxi ? atomic_read(&nxi->nx_usecnt) : 0,
13129 +               _file, _line);
13130 +
13131 +       atomic_inc(&nxi->nx_usecnt);
13132 +       return nxi;
13133 +}
13134 +
13135 +
13136 +extern void free_nx_info(struct nx_info *);
13137 +
13138 +#define put_nx_info(i) __put_nx_info(i, __FILE__, __LINE__)
13139 +
13140 +static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line)
13141 +{
13142 +       if (!nxi)
13143 +               return;
13144 +
13145 +       vxlprintk(VXD_CBIT(nid, 2), "put_nx_info(%p[#%d.%d])",
13146 +               nxi, nxi ? nxi->nx_id : 0,
13147 +               nxi ? atomic_read(&nxi->nx_usecnt) : 0,
13148 +               _file, _line);
13149 +
13150 +       if (atomic_dec_and_test(&nxi->nx_usecnt))
13151 +               free_nx_info(nxi);
13152 +}
13153 +
13154 +
13155 +#define init_nx_info(p, i) __init_nx_info(p, i, __FILE__, __LINE__)
13156 +
13157 +static inline void __init_nx_info(struct nx_info **nxp, struct nx_info *nxi,
13158 +               const char *_file, int _line)
13159 +{
13160 +       if (nxi) {
13161 +               vxlprintk(VXD_CBIT(nid, 3),
13162 +                       "init_nx_info(%p[#%d.%d])",
13163 +                       nxi, nxi ? nxi->nx_id : 0,
13164 +                       nxi ? atomic_read(&nxi->nx_usecnt) : 0,
13165 +                       _file, _line);
13166 +
13167 +               atomic_inc(&nxi->nx_usecnt);
13168 +       }
13169 +       *nxp = nxi;
13170 +}
13171 +
13172 +
13173 +#define set_nx_info(p, i) __set_nx_info(p, i, __FILE__, __LINE__)
13174 +
13175 +static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi,
13176 +       const char *_file, int _line)
13177 +{
13178 +       struct nx_info *nxo;
13179 +
13180 +       if (!nxi)
13181 +               return;
13182 +
13183 +       vxlprintk(VXD_CBIT(nid, 3), "set_nx_info(%p[#%d.%d])",
13184 +               nxi, nxi ? nxi->nx_id : 0,
13185 +               nxi ? atomic_read(&nxi->nx_usecnt) : 0,
13186 +               _file, _line);
13187 +
13188 +       atomic_inc(&nxi->nx_usecnt);
13189 +       nxo = xchg(nxp, nxi);
13190 +       BUG_ON(nxo);
13191 +}
13192 +
13193 +#define clr_nx_info(p) __clr_nx_info(p, __FILE__, __LINE__)
13194 +
13195 +static inline void __clr_nx_info(struct nx_info **nxp,
13196 +       const char *_file, int _line)
13197 +{
13198 +       struct nx_info *nxo;
13199 +
13200 +       nxo = xchg(nxp, NULL);
13201 +       if (!nxo)
13202 +               return;
13203 +
13204 +       vxlprintk(VXD_CBIT(nid, 3), "clr_nx_info(%p[#%d.%d])",
13205 +               nxo, nxo ? nxo->nx_id : 0,
13206 +               nxo ? atomic_read(&nxo->nx_usecnt) : 0,
13207 +               _file, _line);
13208 +
13209 +       if (atomic_dec_and_test(&nxo->nx_usecnt))
13210 +               free_nx_info(nxo);
13211 +}
13212 +
13213 +
13214 +#define claim_nx_info(v, p) __claim_nx_info(v, p, __FILE__, __LINE__)
13215 +
13216 +static inline void __claim_nx_info(struct nx_info *nxi,
13217 +       struct task_struct *task, const char *_file, int _line)
13218 +{
13219 +       vxlprintk(VXD_CBIT(nid, 3), "claim_nx_info(%p[#%d.%d.%d]) %p",
13220 +               nxi, nxi ? nxi->nx_id : 0,
13221 +               nxi?atomic_read(&nxi->nx_usecnt):0,
13222 +               nxi?atomic_read(&nxi->nx_tasks):0,
13223 +               task, _file, _line);
13224 +
13225 +       atomic_inc(&nxi->nx_tasks);
13226 +}
13227 +
13228 +
13229 +extern void unhash_nx_info(struct nx_info *);
13230 +
13231 +#define release_nx_info(v, p) __release_nx_info(v, p, __FILE__, __LINE__)
13232 +
13233 +static inline void __release_nx_info(struct nx_info *nxi,
13234 +       struct task_struct *task, const char *_file, int _line)
13235 +{
13236 +       vxlprintk(VXD_CBIT(nid, 3), "release_nx_info(%p[#%d.%d.%d]) %p",
13237 +               nxi, nxi ? nxi->nx_id : 0,
13238 +               nxi ? atomic_read(&nxi->nx_usecnt) : 0,
13239 +               nxi ? atomic_read(&nxi->nx_tasks) : 0,
13240 +               task, _file, _line);
13241 +
13242 +       might_sleep();
13243 +
13244 +       if (atomic_dec_and_test(&nxi->nx_tasks))
13245 +               unhash_nx_info(nxi);
13246 +}
13247 +
13248 +
13249 +#define task_get_nx_info(i)    __task_get_nx_info(i, __FILE__, __LINE__)
13250 +
13251 +static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p,
13252 +       const char *_file, int _line)
13253 +{
13254 +       struct nx_info *nxi;
13255 +
13256 +       task_lock(p);
13257 +       vxlprintk(VXD_CBIT(nid, 5), "task_get_nx_info(%p)",
13258 +               p, _file, _line);
13259 +       nxi = __get_nx_info(p->nx_info, _file, _line);
13260 +       task_unlock(p);
13261 +       return nxi;
13262 +}
13263 +
13264 +
13265 +static inline void exit_nx_info(struct task_struct *p)
13266 +{
13267 +       if (p->nx_info)
13268 +               release_nx_info(p->nx_info, p);
13269 +}
13270 +
13271 +
13272 +#else
13273 +#warning duplicate inclusion
13274 +#endif
13275 diff -NurpP --minimal linux-2.6.30.2/include/linux/vs_pid.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_pid.h
13276 --- linux-2.6.30.2/include/linux/vs_pid.h       1970-01-01 01:00:00.000000000 +0100
13277 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_pid.h    2009-07-04 01:11:39.000000000 +0200
13278 @@ -0,0 +1,95 @@
13279 +#ifndef _VS_PID_H
13280 +#define _VS_PID_H
13281 +
13282 +#include "vserver/base.h"
13283 +#include "vserver/check.h"
13284 +#include "vserver/context.h"
13285 +#include "vserver/debug.h"
13286 +#include "vserver/pid.h"
13287 +#include <linux/pid_namespace.h>
13288 +
13289 +
13290 +#define VXF_FAKE_INIT  (VXF_INFO_INIT | VXF_STATE_INIT)
13291 +
13292 +static inline
13293 +int vx_proc_task_visible(struct task_struct *task)
13294 +{
13295 +       if ((task->pid == 1) &&
13296 +               !vx_flags(VXF_FAKE_INIT, VXF_FAKE_INIT))
13297 +               /* show a blend through init */
13298 +               goto visible;
13299 +       if (vx_check(vx_task_xid(task), VS_WATCH | VS_IDENT))
13300 +               goto visible;
13301 +       return 0;
13302 +visible:
13303 +       return 1;
13304 +}
13305 +
13306 +#define find_task_by_real_pid(pid) find_task_by_pid_ns(pid, &init_pid_ns)
13307 +
13308 +#if 0
13309 +
13310 +static inline
13311 +struct task_struct *vx_find_proc_task_by_pid(int pid)
13312 +{
13313 +       struct task_struct *task = find_task_by_real_pid(pid);
13314 +
13315 +       if (task && !vx_proc_task_visible(task)) {
13316 +               vxdprintk(VXD_CBIT(misc, 6),
13317 +                       "dropping task (find) %p[#%u,%u] for %p[#%u,%u]",
13318 +                       task, task->xid, task->pid,
13319 +                       current, current->xid, current->pid);
13320 +               task = NULL;
13321 +       }
13322 +       return task;
13323 +}
13324 +
13325 +#endif
13326 +
13327 +static inline
13328 +struct task_struct *vx_get_proc_task(struct inode *inode, struct pid *pid)
13329 +{
13330 +       struct task_struct *task = get_pid_task(pid, PIDTYPE_PID);
13331 +
13332 +       if (task && !vx_proc_task_visible(task)) {
13333 +               vxdprintk(VXD_CBIT(misc, 6),
13334 +                       "dropping task (get) %p[#%u,%u] for %p[#%u,%u]",
13335 +                       task, task->xid, task->pid,
13336 +                       current, current->xid, current->pid);
13337 +               put_task_struct(task);
13338 +               task = NULL;
13339 +       }
13340 +       return task;
13341 +}
13342 +
13343 +#if 0
13344 +
13345 +static inline
13346 +struct task_struct *vx_child_reaper(struct task_struct *p)
13347 +{
13348 +       struct vx_info *vxi = p->vx_info;
13349 +       struct task_struct *reaper = child_reaper(p);
13350 +
13351 +       if (!vxi)
13352 +               goto out;
13353 +
13354 +       BUG_ON(!p->vx_info->vx_reaper);
13355 +
13356 +       /* child reaper for the guest reaper */
13357 +       if (vxi->vx_reaper == p)
13358 +               goto out;
13359 +
13360 +       reaper = vxi->vx_reaper;
13361 +out:
13362 +       vxdprintk(VXD_CBIT(xid, 7),
13363 +               "vx_child_reaper(%p[#%u,%u]) = %p[#%u,%u]",
13364 +               p, p->xid, p->pid, reaper, reaper->xid, reaper->pid);
13365 +       return reaper;
13366 +}
13367 +
13368 +#endif
13369 +
13370 +
13371 +#else
13372 +#warning duplicate inclusion
13373 +#endif
13374 diff -NurpP --minimal linux-2.6.30.2/include/linux/vs_sched.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_sched.h
13375 --- linux-2.6.30.2/include/linux/vs_sched.h     1970-01-01 01:00:00.000000000 +0100
13376 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_sched.h  2009-07-04 01:11:39.000000000 +0200
13377 @@ -0,0 +1,110 @@
13378 +#ifndef _VS_SCHED_H
13379 +#define _VS_SCHED_H
13380 +
13381 +#include "vserver/base.h"
13382 +#include "vserver/context.h"
13383 +#include "vserver/sched.h"
13384 +
13385 +
13386 +#define VAVAVOOM_RATIO          50
13387 +
13388 +#define MAX_PRIO_BIAS           20
13389 +#define MIN_PRIO_BIAS          -20
13390 +
13391 +
13392 +#ifdef CONFIG_VSERVER_HARDCPU
13393 +
13394 +/*
13395 + * effective_prio - return the priority that is based on the static
13396 + * priority but is modified by bonuses/penalties.
13397 + *
13398 + * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
13399 + * into a -4 ... 0 ... +4 bonus/penalty range.
13400 + *
13401 + * Additionally, we scale another amount based on the number of
13402 + * CPU tokens currently held by the context, if the process is
13403 + * part of a context (and the appropriate SCHED flag is set).
13404 + * This ranges from -5 ... 0 ... +15, quadratically.
13405 + *
13406 + * So, the total bonus is -9 .. 0 .. +19
13407 + * We use ~50% of the full 0...39 priority range so that:
13408 + *
13409 + * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
13410 + * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
13411 + *    unless that context is far exceeding its CPU allocation.
13412 + *
13413 + * Both properties are important to certain workloads.
13414 + */
13415 +static inline
13416 +int vx_effective_vavavoom(struct _vx_sched_pc *sched_pc, int max_prio)
13417 +{
13418 +       int vavavoom, max;
13419 +
13420 +       /* lots of tokens = lots of vavavoom
13421 +        *      no tokens = no vavavoom      */
13422 +       if ((vavavoom = sched_pc->tokens) >= 0) {
13423 +               max = sched_pc->tokens_max;
13424 +               vavavoom = max - vavavoom;
13425 +               max = max * max;
13426 +               vavavoom = max_prio * VAVAVOOM_RATIO / 100
13427 +                       * (vavavoom*vavavoom - (max >> 2)) / max;
13428 +               return vavavoom;
13429 +       }
13430 +       return 0;
13431 +}
13432 +
13433 +
13434 +static inline
13435 +int vx_adjust_prio(struct task_struct *p, int prio, int max_user)
13436 +{
13437 +       struct vx_info *vxi = p->vx_info;
13438 +       struct _vx_sched_pc *sched_pc;
13439 +
13440 +       if (!vxi)
13441 +               return prio;
13442 +
13443 +       sched_pc = &vx_cpu(vxi, sched_pc);
13444 +       if (vx_info_flags(vxi, VXF_SCHED_PRIO, 0)) {
13445 +               int vavavoom = vx_effective_vavavoom(sched_pc, max_user);
13446 +
13447 +               sched_pc->vavavoom = vavavoom;
13448 +               prio += vavavoom;
13449 +       }
13450 +       prio += sched_pc->prio_bias;
13451 +       return prio;
13452 +}
13453 +
13454 +#else /* !CONFIG_VSERVER_HARDCPU */
13455 +
13456 +static inline
13457 +int vx_adjust_prio(struct task_struct *p, int prio, int max_user)
13458 +{
13459 +       struct vx_info *vxi = p->vx_info;
13460 +
13461 +       if (vxi)
13462 +               prio += vx_cpu(vxi, sched_pc).prio_bias;
13463 +       return prio;
13464 +}
13465 +
13466 +#endif /* CONFIG_VSERVER_HARDCPU */
13467 +
13468 +
13469 +static inline void vx_account_user(struct vx_info *vxi,
13470 +       cputime_t cputime, int nice)
13471 +{
13472 +       if (!vxi)
13473 +               return;
13474 +       vx_cpu(vxi, sched_pc).user_ticks += cputime;
13475 +}
13476 +
13477 +static inline void vx_account_system(struct vx_info *vxi,
13478 +       cputime_t cputime, int idle)
13479 +{
13480 +       if (!vxi)
13481 +               return;
13482 +       vx_cpu(vxi, sched_pc).sys_ticks += cputime;
13483 +}
13484 +
13485 +#else
13486 +#warning duplicate inclusion
13487 +#endif
13488 diff -NurpP --minimal linux-2.6.30.2/include/linux/vs_socket.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_socket.h
13489 --- linux-2.6.30.2/include/linux/vs_socket.h    1970-01-01 01:00:00.000000000 +0100
13490 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_socket.h 2009-07-04 01:11:39.000000000 +0200
13491 @@ -0,0 +1,67 @@
13492 +#ifndef _VS_SOCKET_H
13493 +#define _VS_SOCKET_H
13494 +
13495 +#include "vserver/debug.h"
13496 +#include "vserver/base.h"
13497 +#include "vserver/cacct.h"
13498 +#include "vserver/context.h"
13499 +#include "vserver/tag.h"
13500 +
13501 +
13502 +/* socket accounting */
13503 +
13504 +#include <linux/socket.h>
13505 +
13506 +static inline int vx_sock_type(int family)
13507 +{
13508 +       switch (family) {
13509 +       case PF_UNSPEC:
13510 +               return VXA_SOCK_UNSPEC;
13511 +       case PF_UNIX:
13512 +               return VXA_SOCK_UNIX;
13513 +       case PF_INET:
13514 +               return VXA_SOCK_INET;
13515 +       case PF_INET6:
13516 +               return VXA_SOCK_INET6;
13517 +       case PF_PACKET:
13518 +               return VXA_SOCK_PACKET;
13519 +       default:
13520 +               return VXA_SOCK_OTHER;
13521 +       }
13522 +}
13523 +
13524 +#define vx_acc_sock(v, f, p, s) \
13525 +       __vx_acc_sock(v, f, p, s, __FILE__, __LINE__)
13526 +
13527 +static inline void __vx_acc_sock(struct vx_info *vxi,
13528 +       int family, int pos, int size, char *file, int line)
13529 +{
13530 +       if (vxi) {
13531 +               int type = vx_sock_type(family);
13532 +
13533 +               atomic_long_inc(&vxi->cacct.sock[type][pos].count);
13534 +               atomic_long_add(size, &vxi->cacct.sock[type][pos].total);
13535 +       }
13536 +}
13537 +
13538 +#define vx_sock_recv(sk, s) \
13539 +       vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 0, s)
13540 +#define vx_sock_send(sk, s) \
13541 +       vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 1, s)
13542 +#define vx_sock_fail(sk, s) \
13543 +       vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, s)
13544 +
13545 +
13546 +#define sock_vx_init(s) do {           \
13547 +       (s)->sk_xid = 0;                \
13548 +       (s)->sk_vx_info = NULL;         \
13549 +       } while (0)
13550 +
13551 +#define sock_nx_init(s) do {           \
13552 +       (s)->sk_nid = 0;                \
13553 +       (s)->sk_nx_info = NULL;         \
13554 +       } while (0)
13555 +
13556 +#else
13557 +#warning duplicate inclusion
13558 +#endif
13559 diff -NurpP --minimal linux-2.6.30.2/include/linux/vs_tag.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_tag.h
13560 --- linux-2.6.30.2/include/linux/vs_tag.h       1970-01-01 01:00:00.000000000 +0100
13561 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_tag.h    2009-07-04 01:11:39.000000000 +0200
13562 @@ -0,0 +1,47 @@
13563 +#ifndef _VS_TAG_H
13564 +#define _VS_TAG_H
13565 +
13566 +#include <linux/vserver/tag.h>
13567 +
13568 +/* check conditions */
13569 +
13570 +#define DX_ADMIN       0x0001
13571 +#define DX_WATCH       0x0002
13572 +#define DX_HOSTID      0x0008
13573 +
13574 +#define DX_IDENT       0x0010
13575 +
13576 +#define DX_ARG_MASK    0x0010
13577 +
13578 +
13579 +#define dx_task_tag(t) ((t)->tag)
13580 +
13581 +#define dx_current_tag() dx_task_tag(current)
13582 +
13583 +#define dx_check(c, m) __dx_check(dx_current_tag(), c, m)
13584 +
13585 +#define dx_weak_check(c, m)    ((m) ? dx_check(c, m) : 1)
13586 +
13587 +
13588 +/*
13589 + * check current context for ADMIN/WATCH and
13590 + * optionally against supplied argument
13591 + */
13592 +static inline int __dx_check(tag_t cid, tag_t id, unsigned int mode)
13593 +{
13594 +       if (mode & DX_ARG_MASK) {
13595 +               if ((mode & DX_IDENT) && (id == cid))
13596 +                       return 1;
13597 +       }
13598 +       return (((mode & DX_ADMIN) && (cid == 0)) ||
13599 +               ((mode & DX_WATCH) && (cid == 1)) ||
13600 +               ((mode & DX_HOSTID) && (id == 0)));
13601 +}
13602 +
13603 +struct inode;
13604 +int dx_permission(struct inode *inode, int mask);
13605 +
13606 +
13607 +#else
13608 +#warning duplicate inclusion
13609 +#endif
13610 diff -NurpP --minimal linux-2.6.30.2/include/linux/vs_time.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_time.h
13611 --- linux-2.6.30.2/include/linux/vs_time.h      1970-01-01 01:00:00.000000000 +0100
13612 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/linux/vs_time.h   2009-07-04 01:11:39.000000000 +0200
13613 @@ -0,0 +1,19 @@
13614 +#ifndef _VS_TIME_H
13615 +#define _VS_TIME_H
13616 +
13617 +
13618 +/* time faking stuff */
13619 +
13620 +#ifdef CONFIG_VSERVER_VTIME
13621 +
13622 +extern void vx_gettimeofday(struct timeval *tv);
13623 +extern int vx_settimeofday(struct timespec *ts);
13624 +
13625 +#else
13626 +#define        vx_gettimeofday(t)      do_gettimeofday(t)
13627 +#define        vx_settimeofday(t)      do_settimeofday(t)
13628 +#endif
13629 +
13630 +#else
13631 +#warning duplicate inclusion
13632 +#endif
13633 diff -NurpP --minimal linux-2.6.30.2/include/net/addrconf.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/net/addrconf.h
13634 --- linux-2.6.30.2/include/net/addrconf.h       2009-06-11 17:13:18.000000000 +0200
13635 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/net/addrconf.h    2009-07-04 01:11:39.000000000 +0200
13636 @@ -84,7 +84,8 @@ extern int                    ipv6_dev_get_saddr(struct n
13637                                                struct net_device *dev,
13638                                                const struct in6_addr *daddr,
13639                                                unsigned int srcprefs,
13640 -                                              struct in6_addr *saddr);
13641 +                                              struct in6_addr *saddr,
13642 +                                              struct nx_info *nxi);
13643  extern int                     ipv6_get_lladdr(struct net_device *dev,
13644                                                 struct in6_addr *addr,
13645                                                 unsigned char banned_flags);
13646 diff -NurpP --minimal linux-2.6.30.2/include/net/af_unix.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/net/af_unix.h
13647 --- linux-2.6.30.2/include/net/af_unix.h        2008-12-25 00:26:37.000000000 +0100
13648 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/net/af_unix.h     2009-07-04 01:11:39.000000000 +0200
13649 @@ -4,6 +4,7 @@
13650  #include <linux/socket.h>
13651  #include <linux/un.h>
13652  #include <linux/mutex.h>
13653 +#include <linux/vs_base.h>
13654  #include <net/sock.h>
13655  
13656  extern void unix_inflight(struct file *fp);
13657 diff -NurpP --minimal linux-2.6.30.2/include/net/inet_timewait_sock.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/net/inet_timewait_sock.h
13658 --- linux-2.6.30.2/include/net/inet_timewait_sock.h     2009-03-24 14:22:44.000000000 +0100
13659 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/net/inet_timewait_sock.h  2009-07-04 01:11:39.000000000 +0200
13660 @@ -15,15 +15,14 @@
13661  #ifndef _INET_TIMEWAIT_SOCK_
13662  #define _INET_TIMEWAIT_SOCK_
13663  
13664 +// #include <net/inet_sock.h>
13665 +#include <net/sock.h>
13666  
13667  #include <linux/list.h>
13668  #include <linux/module.h>
13669  #include <linux/timer.h>
13670  #include <linux/types.h>
13671  #include <linux/workqueue.h>
13672 -
13673 -#include <net/inet_sock.h>
13674 -#include <net/sock.h>
13675  #include <net/tcp_states.h>
13676  #include <net/timewait_sock.h>
13677  
13678 @@ -116,6 +115,10 @@ struct inet_timewait_sock {
13679  #define tw_hash                        __tw_common.skc_hash
13680  #define tw_prot                        __tw_common.skc_prot
13681  #define tw_net                 __tw_common.skc_net
13682 +#define tw_xid                 __tw_common.skc_xid
13683 +#define tw_vx_info             __tw_common.skc_vx_info
13684 +#define tw_nid                 __tw_common.skc_nid
13685 +#define tw_nx_info             __tw_common.skc_nx_info
13686         int                     tw_timeout;
13687         volatile unsigned char  tw_substate;
13688         /* 3 bits hole, try to pack */
13689 diff -NurpP --minimal linux-2.6.30.2/include/net/route.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/net/route.h
13690 --- linux-2.6.30.2/include/net/route.h  2008-12-25 00:26:37.000000000 +0100
13691 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/net/route.h       2009-07-04 01:11:39.000000000 +0200
13692 @@ -135,6 +135,9 @@ static inline void ip_rt_put(struct rtab
13693                 dst_release(&rt->u.dst);
13694  }
13695  
13696 +#include <linux/vs_base.h>
13697 +#include <linux/vs_inet.h>
13698 +
13699  #define IPTOS_RT_MASK  (IPTOS_TOS_MASK & ~3)
13700  
13701  extern const __u8 ip_tos2prio[16];
13702 @@ -144,6 +147,9 @@ static inline char rt_tos2priority(u8 to
13703         return ip_tos2prio[IPTOS_TOS(tos)>>1];
13704  }
13705  
13706 +extern int ip_v4_find_src(struct net *net, struct nx_info *,
13707 +       struct rtable **, struct flowi *);
13708 +
13709  static inline int ip_route_connect(struct rtable **rp, __be32 dst,
13710                                    __be32 src, u32 tos, int oif, u8 protocol,
13711                                    __be16 sport, __be16 dport, struct sock *sk,
13712 @@ -161,11 +167,24 @@ static inline int ip_route_connect(struc
13713  
13714         int err;
13715         struct net *net = sock_net(sk);
13716 +       struct nx_info *nx_info = current->nx_info;
13717  
13718         if (inet_sk(sk)->transparent)
13719                 fl.flags |= FLOWI_FLAG_ANYSRC;
13720  
13721 -       if (!dst || !src) {
13722 +       if (sk)
13723 +               nx_info = sk->sk_nx_info;
13724 +
13725 +       vxdprintk(VXD_CBIT(net, 4),
13726 +               "ip_route_connect(%p) %p,%p;%lx",
13727 +               sk, nx_info, sk->sk_socket,
13728 +               (sk->sk_socket?sk->sk_socket->flags:0));
13729 +
13730 +       err = ip_v4_find_src(net, nx_info, rp, &fl);
13731 +       if (err)
13732 +               return err;
13733 +
13734 +       if (!fl.fl4_dst || !fl.fl4_src) {
13735                 err = __ip_route_output_key(net, rp, &fl);
13736                 if (err)
13737                         return err;
13738 diff -NurpP --minimal linux-2.6.30.2/include/net/sock.h linux-2.6.30.2-vs2.3.0.36.14-pre4/include/net/sock.h
13739 --- linux-2.6.30.2/include/net/sock.h   2009-06-11 17:13:23.000000000 +0200
13740 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/include/net/sock.h        2009-07-04 01:11:39.000000000 +0200
13741 @@ -134,6 +134,10 @@ struct sock_common {
13742  #ifdef CONFIG_NET_NS
13743         struct net              *skc_net;
13744  #endif
13745 +       xid_t                   skc_xid;
13746 +       struct vx_info          *skc_vx_info;
13747 +       nid_t                   skc_nid;
13748 +       struct nx_info          *skc_nx_info;
13749  };
13750  
13751  /**
13752 @@ -218,6 +222,10 @@ struct sock {
13753  #define sk_hash                        __sk_common.skc_hash
13754  #define sk_prot                        __sk_common.skc_prot
13755  #define sk_net                 __sk_common.skc_net
13756 +#define sk_xid                 __sk_common.skc_xid
13757 +#define sk_vx_info             __sk_common.skc_vx_info
13758 +#define sk_nid                 __sk_common.skc_nid
13759 +#define sk_nx_info             __sk_common.skc_nx_info
13760         unsigned char           sk_shutdown : 2,
13761                                 sk_no_check : 2,
13762                                 sk_userlocks : 4;
13763 diff -NurpP --minimal linux-2.6.30.2/init/main.c linux-2.6.30.2-vs2.3.0.36.14-pre4/init/main.c
13764 --- linux-2.6.30.2/init/main.c  2009-06-11 17:13:25.000000000 +0200
13765 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/init/main.c       2009-07-04 01:11:39.000000000 +0200
13766 @@ -65,6 +65,7 @@
13767  #include <linux/ftrace.h>
13768  #include <linux/async.h>
13769  #include <trace/boot.h>
13770 +#include <linux/vserver/percpu.h>
13771  
13772  #include <asm/io.h>
13773  #include <asm/bugs.h>
13774 @@ -383,12 +384,14 @@ EXPORT_SYMBOL(__per_cpu_offset);
13775  
13776  static void __init setup_per_cpu_areas(void)
13777  {
13778 -       unsigned long size, i;
13779 +       unsigned long size, vspc, i;
13780         char *ptr;
13781         unsigned long nr_possible_cpus = num_possible_cpus();
13782  
13783 +       vspc = PERCPU_PERCTX * CONFIG_VSERVER_CONTEXTS;
13784 +
13785         /* Copy section for each CPU (we discard the original) */
13786 -       size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
13787 +       size = ALIGN(PERCPU_ENOUGH_ROOM + vspc, PAGE_SIZE);
13788         ptr = alloc_bootmem_pages(size * nr_possible_cpus);
13789  
13790         for_each_possible_cpu(i) {
13791 diff -NurpP --minimal linux-2.6.30.2/ipc/mqueue.c linux-2.6.30.2-vs2.3.0.36.14-pre4/ipc/mqueue.c
13792 --- linux-2.6.30.2/ipc/mqueue.c 2009-06-11 17:13:25.000000000 +0200
13793 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/ipc/mqueue.c      2009-07-04 01:40:51.000000000 +0200
13794 @@ -32,6 +32,8 @@
13795  #include <linux/nsproxy.h>
13796  #include <linux/pid.h>
13797  #include <linux/ipc_namespace.h>
13798 +#include <linux/vs_context.h>
13799 +#include <linux/vs_limit.h>
13800  
13801  #include <net/sock.h>
13802  #include "util.h"
13803 @@ -65,6 +67,7 @@ struct mqueue_inode_info {
13804         struct sigevent notify;
13805         struct pid* notify_owner;
13806         struct user_struct *user;       /* user who created, for accounting */
13807 +       struct vx_info *vxi;
13808         struct sock *notify_sock;
13809         struct sk_buff *notify_cookie;
13810  
13811 @@ -124,6 +127,7 @@ static struct inode *mqueue_get_inode(st
13812                 if (S_ISREG(mode)) {
13813                         struct mqueue_inode_info *info;
13814                         struct task_struct *p = current;
13815 +                       struct vx_info *vxi = p->vx_info;
13816                         unsigned long mq_bytes, mq_msg_tblsz;
13817  
13818                         inode->i_fop = &mqueue_file_operations;
13819 @@ -138,6 +142,7 @@ static struct inode *mqueue_get_inode(st
13820                         info->notify_owner = NULL;
13821                         info->qsize = 0;
13822                         info->user = NULL;      /* set when all is ok */
13823 +                       info->vxi = NULL;
13824                         memset(&info->attr, 0, sizeof(info->attr));
13825                         info->attr.mq_maxmsg = ipc_ns->mq_msg_max;
13826                         info->attr.mq_msgsize = ipc_ns->mq_msgsize_max;
13827 @@ -152,22 +157,26 @@ static struct inode *mqueue_get_inode(st
13828                         spin_lock(&mq_lock);
13829                         if (u->mq_bytes + mq_bytes < u->mq_bytes ||
13830                             u->mq_bytes + mq_bytes >
13831 -                           p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur) {
13832 +                           p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur ||
13833 +                           !vx_ipcmsg_avail(vxi, mq_bytes)) {
13834                                 spin_unlock(&mq_lock);
13835                                 goto out_inode;
13836                         }
13837                         u->mq_bytes += mq_bytes;
13838 +                       vx_ipcmsg_add(vxi, u, mq_bytes);
13839                         spin_unlock(&mq_lock);
13840  
13841                         info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
13842                         if (!info->messages) {
13843                                 spin_lock(&mq_lock);
13844                                 u->mq_bytes -= mq_bytes;
13845 +                               vx_ipcmsg_sub(vxi, u, mq_bytes);
13846                                 spin_unlock(&mq_lock);
13847                                 goto out_inode;
13848                         }
13849                         /* all is ok */
13850                         info->user = get_uid(u);
13851 +                       info->vxi = get_vx_info(vxi);
13852                 } else if (S_ISDIR(mode)) {
13853                         inc_nlink(inode);
13854                         /* Some things misbehave if size == 0 on a directory */
13855 @@ -268,8 +277,11 @@ static void mqueue_delete_inode(struct i
13856                    (info->attr.mq_maxmsg * info->attr.mq_msgsize));
13857         user = info->user;
13858         if (user) {
13859 +               struct vx_info *vxi = info->vxi;
13860 +
13861                 spin_lock(&mq_lock);
13862                 user->mq_bytes -= mq_bytes;
13863 +               vx_ipcmsg_sub(vxi, user, mq_bytes);
13864                 /*
13865                  * get_ns_from_inode() ensures that the
13866                  * (ipc_ns = sb->s_fs_info) is either a valid ipc_ns
13867 @@ -279,6 +291,7 @@ static void mqueue_delete_inode(struct i
13868                 if (ipc_ns)
13869                         ipc_ns->mq_queues_count--;
13870                 spin_unlock(&mq_lock);
13871 +               put_vx_info(vxi);
13872                 free_uid(user);
13873         }
13874         if (ipc_ns)
13875 diff -NurpP --minimal linux-2.6.30.2/ipc/msg.c linux-2.6.30.2-vs2.3.0.36.14-pre4/ipc/msg.c
13876 --- linux-2.6.30.2/ipc/msg.c    2009-03-24 14:22:44.000000000 +0100
13877 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/ipc/msg.c 2009-07-04 01:11:39.000000000 +0200
13878 @@ -38,6 +38,7 @@
13879  #include <linux/rwsem.h>
13880  #include <linux/nsproxy.h>
13881  #include <linux/ipc_namespace.h>
13882 +#include <linux/vs_base.h>
13883  
13884  #include <asm/current.h>
13885  #include <asm/uaccess.h>
13886 @@ -190,6 +191,7 @@ static int newque(struct ipc_namespace *
13887  
13888         msq->q_perm.mode = msgflg & S_IRWXUGO;
13889         msq->q_perm.key = key;
13890 +       msq->q_perm.xid = vx_current_xid();
13891  
13892         msq->q_perm.security = NULL;
13893         retval = security_msg_queue_alloc(msq);
13894 diff -NurpP --minimal linux-2.6.30.2/ipc/namespace.c linux-2.6.30.2-vs2.3.0.36.14-pre4/ipc/namespace.c
13895 --- linux-2.6.30.2/ipc/namespace.c      2009-06-11 17:13:26.000000000 +0200
13896 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/ipc/namespace.c   2009-07-04 04:52:12.000000000 +0200
13897 @@ -11,6 +11,8 @@
13898  #include <linux/slab.h>
13899  #include <linux/fs.h>
13900  #include <linux/mount.h>
13901 +#include <linux/vs_base.h>
13902 +#include <linux/vserver/global.h>
13903  
13904  #include "util.h"
13905  
13906 diff -NurpP --minimal linux-2.6.30.2/ipc/sem.c linux-2.6.30.2-vs2.3.0.36.14-pre4/ipc/sem.c
13907 --- linux-2.6.30.2/ipc/sem.c    2009-03-24 14:22:44.000000000 +0100
13908 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/ipc/sem.c 2009-07-04 01:11:39.000000000 +0200
13909 @@ -83,6 +83,8 @@
13910  #include <linux/rwsem.h>
13911  #include <linux/nsproxy.h>
13912  #include <linux/ipc_namespace.h>
13913 +#include <linux/vs_base.h>
13914 +#include <linux/vs_limit.h>
13915  
13916  #include <asm/uaccess.h>
13917  #include "util.h"
13918 @@ -255,6 +257,7 @@ static int newary(struct ipc_namespace *
13919  
13920         sma->sem_perm.mode = (semflg & S_IRWXUGO);
13921         sma->sem_perm.key = key;
13922 +       sma->sem_perm.xid = vx_current_xid();
13923  
13924         sma->sem_perm.security = NULL;
13925         retval = security_sem_alloc(sma);
13926 @@ -270,6 +273,9 @@ static int newary(struct ipc_namespace *
13927                 return id;
13928         }
13929         ns->used_sems += nsems;
13930 +       /* FIXME: obsoleted? */
13931 +       vx_semary_inc(sma);
13932 +       vx_nsems_add(sma, nsems);
13933  
13934         sma->sem_base = (struct sem *) &sma[1];
13935         INIT_LIST_HEAD(&sma->sem_pending);
13936 @@ -546,6 +552,9 @@ static void freeary(struct ipc_namespace
13937         sem_unlock(sma);
13938  
13939         ns->used_sems -= sma->sem_nsems;
13940 +       /* FIXME: obsoleted? */
13941 +       vx_nsems_sub(sma, sma->sem_nsems);
13942 +       vx_semary_dec(sma);
13943         security_sem_free(sma);
13944         ipc_rcu_putref(sma);
13945  }
13946 diff -NurpP --minimal linux-2.6.30.2/ipc/shm.c linux-2.6.30.2-vs2.3.0.36.14-pre4/ipc/shm.c
13947 --- linux-2.6.30.2/ipc/shm.c    2009-06-11 17:13:26.000000000 +0200
13948 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/ipc/shm.c 2009-07-04 01:48:00.000000000 +0200
13949 @@ -40,6 +40,8 @@
13950  #include <linux/mount.h>
13951  #include <linux/ipc_namespace.h>
13952  #include <linux/ima.h>
13953 +#include <linux/vs_context.h>
13954 +#include <linux/vs_limit.h>
13955  
13956  #include <asm/uaccess.h>
13957  
13958 @@ -169,7 +171,12 @@ static void shm_open(struct vm_area_stru
13959   */
13960  static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
13961  {
13962 -       ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
13963 +       struct vx_info *vxi = lookup_vx_info(shp->shm_perm.xid);
13964 +       int numpages = (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
13965 +
13966 +       vx_ipcshm_sub(vxi, shp, numpages);
13967 +       ns->shm_tot -= numpages;
13968 +
13969         shm_rmid(ns, shp);
13970         shm_unlock(shp);
13971         if (!is_file_hugepages(shp->shm_file))
13972 @@ -179,6 +186,7 @@ static void shm_destroy(struct ipc_names
13973                                                 shp->mlock_user);
13974         fput (shp->shm_file);
13975         security_shm_free(shp);
13976 +       put_vx_info(vxi);
13977         ipc_rcu_putref(shp);
13978  }
13979  
13980 @@ -349,11 +357,15 @@ static int newseg(struct ipc_namespace *
13981         if (ns->shm_tot + numpages > ns->shm_ctlall)
13982                 return -ENOSPC;
13983  
13984 +       if (!vx_ipcshm_avail(current->vx_info, numpages))
13985 +               return -ENOSPC;
13986 +
13987         shp = ipc_rcu_alloc(sizeof(*shp));
13988         if (!shp)
13989                 return -ENOMEM;
13990  
13991         shp->shm_perm.key = key;
13992 +       shp->shm_perm.xid = vx_current_xid();
13993         shp->shm_perm.mode = (shmflg & S_IRWXUGO);
13994         shp->mlock_user = NULL;
13995  
13996 @@ -408,6 +420,7 @@ static int newseg(struct ipc_namespace *
13997         ns->shm_tot += numpages;
13998         error = shp->shm_perm.id;
13999         shm_unlock(shp);
14000 +       vx_ipcshm_add(current->vx_info, key, numpages);
14001         return error;
14002  
14003  no_id:
14004 diff -NurpP --minimal linux-2.6.30.2/kernel/capability.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/capability.c
14005 --- linux-2.6.30.2/kernel/capability.c  2009-03-24 14:22:44.000000000 +0100
14006 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/capability.c       2009-07-04 01:11:39.000000000 +0200
14007 @@ -14,6 +14,7 @@
14008  #include <linux/security.h>
14009  #include <linux/syscalls.h>
14010  #include <linux/pid_namespace.h>
14011 +#include <linux/vs_context.h>
14012  #include <asm/uaccess.h>
14013  #include "cred-internals.h"
14014  
14015 @@ -122,6 +123,7 @@ static int cap_validate_magic(cap_user_h
14016         return 0;
14017  }
14018  
14019 +
14020  /*
14021   * The only thing that can change the capabilities of the current
14022   * process is the current process. As such, we can't be in this code
14023 @@ -289,6 +291,8 @@ error:
14024         return ret;
14025  }
14026  
14027 +#include <linux/vserver/base.h>
14028 +
14029  /**
14030   * capable - Determine if the current task has a superior capability in effect
14031   * @cap: The capability to be tested for
14032 @@ -301,6 +305,9 @@ error:
14033   */
14034  int capable(int cap)
14035  {
14036 +       /* here for now so we don't require task locking */
14037 +       if (vs_check_bit(VXC_CAP_MASK, cap) && !vx_mcaps(1L << cap))
14038 +               return 0;
14039         if (unlikely(!cap_valid(cap))) {
14040                 printk(KERN_CRIT "capable() called with invalid cap=%u\n", cap);
14041                 BUG();
14042 diff -NurpP --minimal linux-2.6.30.2/kernel/compat.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/compat.c
14043 --- linux-2.6.30.2/kernel/compat.c      2009-03-24 14:22:44.000000000 +0100
14044 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/compat.c   2009-07-04 01:11:39.000000000 +0200
14045 @@ -891,7 +891,7 @@ asmlinkage long compat_sys_time(compat_t
14046         compat_time_t i;
14047         struct timeval tv;
14048  
14049 -       do_gettimeofday(&tv);
14050 +       vx_gettimeofday(&tv);
14051         i = tv.tv_sec;
14052  
14053         if (tloc) {
14054 @@ -916,7 +916,7 @@ asmlinkage long compat_sys_stime(compat_
14055         if (err)
14056                 return err;
14057  
14058 -       do_settimeofday(&tv);
14059 +       vx_settimeofday(&tv);
14060         return 0;
14061  }
14062  
14063 diff -NurpP --minimal linux-2.6.30.2/kernel/exit.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/exit.c
14064 --- linux-2.6.30.2/kernel/exit.c        2009-06-11 17:13:26.000000000 +0200
14065 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/exit.c     2009-07-04 01:11:39.000000000 +0200
14066 @@ -48,6 +48,10 @@
14067  #include <linux/tracehook.h>
14068  #include <linux/fs_struct.h>
14069  #include <linux/init_task.h>
14070 +#include <linux/vs_limit.h>
14071 +#include <linux/vs_context.h>
14072 +#include <linux/vs_network.h>
14073 +#include <linux/vs_pid.h>
14074  #include <trace/sched.h>
14075  
14076  #include <asm/uaccess.h>
14077 @@ -489,9 +493,11 @@ static void close_files(struct files_str
14078                                         filp_close(file, files);
14079                                         cond_resched();
14080                                 }
14081 +                               vx_openfd_dec(i);
14082                         }
14083                         i++;
14084                         set >>= 1;
14085 +                       cond_resched();
14086                 }
14087         }
14088  }
14089 @@ -1007,10 +1013,15 @@ NORET_TYPE void do_exit(long code)
14090         if (tsk->splice_pipe)
14091                 __free_pipe_info(tsk->splice_pipe);
14092  
14093 +       /* needs to stay after exit_notify() */
14094 +       exit_vx_info(tsk, code);
14095 +       exit_nx_info(tsk);
14096 +
14097         preempt_disable();
14098         /* causes final put_task_struct in finish_task_switch(). */
14099         tsk->state = TASK_DEAD;
14100         schedule();
14101 +       printk("bad task: %p [%lx]\n", current, current->state);
14102         BUG();
14103         /* Avoid "noreturn function does return".  */
14104         for (;;)
14105 diff -NurpP --minimal linux-2.6.30.2/kernel/fork.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/fork.c
14106 --- linux-2.6.30.2/kernel/fork.c        2009-06-11 17:13:26.000000000 +0200
14107 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/fork.c     2009-07-04 01:50:45.000000000 +0200
14108 @@ -63,6 +63,10 @@
14109  #include <linux/fs_struct.h>
14110  #include <trace/sched.h>
14111  #include <linux/magic.h>
14112 +#include <linux/vs_context.h>
14113 +#include <linux/vs_network.h>
14114 +#include <linux/vs_limit.h>
14115 +#include <linux/vs_memory.h>
14116  
14117  #include <asm/pgtable.h>
14118  #include <asm/pgalloc.h>
14119 @@ -142,6 +146,8 @@ void free_task(struct task_struct *tsk)
14120         prop_local_destroy_single(&tsk->dirties);
14121         free_thread_info(tsk->stack);
14122         rt_mutex_debug_task_free(tsk);
14123 +       clr_vx_info(&tsk->vx_info);
14124 +       clr_nx_info(&tsk->nx_info);
14125         ftrace_graph_exit_task(tsk);
14126         free_task_struct(tsk);
14127  }
14128 @@ -285,6 +291,8 @@ static int dup_mmap(struct mm_struct *mm
14129         mm->free_area_cache = oldmm->mmap_base;
14130         mm->cached_hole_size = ~0UL;
14131         mm->map_count = 0;
14132 +       __set_mm_counter(mm, file_rss, 0);
14133 +       __set_mm_counter(mm, anon_rss, 0);
14134         cpumask_clear(mm_cpumask(mm));
14135         mm->mm_rb = RB_ROOT;
14136         rb_link = &mm->mm_rb.rb_node;
14137 @@ -296,7 +304,7 @@ static int dup_mmap(struct mm_struct *mm
14138  
14139                 if (mpnt->vm_flags & VM_DONTCOPY) {
14140                         long pages = vma_pages(mpnt);
14141 -                       mm->total_vm -= pages;
14142 +                       vx_vmpages_sub(mm, pages);
14143                         vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
14144                                                                 -pages);
14145                         continue;
14146 @@ -429,8 +437,8 @@ static struct mm_struct * mm_init(struct
14147         mm->flags = (current->mm) ? current->mm->flags : default_dump_filter;
14148         mm->core_state = NULL;
14149         mm->nr_ptes = 0;
14150 -       set_mm_counter(mm, file_rss, 0);
14151 -       set_mm_counter(mm, anon_rss, 0);
14152 +       __set_mm_counter(mm, file_rss, 0);
14153 +       __set_mm_counter(mm, anon_rss, 0);
14154         spin_lock_init(&mm->page_table_lock);
14155         spin_lock_init(&mm->ioctx_lock);
14156         INIT_HLIST_HEAD(&mm->ioctx_list);
14157 @@ -441,6 +449,7 @@ static struct mm_struct * mm_init(struct
14158         if (likely(!mm_alloc_pgd(mm))) {
14159                 mm->def_flags = 0;
14160                 mmu_notifier_mm_init(mm);
14161 +               set_vx_info(&mm->mm_vx_info, p->vx_info);
14162                 return mm;
14163         }
14164  
14165 @@ -474,6 +483,7 @@ void __mmdrop(struct mm_struct *mm)
14166         mm_free_pgd(mm);
14167         destroy_context(mm);
14168         mmu_notifier_mm_destroy(mm);
14169 +       clr_vx_info(&mm->mm_vx_info);
14170         free_mm(mm);
14171  }
14172  EXPORT_SYMBOL_GPL(__mmdrop);
14173 @@ -600,6 +610,7 @@ struct mm_struct *dup_mm(struct task_str
14174                 goto fail_nomem;
14175  
14176         memcpy(mm, oldmm, sizeof(*mm));
14177 +       mm->mm_vx_info = NULL;
14178  
14179         /* Initializing for Swap token stuff */
14180         mm->token_priority = 0;
14181 @@ -633,6 +644,7 @@ fail_nocontext:
14182          * If init_new_context() failed, we cannot use mmput() to free the mm
14183          * because it calls destroy_context()
14184          */
14185 +       clr_vx_info(&mm->mm_vx_info);
14186         mm_free_pgd(mm);
14187         free_mm(mm);
14188         return NULL;
14189 @@ -954,6 +966,8 @@ static struct task_struct *copy_process(
14190         int retval;
14191         struct task_struct *p;
14192         int cgroup_callbacks_done = 0;
14193 +       struct vx_info *vxi;
14194 +       struct nx_info *nxi;
14195  
14196         if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
14197                 return ERR_PTR(-EINVAL);
14198 @@ -988,12 +1002,28 @@ static struct task_struct *copy_process(
14199         DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
14200         DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
14201  #endif
14202 +       init_vx_info(&p->vx_info, current->vx_info);
14203 +       init_nx_info(&p->nx_info, current->nx_info);
14204 +
14205 +       /* check vserver memory */
14206 +       if (p->mm && !(clone_flags & CLONE_VM)) {
14207 +               if (vx_vmpages_avail(p->mm, p->mm->total_vm))
14208 +                       vx_pages_add(p->vx_info, RLIMIT_AS, p->mm->total_vm);
14209 +               else
14210 +                       goto bad_fork_free;
14211 +       }
14212 +       if (p->mm && vx_flags(VXF_FORK_RSS, 0)) {
14213 +               if (!vx_rss_avail(p->mm, get_mm_counter(p->mm, file_rss)))
14214 +                       goto bad_fork_cleanup_vm;
14215 +       }
14216         retval = -EAGAIN;
14217 +       if (!vx_nproc_avail(1))
14218 +               goto bad_fork_cleanup_vm;
14219         if (atomic_read(&p->real_cred->user->processes) >=
14220                         p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
14221                 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
14222                     p->real_cred->user != INIT_USER)
14223 -                       goto bad_fork_free;
14224 +                       goto bad_fork_cleanup_vm;
14225         }
14226  
14227         retval = copy_creds(p, clone_flags);
14228 @@ -1263,6 +1293,18 @@ static struct task_struct *copy_process(
14229  
14230         total_forks++;
14231         spin_unlock(&current->sighand->siglock);
14232 +
14233 +       /* p is copy of current */
14234 +       vxi = p->vx_info;
14235 +       if (vxi) {
14236 +               claim_vx_info(vxi, p);
14237 +               atomic_inc(&vxi->cvirt.nr_threads);
14238 +               atomic_inc(&vxi->cvirt.total_forks);
14239 +               vx_nproc_inc(p);
14240 +       }
14241 +       nxi = p->nx_info;
14242 +       if (nxi)
14243 +               claim_nx_info(nxi, p);
14244         write_unlock_irq(&tasklist_lock);
14245         proc_fork_connector(p);
14246         cgroup_post_fork(p);
14247 @@ -1307,6 +1349,9 @@ bad_fork_cleanup_count:
14248         atomic_dec(&p->cred->user->processes);
14249         put_cred(p->real_cred);
14250         put_cred(p->cred);
14251 +bad_fork_cleanup_vm:
14252 +       if (p->mm && !(clone_flags & CLONE_VM))
14253 +               vx_pages_sub(p->vx_info, RLIMIT_AS, p->mm->total_vm);
14254  bad_fork_free:
14255         free_task(p);
14256  fork_out:
14257 diff -NurpP --minimal linux-2.6.30.2/kernel/kthread.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/kthread.c
14258 --- linux-2.6.30.2/kernel/kthread.c     2009-06-11 17:13:26.000000000 +0200
14259 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/kthread.c  2009-07-04 01:11:39.000000000 +0200
14260 @@ -13,6 +13,7 @@
14261  #include <linux/file.h>
14262  #include <linux/module.h>
14263  #include <linux/mutex.h>
14264 +#include <linux/vs_pid.h>
14265  #include <trace/sched.h>
14266  
14267  #define KTHREAD_NICE_LEVEL (-5)
14268 diff -NurpP --minimal linux-2.6.30.2/kernel/Makefile linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/Makefile
14269 --- linux-2.6.30.2/kernel/Makefile      2009-06-11 17:13:26.000000000 +0200
14270 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/Makefile   2009-07-04 01:11:39.000000000 +0200
14271 @@ -22,6 +22,7 @@ CFLAGS_REMOVE_cgroup-debug.o = -pg
14272  CFLAGS_REMOVE_sched_clock.o = -pg
14273  endif
14274  
14275 +obj-y += vserver/
14276  obj-$(CONFIG_FREEZER) += freezer.o
14277  obj-$(CONFIG_PROFILING) += profile.o
14278  obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
14279 diff -NurpP --minimal linux-2.6.30.2/kernel/nsproxy.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/nsproxy.c
14280 --- linux-2.6.30.2/kernel/nsproxy.c     2009-03-24 14:22:44.000000000 +0100
14281 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/nsproxy.c  2009-07-04 01:11:39.000000000 +0200
14282 @@ -19,6 +19,8 @@
14283  #include <linux/mnt_namespace.h>
14284  #include <linux/utsname.h>
14285  #include <linux/pid_namespace.h>
14286 +#include <linux/vserver/global.h>
14287 +#include <linux/vserver/debug.h>
14288  #include <net/net_namespace.h>
14289  #include <linux/ipc_namespace.h>
14290  
14291 @@ -37,6 +39,9 @@ static inline struct nsproxy *clone_nspr
14292         if (ns) {
14293                 memcpy(ns, orig, sizeof(struct nsproxy));
14294                 atomic_set(&ns->count, 1);
14295 +               vxdprintk(VXD_CBIT(space, 2), "clone_nsproxy(%p[%u] = %p[1]",
14296 +                       orig, atomic_read(&orig->count), ns);
14297 +               atomic_inc(&vs_global_nsproxy);
14298         }
14299         return ns;
14300  }
14301 @@ -46,41 +51,52 @@ static inline struct nsproxy *clone_nspr
14302   * Return the newly created nsproxy.  Do not attach this to the task,
14303   * leave it to the caller to do proper locking and attach it to task.
14304   */
14305 -static struct nsproxy *create_new_namespaces(unsigned long flags,
14306 -                       struct task_struct *tsk, struct fs_struct *new_fs)
14307 +static struct nsproxy *unshare_namespaces(unsigned long flags,
14308 +                       struct nsproxy *orig, struct fs_struct *new_fs)
14309  {
14310         struct nsproxy *new_nsp;
14311         int err;
14312  
14313 -       new_nsp = clone_nsproxy(tsk->nsproxy);
14314 +       vxdprintk(VXD_CBIT(space, 4),
14315 +               "unshare_namespaces(0x%08lx,%p,%p)",
14316 +               flags, orig, new_fs);
14317 +
14318 +       new_nsp = clone_nsproxy(orig);
14319         if (!new_nsp)
14320                 return ERR_PTR(-ENOMEM);
14321  
14322 -       new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs);
14323 +       new_nsp->mnt_ns = copy_mnt_ns(flags, orig->mnt_ns, new_fs);
14324         if (IS_ERR(new_nsp->mnt_ns)) {
14325                 err = PTR_ERR(new_nsp->mnt_ns);
14326                 goto out_ns;
14327         }
14328  
14329 -       new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns);
14330 +       new_nsp->uts_ns = copy_utsname(flags, orig->uts_ns);
14331         if (IS_ERR(new_nsp->uts_ns)) {
14332                 err = PTR_ERR(new_nsp->uts_ns);
14333                 goto out_uts;
14334         }
14335  
14336 -       new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns);
14337 +       new_nsp->ipc_ns = copy_ipcs(flags, orig->ipc_ns);
14338         if (IS_ERR(new_nsp->ipc_ns)) {
14339                 err = PTR_ERR(new_nsp->ipc_ns);
14340                 goto out_ipc;
14341         }
14342  
14343 -       new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk));
14344 +       new_nsp->pid_ns = copy_pid_ns(flags, orig->pid_ns);
14345         if (IS_ERR(new_nsp->pid_ns)) {
14346                 err = PTR_ERR(new_nsp->pid_ns);
14347                 goto out_pid;
14348         }
14349  
14350 -       new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns);
14351 +       /* disabled now?
14352 +       new_nsp->user_ns = copy_user_ns(flags, orig->user_ns);
14353 +       if (IS_ERR(new_nsp->user_ns)) {
14354 +               err = PTR_ERR(new_nsp->user_ns);
14355 +               goto out_user;
14356 +       } */
14357 +
14358 +       new_nsp->net_ns = copy_net_ns(flags, orig->net_ns);
14359         if (IS_ERR(new_nsp->net_ns)) {
14360                 err = PTR_ERR(new_nsp->net_ns);
14361                 goto out_net;
14362 @@ -105,6 +121,35 @@ out_ns:
14363         return ERR_PTR(err);
14364  }
14365  
14366 +static struct nsproxy *create_new_namespaces(int flags, struct task_struct *tsk,
14367 +                       struct fs_struct *new_fs)
14368 +{
14369 +       return unshare_namespaces(flags, tsk->nsproxy, new_fs);
14370 +}
14371 +
14372 +/*
14373 + * copies the nsproxy, setting refcount to 1, and grabbing a
14374 + * reference to all contained namespaces.
14375 + */
14376 +struct nsproxy *copy_nsproxy(struct nsproxy *orig)
14377 +{
14378 +       struct nsproxy *ns = clone_nsproxy(orig);
14379 +
14380 +       if (ns) {
14381 +               if (ns->mnt_ns)
14382 +                       get_mnt_ns(ns->mnt_ns);
14383 +               if (ns->uts_ns)
14384 +                       get_uts_ns(ns->uts_ns);
14385 +               if (ns->ipc_ns)
14386 +                       get_ipc_ns(ns->ipc_ns);
14387 +               if (ns->pid_ns)
14388 +                       get_pid_ns(ns->pid_ns);
14389 +               if (ns->net_ns)
14390 +                       get_net(ns->net_ns);
14391 +       }
14392 +       return ns;
14393 +}
14394 +
14395  /*
14396   * called from clone.  This now handles copy for nsproxy and all
14397   * namespaces therein.
14398 @@ -112,9 +157,12 @@ out_ns:
14399  int copy_namespaces(unsigned long flags, struct task_struct *tsk)
14400  {
14401         struct nsproxy *old_ns = tsk->nsproxy;
14402 -       struct nsproxy *new_ns;
14403 +       struct nsproxy *new_ns = NULL;
14404         int err = 0;
14405  
14406 +       vxdprintk(VXD_CBIT(space, 7), "copy_namespaces(0x%08lx,%p[%p])",
14407 +               flags, tsk, old_ns);
14408 +
14409         if (!old_ns)
14410                 return 0;
14411  
14412 @@ -151,6 +199,9 @@ int copy_namespaces(unsigned long flags,
14413  
14414  out:
14415         put_nsproxy(old_ns);
14416 +       vxdprintk(VXD_CBIT(space, 3),
14417 +               "copy_namespaces(0x%08lx,%p[%p]) = %d [%p]",
14418 +               flags, tsk, old_ns, err, new_ns);
14419         return err;
14420  }
14421  
14422 @@ -164,7 +215,9 @@ void free_nsproxy(struct nsproxy *ns)
14423                 put_ipc_ns(ns->ipc_ns);
14424         if (ns->pid_ns)
14425                 put_pid_ns(ns->pid_ns);
14426 -       put_net(ns->net_ns);
14427 +       if (ns->net_ns)
14428 +               put_net(ns->net_ns);
14429 +       atomic_dec(&vs_global_nsproxy);
14430         kmem_cache_free(nsproxy_cachep, ns);
14431  }
14432  
14433 @@ -177,6 +230,10 @@ int unshare_nsproxy_namespaces(unsigned 
14434  {
14435         int err = 0;
14436  
14437 +       vxdprintk(VXD_CBIT(space, 4),
14438 +               "unshare_nsproxy_namespaces(0x%08lx,[%p])",
14439 +               unshare_flags, current->nsproxy);
14440 +
14441         if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
14442                                CLONE_NEWNET)))
14443                 return 0;
14444 diff -NurpP --minimal linux-2.6.30.2/kernel/pid.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/pid.c
14445 --- linux-2.6.30.2/kernel/pid.c 2009-06-11 17:13:26.000000000 +0200
14446 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/pid.c      2009-07-04 01:11:39.000000000 +0200
14447 @@ -36,6 +36,7 @@
14448  #include <linux/pid_namespace.h>
14449  #include <linux/init_task.h>
14450  #include <linux/syscalls.h>
14451 +#include <linux/vs_pid.h>
14452  
14453  #define pid_hashfn(nr, ns)     \
14454         hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
14455 @@ -305,7 +306,7 @@ EXPORT_SYMBOL_GPL(find_pid_ns);
14456  
14457  struct pid *find_vpid(int nr)
14458  {
14459 -       return find_pid_ns(nr, current->nsproxy->pid_ns);
14460 +       return find_pid_ns(vx_rmap_pid(nr), current->nsproxy->pid_ns);
14461  }
14462  EXPORT_SYMBOL_GPL(find_vpid);
14463  
14464 @@ -365,6 +366,9 @@ void transfer_pid(struct task_struct *ol
14465  struct task_struct *pid_task(struct pid *pid, enum pid_type type)
14466  {
14467         struct task_struct *result = NULL;
14468 +
14469 +       if (type == PIDTYPE_REALPID)
14470 +               type = PIDTYPE_PID;
14471         if (pid) {
14472                 struct hlist_node *first;
14473                 first = rcu_dereference(pid->tasks[type].first);
14474 @@ -388,14 +392,14 @@ EXPORT_SYMBOL(find_task_by_pid_type_ns);
14475  
14476  struct task_struct *find_task_by_vpid(pid_t vnr)
14477  {
14478 -       return find_task_by_pid_type_ns(PIDTYPE_PID, vnr,
14479 +       return find_task_by_pid_type_ns(PIDTYPE_PID, vx_rmap_pid(vnr),
14480                         current->nsproxy->pid_ns);
14481  }
14482  EXPORT_SYMBOL(find_task_by_vpid);
14483  
14484  struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
14485  {
14486 -       return find_task_by_pid_type_ns(PIDTYPE_PID, nr, ns);
14487 +       return find_task_by_pid_type_ns(PIDTYPE_PID, vx_rmap_pid(nr), ns);
14488  }
14489  EXPORT_SYMBOL(find_task_by_pid_ns);
14490  
14491 @@ -433,7 +437,7 @@ struct pid *find_get_pid(pid_t nr)
14492  }
14493  EXPORT_SYMBOL_GPL(find_get_pid);
14494  
14495 -pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
14496 +pid_t pid_unmapped_nr_ns(struct pid *pid, struct pid_namespace *ns)
14497  {
14498         struct upid *upid;
14499         pid_t nr = 0;
14500 @@ -446,6 +450,11 @@ pid_t pid_nr_ns(struct pid *pid, struct 
14501         return nr;
14502  }
14503  
14504 +pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
14505 +{
14506 +       return vx_map_pid(pid_unmapped_nr_ns(pid, ns));
14507 +}
14508 +
14509  pid_t pid_vnr(struct pid *pid)
14510  {
14511         return pid_nr_ns(pid, current->nsproxy->pid_ns);
14512 diff -NurpP --minimal linux-2.6.30.2/kernel/pid_namespace.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/pid_namespace.c
14513 --- linux-2.6.30.2/kernel/pid_namespace.c       2009-06-11 17:13:26.000000000 +0200
14514 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/pid_namespace.c    2009-07-04 01:11:39.000000000 +0200
14515 @@ -13,6 +13,7 @@
14516  #include <linux/syscalls.h>
14517  #include <linux/err.h>
14518  #include <linux/acct.h>
14519 +#include <linux/vserver/global.h>
14520  
14521  #define BITS_PER_PAGE          (PAGE_SIZE*8)
14522  
14523 @@ -85,6 +86,7 @@ static struct pid_namespace *create_pid_
14524                 goto out_free_map;
14525  
14526         kref_init(&ns->kref);
14527 +       atomic_inc(&vs_global_pid_ns);
14528         ns->level = level;
14529  
14530         set_bit(0, ns->pidmap[0].page);
14531 @@ -109,6 +111,7 @@ static void destroy_pid_namespace(struct
14532  
14533         for (i = 0; i < PIDMAP_ENTRIES; i++)
14534                 kfree(ns->pidmap[i].page);
14535 +       atomic_dec(&vs_global_pid_ns);
14536         kmem_cache_free(pid_ns_cachep, ns);
14537  }
14538  
14539 diff -NurpP --minimal linux-2.6.30.2/kernel/posix-timers.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/posix-timers.c
14540 --- linux-2.6.30.2/kernel/posix-timers.c        2009-03-24 14:22:44.000000000 +0100
14541 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/posix-timers.c     2009-07-04 01:11:39.000000000 +0200
14542 @@ -46,6 +46,7 @@
14543  #include <linux/wait.h>
14544  #include <linux/workqueue.h>
14545  #include <linux/module.h>
14546 +#include <linux/vs_context.h>
14547  
14548  /*
14549   * Management arrays for POSIX timers.  Timers are kept in slab memory
14550 @@ -321,6 +322,7 @@ int posix_timer_event(struct k_itimer *t
14551  {
14552         struct task_struct *task;
14553         int shared, ret = -1;
14554 +
14555         /*
14556          * FIXME: if ->sigq is queued we can race with
14557          * dequeue_signal()->do_schedule_next_timer().
14558 @@ -337,10 +339,18 @@ int posix_timer_event(struct k_itimer *t
14559         rcu_read_lock();
14560         task = pid_task(timr->it_pid, PIDTYPE_PID);
14561         if (task) {
14562 +               struct vx_info_save vxis;
14563 +               struct vx_info *vxi;
14564 +
14565 +               vxi = task_get_vx_info(task);
14566 +               enter_vx_info(vxi, &vxis);
14567                 shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
14568                 ret = send_sigqueue(timr->sigq, task, shared);
14569 +               leave_vx_info(&vxis);
14570 +               put_vx_info(vxi);
14571         }
14572         rcu_read_unlock();
14573 +
14574         /* If we failed to send the signal the timer stops. */
14575         return ret > 0;
14576  }
14577 diff -NurpP --minimal linux-2.6.30.2/kernel/printk.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/printk.c
14578 --- linux-2.6.30.2/kernel/printk.c      2009-06-11 17:13:26.000000000 +0200
14579 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/printk.c   2009-07-04 01:56:52.000000000 +0200
14580 @@ -33,6 +33,7 @@
14581  #include <linux/bootmem.h>
14582  #include <linux/syscalls.h>
14583  #include <linux/kexec.h>
14584 +#include <linux/vs_cvirt.h>
14585  
14586  #include <asm/uaccess.h>
14587  
14588 @@ -270,18 +271,13 @@ int do_syslog(int type, char __user *buf
14589         unsigned i, j, limit, count;
14590         int do_clear = 0;
14591         char c;
14592 -       int error = 0;
14593 +       int error;
14594  
14595         error = security_syslog(type);
14596         if (error)
14597                 return error;
14598  
14599 -       switch (type) {
14600 -       case 0:         /* Close log */
14601 -               break;
14602 -       case 1:         /* Open log */
14603 -               break;
14604 -       case 2:         /* Read from log */
14605 +       if ((type >= 2) && (type <= 4)) {
14606                 error = -EINVAL;
14607                 if (!buf || len < 0)
14608                         goto out;
14609 @@ -292,6 +288,16 @@ int do_syslog(int type, char __user *buf
14610                         error = -EFAULT;
14611                         goto out;
14612                 }
14613 +       }
14614 +       if (!vx_check(0, VS_ADMIN|VS_WATCH))
14615 +               return vx_do_syslog(type, buf, len);
14616 +
14617 +       switch (type) {
14618 +       case 0:         /* Close log */
14619 +               break;
14620 +       case 1:         /* Open log */
14621 +               break;
14622 +       case 2:         /* Read from log */
14623                 error = wait_event_interruptible(log_wait,
14624                                                         (log_start - log_end));
14625                 if (error)
14626 @@ -316,16 +322,6 @@ int do_syslog(int type, char __user *buf
14627                 do_clear = 1;
14628                 /* FALL THRU */
14629         case 3:         /* Read last kernel messages */
14630 -               error = -EINVAL;
14631 -               if (!buf || len < 0)
14632 -                       goto out;
14633 -               error = 0;
14634 -               if (!len)
14635 -                       goto out;
14636 -               if (!access_ok(VERIFY_WRITE, buf, len)) {
14637 -                       error = -EFAULT;
14638 -                       goto out;
14639 -               }
14640                 count = len;
14641                 if (count > log_buf_len)
14642                         count = log_buf_len;
14643 diff -NurpP --minimal linux-2.6.30.2/kernel/ptrace.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/ptrace.c
14644 --- linux-2.6.30.2/kernel/ptrace.c      2009-06-11 17:13:26.000000000 +0200
14645 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/ptrace.c   2009-07-04 02:00:10.000000000 +0200
14646 @@ -22,6 +22,7 @@
14647  #include <linux/pid_namespace.h>
14648  #include <linux/syscalls.h>
14649  #include <linux/uaccess.h>
14650 +#include <linux/vs_context.h>
14651  
14652  
14653  /*
14654 @@ -161,6 +162,11 @@ int __ptrace_may_access(struct task_stru
14655                 dumpable = get_dumpable(task->mm);
14656         if (!dumpable && !capable(CAP_SYS_PTRACE))
14657                 return -EPERM;
14658 +       if (!vx_check(task->xid, VS_ADMIN_P|VS_IDENT))
14659 +               return -EPERM;
14660 +       if (!vx_check(task->xid, VS_IDENT) &&
14661 +               !task_vx_flags(task, VXF_STATE_ADMIN, 0))
14662 +               return -EACCES;
14663  
14664         return security_ptrace_may_access(task, mode);
14665  }
14666 @@ -672,6 +678,10 @@ SYSCALL_DEFINE4(ptrace, long, request, l
14667                 goto out;
14668         }
14669  
14670 +       ret = -EPERM;
14671 +       if (!vx_check(vx_task_xid(child), VS_WATCH_P | VS_IDENT))
14672 +               goto out_put_task_struct;
14673 +
14674         if (request == PTRACE_ATTACH) {
14675                 ret = ptrace_attach(child);
14676                 /*
14677 diff -NurpP --minimal linux-2.6.30.2/kernel/sched.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/sched.c
14678 --- linux-2.6.30.2/kernel/sched.c       2009-06-11 17:13:26.000000000 +0200
14679 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/sched.c    2009-07-04 01:59:40.000000000 +0200
14680 @@ -72,6 +72,8 @@
14681  #include <linux/debugfs.h>
14682  #include <linux/ctype.h>
14683  #include <linux/ftrace.h>
14684 +#include <linux/vs_sched.h>
14685 +#include <linux/vs_cvirt.h>
14686  #include <trace/sched.h>
14687  
14688  #include <asm/tlb.h>
14689 @@ -637,6 +639,16 @@ struct rq {
14690  #endif
14691         struct hrtimer hrtick_timer;
14692  #endif
14693 +       unsigned long norm_time;
14694 +       unsigned long idle_time;
14695 +#ifdef CONFIG_VSERVER_IDLETIME
14696 +       int idle_skip;
14697 +#endif
14698 +#ifdef CONFIG_VSERVER_HARDCPU
14699 +       struct list_head hold_queue;
14700 +       unsigned long nr_onhold;
14701 +       int idle_tokens;
14702 +#endif
14703  
14704  #ifdef CONFIG_SCHEDSTATS
14705         /* latency stats */
14706 @@ -1910,6 +1922,8 @@ static inline void check_class_changed(s
14707                 p->sched_class->prio_changed(rq, p, oldprio, running);
14708  }
14709  
14710 +#include "sched_mon.h"
14711 +
14712  #ifdef CONFIG_SMP
14713  
14714  /* Used instead of source_load when we know the type == 0 */
14715 @@ -1997,6 +2011,7 @@ migrate_task(struct task_struct *p, int 
14716  {
14717         struct rq *rq = task_rq(p);
14718  
14719 +       vxm_migrate_task(p, rq, dest_cpu);
14720         /*
14721          * If the task is not on a runqueue (and not running), then
14722          * it is sufficient to simply update the task's cpu field.
14723 @@ -2324,6 +2339,8 @@ static int sched_balance_self(int cpu, i
14724  
14725  #endif /* CONFIG_SMP */
14726  
14727 +#include "sched_hard.h"
14728 +
14729  /***
14730   * try_to_wake_up - wake up a thread
14731   * @p: the to-be-woken-up thread
14732 @@ -2368,6 +2385,13 @@ static int try_to_wake_up(struct task_st
14733         rq = task_rq_lock(p, &flags);
14734         update_rq_clock(rq);
14735         old_state = p->state;
14736 +
14737 +       /* we need to unhold suspended tasks */
14738 +       if (old_state & TASK_ONHOLD) {
14739 +               vx_unhold_task(p, rq);
14740 +               old_state = p->state;
14741 +       }
14742 +
14743         if (!(old_state & state))
14744                 goto out;
14745  
14746 @@ -2389,6 +2413,12 @@ static int try_to_wake_up(struct task_st
14747                 /* might preempt at this point */
14748                 rq = task_rq_lock(p, &flags);
14749                 old_state = p->state;
14750 +
14751 +       /* we need to unhold suspended tasks
14752 +       if (old_state & TASK_ONHOLD) {
14753 +               vx_unhold_task(p, rq);
14754 +               old_state = p->state;
14755 +       } */
14756                 if (!(old_state & state))
14757                         goto out;
14758                 if (p->se.on_rq)
14759 @@ -4607,16 +4637,19 @@ void account_user_time(struct task_struc
14760                        cputime_t cputime_scaled)
14761  {
14762         struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
14763 +       struct vx_info *vxi = p->vx_info;  /* p is _always_ current */
14764         cputime64_t tmp;
14765 +       int nice = (TASK_NICE(p) > 0);
14766  
14767         /* Add user time to process. */
14768         p->utime = cputime_add(p->utime, cputime);
14769         p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
14770 +       vx_account_user(vxi, cputime, nice);
14771         account_group_user_time(p, cputime);
14772  
14773         /* Add user time to cpustat. */
14774         tmp = cputime_to_cputime64(cputime);
14775 -       if (TASK_NICE(p) > 0)
14776 +       if (nice)
14777                 cpustat->nice = cputime64_add(cpustat->nice, tmp);
14778         else
14779                 cpustat->user = cputime64_add(cpustat->user, tmp);
14780 @@ -4662,6 +4695,7 @@ void account_system_time(struct task_str
14781                          cputime_t cputime, cputime_t cputime_scaled)
14782  {
14783         struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
14784 +       struct vx_info *vxi = p->vx_info;  /* p is _always_ current */
14785         cputime64_t tmp;
14786  
14787         if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
14788 @@ -4672,6 +4706,7 @@ void account_system_time(struct task_str
14789         /* Add system time to process. */
14790         p->stime = cputime_add(p->stime, cputime);
14791         p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
14792 +       vx_account_system(vxi, cputime, 0 /* do we have idle time? */);
14793         account_group_system_time(p, cputime);
14794  
14795         /* Add system time to cpustat. */
14796 @@ -5049,6 +5084,11 @@ need_resched_nonpreemptible:
14797                 idle_balance(cpu, rq);
14798  
14799         put_prev_task(rq, prev);
14800 +
14801 +       vx_set_rq_time(rq, jiffies);    /* update time */
14802 +       vx_schedule(prev, rq, cpu);     /* hold if over limit */
14803 +       vx_try_unhold(rq, cpu);         /* unhold if refilled */
14804 +
14805         next = pick_next_task(rq);
14806  
14807         if (likely(prev != next)) {
14808 @@ -5696,7 +5736,7 @@ SYSCALL_DEFINE1(nice, int, increment)
14809                 nice = 19;
14810  
14811         if (increment < 0 && !can_nice(current, nice))
14812 -               return -EPERM;
14813 +               return vx_flags(VXF_IGNEG_NICE, 0) ? 0 : -EPERM;
14814  
14815         retval = security_task_setnice(current, nice);
14816         if (retval)
14817 @@ -8985,7 +9025,10 @@ void __init sched_init(void)
14818  
14819  #endif
14820  #endif /* CONFIG_FAIR_GROUP_SCHED */
14821 -
14822 +#ifdef CONFIG_VSERVER_HARDCPU
14823 +               INIT_LIST_HEAD(&rq->hold_queue);
14824 +               rq->nr_onhold = 0;
14825 +#endif
14826                 rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
14827  #ifdef CONFIG_RT_GROUP_SCHED
14828                 INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
14829 diff -NurpP --minimal linux-2.6.30.2/kernel/sched_fair.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/sched_fair.c
14830 --- linux-2.6.30.2/kernel/sched_fair.c  2009-06-11 17:13:26.000000000 +0200
14831 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/sched_fair.c       2009-07-04 01:11:39.000000000 +0200
14832 @@ -717,6 +717,9 @@ enqueue_entity(struct cfs_rq *cfs_rq, st
14833         check_spread(cfs_rq, se);
14834         if (se != cfs_rq->curr)
14835                 __enqueue_entity(cfs_rq, se);
14836 +
14837 +       if (entity_is_task(se))
14838 +               vx_activate_task(task_of(se));
14839  }
14840  
14841  static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
14842 @@ -760,6 +763,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
14843  
14844         if (se != cfs_rq->curr)
14845                 __dequeue_entity(cfs_rq, se);
14846 +       if (entity_is_task(se))
14847 +               vx_deactivate_task(task_of(se));
14848         account_entity_dequeue(cfs_rq, se);
14849         update_min_vruntime(cfs_rq);
14850  }
14851 diff -NurpP --minimal linux-2.6.30.2/kernel/sched_hard.h linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/sched_hard.h
14852 --- linux-2.6.30.2/kernel/sched_hard.h  1970-01-01 01:00:00.000000000 +0100
14853 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/sched_hard.h       2009-07-04 01:11:39.000000000 +0200
14854 @@ -0,0 +1,353 @@
14855 +
14856 +#ifdef CONFIG_VSERVER_IDLELIMIT
14857 +
14858 +/*
14859 + * vx_idle_resched - reschedule after maxidle
14860 + */
14861 +static inline
14862 +void vx_idle_resched(struct rq *rq)
14863 +{
14864 +       /* maybe have a better criterion for paused */
14865 +       if (!--rq->idle_tokens && !list_empty(&rq->hold_queue))
14866 +               set_need_resched();
14867 +}
14868 +
14869 +#else /* !CONFIG_VSERVER_IDLELIMIT */
14870 +
14871 +#define vx_idle_resched(rq)
14872 +
14873 +#endif /* CONFIG_VSERVER_IDLELIMIT */
14874 +
14875 +
14876 +
14877 +#ifdef CONFIG_VSERVER_IDLETIME
14878 +
14879 +#define vx_set_rq_min_skip(rq, min)            \
14880 +       (rq)->idle_skip = (min)
14881 +
14882 +#define vx_save_min_skip(ret, min, val)                \
14883 +       __vx_save_min_skip(ret, min, val)
14884 +
14885 +static inline
14886 +void __vx_save_min_skip(int ret, int *min, int val)
14887 +{
14888 +       if (ret > -2)
14889 +               return;
14890 +       if ((*min > val) || !*min)
14891 +               *min = val;
14892 +}
14893 +
14894 +static inline
14895 +int vx_try_skip(struct rq *rq, int cpu)
14896 +{
14897 +       /* artificially advance time */
14898 +       if (rq->idle_skip > 0) {
14899 +               vxdprintk(list_empty(&rq->hold_queue),
14900 +                       "hold queue empty on cpu %d", cpu);
14901 +               rq->idle_time += rq->idle_skip;
14902 +               vxm_idle_skip(rq, cpu);
14903 +               return 1;
14904 +       }
14905 +       return 0;
14906 +}
14907 +
14908 +#else /* !CONFIG_VSERVER_IDLETIME */
14909 +
14910 +#define vx_set_rq_min_skip(rq, min)            \
14911 +       ({ int dummy = (min); dummy; })
14912 +
14913 +#define vx_save_min_skip(ret, min, val)
14914 +
14915 +static inline
14916 +int vx_try_skip(struct rq *rq, int cpu)
14917 +{
14918 +       return 0;
14919 +}
14920 +
14921 +#endif /* CONFIG_VSERVER_IDLETIME */
14922 +
14923 +
14924 +
14925 +#ifdef CONFIG_VSERVER_HARDCPU
14926 +
14927 +#define vx_set_rq_max_idle(rq, max)            \
14928 +       (rq)->idle_tokens = (max)
14929 +
14930 +#define vx_save_max_idle(ret, min, val)                \
14931 +       __vx_save_max_idle(ret, min, val)
14932 +
14933 +static inline
14934 +void __vx_save_max_idle(int ret, int *min, int val)
14935 +{
14936 +       if (*min > val)
14937 +               *min = val;
14938 +}
14939 +
14940 +
14941 +/*
14942 + * vx_hold_task - put a task on the hold queue
14943 + */
14944 +static inline
14945 +void vx_hold_task(struct task_struct *p, struct rq *rq)
14946 +{
14947 +       // printk("@ hold_task(%p[%lx])\n", p, p->state);
14948 +
14949 +       /* ignore dead/killed tasks */
14950 +       if (unlikely(p->state & (TASK_DEAD | TASK_WAKEKILL)))
14951 +               return;
14952 +
14953 +       /* ignore sleeping tasks */
14954 +       if (unlikely(p->state & TASK_NORMAL))
14955 +               return;
14956 +
14957 +       /* remove task from runqueue */
14958 +       if (likely(p->se.on_rq))
14959 +               dequeue_task(rq, p, 0);
14960 +       else
14961 +               printk("@ woops, task %p not on runqueue?\n", p);
14962 +
14963 +       p->state |= TASK_ONHOLD;
14964 +       /* a new one on hold */
14965 +       rq->nr_onhold++;
14966 +       vxm_hold_task(p, rq);
14967 +       list_add_tail(&p->hq, &rq->hold_queue);
14968 +       // list_add_tail(&p->run_list, &rq->hold_queue);
14969 +}
14970 +
14971 +/*
14972 + * vx_unhold_task - put a task back to the runqueue
14973 + */
14974 +static inline
14975 +void vx_unhold_task(struct task_struct *p, struct rq *rq)
14976 +{
14977 +       // printk("@ unhold_task(%p[%lx])\n", p, p->state);
14978 +       list_del_init(&p->hq);
14979 +       // list_del(&p->run_list);
14980 +       /* one less waiting */
14981 +       rq->nr_onhold--;
14982 +       p->state &= ~TASK_ONHOLD;
14983 +       enqueue_task(rq, p, 0);
14984 +       // ? inc_nr_running(p, rq);
14985 +       vxm_unhold_task(p, rq);
14986 +}
14987 +
14988 +/*
14989 + * vx_remove_hold - remove a task from the hold queue
14990 + */
14991 +static inline
14992 +void vx_remove_hold(struct task_struct *p, struct rq *rq)
14993 +{
14994 +       printk("@ remove_hold(%p[%lx])\n", p, p->state);
14995 +       list_del_init(&p->hq);
14996 +       // list_del(&p->run_list);
14997 +       /* one less waiting */
14998 +       rq->nr_onhold--;
14999 +       p->state &= ~TASK_ONHOLD;
15000 +}
15001 +
15002 +unsigned long nr_onhold(void)
15003 +{
15004 +       unsigned long i, sum = 0;
15005 +
15006 +       for_each_online_cpu(i)
15007 +               sum += cpu_rq(i)->nr_onhold;
15008 +
15009 +       return sum;
15010 +}
15011 +
15012 +
15013 +
15014 +static inline
15015 +int __vx_tokens_avail(struct _vx_sched_pc *sched_pc)
15016 +{
15017 +       return sched_pc->tokens;
15018 +}
15019 +
15020 +static inline
15021 +void __vx_consume_token(struct _vx_sched_pc *sched_pc)
15022 +{
15023 +       sched_pc->tokens--;
15024 +}
15025 +
15026 +static inline
15027 +int vx_need_resched(struct task_struct *p, int slice, int cpu)
15028 +{
15029 +       struct vx_info *vxi = p->vx_info;
15030 +
15031 +       if (vx_info_flags(vxi, VXF_SCHED_HARD|VXF_SCHED_PRIO, 0)) {
15032 +               struct _vx_sched_pc *sched_pc =
15033 +                       &vx_per_cpu(vxi, sched_pc, cpu);
15034 +               int tokens;
15035 +
15036 +               /* maybe we can simplify that to decrement
15037 +                  the token counter unconditional? */
15038 +
15039 +               if ((tokens = __vx_tokens_avail(sched_pc)) > 0)
15040 +                       __vx_consume_token(sched_pc);
15041 +
15042 +               /* for tokens > 0, one token was consumed */
15043 +               if (tokens < 2)
15044 +                       slice = 0;
15045 +       }
15046 +       vxm_need_resched(p, slice, cpu);
15047 +       return (slice == 0);
15048 +}
15049 +
15050 +
15051 +#define vx_set_rq_time(rq, time) do {  \
15052 +       rq->norm_time = time;           \
15053 +} while (0)
15054 +
15055 +
15056 +static inline
15057 +void vx_try_unhold(struct rq *rq, int cpu)
15058 +{
15059 +       struct vx_info *vxi = NULL;
15060 +       struct list_head *l, *n;
15061 +       int maxidle = HZ;
15062 +       int minskip = 0;
15063 +
15064 +       /* nothing to do? what about pause? */
15065 +       if (list_empty(&rq->hold_queue))
15066 +               return;
15067 +
15068 +       list_for_each_safe(l, n, &rq->hold_queue) {
15069 +               int ret, delta_min[2];
15070 +               struct _vx_sched_pc *sched_pc;
15071 +               struct task_struct *p;
15072 +
15073 +               p = list_entry(l, struct task_struct, hq);
15074 +               /* don't bother with same context */
15075 +               if (vxi == p->vx_info)
15076 +                       continue;
15077 +
15078 +               vxi = p->vx_info;
15079 +               /* ignore paused contexts */
15080 +               if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
15081 +                       continue;
15082 +
15083 +               sched_pc = &vx_per_cpu(vxi, sched_pc, cpu);
15084 +
15085 +               /* recalc tokens */
15086 +               vxm_sched_info(sched_pc, vxi, cpu);
15087 +               ret = vx_tokens_recalc(sched_pc,
15088 +                       &rq->norm_time, &rq->idle_time, delta_min);
15089 +               vxm_tokens_recalc(sched_pc, rq, vxi, cpu);
15090 +
15091 +               if (ret > 0) {
15092 +                       /* we found a runable context */
15093 +                       vx_unhold_task(p, rq);
15094 +                       break;
15095 +               }
15096 +               vx_save_max_idle(ret, &maxidle, delta_min[0]);
15097 +               vx_save_min_skip(ret, &minskip, delta_min[1]);
15098 +       }
15099 +       vx_set_rq_max_idle(rq, maxidle);
15100 +       vx_set_rq_min_skip(rq, minskip);
15101 +       vxm_rq_max_min(rq, cpu);
15102 +}
15103 +
15104 +
15105 +static inline
15106 +int vx_schedule(struct task_struct *next, struct rq *rq, int cpu)
15107 +{
15108 +       struct vx_info *vxi = next->vx_info;
15109 +       struct _vx_sched_pc *sched_pc;
15110 +       int delta_min[2];
15111 +       int flags, ret;
15112 +
15113 +       if (!vxi)
15114 +               return 1;
15115 +
15116 +       flags = vxi->vx_flags;
15117 +
15118 +       if (unlikely(vs_check_flags(flags, VXF_SCHED_PAUSE, 0)))
15119 +               goto put_on_hold;
15120 +       if (!vs_check_flags(flags, VXF_SCHED_HARD | VXF_SCHED_PRIO, 0))
15121 +               return 1;
15122 +
15123 +       sched_pc = &vx_per_cpu(vxi, sched_pc, cpu);
15124 +#ifdef CONFIG_SMP
15125 +       /* update scheduler params */
15126 +       if (cpu_isset(cpu, vxi->sched.update)) {
15127 +               vx_update_sched_param(&vxi->sched, sched_pc);
15128 +               vxm_update_sched(sched_pc, vxi, cpu);
15129 +               cpu_clear(cpu, vxi->sched.update);
15130 +       }
15131 +#endif
15132 +       vxm_sched_info(sched_pc, vxi, cpu);
15133 +       ret  = vx_tokens_recalc(sched_pc,
15134 +               &rq->norm_time, &rq->idle_time, delta_min);
15135 +       vxm_tokens_recalc(sched_pc, rq, vxi, cpu);
15136 +
15137 +       if (!vs_check_flags(flags, VXF_SCHED_HARD, 0))
15138 +               return 1;
15139 +
15140 +       if (unlikely(ret < 0)) {
15141 +               vx_save_max_idle(ret, &rq->idle_tokens, delta_min[0]);
15142 +               vx_save_min_skip(ret, &rq->idle_skip, delta_min[1]);
15143 +               vxm_rq_max_min(rq, cpu);
15144 +       put_on_hold:
15145 +               vx_hold_task(next, rq);
15146 +               return 0;
15147 +       }
15148 +       return 1;
15149 +}
15150 +
15151 +
15152 +#else /* CONFIG_VSERVER_HARDCPU */
15153 +
15154 +static inline
15155 +void vx_hold_task(struct task_struct *p, struct rq *rq)
15156 +{
15157 +       return;
15158 +}
15159 +
15160 +static inline
15161 +void vx_unhold_task(struct task_struct *p, struct rq *rq)
15162 +{
15163 +       return;
15164 +}
15165 +
15166 +unsigned long nr_onhold(void)
15167 +{
15168 +       return 0;
15169 +}
15170 +
15171 +
15172 +static inline
15173 +int vx_need_resched(struct task_struct *p, int slice, int cpu)
15174 +{
15175 +       return (slice == 0);
15176 +}
15177 +
15178 +
15179 +#define vx_set_rq_time(rq, time)
15180 +
15181 +static inline
15182 +void vx_try_unhold(struct rq *rq, int cpu)
15183 +{
15184 +       return;
15185 +}
15186 +
15187 +static inline
15188 +int vx_schedule(struct task_struct *next, struct rq *rq, int cpu)
15189 +{
15190 +       struct vx_info *vxi = next->vx_info;
15191 +       struct _vx_sched_pc *sched_pc;
15192 +       int delta_min[2];
15193 +       int ret;
15194 +
15195 +       if (!vx_info_flags(vxi, VXF_SCHED_PRIO, 0))
15196 +               return 1;
15197 +
15198 +       sched_pc = &vx_per_cpu(vxi, sched_pc, cpu);
15199 +       vxm_sched_info(sched_pc, vxi, cpu);
15200 +       ret  = vx_tokens_recalc(sched_pc,
15201 +               &rq->norm_time, &rq->idle_time, delta_min);
15202 +       vxm_tokens_recalc(sched_pc, rq, vxi, cpu);
15203 +       return 1;
15204 +}
15205 +
15206 +#endif /* CONFIG_VSERVER_HARDCPU */
15207 +
15208 diff -NurpP --minimal linux-2.6.30.2/kernel/sched_mon.h linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/sched_mon.h
15209 --- linux-2.6.30.2/kernel/sched_mon.h   1970-01-01 01:00:00.000000000 +0100
15210 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/sched_mon.h        2009-07-04 01:11:39.000000000 +0200
15211 @@ -0,0 +1,200 @@
15212 +
15213 +#include <linux/vserver/monitor.h>
15214 +
15215 +#ifdef  CONFIG_VSERVER_MONITOR
15216 +
15217 +#ifdef CONFIG_VSERVER_HARDCPU
15218 +#define HARDCPU(x) (x)
15219 +#else
15220 +#define HARDCPU(x) (0)
15221 +#endif
15222 +
15223 +#ifdef CONFIG_VSERVER_IDLETIME
15224 +#define IDLETIME(x) (x)
15225 +#else
15226 +#define IDLETIME(x) (0)
15227 +#endif
15228 +
15229 +struct _vx_mon_entry *vxm_advance(int cpu);
15230 +
15231 +
15232 +static inline
15233 +void   __vxm_basic(struct _vx_mon_entry *entry, xid_t xid, int type)
15234 +{
15235 +       entry->type = type;
15236 +       entry->xid = xid;
15237 +}
15238 +
15239 +static inline
15240 +void   __vxm_sync(int cpu)
15241 +{
15242 +       struct _vx_mon_entry *entry = vxm_advance(cpu);
15243 +
15244 +       __vxm_basic(entry, 0, VXM_SYNC);
15245 +       entry->ev.sec = xtime.tv_sec;
15246 +       entry->ev.nsec = xtime.tv_nsec;
15247 +}
15248 +
15249 +static inline
15250 +void   __vxm_task(struct task_struct *p, int type)
15251 +{
15252 +       struct _vx_mon_entry *entry = vxm_advance(task_cpu(p));
15253 +
15254 +       __vxm_basic(entry, p->xid, type);
15255 +       entry->ev.tsk.pid = p->pid;
15256 +       entry->ev.tsk.state = p->state;
15257 +}
15258 +
15259 +static inline
15260 +void   __vxm_sched(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu)
15261 +{
15262 +       struct _vx_mon_entry *entry = vxm_advance(cpu);
15263 +
15264 +       __vxm_basic(entry, vxi->vx_id, (VXM_SCHED | s->flags));
15265 +       entry->sd.tokens = s->tokens;
15266 +       entry->sd.norm_time = s->norm_time;
15267 +       entry->sd.idle_time = s->idle_time;
15268 +}
15269 +
15270 +static inline
15271 +void   __vxm_rqinfo1(struct rq *q, int cpu)
15272 +{
15273 +       struct _vx_mon_entry *entry = vxm_advance(cpu);
15274 +
15275 +       entry->type = VXM_RQINFO_1;
15276 +       entry->xid = ((unsigned long)q >> 16) & 0xffff;
15277 +       entry->q1.running = q->nr_running;
15278 +       entry->q1.onhold = HARDCPU(q->nr_onhold);
15279 +       entry->q1.iowait = atomic_read(&q->nr_iowait);
15280 +       entry->q1.uintr = q->nr_uninterruptible;
15281 +       entry->q1.idle_tokens = IDLETIME(q->idle_tokens);
15282 +}
15283 +
15284 +static inline
15285 +void   __vxm_rqinfo2(struct rq *q, int cpu)
15286 +{
15287 +       struct _vx_mon_entry *entry = vxm_advance(cpu);
15288 +
15289 +       entry->type = VXM_RQINFO_2;
15290 +       entry->xid = (unsigned long)q & 0xffff;
15291 +       entry->q2.norm_time = q->norm_time;
15292 +       entry->q2.idle_time = q->idle_time;
15293 +       entry->q2.idle_skip = IDLETIME(q->idle_skip);
15294 +}
15295 +
15296 +static inline
15297 +void   __vxm_update(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu)
15298 +{
15299 +       struct _vx_mon_entry *entry = vxm_advance(cpu);
15300 +
15301 +       __vxm_basic(entry, vxi->vx_id, VXM_UPDATE);
15302 +       entry->ev.tokens = s->tokens;
15303 +}
15304 +
15305 +static inline
15306 +void   __vxm_update1(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu)
15307 +{
15308 +       struct _vx_mon_entry *entry = vxm_advance(cpu);
15309 +
15310 +       __vxm_basic(entry, vxi->vx_id, VXM_UPDATE_1);
15311 +       entry->u1.tokens_max = s->tokens_max;
15312 +       entry->u1.fill_rate = s->fill_rate[0];
15313 +       entry->u1.interval = s->interval[0];
15314 +}
15315 +
15316 +static inline
15317 +void   __vxm_update2(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu)
15318 +{
15319 +       struct _vx_mon_entry *entry = vxm_advance(cpu);
15320 +
15321 +       __vxm_basic(entry, vxi->vx_id, VXM_UPDATE_2);
15322 +       entry->u2.tokens_min = s->tokens_min;
15323 +       entry->u2.fill_rate = s->fill_rate[1];
15324 +       entry->u2.interval = s->interval[1];
15325 +}
15326 +
15327 +
15328 +#define        vxm_activate_task(p,q)          __vxm_task(p, VXM_ACTIVATE)
15329 +#define        vxm_activate_idle(p,q)          __vxm_task(p, VXM_IDLE)
15330 +#define        vxm_deactivate_task(p,q)        __vxm_task(p, VXM_DEACTIVATE)
15331 +#define        vxm_hold_task(p,q)              __vxm_task(p, VXM_HOLD)
15332 +#define        vxm_unhold_task(p,q)            __vxm_task(p, VXM_UNHOLD)
15333 +
15334 +static inline
15335 +void   vxm_migrate_task(struct task_struct *p, struct rq *rq, int dest)
15336 +{
15337 +       __vxm_task(p, VXM_MIGRATE);
15338 +       __vxm_rqinfo1(rq, task_cpu(p));
15339 +       __vxm_rqinfo2(rq, task_cpu(p));
15340 +}
15341 +
15342 +static inline
15343 +void   vxm_idle_skip(struct rq *rq, int cpu)
15344 +{
15345 +       __vxm_rqinfo1(rq, cpu);
15346 +       __vxm_rqinfo2(rq, cpu);
15347 +}
15348 +
15349 +static inline
15350 +void   vxm_need_resched(struct task_struct *p, int slice, int cpu)
15351 +{
15352 +       if (slice)
15353 +               return;
15354 +
15355 +       __vxm_task(p, VXM_RESCHED);
15356 +}
15357 +
15358 +static inline
15359 +void   vxm_sync(unsigned long now, int cpu)
15360 +{
15361 +       if (!CONFIG_VSERVER_MONITOR_SYNC ||
15362 +               (now % CONFIG_VSERVER_MONITOR_SYNC))
15363 +               return;
15364 +
15365 +       __vxm_sync(cpu);
15366 +}
15367 +
15368 +#define        vxm_sched_info(s,v,c)           __vxm_sched(s,v,c)
15369 +
15370 +static inline
15371 +void   vxm_tokens_recalc(struct _vx_sched_pc *s, struct rq *rq,
15372 +       struct vx_info *vxi, int cpu)
15373 +{
15374 +       __vxm_sched(s, vxi, cpu);
15375 +       __vxm_rqinfo2(rq, cpu);
15376 +}
15377 +
15378 +static inline
15379 +void   vxm_update_sched(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu)
15380 +{
15381 +       __vxm_sched(s, vxi, cpu);
15382 +       __vxm_update(s, vxi, cpu);
15383 +       __vxm_update1(s, vxi, cpu);
15384 +       __vxm_update2(s, vxi, cpu);
15385 +}
15386 +
15387 +static inline
15388 +void   vxm_rq_max_min(struct rq *rq, int cpu)
15389 +{
15390 +       __vxm_rqinfo1(rq, cpu);
15391 +       __vxm_rqinfo2(rq, cpu);
15392 +}
15393 +
15394 +#else  /* CONFIG_VSERVER_MONITOR */
15395 +
15396 +#define        vxm_activate_task(t,q)          do { } while (0)
15397 +#define        vxm_activate_idle(t,q)          do { } while (0)
15398 +#define        vxm_deactivate_task(t,q)        do { } while (0)
15399 +#define        vxm_hold_task(t,q)              do { } while (0)
15400 +#define        vxm_unhold_task(t,q)            do { } while (0)
15401 +#define        vxm_migrate_task(t,q,d)         do { } while (0)
15402 +#define        vxm_idle_skip(q,c)              do { } while (0)
15403 +#define        vxm_need_resched(t,s,c)         do { } while (0)
15404 +#define        vxm_sync(s,c)                   do { } while (0)
15405 +#define        vxm_sched_info(s,v,c)           do { } while (0)
15406 +#define        vxm_tokens_recalc(s,q,v,c)      do { } while (0)
15407 +#define        vxm_update_sched(s,v,c)         do { } while (0)
15408 +#define        vxm_rq_max_min(q,c)             do { } while (0)
15409 +
15410 +#endif /* CONFIG_VSERVER_MONITOR */
15411 +
15412 diff -NurpP --minimal linux-2.6.30.2/kernel/signal.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/signal.c
15413 --- linux-2.6.30.2/kernel/signal.c      2009-06-11 17:13:26.000000000 +0200
15414 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/signal.c   2009-07-04 01:11:39.000000000 +0200
15415 @@ -27,6 +27,8 @@
15416  #include <linux/freezer.h>
15417  #include <linux/pid_namespace.h>
15418  #include <linux/nsproxy.h>
15419 +#include <linux/vs_context.h>
15420 +#include <linux/vs_pid.h>
15421  #include <trace/sched.h>
15422  
15423  #include <asm/param.h>
15424 @@ -595,6 +597,14 @@ static int check_kill_permission(int sig
15425         if (!valid_signal(sig))
15426                 return -EINVAL;
15427  
15428 +       if ((info != SEND_SIG_NOINFO) &&
15429 +               (is_si_special(info) || !SI_FROMUSER(info)))
15430 +               goto skip;
15431 +
15432 +       vxdprintk(VXD_CBIT(misc, 7),
15433 +               "check_kill_permission(%d,%p,%p[#%u,%u])",
15434 +               sig, info, t, vx_task_xid(t), t->pid);
15435 +
15436         if (info != SEND_SIG_NOINFO && (is_si_special(info) || SI_FROMKERNEL(info)))
15437                 return 0;
15438  
15439 @@ -622,6 +632,20 @@ static int check_kill_permission(int sig
15440                 }
15441         }
15442  
15443 +       error = -EPERM;
15444 +       if (t->pid == 1 && current->xid)
15445 +               return error;
15446 +
15447 +       error = -ESRCH;
15448 +       /* FIXME: we shouldn't return ESRCH ever, to avoid
15449 +                 loops, maybe ENOENT or EACCES? */
15450 +       if (!vx_check(vx_task_xid(t), VS_WATCH_P | VS_IDENT)) {
15451 +               vxdprintk(current->xid || VXD_CBIT(misc, 7),
15452 +                       "signal %d[%p] xid mismatch %p[#%u,%u] xid=#%u",
15453 +                       sig, info, t, vx_task_xid(t), t->pid, current->xid);
15454 +               return error;
15455 +       }
15456 +skip:
15457         return security_task_kill(t, info, sig, 0);
15458  }
15459  
15460 @@ -1104,7 +1128,7 @@ int kill_pid_info(int sig, struct siginf
15461         rcu_read_lock();
15462  retry:
15463         p = pid_task(pid, PIDTYPE_PID);
15464 -       if (p) {
15465 +       if (p && vx_check(vx_task_xid(p), VS_IDENT)) {
15466                 error = group_send_sig_info(sig, info, p);
15467                 if (unlikely(error == -ESRCH))
15468                         /*
15469 @@ -1143,7 +1167,7 @@ int kill_pid_info_as_uid(int sig, struct
15470  
15471         read_lock(&tasklist_lock);
15472         p = pid_task(pid, PIDTYPE_PID);
15473 -       if (!p) {
15474 +       if (!p || !vx_check(vx_task_xid(p), VS_IDENT)) {
15475                 ret = -ESRCH;
15476                 goto out_unlock;
15477         }
15478 @@ -1197,8 +1221,10 @@ static int kill_something_info(int sig, 
15479                 struct task_struct * p;
15480  
15481                 for_each_process(p) {
15482 -                       if (task_pid_vnr(p) > 1 &&
15483 -                                       !same_thread_group(p, current)) {
15484 +                       if (vx_check(vx_task_xid(p), VS_ADMIN|VS_IDENT) &&
15485 +                               task_pid_vnr(p) > 1 &&
15486 +                               !same_thread_group(p, current) &&
15487 +                               !vx_current_initpid(p->pid)) {
15488                                 int err = group_send_sig_info(sig, info, p);
15489                                 ++count;
15490                                 if (err != -EPERM)
15491 @@ -1884,6 +1910,11 @@ relock:
15492                                 !sig_kernel_only(signr))
15493                         continue;
15494  
15495 +               /* virtual init is protected against user signals */
15496 +               if ((info->si_code == SI_USER) &&
15497 +                       vx_current_initpid(current->pid))
15498 +                       continue;
15499 +
15500                 if (sig_kernel_stop(signr)) {
15501                         /*
15502                          * The default action is to stop all threads in
15503 diff -NurpP --minimal linux-2.6.30.2/kernel/softirq.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/softirq.c
15504 --- linux-2.6.30.2/kernel/softirq.c     2009-06-11 17:13:26.000000000 +0200
15505 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/softirq.c  2009-07-04 01:48:28.000000000 +0200
15506 @@ -25,6 +25,7 @@
15507  #include <linux/smp.h>
15508  #include <linux/tick.h>
15509  #include <trace/irq.h>
15510 +#include <linux/vs_context.h>
15511  
15512  #include <asm/irq.h>
15513  /*
15514 diff -NurpP --minimal linux-2.6.30.2/kernel/sys.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/sys.c
15515 --- linux-2.6.30.2/kernel/sys.c 2009-06-11 17:13:26.000000000 +0200
15516 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/sys.c      2009-07-04 01:11:39.000000000 +0200
15517 @@ -40,6 +40,7 @@
15518  #include <linux/syscalls.h>
15519  #include <linux/kprobes.h>
15520  #include <linux/user_namespace.h>
15521 +#include <linux/vs_pid.h>
15522  
15523  #include <asm/uaccess.h>
15524  #include <asm/io.h>
15525 @@ -129,7 +130,10 @@ static int set_one_prio(struct task_stru
15526                 goto out;
15527         }
15528         if (niceval < task_nice(p) && !can_nice(p, niceval)) {
15529 -               error = -EACCES;
15530 +               if (vx_flags(VXF_IGNEG_NICE, 0))
15531 +                       error = 0;
15532 +               else
15533 +                       error = -EACCES;
15534                 goto out;
15535         }
15536         no_nice = security_task_setnice(p, niceval);
15537 @@ -178,6 +182,8 @@ SYSCALL_DEFINE3(setpriority, int, which,
15538                         else
15539                                 pgrp = task_pgrp(current);
15540                         do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
15541 +                               if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
15542 +                                       continue;
15543                                 error = set_one_prio(p, niceval, error);
15544                         } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
15545                         break;
15546 @@ -239,6 +245,8 @@ SYSCALL_DEFINE2(getpriority, int, which,
15547                         else
15548                                 pgrp = task_pgrp(current);
15549                         do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
15550 +                               if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
15551 +                                       continue;
15552                                 niceval = 20 - task_nice(p);
15553                                 if (niceval > retval)
15554                                         retval = niceval;
15555 @@ -348,6 +356,9 @@ void kernel_power_off(void)
15556         machine_power_off();
15557  }
15558  EXPORT_SYMBOL_GPL(kernel_power_off);
15559 +
15560 +long vs_reboot(unsigned int, void __user *);
15561 +
15562  /*
15563   * Reboot system call: for obvious reasons only root may call it,
15564   * and even root needs to set up some magic numbers in the registers
15565 @@ -380,6 +391,9 @@ SYSCALL_DEFINE4(reboot, int, magic1, int
15566         if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
15567                 cmd = LINUX_REBOOT_CMD_HALT;
15568  
15569 +       if (!vx_check(0, VS_ADMIN|VS_WATCH))
15570 +               return vs_reboot(cmd, arg);
15571 +
15572         lock_kernel();
15573         switch (cmd) {
15574         case LINUX_REBOOT_CMD_RESTART:
15575 @@ -1413,7 +1427,7 @@ SYSCALL_DEFINE2(sethostname, char __user
15576         int errno;
15577         char tmp[__NEW_UTS_LEN];
15578  
15579 -       if (!capable(CAP_SYS_ADMIN))
15580 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SET_UTSNAME))
15581                 return -EPERM;
15582         if (len < 0 || len > __NEW_UTS_LEN)
15583                 return -EINVAL;
15584 @@ -1462,7 +1476,7 @@ SYSCALL_DEFINE2(setdomainname, char __us
15585         int errno;
15586         char tmp[__NEW_UTS_LEN];
15587  
15588 -       if (!capable(CAP_SYS_ADMIN))
15589 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SET_UTSNAME))
15590                 return -EPERM;
15591         if (len < 0 || len > __NEW_UTS_LEN)
15592                 return -EINVAL;
15593 @@ -1531,7 +1545,7 @@ SYSCALL_DEFINE2(setrlimit, unsigned int,
15594                 return -EINVAL;
15595         old_rlim = current->signal->rlim + resource;
15596         if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
15597 -           !capable(CAP_SYS_RESOURCE))
15598 +           !vx_capable(CAP_SYS_RESOURCE, VXC_SET_RLIMIT))
15599                 return -EPERM;
15600         if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
15601                 return -EPERM;
15602 diff -NurpP --minimal linux-2.6.30.2/kernel/sysctl.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/sysctl.c
15603 --- linux-2.6.30.2/kernel/sysctl.c      2009-07-23 13:28:48.000000000 +0200
15604 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/sysctl.c   2009-07-23 13:50:49.000000000 +0200
15605 @@ -115,6 +115,7 @@ static int ngroups_max = NGROUPS_MAX;
15606  #ifdef CONFIG_MODULES
15607  extern char modprobe_path[];
15608  #endif
15609 +extern char vshelper_path[];
15610  #ifdef CONFIG_CHR_DEV_SG
15611  extern int sg_big_buff;
15612  #endif
15613 @@ -546,6 +547,15 @@ static struct ctl_table kern_table[] = {
15614                 .strategy       = &sysctl_string,
15615         },
15616  #endif
15617 +       {
15618 +               .ctl_name       = KERN_VSHELPER,
15619 +               .procname       = "vshelper",
15620 +               .data           = &vshelper_path,
15621 +               .maxlen         = 256,
15622 +               .mode           = 0644,
15623 +               .proc_handler   = &proc_dostring,
15624 +               .strategy       = &sysctl_string,
15625 +       },
15626  #ifdef CONFIG_CHR_DEV_SG
15627         {
15628                 .ctl_name       = KERN_SG_BIG_BUFF,
15629 diff -NurpP --minimal linux-2.6.30.2/kernel/sysctl_check.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/sysctl_check.c
15630 --- linux-2.6.30.2/kernel/sysctl_check.c        2009-06-11 17:13:26.000000000 +0200
15631 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/sysctl_check.c     2009-07-04 01:11:39.000000000 +0200
15632 @@ -39,6 +39,7 @@ static const struct trans_ctl_table tran
15633  
15634         { KERN_PANIC,                   "panic" },
15635         { KERN_REALROOTDEV,             "real-root-dev" },
15636 +       { KERN_VSHELPER,                "vshelper", },
15637  
15638         { KERN_SPARC_REBOOT,            "reboot-cmd" },
15639         { KERN_CTLALTDEL,               "ctrl-alt-del" },
15640 @@ -1217,6 +1218,22 @@ static const struct trans_ctl_table tran
15641         {}
15642  };
15643  
15644 +static struct trans_ctl_table trans_vserver_table[] = {
15645 +       { 1,    "debug_switch" },
15646 +       { 2,    "debug_xid" },
15647 +       { 3,    "debug_nid" },
15648 +       { 4,    "debug_tag" },
15649 +       { 5,    "debug_net" },
15650 +       { 6,    "debug_limit" },
15651 +       { 7,    "debug_cres" },
15652 +       { 8,    "debug_dlim" },
15653 +       { 9,    "debug_quota" },
15654 +       { 10,   "debug_cvirt" },
15655 +       { 11,   "debug_space" },
15656 +       { 12,   "debug_misc" },
15657 +       {}
15658 +};
15659 +
15660  static const struct trans_ctl_table trans_root_table[] = {
15661         { CTL_KERN,     "kernel",       trans_kern_table },
15662         { CTL_VM,       "vm",           trans_vm_table },
15663 @@ -1233,6 +1250,7 @@ static const struct trans_ctl_table tran
15664         { CTL_SUNRPC,   "sunrpc",       trans_sunrpc_table },
15665         { CTL_PM,       "pm",           trans_pm_table },
15666         { CTL_FRV,      "frv",          trans_frv_table },
15667 +       { CTL_VSERVER,  "vserver",      trans_vserver_table },
15668         {}
15669  };
15670  
15671 diff -NurpP --minimal linux-2.6.30.2/kernel/time.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/time.c
15672 --- linux-2.6.30.2/kernel/time.c        2009-03-24 14:22:45.000000000 +0100
15673 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/time.c     2009-07-04 01:11:39.000000000 +0200
15674 @@ -63,6 +63,7 @@ EXPORT_SYMBOL(sys_tz);
15675  SYSCALL_DEFINE1(time, time_t __user *, tloc)
15676  {
15677         time_t i = get_seconds();
15678 +/*     FIXME: do_gettimeofday(&tv) -> vx_gettimeofday(&tv) */
15679  
15680         if (tloc) {
15681                 if (put_user(i,tloc))
15682 @@ -93,7 +94,7 @@ SYSCALL_DEFINE1(stime, time_t __user *, 
15683         if (err)
15684                 return err;
15685  
15686 -       do_settimeofday(&tv);
15687 +       vx_settimeofday(&tv);
15688         return 0;
15689  }
15690  
15691 @@ -104,7 +105,7 @@ SYSCALL_DEFINE2(gettimeofday, struct tim
15692  {
15693         if (likely(tv != NULL)) {
15694                 struct timeval ktv;
15695 -               do_gettimeofday(&ktv);
15696 +               vx_gettimeofday(&ktv);
15697                 if (copy_to_user(tv, &ktv, sizeof(ktv)))
15698                         return -EFAULT;
15699         }
15700 @@ -179,7 +180,7 @@ int do_sys_settimeofday(struct timespec 
15701                 /* SMP safe, again the code in arch/foo/time.c should
15702                  * globally block out interrupts when it runs.
15703                  */
15704 -               return do_settimeofday(tv);
15705 +               return vx_settimeofday(tv);
15706         }
15707         return 0;
15708  }
15709 @@ -311,7 +312,7 @@ void getnstimeofday(struct timespec *tv)
15710  {
15711         struct timeval x;
15712  
15713 -       do_gettimeofday(&x);
15714 +       vx_gettimeofday(&x);
15715         tv->tv_sec = x.tv_sec;
15716         tv->tv_nsec = x.tv_usec * NSEC_PER_USEC;
15717  }
15718 diff -NurpP --minimal linux-2.6.30.2/kernel/timer.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/timer.c
15719 --- linux-2.6.30.2/kernel/timer.c       2009-06-11 17:13:26.000000000 +0200
15720 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/timer.c    2009-07-04 01:11:39.000000000 +0200
15721 @@ -37,6 +37,10 @@
15722  #include <linux/delay.h>
15723  #include <linux/tick.h>
15724  #include <linux/kallsyms.h>
15725 +#include <linux/vs_base.h>
15726 +#include <linux/vs_cvirt.h>
15727 +#include <linux/vs_pid.h>
15728 +#include <linux/vserver/sched.h>
15729  
15730  #include <asm/uaccess.h>
15731  #include <asm/unistd.h>
15732 @@ -1103,6 +1107,25 @@ unsigned long get_next_timer_interrupt(u
15733  }
15734  #endif
15735  
15736 +static inline
15737 +void __vx_consume_token(struct _vx_sched_pc *sched_pc)
15738 +{
15739 +       sched_pc->tokens--;
15740 +}
15741 +
15742 +static inline
15743 +void vx_hard_tick(struct task_struct *p, int cpu)
15744 +{
15745 +       struct vx_info *vxi = p->vx_info;
15746 +
15747 +       if (vx_info_flags(vxi, VXF_SCHED_HARD|VXF_SCHED_PRIO, 0)) {
15748 +               struct _vx_sched_pc *sched_pc =
15749 +                       &vx_per_cpu(vxi, sched_pc, cpu);
15750 +
15751 +               __vx_consume_token(sched_pc);
15752 +       }
15753 +}
15754 +
15755  /*
15756   * Called from the timer interrupt handler to charge one tick to the current
15757   * process.  user_tick is 1 if the tick is user time, 0 for system.
15758 @@ -1119,6 +1142,7 @@ void update_process_times(int user_tick)
15759                 rcu_check_callbacks(cpu, user_tick);
15760         printk_tick();
15761         scheduler_tick();
15762 +       vx_hard_tick(p, cpu);
15763         run_posix_cpu_timers(p);
15764  }
15765  
15766 @@ -1221,12 +1245,6 @@ SYSCALL_DEFINE1(alarm, unsigned int, sec
15767  
15768  #endif
15769  
15770 -#ifndef __alpha__
15771 -
15772 -/*
15773 - * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this
15774 - * should be moved into arch/i386 instead?
15775 - */
15776  
15777  /**
15778   * sys_getpid - return the thread group id of the current process
15779 @@ -1255,10 +1273,23 @@ SYSCALL_DEFINE0(getppid)
15780         rcu_read_lock();
15781         pid = task_tgid_vnr(current->real_parent);
15782         rcu_read_unlock();
15783 +       return vx_map_pid(pid);
15784 +}
15785  
15786 -       return pid;
15787 +#ifdef __alpha__
15788 +
15789 +/*
15790 + * The Alpha uses getxpid, getxuid, and getxgid instead.
15791 + */
15792 +
15793 +asmlinkage long do_getxpid(long *ppid)
15794 +{
15795 +       *ppid = sys_getppid();
15796 +       return sys_getpid();
15797  }
15798  
15799 +#else /* _alpha_ */
15800 +
15801  SYSCALL_DEFINE0(getuid)
15802  {
15803         /* Only we change this so SMP safe */
15804 @@ -1429,6 +1460,8 @@ int do_sysinfo(struct sysinfo *info)
15805                         tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
15806                         tp.tv_sec++;
15807                 }
15808 +               if (vx_flags(VXF_VIRT_UPTIME, 0))
15809 +                       vx_vsi_uptime(&tp, NULL);
15810                 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
15811  
15812                 info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
15813 diff -NurpP --minimal linux-2.6.30.2/kernel/user.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/user.c
15814 --- linux-2.6.30.2/kernel/user.c        2009-06-11 17:13:27.000000000 +0200
15815 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/user.c     2009-07-04 01:11:39.000000000 +0200
15816 @@ -249,10 +249,10 @@ static struct kobj_type uids_ktype = {
15817   *
15818   * See Documentation/scheduler/sched-design-CFS.txt for ramifications.
15819   */
15820 -static int uids_user_create(struct user_struct *up)
15821 +static int uids_user_create(struct user_namespace *ns, struct user_struct *up)
15822  {
15823         struct kobject *kobj = &up->kobj;
15824 -       int error;
15825 +       int error = 0;
15826  
15827         memset(kobj, 0, sizeof(struct kobject));
15828         if (up->user_ns != &init_user_ns)
15829 @@ -280,7 +280,7 @@ int __init uids_sysfs_init(void)
15830         if (!uids_kset)
15831                 return -ENOMEM;
15832  
15833 -       return uids_user_create(&root_user);
15834 +       return uids_user_create(NULL, &root_user);
15835  }
15836  
15837  /* work function to remove sysfs directory for a user and free up
15838 @@ -342,7 +342,8 @@ static void free_user(struct user_struct
15839  #else  /* CONFIG_USER_SCHED && CONFIG_SYSFS */
15840  
15841  int uids_sysfs_init(void) { return 0; }
15842 -static inline int uids_user_create(struct user_struct *up) { return 0; }
15843 +static inline int uids_user_create(struct user_namespace *ns,
15844 +       struct user_struct *up) { return 0; }
15845  static inline void uids_mutex_lock(void) { }
15846  static inline void uids_mutex_unlock(void) { }
15847  
15848 @@ -439,7 +440,7 @@ struct user_struct *alloc_uid(struct use
15849  
15850                 new->user_ns = get_user_ns(ns);
15851  
15852 -               if (uids_user_create(new))
15853 +               if (uids_user_create(ns, new))
15854                         goto out_destoy_sched;
15855  
15856                 /*
15857 diff -NurpP --minimal linux-2.6.30.2/kernel/user_namespace.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/user_namespace.c
15858 --- linux-2.6.30.2/kernel/user_namespace.c      2009-03-24 14:22:45.000000000 +0100
15859 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/user_namespace.c   2009-07-04 01:11:39.000000000 +0200
15860 @@ -10,6 +10,7 @@
15861  #include <linux/slab.h>
15862  #include <linux/user_namespace.h>
15863  #include <linux/cred.h>
15864 +#include <linux/vserver/global.h>
15865  
15866  /*
15867   * Create a new user namespace, deriving the creator from the user in the
15868 @@ -30,6 +31,7 @@ int create_user_ns(struct cred *new)
15869                 return -ENOMEM;
15870  
15871         kref_init(&ns->kref);
15872 +       atomic_inc(&vs_global_user_ns);
15873  
15874         for (n = 0; n < UIDHASH_SZ; ++n)
15875                 INIT_HLIST_HEAD(ns->uidhash_table + n);
15876 @@ -78,6 +80,8 @@ void free_user_ns(struct kref *kref)
15877         struct user_namespace *ns =
15878                 container_of(kref, struct user_namespace, kref);
15879  
15880 +       /* FIXME: maybe move into destroyer? */
15881 +       atomic_dec(&vs_global_user_ns);
15882         INIT_WORK(&ns->destroyer, free_user_ns_work);
15883         schedule_work(&ns->destroyer);
15884  }
15885 diff -NurpP --minimal linux-2.6.30.2/kernel/utsname.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/utsname.c
15886 --- linux-2.6.30.2/kernel/utsname.c     2008-12-25 00:26:37.000000000 +0100
15887 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/utsname.c  2009-07-04 01:11:39.000000000 +0200
15888 @@ -14,6 +14,7 @@
15889  #include <linux/utsname.h>
15890  #include <linux/err.h>
15891  #include <linux/slab.h>
15892 +#include <linux/vserver/global.h>
15893  
15894  /*
15895   * Clone a new ns copying an original utsname, setting refcount to 1
15896 @@ -32,6 +33,7 @@ static struct uts_namespace *clone_uts_n
15897         memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
15898         up_read(&uts_sem);
15899         kref_init(&ns->kref);
15900 +       atomic_inc(&vs_global_uts_ns);
15901         return ns;
15902  }
15903  
15904 @@ -62,5 +64,6 @@ void free_uts_ns(struct kref *kref)
15905         struct uts_namespace *ns;
15906  
15907         ns = container_of(kref, struct uts_namespace, kref);
15908 +       atomic_dec(&vs_global_uts_ns);
15909         kfree(ns);
15910  }
15911 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/cacct.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/cacct.c
15912 --- linux-2.6.30.2/kernel/vserver/cacct.c       1970-01-01 01:00:00.000000000 +0100
15913 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/cacct.c    2009-07-04 01:11:39.000000000 +0200
15914 @@ -0,0 +1,42 @@
15915 +/*
15916 + *  linux/kernel/vserver/cacct.c
15917 + *
15918 + *  Virtual Server: Context Accounting
15919 + *
15920 + *  Copyright (C) 2006-2007 Herbert Pötzl
15921 + *
15922 + *  V0.01  added accounting stats
15923 + *
15924 + */
15925 +
15926 +#include <linux/types.h>
15927 +#include <linux/vs_context.h>
15928 +#include <linux/vserver/cacct_cmd.h>
15929 +#include <linux/vserver/cacct_int.h>
15930 +
15931 +#include <asm/errno.h>
15932 +#include <asm/uaccess.h>
15933 +
15934 +
15935 +int vc_sock_stat(struct vx_info *vxi, void __user *data)
15936 +{
15937 +       struct vcmd_sock_stat_v0 vc_data;
15938 +       int j, field;
15939 +
15940 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15941 +               return -EFAULT;
15942 +
15943 +       field = vc_data.field;
15944 +       if ((field < 0) || (field >= VXA_SOCK_SIZE))
15945 +               return -EINVAL;
15946 +
15947 +       for (j = 0; j < 3; j++) {
15948 +               vc_data.count[j] = vx_sock_count(&vxi->cacct, field, j);
15949 +               vc_data.total[j] = vx_sock_total(&vxi->cacct, field, j);
15950 +       }
15951 +
15952 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15953 +               return -EFAULT;
15954 +       return 0;
15955 +}
15956 +
15957 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/cacct_init.h linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/cacct_init.h
15958 --- linux-2.6.30.2/kernel/vserver/cacct_init.h  1970-01-01 01:00:00.000000000 +0100
15959 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/cacct_init.h       2009-07-04 01:11:39.000000000 +0200
15960 @@ -0,0 +1,25 @@
15961 +
15962 +
15963 +static inline void vx_info_init_cacct(struct _vx_cacct *cacct)
15964 +{
15965 +       int i, j;
15966 +
15967 +
15968 +       for (i = 0; i < VXA_SOCK_SIZE; i++) {
15969 +               for (j = 0; j < 3; j++) {
15970 +                       atomic_set(&cacct->sock[i][j].count, 0);
15971 +                       atomic_set(&cacct->sock[i][j].total, 0);
15972 +               }
15973 +       }
15974 +       for (i = 0; i < 8; i++)
15975 +               atomic_set(&cacct->slab[i], 0);
15976 +       for (i = 0; i < 5; i++)
15977 +               for (j = 0; j < 4; j++)
15978 +                       atomic_set(&cacct->page[i][j], 0);
15979 +}
15980 +
15981 +static inline void vx_info_exit_cacct(struct _vx_cacct *cacct)
15982 +{
15983 +       return;
15984 +}
15985 +
15986 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/cacct_proc.h linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/cacct_proc.h
15987 --- linux-2.6.30.2/kernel/vserver/cacct_proc.h  1970-01-01 01:00:00.000000000 +0100
15988 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/cacct_proc.h       2009-07-04 01:11:39.000000000 +0200
15989 @@ -0,0 +1,53 @@
15990 +#ifndef _VX_CACCT_PROC_H
15991 +#define _VX_CACCT_PROC_H
15992 +
15993 +#include <linux/vserver/cacct_int.h>
15994 +
15995 +
15996 +#define VX_SOCKA_TOP   \
15997 +       "Type\t    recv #/bytes\t\t   send #/bytes\t\t    fail #/bytes\n"
15998 +
15999 +static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer)
16000 +{
16001 +       int i, j, length = 0;
16002 +       static char *type[VXA_SOCK_SIZE] = {
16003 +               "UNSPEC", "UNIX", "INET", "INET6", "PACKET", "OTHER"
16004 +       };
16005 +
16006 +       length += sprintf(buffer + length, VX_SOCKA_TOP);
16007 +       for (i = 0; i < VXA_SOCK_SIZE; i++) {
16008 +               length += sprintf(buffer + length, "%s:", type[i]);
16009 +               for (j = 0; j < 3; j++) {
16010 +                       length += sprintf(buffer + length,
16011 +                               "\t%10lu/%-10lu",
16012 +                               vx_sock_count(cacct, i, j),
16013 +                               vx_sock_total(cacct, i, j));
16014 +               }
16015 +               buffer[length++] = '\n';
16016 +       }
16017 +
16018 +       length += sprintf(buffer + length, "\n");
16019 +       length += sprintf(buffer + length,
16020 +               "slab:\t %8u %8u %8u %8u\n",
16021 +               atomic_read(&cacct->slab[1]),
16022 +               atomic_read(&cacct->slab[4]),
16023 +               atomic_read(&cacct->slab[0]),
16024 +               atomic_read(&cacct->slab[2]));
16025 +
16026 +       length += sprintf(buffer + length, "\n");
16027 +       for (i = 0; i < 5; i++) {
16028 +               length += sprintf(buffer + length,
16029 +                       "page[%d]: %8u %8u %8u %8u\t %8u %8u %8u %8u\n", i,
16030 +                       atomic_read(&cacct->page[i][0]),
16031 +                       atomic_read(&cacct->page[i][1]),
16032 +                       atomic_read(&cacct->page[i][2]),
16033 +                       atomic_read(&cacct->page[i][3]),
16034 +                       atomic_read(&cacct->page[i][4]),
16035 +                       atomic_read(&cacct->page[i][5]),
16036 +                       atomic_read(&cacct->page[i][6]),
16037 +                       atomic_read(&cacct->page[i][7]));
16038 +       }
16039 +       return length;
16040 +}
16041 +
16042 +#endif /* _VX_CACCT_PROC_H */
16043 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/context.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/context.c
16044 --- linux-2.6.30.2/kernel/vserver/context.c     1970-01-01 01:00:00.000000000 +0100
16045 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/context.c  2009-07-04 02:46:55.000000000 +0200
16046 @@ -0,0 +1,1031 @@
16047 +/*
16048 + *  linux/kernel/vserver/context.c
16049 + *
16050 + *  Virtual Server: Context Support
16051 + *
16052 + *  Copyright (C) 2003-2007  Herbert Pötzl
16053 + *
16054 + *  V0.01  context helper
16055 + *  V0.02  vx_ctx_kill syscall command
16056 + *  V0.03  replaced context_info calls
16057 + *  V0.04  redesign of struct (de)alloc
16058 + *  V0.05  rlimit basic implementation
16059 + *  V0.06  task_xid and info commands
16060 + *  V0.07  context flags and caps
16061 + *  V0.08  switch to RCU based hash
16062 + *  V0.09  revert to non RCU for now
16063 + *  V0.10  and back to working RCU hash
16064 + *  V0.11  and back to locking again
16065 + *  V0.12  referenced context store
16066 + *  V0.13  separate per cpu data
16067 + *  V0.14  changed vcmds to vxi arg
16068 + *  V0.15  added context stat
16069 + *  V0.16  have __create claim() the vxi
16070 + *  V0.17  removed older and legacy stuff
16071 + *
16072 + */
16073 +
16074 +#include <linux/slab.h>
16075 +#include <linux/types.h>
16076 +#include <linux/security.h>
16077 +#include <linux/pid_namespace.h>
16078 +
16079 +#include <linux/vserver/context.h>
16080 +#include <linux/vserver/network.h>
16081 +#include <linux/vserver/debug.h>
16082 +#include <linux/vserver/limit.h>
16083 +#include <linux/vserver/limit_int.h>
16084 +#include <linux/vserver/space.h>
16085 +#include <linux/init_task.h>
16086 +#include <linux/fs_struct.h>
16087 +
16088 +#include <linux/vs_context.h>
16089 +#include <linux/vs_limit.h>
16090 +#include <linux/vs_pid.h>
16091 +#include <linux/vserver/context_cmd.h>
16092 +
16093 +#include "cvirt_init.h"
16094 +#include "cacct_init.h"
16095 +#include "limit_init.h"
16096 +#include "sched_init.h"
16097 +
16098 +
16099 +atomic_t vx_global_ctotal      = ATOMIC_INIT(0);
16100 +atomic_t vx_global_cactive     = ATOMIC_INIT(0);
16101 +
16102 +
16103 +/*     now inactive context structures */
16104 +
16105 +static struct hlist_head vx_info_inactive = HLIST_HEAD_INIT;
16106 +
16107 +static spinlock_t vx_info_inactive_lock = SPIN_LOCK_UNLOCKED;
16108 +
16109 +
16110 +/*     __alloc_vx_info()
16111 +
16112 +       * allocate an initialized vx_info struct
16113 +       * doesn't make it visible (hash)                        */
16114 +
16115 +static struct vx_info *__alloc_vx_info(xid_t xid)
16116 +{
16117 +       struct vx_info *new = NULL;
16118 +       int cpu, index;
16119 +
16120 +       vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid);
16121 +
16122 +       /* would this benefit from a slab cache? */
16123 +       new = kmalloc(sizeof(struct vx_info), GFP_KERNEL);
16124 +       if (!new)
16125 +               return 0;
16126 +
16127 +       memset(new, 0, sizeof(struct vx_info));
16128 +#ifdef CONFIG_SMP
16129 +       new->ptr_pc = alloc_percpu(struct _vx_info_pc);
16130 +       if (!new->ptr_pc)
16131 +               goto error;
16132 +#endif
16133 +       new->vx_id = xid;
16134 +       INIT_HLIST_NODE(&new->vx_hlist);
16135 +       atomic_set(&new->vx_usecnt, 0);
16136 +       atomic_set(&new->vx_tasks, 0);
16137 +       new->vx_parent = NULL;
16138 +       new->vx_state = 0;
16139 +       init_waitqueue_head(&new->vx_wait);
16140 +
16141 +       /* prepare reaper */
16142 +       get_task_struct(init_pid_ns.child_reaper);
16143 +       new->vx_reaper = init_pid_ns.child_reaper;
16144 +       new->vx_badness_bias = 0;
16145 +
16146 +       /* rest of init goes here */
16147 +       vx_info_init_limit(&new->limit);
16148 +       vx_info_init_sched(&new->sched);
16149 +       vx_info_init_cvirt(&new->cvirt);
16150 +       vx_info_init_cacct(&new->cacct);
16151 +
16152 +       /* per cpu data structures */
16153 +       for_each_possible_cpu(cpu) {
16154 +               vx_info_init_sched_pc(
16155 +                       &vx_per_cpu(new, sched_pc, cpu), cpu);
16156 +               vx_info_init_cvirt_pc(
16157 +                       &vx_per_cpu(new, cvirt_pc, cpu), cpu);
16158 +       }
16159 +
16160 +       new->vx_flags = VXF_INIT_SET;
16161 +       cap_set_init_eff(new->vx_bcaps);
16162 +       new->vx_ccaps = 0;
16163 +       // new->vx_cap_bset = current->cap_bset;
16164 +
16165 +       new->reboot_cmd = 0;
16166 +       new->exit_code = 0;
16167 +
16168 +       // preconfig fs entries
16169 +       for (index = 0; index < VX_SPACES; index++) {
16170 +               write_lock(&init_fs.lock);
16171 +               init_fs.users++;
16172 +               write_unlock(&init_fs.lock);
16173 +               new->vx_fs[index] = &init_fs;
16174 +       }
16175 +
16176 +       vxdprintk(VXD_CBIT(xid, 0),
16177 +               "alloc_vx_info(%d) = %p", xid, new);
16178 +       vxh_alloc_vx_info(new);
16179 +       atomic_inc(&vx_global_ctotal);
16180 +       return new;
16181 +#ifdef CONFIG_SMP
16182 +error:
16183 +       kfree(new);
16184 +       return 0;
16185 +#endif
16186 +}
16187 +
16188 +/*     __dealloc_vx_info()
16189 +
16190 +       * final disposal of vx_info                             */
16191 +
16192 +static void __dealloc_vx_info(struct vx_info *vxi)
16193 +{
16194 +       struct vx_info_save vxis;
16195 +       int cpu;
16196 +
16197 +       vxdprintk(VXD_CBIT(xid, 0),
16198 +               "dealloc_vx_info(%p)", vxi);
16199 +       vxh_dealloc_vx_info(vxi);
16200 +
16201 +#ifdef CONFIG_VSERVER_WARN
16202 +       enter_vx_info(vxi, &vxis);
16203 +       vx_info_exit_limit(&vxi->limit);
16204 +       vx_info_exit_sched(&vxi->sched);
16205 +       vx_info_exit_cvirt(&vxi->cvirt);
16206 +       vx_info_exit_cacct(&vxi->cacct);
16207 +
16208 +       for_each_possible_cpu(cpu) {
16209 +               vx_info_exit_sched_pc(
16210 +                       &vx_per_cpu(vxi, sched_pc, cpu), cpu);
16211 +               vx_info_exit_cvirt_pc(
16212 +                       &vx_per_cpu(vxi, cvirt_pc, cpu), cpu);
16213 +       }
16214 +       leave_vx_info(&vxis);
16215 +#endif
16216 +
16217 +       vxi->vx_id = -1;
16218 +       vxi->vx_state |= VXS_RELEASED;
16219 +
16220 +#ifdef CONFIG_SMP
16221 +       free_percpu(vxi->ptr_pc);
16222 +#endif
16223 +       kfree(vxi);
16224 +       atomic_dec(&vx_global_ctotal);
16225 +}
16226 +
16227 +static void __shutdown_vx_info(struct vx_info *vxi)
16228 +{
16229 +       struct nsproxy *nsproxy;
16230 +       struct fs_struct *fs;
16231 +       int index, kill;
16232 +
16233 +       might_sleep();
16234 +
16235 +       vxi->vx_state |= VXS_SHUTDOWN;
16236 +       vs_state_change(vxi, VSC_SHUTDOWN);
16237 +
16238 +       for (index = 0; index < VX_SPACES; index++) {
16239 +               nsproxy = xchg(&vxi->vx_nsproxy[index], NULL);
16240 +               if (nsproxy)
16241 +                       put_nsproxy(nsproxy);
16242 +
16243 +               fs = xchg(&vxi->vx_fs[index], NULL);
16244 +               write_lock(&fs->lock);
16245 +               kill = !--fs->users;
16246 +               write_unlock(&fs->lock);
16247 +               if (kill)
16248 +                       free_fs_struct(fs);
16249 +       }
16250 +}
16251 +
16252 +/* exported stuff */
16253 +
16254 +void free_vx_info(struct vx_info *vxi)
16255 +{
16256 +       unsigned long flags;
16257 +       unsigned index;
16258 +
16259 +       /* check for reference counts first */
16260 +       BUG_ON(atomic_read(&vxi->vx_usecnt));
16261 +       BUG_ON(atomic_read(&vxi->vx_tasks));
16262 +
16263 +       /* context must not be hashed */
16264 +       BUG_ON(vx_info_state(vxi, VXS_HASHED));
16265 +
16266 +       /* context shutdown is mandatory */
16267 +       BUG_ON(!vx_info_state(vxi, VXS_SHUTDOWN));
16268 +
16269 +       /* nsproxy and fs check */
16270 +       for (index = 0; index < VX_SPACES; index++) {
16271 +               BUG_ON(vxi->vx_nsproxy[index]);
16272 +               BUG_ON(vxi->vx_fs[index]);
16273 +       }
16274 +
16275 +       spin_lock_irqsave(&vx_info_inactive_lock, flags);
16276 +       hlist_del(&vxi->vx_hlist);
16277 +       spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
16278 +
16279 +       __dealloc_vx_info(vxi);
16280 +}
16281 +
16282 +
16283 +/*     hash table for vx_info hash */
16284 +
16285 +#define VX_HASH_SIZE   13
16286 +
16287 +static struct hlist_head vx_info_hash[VX_HASH_SIZE] =
16288 +       { [0 ... VX_HASH_SIZE-1] = HLIST_HEAD_INIT };
16289 +
16290 +static spinlock_t vx_info_hash_lock = SPIN_LOCK_UNLOCKED;
16291 +
16292 +
16293 +static inline unsigned int __hashval(xid_t xid)
16294 +{
16295 +       return (xid % VX_HASH_SIZE);
16296 +}
16297 +
16298 +
16299 +
16300 +/*     __hash_vx_info()
16301 +
16302 +       * add the vxi to the global hash table
16303 +       * requires the hash_lock to be held                     */
16304 +
16305 +static inline void __hash_vx_info(struct vx_info *vxi)
16306 +{
16307 +       struct hlist_head *head;
16308 +
16309 +       vxd_assert_lock(&vx_info_hash_lock);
16310 +       vxdprintk(VXD_CBIT(xid, 4),
16311 +               "__hash_vx_info: %p[#%d]", vxi, vxi->vx_id);
16312 +       vxh_hash_vx_info(vxi);
16313 +
16314 +       /* context must not be hashed */
16315 +       BUG_ON(vx_info_state(vxi, VXS_HASHED));
16316 +
16317 +       vxi->vx_state |= VXS_HASHED;
16318 +       head = &vx_info_hash[__hashval(vxi->vx_id)];
16319 +       hlist_add_head(&vxi->vx_hlist, head);
16320 +       atomic_inc(&vx_global_cactive);
16321 +}
16322 +
16323 +/*     __unhash_vx_info()
16324 +
16325 +       * remove the vxi from the global hash table
16326 +       * requires the hash_lock to be held                     */
16327 +
16328 +static inline void __unhash_vx_info(struct vx_info *vxi)
16329 +{
16330 +       unsigned long flags;
16331 +
16332 +       vxd_assert_lock(&vx_info_hash_lock);
16333 +       vxdprintk(VXD_CBIT(xid, 4),
16334 +               "__unhash_vx_info: %p[#%d.%d.%d]", vxi, vxi->vx_id,
16335 +               atomic_read(&vxi->vx_usecnt), atomic_read(&vxi->vx_tasks));
16336 +       vxh_unhash_vx_info(vxi);
16337 +
16338 +       /* context must be hashed */
16339 +       BUG_ON(!vx_info_state(vxi, VXS_HASHED));
16340 +       /* but without tasks */
16341 +       BUG_ON(atomic_read(&vxi->vx_tasks));
16342 +
16343 +       vxi->vx_state &= ~VXS_HASHED;
16344 +       hlist_del_init(&vxi->vx_hlist);
16345 +       spin_lock_irqsave(&vx_info_inactive_lock, flags);
16346 +       hlist_add_head(&vxi->vx_hlist, &vx_info_inactive);
16347 +       spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
16348 +       atomic_dec(&vx_global_cactive);
16349 +}
16350 +
16351 +
16352 +/*     __lookup_vx_info()
16353 +
16354 +       * requires the hash_lock to be held
16355 +       * doesn't increment the vx_refcnt                       */
16356 +
16357 +static inline struct vx_info *__lookup_vx_info(xid_t xid)
16358 +{
16359 +       struct hlist_head *head = &vx_info_hash[__hashval(xid)];
16360 +       struct hlist_node *pos;
16361 +       struct vx_info *vxi;
16362 +
16363 +       vxd_assert_lock(&vx_info_hash_lock);
16364 +       hlist_for_each(pos, head) {
16365 +               vxi = hlist_entry(pos, struct vx_info, vx_hlist);
16366 +
16367 +               if (vxi->vx_id == xid)
16368 +                       goto found;
16369 +       }
16370 +       vxi = NULL;
16371 +found:
16372 +       vxdprintk(VXD_CBIT(xid, 0),
16373 +               "__lookup_vx_info(#%u): %p[#%u]",
16374 +               xid, vxi, vxi ? vxi->vx_id : 0);
16375 +       vxh_lookup_vx_info(vxi, xid);
16376 +       return vxi;
16377 +}
16378 +
16379 +
16380 +/*     __create_vx_info()
16381 +
16382 +       * create the requested context
16383 +       * get(), claim() and hash it                            */
16384 +
16385 +static struct vx_info *__create_vx_info(int id)
16386 +{
16387 +       struct vx_info *new, *vxi = NULL;
16388 +
16389 +       vxdprintk(VXD_CBIT(xid, 1), "create_vx_info(%d)*", id);
16390 +
16391 +       if (!(new = __alloc_vx_info(id)))
16392 +               return ERR_PTR(-ENOMEM);
16393 +
16394 +       /* required to make dynamic xids unique */
16395 +       spin_lock(&vx_info_hash_lock);
16396 +
16397 +       /* static context requested */
16398 +       if ((vxi = __lookup_vx_info(id))) {
16399 +               vxdprintk(VXD_CBIT(xid, 0),
16400 +                       "create_vx_info(%d) = %p (already there)", id, vxi);
16401 +               if (vx_info_flags(vxi, VXF_STATE_SETUP, 0))
16402 +                       vxi = ERR_PTR(-EBUSY);
16403 +               else
16404 +                       vxi = ERR_PTR(-EEXIST);
16405 +               goto out_unlock;
16406 +       }
16407 +       /* new context */
16408 +       vxdprintk(VXD_CBIT(xid, 0),
16409 +               "create_vx_info(%d) = %p (new)", id, new);
16410 +       claim_vx_info(new, NULL);
16411 +       __hash_vx_info(get_vx_info(new));
16412 +       vxi = new, new = NULL;
16413 +
16414 +out_unlock:
16415 +       spin_unlock(&vx_info_hash_lock);
16416 +       vxh_create_vx_info(IS_ERR(vxi) ? NULL : vxi, id);
16417 +       if (new)
16418 +               __dealloc_vx_info(new);
16419 +       return vxi;
16420 +}
16421 +
16422 +
16423 +/*     exported stuff                                          */
16424 +
16425 +
16426 +void unhash_vx_info(struct vx_info *vxi)
16427 +{
16428 +       __shutdown_vx_info(vxi);
16429 +       spin_lock(&vx_info_hash_lock);
16430 +       __unhash_vx_info(vxi);
16431 +       spin_unlock(&vx_info_hash_lock);
16432 +       __wakeup_vx_info(vxi);
16433 +}
16434 +
16435 +
16436 +/*     lookup_vx_info()
16437 +
16438 +       * search for a vx_info and get() it
16439 +       * negative id means current                             */
16440 +
16441 +struct vx_info *lookup_vx_info(int id)
16442 +{
16443 +       struct vx_info *vxi = NULL;
16444 +
16445 +       if (id < 0) {
16446 +               vxi = get_vx_info(current->vx_info);
16447 +       } else if (id > 1) {
16448 +               spin_lock(&vx_info_hash_lock);
16449 +               vxi = get_vx_info(__lookup_vx_info(id));
16450 +               spin_unlock(&vx_info_hash_lock);
16451 +       }
16452 +       return vxi;
16453 +}
16454 +
16455 +/*     xid_is_hashed()
16456 +
16457 +       * verify that xid is still hashed                       */
16458 +
16459 +int xid_is_hashed(xid_t xid)
16460 +{
16461 +       int hashed;
16462 +
16463 +       spin_lock(&vx_info_hash_lock);
16464 +       hashed = (__lookup_vx_info(xid) != NULL);
16465 +       spin_unlock(&vx_info_hash_lock);
16466 +       return hashed;
16467 +}
16468 +
16469 +#ifdef CONFIG_PROC_FS
16470 +
16471 +/*     get_xid_list()
16472 +
16473 +       * get a subset of hashed xids for proc
16474 +       * assumes size is at least one                          */
16475 +
16476 +int get_xid_list(int index, unsigned int *xids, int size)
16477 +{
16478 +       int hindex, nr_xids = 0;
16479 +
16480 +       /* only show current and children */
16481 +       if (!vx_check(0, VS_ADMIN | VS_WATCH)) {
16482 +               if (index > 0)
16483 +                       return 0;
16484 +               xids[nr_xids] = vx_current_xid();
16485 +               return 1;
16486 +       }
16487 +
16488 +       for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) {
16489 +               struct hlist_head *head = &vx_info_hash[hindex];
16490 +               struct hlist_node *pos;
16491 +
16492 +               spin_lock(&vx_info_hash_lock);
16493 +               hlist_for_each(pos, head) {
16494 +                       struct vx_info *vxi;
16495 +
16496 +                       if (--index > 0)
16497 +                               continue;
16498 +
16499 +                       vxi = hlist_entry(pos, struct vx_info, vx_hlist);
16500 +                       xids[nr_xids] = vxi->vx_id;
16501 +                       if (++nr_xids >= size) {
16502 +                               spin_unlock(&vx_info_hash_lock);
16503 +                               goto out;
16504 +                       }
16505 +               }
16506 +               /* keep the lock time short */
16507 +               spin_unlock(&vx_info_hash_lock);
16508 +       }
16509 +out:
16510 +       return nr_xids;
16511 +}
16512 +#endif
16513 +
16514 +#ifdef CONFIG_VSERVER_DEBUG
16515 +
16516 +void   dump_vx_info_inactive(int level)
16517 +{
16518 +       struct hlist_node *entry, *next;
16519 +
16520 +       hlist_for_each_safe(entry, next, &vx_info_inactive) {
16521 +               struct vx_info *vxi =
16522 +                       list_entry(entry, struct vx_info, vx_hlist);
16523 +
16524 +               dump_vx_info(vxi, level);
16525 +       }
16526 +}
16527 +
16528 +#endif
16529 +
16530 +#if 0
16531 +int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
16532 +{
16533 +       struct user_struct *new_user, *old_user;
16534 +
16535 +       if (!p || !vxi)
16536 +               BUG();
16537 +
16538 +       if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
16539 +               return -EACCES;
16540 +
16541 +       new_user = alloc_uid(vxi->vx_id, p->uid);
16542 +       if (!new_user)
16543 +               return -ENOMEM;
16544 +
16545 +       old_user = p->user;
16546 +       if (new_user != old_user) {
16547 +               atomic_inc(&new_user->processes);
16548 +               atomic_dec(&old_user->processes);
16549 +               p->user = new_user;
16550 +       }
16551 +       free_uid(old_user);
16552 +       return 0;
16553 +}
16554 +#endif
16555 +
16556 +#if 0
16557 +void vx_mask_cap_bset(struct vx_info *vxi, struct task_struct *p)
16558 +{
16559 +       // p->cap_effective &= vxi->vx_cap_bset;
16560 +       p->cap_effective =
16561 +               cap_intersect(p->cap_effective, vxi->cap_bset);
16562 +       // p->cap_inheritable &= vxi->vx_cap_bset;
16563 +       p->cap_inheritable =
16564 +               cap_intersect(p->cap_inheritable, vxi->cap_bset);
16565 +       // p->cap_permitted &= vxi->vx_cap_bset;
16566 +       p->cap_permitted =
16567 +               cap_intersect(p->cap_permitted, vxi->cap_bset);
16568 +}
16569 +#endif
16570 +
16571 +
16572 +#include <linux/file.h>
16573 +#include <linux/fdtable.h>
16574 +
16575 +static int vx_openfd_task(struct task_struct *tsk)
16576 +{
16577 +       struct files_struct *files = tsk->files;
16578 +       struct fdtable *fdt;
16579 +       const unsigned long *bptr;
16580 +       int count, total;
16581 +
16582 +       /* no rcu_read_lock() because of spin_lock() */
16583 +       spin_lock(&files->file_lock);
16584 +       fdt = files_fdtable(files);
16585 +       bptr = fdt->open_fds->fds_bits;
16586 +       count = fdt->max_fds / (sizeof(unsigned long) * 8);
16587 +       for (total = 0; count > 0; count--) {
16588 +               if (*bptr)
16589 +                       total += hweight_long(*bptr);
16590 +               bptr++;
16591 +       }
16592 +       spin_unlock(&files->file_lock);
16593 +       return total;
16594 +}
16595 +
16596 +
16597 +/*     for *space compatibility */
16598 +
16599 +asmlinkage long sys_unshare(unsigned long);
16600 +
16601 +/*
16602 + *     migrate task to new context
16603 + *     gets vxi, puts old_vxi on change
16604 + *     optionally unshares namespaces (hack)
16605 + */
16606 +
16607 +int vx_migrate_task(struct task_struct *p, struct vx_info *vxi, int unshare)
16608 +{
16609 +       struct vx_info *old_vxi;
16610 +       int ret = 0;
16611 +
16612 +       if (!p || !vxi)
16613 +               BUG();
16614 +
16615 +       vxdprintk(VXD_CBIT(xid, 5),
16616 +               "vx_migrate_task(%p,%p[#%d.%d])", p, vxi,
16617 +               vxi->vx_id, atomic_read(&vxi->vx_usecnt));
16618 +
16619 +       if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0) &&
16620 +               !vx_info_flags(vxi, VXF_STATE_SETUP, 0))
16621 +               return -EACCES;
16622 +
16623 +       if (vx_info_state(vxi, VXS_SHUTDOWN))
16624 +               return -EFAULT;
16625 +
16626 +       old_vxi = task_get_vx_info(p);
16627 +       if (old_vxi == vxi)
16628 +               goto out;
16629 +
16630 +//     if (!(ret = vx_migrate_user(p, vxi))) {
16631 +       {
16632 +               int openfd;
16633 +
16634 +               task_lock(p);
16635 +               openfd = vx_openfd_task(p);
16636 +
16637 +               if (old_vxi) {
16638 +                       atomic_dec(&old_vxi->cvirt.nr_threads);
16639 +                       atomic_dec(&old_vxi->cvirt.nr_running);
16640 +                       __rlim_dec(&old_vxi->limit, RLIMIT_NPROC);
16641 +                       /* FIXME: what about the struct files here? */
16642 +                       __rlim_sub(&old_vxi->limit, VLIMIT_OPENFD, openfd);
16643 +                       /* account for the executable */
16644 +                       __rlim_dec(&old_vxi->limit, VLIMIT_DENTRY);
16645 +               }
16646 +               atomic_inc(&vxi->cvirt.nr_threads);
16647 +               atomic_inc(&vxi->cvirt.nr_running);
16648 +               __rlim_inc(&vxi->limit, RLIMIT_NPROC);
16649 +               /* FIXME: what about the struct files here? */
16650 +               __rlim_add(&vxi->limit, VLIMIT_OPENFD, openfd);
16651 +               /* account for the executable */
16652 +               __rlim_inc(&vxi->limit, VLIMIT_DENTRY);
16653 +
16654 +               if (old_vxi) {
16655 +                       release_vx_info(old_vxi, p);
16656 +                       clr_vx_info(&p->vx_info);
16657 +               }
16658 +               claim_vx_info(vxi, p);
16659 +               set_vx_info(&p->vx_info, vxi);
16660 +               p->xid = vxi->vx_id;
16661 +
16662 +               vxdprintk(VXD_CBIT(xid, 5),
16663 +                       "moved task %p into vxi:%p[#%d]",
16664 +                       p, vxi, vxi->vx_id);
16665 +
16666 +               // vx_mask_cap_bset(vxi, p);
16667 +               task_unlock(p);
16668 +
16669 +               /* hack for *spaces to provide compatibility */
16670 +               if (unshare) {
16671 +                       struct nsproxy *old_nsp, *new_nsp;
16672 +
16673 +                       ret = unshare_nsproxy_namespaces(
16674 +                               CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER,
16675 +                               &new_nsp, NULL);
16676 +                       if (ret)
16677 +                               goto out;
16678 +
16679 +                       old_nsp = xchg(&p->nsproxy, new_nsp);
16680 +                       vx_set_space(vxi,
16681 +                               CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER, 0);
16682 +                       put_nsproxy(old_nsp);
16683 +               }
16684 +       }
16685 +out:
16686 +       put_vx_info(old_vxi);
16687 +       return ret;
16688 +}
16689 +
16690 +int vx_set_reaper(struct vx_info *vxi, struct task_struct *p)
16691 +{
16692 +       struct task_struct *old_reaper;
16693 +
16694 +       if (!vxi)
16695 +               return -EINVAL;
16696 +
16697 +       vxdprintk(VXD_CBIT(xid, 6),
16698 +               "vx_set_reaper(%p[#%d],%p[#%d,%d])",
16699 +               vxi, vxi->vx_id, p, p->xid, p->pid);
16700 +
16701 +       old_reaper = vxi->vx_reaper;
16702 +       if (old_reaper == p)
16703 +               return 0;
16704 +
16705 +       /* set new child reaper */
16706 +       get_task_struct(p);
16707 +       vxi->vx_reaper = p;
16708 +       put_task_struct(old_reaper);
16709 +       return 0;
16710 +}
16711 +
16712 +int vx_set_init(struct vx_info *vxi, struct task_struct *p)
16713 +{
16714 +       if (!vxi)
16715 +               return -EINVAL;
16716 +
16717 +       vxdprintk(VXD_CBIT(xid, 6),
16718 +               "vx_set_init(%p[#%d],%p[#%d,%d,%d])",
16719 +               vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
16720 +
16721 +       vxi->vx_flags &= ~VXF_STATE_INIT;
16722 +       vxi->vx_initpid = p->tgid;
16723 +       return 0;
16724 +}
16725 +
16726 +void vx_exit_init(struct vx_info *vxi, struct task_struct *p, int code)
16727 +{
16728 +       vxdprintk(VXD_CBIT(xid, 6),
16729 +               "vx_exit_init(%p[#%d],%p[#%d,%d,%d])",
16730 +               vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
16731 +
16732 +       vxi->exit_code = code;
16733 +       vxi->vx_initpid = 0;
16734 +}
16735 +
16736 +
16737 +void vx_set_persistent(struct vx_info *vxi)
16738 +{
16739 +       vxdprintk(VXD_CBIT(xid, 6),
16740 +               "vx_set_persistent(%p[#%d])", vxi, vxi->vx_id);
16741 +
16742 +       get_vx_info(vxi);
16743 +       claim_vx_info(vxi, NULL);
16744 +}
16745 +
16746 +void vx_clear_persistent(struct vx_info *vxi)
16747 +{
16748 +       vxdprintk(VXD_CBIT(xid, 6),
16749 +               "vx_clear_persistent(%p[#%d])", vxi, vxi->vx_id);
16750 +
16751 +       release_vx_info(vxi, NULL);
16752 +       put_vx_info(vxi);
16753 +}
16754 +
16755 +void vx_update_persistent(struct vx_info *vxi)
16756 +{
16757 +       if (vx_info_flags(vxi, VXF_PERSISTENT, 0))
16758 +               vx_set_persistent(vxi);
16759 +       else
16760 +               vx_clear_persistent(vxi);
16761 +}
16762 +
16763 +
16764 +/*     task must be current or locked          */
16765 +
16766 +void   exit_vx_info(struct task_struct *p, int code)
16767 +{
16768 +       struct vx_info *vxi = p->vx_info;
16769 +
16770 +       if (vxi) {
16771 +               atomic_dec(&vxi->cvirt.nr_threads);
16772 +               vx_nproc_dec(p);
16773 +
16774 +               vxi->exit_code = code;
16775 +               release_vx_info(vxi, p);
16776 +       }
16777 +}
16778 +
16779 +void   exit_vx_info_early(struct task_struct *p, int code)
16780 +{
16781 +       struct vx_info *vxi = p->vx_info;
16782 +
16783 +       if (vxi) {
16784 +               if (vxi->vx_initpid == p->tgid)
16785 +                       vx_exit_init(vxi, p, code);
16786 +               if (vxi->vx_reaper == p)
16787 +                       vx_set_reaper(vxi, init_pid_ns.child_reaper);
16788 +       }
16789 +}
16790 +
16791 +
16792 +/* vserver syscall commands below here */
16793 +
16794 +/* taks xid and vx_info functions */
16795 +
16796 +#include <asm/uaccess.h>
16797 +
16798 +
16799 +int vc_task_xid(uint32_t id)
16800 +{
16801 +       xid_t xid;
16802 +
16803 +       if (id) {
16804 +               struct task_struct *tsk;
16805 +
16806 +               read_lock(&tasklist_lock);
16807 +               tsk = find_task_by_real_pid(id);
16808 +               xid = (tsk) ? tsk->xid : -ESRCH;
16809 +               read_unlock(&tasklist_lock);
16810 +       } else
16811 +               xid = vx_current_xid();
16812 +       return xid;
16813 +}
16814 +
16815 +
16816 +int vc_vx_info(struct vx_info *vxi, void __user *data)
16817 +{
16818 +       struct vcmd_vx_info_v0 vc_data;
16819 +
16820 +       vc_data.xid = vxi->vx_id;
16821 +       vc_data.initpid = vxi->vx_initpid;
16822 +
16823 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
16824 +               return -EFAULT;
16825 +       return 0;
16826 +}
16827 +
16828 +
16829 +int vc_ctx_stat(struct vx_info *vxi, void __user *data)
16830 +{
16831 +       struct vcmd_ctx_stat_v0 vc_data;
16832 +
16833 +       vc_data.usecnt = atomic_read(&vxi->vx_usecnt);
16834 +       vc_data.tasks = atomic_read(&vxi->vx_tasks);
16835 +
16836 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
16837 +               return -EFAULT;
16838 +       return 0;
16839 +}
16840 +
16841 +
16842 +/* context functions */
16843 +
16844 +int vc_ctx_create(uint32_t xid, void __user *data)
16845 +{
16846 +       struct vcmd_ctx_create vc_data = { .flagword = VXF_INIT_SET };
16847 +       struct vx_info *new_vxi;
16848 +       int ret;
16849 +
16850 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
16851 +               return -EFAULT;
16852 +
16853 +       if ((xid > MAX_S_CONTEXT) || (xid < 2))
16854 +               return -EINVAL;
16855 +
16856 +       new_vxi = __create_vx_info(xid);
16857 +       if (IS_ERR(new_vxi))
16858 +               return PTR_ERR(new_vxi);
16859 +
16860 +       /* initial flags */
16861 +       new_vxi->vx_flags = vc_data.flagword;
16862 +
16863 +       ret = -ENOEXEC;
16864 +       if (vs_state_change(new_vxi, VSC_STARTUP))
16865 +               goto out;
16866 +
16867 +       ret = vx_migrate_task(current, new_vxi, (!data));
16868 +       if (ret)
16869 +               goto out;
16870 +
16871 +       /* return context id on success */
16872 +       ret = new_vxi->vx_id;
16873 +
16874 +       /* get a reference for persistent contexts */
16875 +       if ((vc_data.flagword & VXF_PERSISTENT))
16876 +               vx_set_persistent(new_vxi);
16877 +out:
16878 +       release_vx_info(new_vxi, NULL);
16879 +       put_vx_info(new_vxi);
16880 +       return ret;
16881 +}
16882 +
16883 +
16884 +int vc_ctx_migrate(struct vx_info *vxi, void __user *data)
16885 +{
16886 +       struct vcmd_ctx_migrate vc_data = { .flagword = 0 };
16887 +       int ret;
16888 +
16889 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
16890 +               return -EFAULT;
16891 +
16892 +       ret = vx_migrate_task(current, vxi, 0);
16893 +       if (ret)
16894 +               return ret;
16895 +       if (vc_data.flagword & VXM_SET_INIT)
16896 +               ret = vx_set_init(vxi, current);
16897 +       if (ret)
16898 +               return ret;
16899 +       if (vc_data.flagword & VXM_SET_REAPER)
16900 +               ret = vx_set_reaper(vxi, current);
16901 +       return ret;
16902 +}
16903 +
16904 +
16905 +int vc_get_cflags(struct vx_info *vxi, void __user *data)
16906 +{
16907 +       struct vcmd_ctx_flags_v0 vc_data;
16908 +
16909 +       vc_data.flagword = vxi->vx_flags;
16910 +
16911 +       /* special STATE flag handling */
16912 +       vc_data.mask = vs_mask_flags(~0ULL, vxi->vx_flags, VXF_ONE_TIME);
16913 +
16914 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
16915 +               return -EFAULT;
16916 +       return 0;
16917 +}
16918 +
16919 +int vc_set_cflags(struct vx_info *vxi, void __user *data)
16920 +{
16921 +       struct vcmd_ctx_flags_v0 vc_data;
16922 +       uint64_t mask, trigger;
16923 +
16924 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16925 +               return -EFAULT;
16926 +
16927 +       /* special STATE flag handling */
16928 +       mask = vs_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME);
16929 +       trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword);
16930 +
16931 +       if (vxi == current->vx_info) {
16932 +               /* if (trigger & VXF_STATE_SETUP)
16933 +                       vx_mask_cap_bset(vxi, current); */
16934 +               if (trigger & VXF_STATE_INIT) {
16935 +                       int ret;
16936 +
16937 +                       ret = vx_set_init(vxi, current);
16938 +                       if (ret)
16939 +                               return ret;
16940 +                       ret = vx_set_reaper(vxi, current);
16941 +                       if (ret)
16942 +                               return ret;
16943 +               }
16944 +       }
16945 +
16946 +       vxi->vx_flags = vs_mask_flags(vxi->vx_flags,
16947 +               vc_data.flagword, mask);
16948 +       if (trigger & VXF_PERSISTENT)
16949 +               vx_update_persistent(vxi);
16950 +
16951 +       return 0;
16952 +}
16953 +
16954 +
16955 +static inline uint64_t caps_from_cap_t(kernel_cap_t c)
16956 +{
16957 +       uint64_t v = c.cap[0] | ((uint64_t)c.cap[1] << 32);
16958 +
16959 +       // printk("caps_from_cap_t(%08x:%08x) = %016llx\n", c.cap[1], c.cap[0], v);
16960 +       return v;
16961 +}
16962 +
16963 +static inline kernel_cap_t cap_t_from_caps(uint64_t v)
16964 +{
16965 +       kernel_cap_t c = __cap_empty_set;
16966 +
16967 +       c.cap[0] = v & 0xFFFFFFFF;
16968 +       c.cap[1] = (v >> 32) & 0xFFFFFFFF;
16969 +
16970 +       // printk("cap_t_from_caps(%016llx) = %08x:%08x\n", v, c.cap[1], c.cap[0]);
16971 +       return c;
16972 +}
16973 +
16974 +
16975 +static int do_get_caps(struct vx_info *vxi, uint64_t *bcaps, uint64_t *ccaps)
16976 +{
16977 +       if (bcaps)
16978 +               *bcaps = caps_from_cap_t(vxi->vx_bcaps);
16979 +       if (ccaps)
16980 +               *ccaps = vxi->vx_ccaps;
16981 +
16982 +       return 0;
16983 +}
16984 +
16985 +int vc_get_ccaps(struct vx_info *vxi, void __user *data)
16986 +{
16987 +       struct vcmd_ctx_caps_v1 vc_data;
16988 +       int ret;
16989 +
16990 +       ret = do_get_caps(vxi, NULL, &vc_data.ccaps);
16991 +       if (ret)
16992 +               return ret;
16993 +       vc_data.cmask = ~0ULL;
16994 +
16995 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
16996 +               return -EFAULT;
16997 +       return 0;
16998 +}
16999 +
17000 +static int do_set_caps(struct vx_info *vxi,
17001 +       uint64_t bcaps, uint64_t bmask, uint64_t ccaps, uint64_t cmask)
17002 +{
17003 +       uint64_t bcold = caps_from_cap_t(vxi->vx_bcaps);
17004 +
17005 +#if 0
17006 +       printk("do_set_caps(%16llx, %16llx, %16llx, %16llx)\n",
17007 +               bcaps, bmask, ccaps, cmask);
17008 +#endif
17009 +       vxi->vx_bcaps = cap_t_from_caps(
17010 +               vs_mask_flags(bcold, bcaps, bmask));
17011 +       vxi->vx_ccaps = vs_mask_flags(vxi->vx_ccaps, ccaps, cmask);
17012 +
17013 +       return 0;
17014 +}
17015 +
17016 +int vc_set_ccaps(struct vx_info *vxi, void __user *data)
17017 +{
17018 +       struct vcmd_ctx_caps_v1 vc_data;
17019 +
17020 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17021 +               return -EFAULT;
17022 +
17023 +       return do_set_caps(vxi, 0, 0, vc_data.ccaps, vc_data.cmask);
17024 +}
17025 +
17026 +int vc_get_bcaps(struct vx_info *vxi, void __user *data)
17027 +{
17028 +       struct vcmd_bcaps vc_data;
17029 +       int ret;
17030 +
17031 +       ret = do_get_caps(vxi, &vc_data.bcaps, NULL);
17032 +       if (ret)
17033 +               return ret;
17034 +       vc_data.bmask = ~0ULL;
17035 +
17036 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17037 +               return -EFAULT;
17038 +       return 0;
17039 +}
17040 +
17041 +int vc_set_bcaps(struct vx_info *vxi, void __user *data)
17042 +{
17043 +       struct vcmd_bcaps vc_data;
17044 +
17045 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17046 +               return -EFAULT;
17047 +
17048 +       return do_set_caps(vxi, vc_data.bcaps, vc_data.bmask, 0, 0);
17049 +}
17050 +
17051 +
17052 +int vc_get_badness(struct vx_info *vxi, void __user *data)
17053 +{
17054 +       struct vcmd_badness_v0 vc_data;
17055 +
17056 +       vc_data.bias = vxi->vx_badness_bias;
17057 +
17058 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17059 +               return -EFAULT;
17060 +       return 0;
17061 +}
17062 +
17063 +int vc_set_badness(struct vx_info *vxi, void __user *data)
17064 +{
17065 +       struct vcmd_badness_v0 vc_data;
17066 +
17067 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17068 +               return -EFAULT;
17069 +
17070 +       vxi->vx_badness_bias = vc_data.bias;
17071 +       return 0;
17072 +}
17073 +
17074 +#include <linux/module.h>
17075 +
17076 +EXPORT_SYMBOL_GPL(free_vx_info);
17077 +
17078 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/cvirt.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/cvirt.c
17079 --- linux-2.6.30.2/kernel/vserver/cvirt.c       1970-01-01 01:00:00.000000000 +0100
17080 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/cvirt.c    2009-07-04 01:11:39.000000000 +0200
17081 @@ -0,0 +1,300 @@
17082 +/*
17083 + *  linux/kernel/vserver/cvirt.c
17084 + *
17085 + *  Virtual Server: Context Virtualization
17086 + *
17087 + *  Copyright (C) 2004-2007  Herbert Pötzl
17088 + *
17089 + *  V0.01  broken out from limit.c
17090 + *  V0.02  added utsname stuff
17091 + *  V0.03  changed vcmds to vxi arg
17092 + *
17093 + */
17094 +
17095 +#include <linux/types.h>
17096 +#include <linux/utsname.h>
17097 +#include <linux/vs_cvirt.h>
17098 +#include <linux/vserver/switch.h>
17099 +#include <linux/vserver/cvirt_cmd.h>
17100 +
17101 +#include <asm/uaccess.h>
17102 +
17103 +
17104 +void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle)
17105 +{
17106 +       struct vx_info *vxi = current->vx_info;
17107 +
17108 +       set_normalized_timespec(uptime,
17109 +               uptime->tv_sec - vxi->cvirt.bias_uptime.tv_sec,
17110 +               uptime->tv_nsec - vxi->cvirt.bias_uptime.tv_nsec);
17111 +       if (!idle)
17112 +               return;
17113 +       set_normalized_timespec(idle,
17114 +               idle->tv_sec - vxi->cvirt.bias_idle.tv_sec,
17115 +               idle->tv_nsec - vxi->cvirt.bias_idle.tv_nsec);
17116 +       return;
17117 +}
17118 +
17119 +uint64_t vx_idle_jiffies(void)
17120 +{
17121 +       return init_task.utime + init_task.stime;
17122 +}
17123 +
17124 +
17125 +
17126 +static inline uint32_t __update_loadavg(uint32_t load,
17127 +       int wsize, int delta, int n)
17128 +{
17129 +       unsigned long long calc, prev;
17130 +
17131 +       /* just set it to n */
17132 +       if (unlikely(delta >= wsize))
17133 +               return (n << FSHIFT);
17134 +
17135 +       calc = delta * n;
17136 +       calc <<= FSHIFT;
17137 +       prev = (wsize - delta);
17138 +       prev *= load;
17139 +       calc += prev;
17140 +       do_div(calc, wsize);
17141 +       return calc;
17142 +}
17143 +
17144 +
17145 +void vx_update_load(struct vx_info *vxi)
17146 +{
17147 +       uint32_t now, last, delta;
17148 +       unsigned int nr_running, nr_uninterruptible;
17149 +       unsigned int total;
17150 +       unsigned long flags;
17151 +
17152 +       spin_lock_irqsave(&vxi->cvirt.load_lock, flags);
17153 +
17154 +       now = jiffies;
17155 +       last = vxi->cvirt.load_last;
17156 +       delta = now - last;
17157 +
17158 +       if (delta < 5*HZ)
17159 +               goto out;
17160 +
17161 +       nr_running = atomic_read(&vxi->cvirt.nr_running);
17162 +       nr_uninterruptible = atomic_read(&vxi->cvirt.nr_uninterruptible);
17163 +       total = nr_running + nr_uninterruptible;
17164 +
17165 +       vxi->cvirt.load[0] = __update_loadavg(vxi->cvirt.load[0],
17166 +               60*HZ, delta, total);
17167 +       vxi->cvirt.load[1] = __update_loadavg(vxi->cvirt.load[1],
17168 +               5*60*HZ, delta, total);
17169 +       vxi->cvirt.load[2] = __update_loadavg(vxi->cvirt.load[2],
17170 +               15*60*HZ, delta, total);
17171 +
17172 +       vxi->cvirt.load_last = now;
17173 +out:
17174 +       atomic_inc(&vxi->cvirt.load_updates);
17175 +       spin_unlock_irqrestore(&vxi->cvirt.load_lock, flags);
17176 +}
17177 +
17178 +
17179 +/*
17180 + * Commands to do_syslog:
17181 + *
17182 + *      0 -- Close the log.  Currently a NOP.
17183 + *      1 -- Open the log. Currently a NOP.
17184 + *      2 -- Read from the log.
17185 + *      3 -- Read all messages remaining in the ring buffer.
17186 + *      4 -- Read and clear all messages remaining in the ring buffer
17187 + *      5 -- Clear ring buffer.
17188 + *      6 -- Disable printk's to console
17189 + *      7 -- Enable printk's to console
17190 + *      8 -- Set level of messages printed to console
17191 + *      9 -- Return number of unread characters in the log buffer
17192 + *     10 -- Return size of the log buffer
17193 + */
17194 +int vx_do_syslog(int type, char __user *buf, int len)
17195 +{
17196 +       int error = 0;
17197 +       int do_clear = 0;
17198 +       struct vx_info *vxi = current->vx_info;
17199 +       struct _vx_syslog *log;
17200 +
17201 +       if (!vxi)
17202 +               return -EINVAL;
17203 +       log = &vxi->cvirt.syslog;
17204 +
17205 +       switch (type) {
17206 +       case 0:         /* Close log */
17207 +       case 1:         /* Open log */
17208 +               break;
17209 +       case 2:         /* Read from log */
17210 +               error = wait_event_interruptible(log->log_wait,
17211 +                       (log->log_start - log->log_end));
17212 +               if (error)
17213 +                       break;
17214 +               spin_lock_irq(&log->logbuf_lock);
17215 +               spin_unlock_irq(&log->logbuf_lock);
17216 +               break;
17217 +       case 4:         /* Read/clear last kernel messages */
17218 +               do_clear = 1;
17219 +               /* fall through */
17220 +       case 3:         /* Read last kernel messages */
17221 +               return 0;
17222 +
17223 +       case 5:         /* Clear ring buffer */
17224 +               return 0;
17225 +
17226 +       case 6:         /* Disable logging to console */
17227 +       case 7:         /* Enable logging to console */
17228 +       case 8:         /* Set level of messages printed to console */
17229 +               break;
17230 +
17231 +       case 9:         /* Number of chars in the log buffer */
17232 +               return 0;
17233 +       case 10:        /* Size of the log buffer */
17234 +               return 0;
17235 +       default:
17236 +               error = -EINVAL;
17237 +               break;
17238 +       }
17239 +       return error;
17240 +}
17241 +
17242 +
17243 +/* virtual host info names */
17244 +
17245 +static char *vx_vhi_name(struct vx_info *vxi, int id)
17246 +{
17247 +       struct nsproxy *nsproxy;
17248 +       struct uts_namespace *uts;
17249 +
17250 +       if (id == VHIN_CONTEXT)
17251 +               return vxi->vx_name;
17252 +
17253 +       nsproxy = vxi->vx_nsproxy[0];
17254 +       if (!nsproxy)
17255 +               return NULL;
17256 +
17257 +       uts = nsproxy->uts_ns;
17258 +       if (!uts)
17259 +               return NULL;
17260 +
17261 +       switch (id) {
17262 +       case VHIN_SYSNAME:
17263 +               return uts->name.sysname;
17264 +       case VHIN_NODENAME:
17265 +               return uts->name.nodename;
17266 +       case VHIN_RELEASE:
17267 +               return uts->name.release;
17268 +       case VHIN_VERSION:
17269 +               return uts->name.version;
17270 +       case VHIN_MACHINE:
17271 +               return uts->name.machine;
17272 +       case VHIN_DOMAINNAME:
17273 +               return uts->name.domainname;
17274 +       default:
17275 +               return NULL;
17276 +       }
17277 +       return NULL;
17278 +}
17279 +
17280 +int vc_set_vhi_name(struct vx_info *vxi, void __user *data)
17281 +{
17282 +       struct vcmd_vhi_name_v0 vc_data;
17283 +       char *name;
17284 +
17285 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17286 +               return -EFAULT;
17287 +
17288 +       name = vx_vhi_name(vxi, vc_data.field);
17289 +       if (!name)
17290 +               return -EINVAL;
17291 +
17292 +       memcpy(name, vc_data.name, 65);
17293 +       return 0;
17294 +}
17295 +
17296 +int vc_get_vhi_name(struct vx_info *vxi, void __user *data)
17297 +{
17298 +       struct vcmd_vhi_name_v0 vc_data;
17299 +       char *name;
17300 +
17301 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17302 +               return -EFAULT;
17303 +
17304 +       name = vx_vhi_name(vxi, vc_data.field);
17305 +       if (!name)
17306 +               return -EINVAL;
17307 +
17308 +       memcpy(vc_data.name, name, 65);
17309 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17310 +               return -EFAULT;
17311 +       return 0;
17312 +}
17313 +
17314 +
17315 +int vc_virt_stat(struct vx_info *vxi, void __user *data)
17316 +{
17317 +       struct vcmd_virt_stat_v0 vc_data;
17318 +       struct _vx_cvirt *cvirt = &vxi->cvirt;
17319 +       struct timespec uptime;
17320 +
17321 +       do_posix_clock_monotonic_gettime(&uptime);
17322 +       set_normalized_timespec(&uptime,
17323 +               uptime.tv_sec - cvirt->bias_uptime.tv_sec,
17324 +               uptime.tv_nsec - cvirt->bias_uptime.tv_nsec);
17325 +
17326 +       vc_data.offset = timeval_to_ns(&cvirt->bias_tv);
17327 +       vc_data.uptime = timespec_to_ns(&uptime);
17328 +       vc_data.nr_threads = atomic_read(&cvirt->nr_threads);
17329 +       vc_data.nr_running = atomic_read(&cvirt->nr_running);
17330 +       vc_data.nr_uninterruptible = atomic_read(&cvirt->nr_uninterruptible);
17331 +       vc_data.nr_onhold = atomic_read(&cvirt->nr_onhold);
17332 +       vc_data.nr_forks = atomic_read(&cvirt->total_forks);
17333 +       vc_data.load[0] = cvirt->load[0];
17334 +       vc_data.load[1] = cvirt->load[1];
17335 +       vc_data.load[2] = cvirt->load[2];
17336 +
17337 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17338 +               return -EFAULT;
17339 +       return 0;
17340 +}
17341 +
17342 +
17343 +#ifdef CONFIG_VSERVER_VTIME
17344 +
17345 +/* virtualized time base */
17346 +
17347 +void vx_gettimeofday(struct timeval *tv)
17348 +{
17349 +       do_gettimeofday(tv);
17350 +       if (!vx_flags(VXF_VIRT_TIME, 0))
17351 +               return;
17352 +
17353 +       tv->tv_sec += current->vx_info->cvirt.bias_tv.tv_sec;
17354 +       tv->tv_usec += current->vx_info->cvirt.bias_tv.tv_usec;
17355 +
17356 +       if (tv->tv_usec >= USEC_PER_SEC) {
17357 +               tv->tv_sec++;
17358 +               tv->tv_usec -= USEC_PER_SEC;
17359 +       } else if (tv->tv_usec < 0) {
17360 +               tv->tv_sec--;
17361 +               tv->tv_usec += USEC_PER_SEC;
17362 +       }
17363 +}
17364 +
17365 +int vx_settimeofday(struct timespec *ts)
17366 +{
17367 +       struct timeval tv;
17368 +
17369 +       if (!vx_flags(VXF_VIRT_TIME, 0))
17370 +               return do_settimeofday(ts);
17371 +
17372 +       do_gettimeofday(&tv);
17373 +       current->vx_info->cvirt.bias_tv.tv_sec =
17374 +               ts->tv_sec - tv.tv_sec;
17375 +       current->vx_info->cvirt.bias_tv.tv_usec =
17376 +               (ts->tv_nsec/NSEC_PER_USEC) - tv.tv_usec;
17377 +       return 0;
17378 +}
17379 +
17380 +#endif
17381 +
17382 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/cvirt_init.h linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/cvirt_init.h
17383 --- linux-2.6.30.2/kernel/vserver/cvirt_init.h  1970-01-01 01:00:00.000000000 +0100
17384 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/cvirt_init.h       2009-07-04 01:11:39.000000000 +0200
17385 @@ -0,0 +1,69 @@
17386 +
17387 +
17388 +extern uint64_t vx_idle_jiffies(void);
17389 +
17390 +static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt)
17391 +{
17392 +       uint64_t idle_jiffies = vx_idle_jiffies();
17393 +       uint64_t nsuptime;
17394 +
17395 +       do_posix_clock_monotonic_gettime(&cvirt->bias_uptime);
17396 +       nsuptime = (unsigned long long)cvirt->bias_uptime.tv_sec
17397 +               * NSEC_PER_SEC + cvirt->bias_uptime.tv_nsec;
17398 +       cvirt->bias_clock = nsec_to_clock_t(nsuptime);
17399 +       cvirt->bias_tv.tv_sec = 0;
17400 +       cvirt->bias_tv.tv_usec = 0;
17401 +
17402 +       jiffies_to_timespec(idle_jiffies, &cvirt->bias_idle);
17403 +       atomic_set(&cvirt->nr_threads, 0);
17404 +       atomic_set(&cvirt->nr_running, 0);
17405 +       atomic_set(&cvirt->nr_uninterruptible, 0);
17406 +       atomic_set(&cvirt->nr_onhold, 0);
17407 +
17408 +       spin_lock_init(&cvirt->load_lock);
17409 +       cvirt->load_last = jiffies;
17410 +       atomic_set(&cvirt->load_updates, 0);
17411 +       cvirt->load[0] = 0;
17412 +       cvirt->load[1] = 0;
17413 +       cvirt->load[2] = 0;
17414 +       atomic_set(&cvirt->total_forks, 0);
17415 +
17416 +       spin_lock_init(&cvirt->syslog.logbuf_lock);
17417 +       init_waitqueue_head(&cvirt->syslog.log_wait);
17418 +       cvirt->syslog.log_start = 0;
17419 +       cvirt->syslog.log_end = 0;
17420 +       cvirt->syslog.con_start = 0;
17421 +       cvirt->syslog.logged_chars = 0;
17422 +}
17423 +
17424 +static inline
17425 +void vx_info_init_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu)
17426 +{
17427 +       // cvirt_pc->cpustat = { 0 };
17428 +}
17429 +
17430 +static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt)
17431 +{
17432 +       int value;
17433 +
17434 +       vxwprintk_xid((value = atomic_read(&cvirt->nr_threads)),
17435 +               "!!! cvirt: %p[nr_threads] = %d on exit.",
17436 +               cvirt, value);
17437 +       vxwprintk_xid((value = atomic_read(&cvirt->nr_running)),
17438 +               "!!! cvirt: %p[nr_running] = %d on exit.",
17439 +               cvirt, value);
17440 +       vxwprintk_xid((value = atomic_read(&cvirt->nr_uninterruptible)),
17441 +               "!!! cvirt: %p[nr_uninterruptible] = %d on exit.",
17442 +               cvirt, value);
17443 +       vxwprintk_xid((value = atomic_read(&cvirt->nr_onhold)),
17444 +               "!!! cvirt: %p[nr_onhold] = %d on exit.",
17445 +               cvirt, value);
17446 +       return;
17447 +}
17448 +
17449 +static inline
17450 +void vx_info_exit_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu)
17451 +{
17452 +       return;
17453 +}
17454 +
17455 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/cvirt_proc.h linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/cvirt_proc.h
17456 --- linux-2.6.30.2/kernel/vserver/cvirt_proc.h  1970-01-01 01:00:00.000000000 +0100
17457 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/cvirt_proc.h       2009-07-04 01:11:39.000000000 +0200
17458 @@ -0,0 +1,135 @@
17459 +#ifndef _VX_CVIRT_PROC_H
17460 +#define _VX_CVIRT_PROC_H
17461 +
17462 +#include <linux/nsproxy.h>
17463 +#include <linux/mnt_namespace.h>
17464 +#include <linux/ipc_namespace.h>
17465 +#include <linux/utsname.h>
17466 +#include <linux/ipc.h>
17467 +
17468 +
17469 +static inline
17470 +int vx_info_proc_nsproxy(struct nsproxy *nsproxy, char *buffer)
17471 +{
17472 +       struct mnt_namespace *ns;
17473 +       struct uts_namespace *uts;
17474 +       struct ipc_namespace *ipc;
17475 +       struct path path;
17476 +       char *pstr, *root;
17477 +       int length = 0;
17478 +
17479 +       if (!nsproxy)
17480 +               goto out;
17481 +
17482 +       length += sprintf(buffer + length,
17483 +               "NSProxy:\t%p [%p,%p,%p]\n",
17484 +               nsproxy, nsproxy->mnt_ns,
17485 +               nsproxy->uts_ns, nsproxy->ipc_ns);
17486 +
17487 +       ns = nsproxy->mnt_ns;
17488 +       if (!ns)
17489 +               goto skip_ns;
17490 +
17491 +       pstr = kmalloc(PATH_MAX, GFP_KERNEL);
17492 +       if (!pstr)
17493 +               goto skip_ns;
17494 +
17495 +       path.mnt = ns->root;
17496 +       path.dentry = ns->root->mnt_root;
17497 +       root = d_path(&path, pstr, PATH_MAX - 2);
17498 +       length += sprintf(buffer + length,
17499 +               "Namespace:\t%p [#%u]\n"
17500 +               "RootPath:\t%s\n",
17501 +               ns, atomic_read(&ns->count),
17502 +               root);
17503 +       kfree(pstr);
17504 +skip_ns:
17505 +
17506 +       uts = nsproxy->uts_ns;
17507 +       if (!uts)
17508 +               goto skip_uts;
17509 +
17510 +       length += sprintf(buffer + length,
17511 +               "SysName:\t%.*s\n"
17512 +               "NodeName:\t%.*s\n"
17513 +               "Release:\t%.*s\n"
17514 +               "Version:\t%.*s\n"
17515 +               "Machine:\t%.*s\n"
17516 +               "DomainName:\t%.*s\n",
17517 +               __NEW_UTS_LEN, uts->name.sysname,
17518 +               __NEW_UTS_LEN, uts->name.nodename,
17519 +               __NEW_UTS_LEN, uts->name.release,
17520 +               __NEW_UTS_LEN, uts->name.version,
17521 +               __NEW_UTS_LEN, uts->name.machine,
17522 +               __NEW_UTS_LEN, uts->name.domainname);
17523 +skip_uts:
17524 +
17525 +       ipc = nsproxy->ipc_ns;
17526 +       if (!ipc)
17527 +               goto skip_ipc;
17528 +
17529 +       length += sprintf(buffer + length,
17530 +               "SEMS:\t\t%d %d %d %d  %d\n"
17531 +               "MSG:\t\t%d %d %d\n"
17532 +               "SHM:\t\t%lu %lu  %d %d\n",
17533 +               ipc->sem_ctls[0], ipc->sem_ctls[1],
17534 +               ipc->sem_ctls[2], ipc->sem_ctls[3],
17535 +               ipc->used_sems,
17536 +               ipc->msg_ctlmax, ipc->msg_ctlmnb, ipc->msg_ctlmni,
17537 +               (unsigned long)ipc->shm_ctlmax,
17538 +               (unsigned long)ipc->shm_ctlall,
17539 +               ipc->shm_ctlmni, ipc->shm_tot);
17540 +skip_ipc:
17541 +out:
17542 +       return length;
17543 +}
17544 +
17545 +
17546 +#include <linux/sched.h>
17547 +
17548 +#define LOAD_INT(x) ((x) >> FSHIFT)
17549 +#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1 - 1)) * 100)
17550 +
17551 +static inline
17552 +int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer)
17553 +{
17554 +       int length = 0;
17555 +       int a, b, c;
17556 +
17557 +       length += sprintf(buffer + length,
17558 +               "BiasUptime:\t%lu.%02lu\n",
17559 +               (unsigned long)cvirt->bias_uptime.tv_sec,
17560 +               (cvirt->bias_uptime.tv_nsec / (NSEC_PER_SEC / 100)));
17561 +
17562 +       a = cvirt->load[0] + (FIXED_1 / 200);
17563 +       b = cvirt->load[1] + (FIXED_1 / 200);
17564 +       c = cvirt->load[2] + (FIXED_1 / 200);
17565 +       length += sprintf(buffer + length,
17566 +               "nr_threads:\t%d\n"
17567 +               "nr_running:\t%d\n"
17568 +               "nr_unintr:\t%d\n"
17569 +               "nr_onhold:\t%d\n"
17570 +               "load_updates:\t%d\n"
17571 +               "loadavg:\t%d.%02d %d.%02d %d.%02d\n"
17572 +               "total_forks:\t%d\n",
17573 +               atomic_read(&cvirt->nr_threads),
17574 +               atomic_read(&cvirt->nr_running),
17575 +               atomic_read(&cvirt->nr_uninterruptible),
17576 +               atomic_read(&cvirt->nr_onhold),
17577 +               atomic_read(&cvirt->load_updates),
17578 +               LOAD_INT(a), LOAD_FRAC(a),
17579 +               LOAD_INT(b), LOAD_FRAC(b),
17580 +               LOAD_INT(c), LOAD_FRAC(c),
17581 +               atomic_read(&cvirt->total_forks));
17582 +       return length;
17583 +}
17584 +
17585 +static inline
17586 +int vx_info_proc_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc,
17587 +       char *buffer, int cpu)
17588 +{
17589 +       int length = 0;
17590 +       return length;
17591 +}
17592 +
17593 +#endif /* _VX_CVIRT_PROC_H */
17594 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/debug.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/debug.c
17595 --- linux-2.6.30.2/kernel/vserver/debug.c       1970-01-01 01:00:00.000000000 +0100
17596 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/debug.c    2009-07-04 01:11:39.000000000 +0200
17597 @@ -0,0 +1,32 @@
17598 +/*
17599 + *  kernel/vserver/debug.c
17600 + *
17601 + *  Copyright (C) 2005-2007 Herbert Pötzl
17602 + *
17603 + *  V0.01  vx_info dump support
17604 + *
17605 + */
17606 +
17607 +#include <linux/module.h>
17608 +
17609 +#include <linux/vserver/context.h>
17610 +
17611 +
17612 +void   dump_vx_info(struct vx_info *vxi, int level)
17613 +{
17614 +       printk("vx_info %p[#%d, %d.%d, %4x]\n", vxi, vxi->vx_id,
17615 +               atomic_read(&vxi->vx_usecnt),
17616 +               atomic_read(&vxi->vx_tasks),
17617 +               vxi->vx_state);
17618 +       if (level > 0) {
17619 +               __dump_vx_limit(&vxi->limit);
17620 +               __dump_vx_sched(&vxi->sched);
17621 +               __dump_vx_cvirt(&vxi->cvirt);
17622 +               __dump_vx_cacct(&vxi->cacct);
17623 +       }
17624 +       printk("---\n");
17625 +}
17626 +
17627 +
17628 +EXPORT_SYMBOL_GPL(dump_vx_info);
17629 +
17630 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/device.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/device.c
17631 --- linux-2.6.30.2/kernel/vserver/device.c      1970-01-01 01:00:00.000000000 +0100
17632 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/device.c   2009-07-04 01:11:39.000000000 +0200
17633 @@ -0,0 +1,443 @@
17634 +/*
17635 + *  linux/kernel/vserver/device.c
17636 + *
17637 + *  Linux-VServer: Device Support
17638 + *
17639 + *  Copyright (C) 2006  Herbert Pötzl
17640 + *  Copyright (C) 2007  Daniel Hokka Zakrisson
17641 + *
17642 + *  V0.01  device mapping basics
17643 + *  V0.02  added defaults
17644 + *
17645 + */
17646 +
17647 +#include <linux/slab.h>
17648 +#include <linux/rcupdate.h>
17649 +#include <linux/fs.h>
17650 +#include <linux/namei.h>
17651 +#include <linux/hash.h>
17652 +
17653 +#include <asm/errno.h>
17654 +#include <asm/uaccess.h>
17655 +#include <linux/vserver/base.h>
17656 +#include <linux/vserver/debug.h>
17657 +#include <linux/vserver/context.h>
17658 +#include <linux/vserver/device.h>
17659 +#include <linux/vserver/device_cmd.h>
17660 +
17661 +
17662 +#define DMAP_HASH_BITS 4
17663 +
17664 +
17665 +struct vs_mapping {
17666 +       union {
17667 +               struct hlist_node hlist;
17668 +               struct list_head list;
17669 +       } u;
17670 +#define dm_hlist       u.hlist
17671 +#define dm_list                u.list
17672 +       xid_t xid;
17673 +       dev_t device;
17674 +       struct vx_dmap_target target;
17675 +};
17676 +
17677 +
17678 +static struct hlist_head dmap_main_hash[1 << DMAP_HASH_BITS];
17679 +
17680 +static spinlock_t dmap_main_hash_lock = SPIN_LOCK_UNLOCKED;
17681 +
17682 +static struct vx_dmap_target dmap_defaults[2] = {
17683 +       { .flags = DATTR_OPEN },
17684 +       { .flags = DATTR_OPEN },
17685 +};
17686 +
17687 +
17688 +struct kmem_cache *dmap_cachep __read_mostly;
17689 +
17690 +int __init dmap_cache_init(void)
17691 +{
17692 +       dmap_cachep = kmem_cache_create("dmap_cache",
17693 +               sizeof(struct vs_mapping), 0,
17694 +               SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
17695 +       return 0;
17696 +}
17697 +
17698 +__initcall(dmap_cache_init);
17699 +
17700 +
17701 +static inline unsigned int __hashval(dev_t dev, int bits)
17702 +{
17703 +       return hash_long((unsigned long)dev, bits);
17704 +}
17705 +
17706 +
17707 +/*     __hash_mapping()
17708 + *     add the mapping to the hash table
17709 + */
17710 +static inline void __hash_mapping(struct vx_info *vxi, struct vs_mapping *vdm)
17711 +{
17712 +       spinlock_t *hash_lock = &dmap_main_hash_lock;
17713 +       struct hlist_head *head, *hash = dmap_main_hash;
17714 +       int device = vdm->device;
17715 +
17716 +       spin_lock(hash_lock);
17717 +       vxdprintk(VXD_CBIT(misc, 8), "__hash_mapping: %p[#%d] %08x:%08x",
17718 +               vxi, vxi ? vxi->vx_id : 0, device, vdm->target.target);
17719 +
17720 +       head = &hash[__hashval(device, DMAP_HASH_BITS)];
17721 +       hlist_add_head(&vdm->dm_hlist, head);
17722 +       spin_unlock(hash_lock);
17723 +}
17724 +
17725 +
17726 +static inline int __mode_to_default(umode_t mode)
17727 +{
17728 +       switch (mode) {
17729 +       case S_IFBLK:
17730 +               return 0;
17731 +       case S_IFCHR:
17732 +               return 1;
17733 +       default:
17734 +               BUG();
17735 +       }
17736 +}
17737 +
17738 +
17739 +/*     __set_default()
17740 + *     set a default
17741 + */
17742 +static inline void __set_default(struct vx_info *vxi, umode_t mode,
17743 +       struct vx_dmap_target *vdmt)
17744 +{
17745 +       spinlock_t *hash_lock = &dmap_main_hash_lock;
17746 +       spin_lock(hash_lock);
17747 +
17748 +       if (vxi)
17749 +               vxi->dmap.targets[__mode_to_default(mode)] = *vdmt;
17750 +       else
17751 +               dmap_defaults[__mode_to_default(mode)] = *vdmt;
17752 +
17753 +
17754 +       spin_unlock(hash_lock);
17755 +
17756 +       vxdprintk(VXD_CBIT(misc, 8), "__set_default: %p[#%u] %08x %04x",
17757 +                 vxi, vxi ? vxi->vx_id : 0, vdmt->target, vdmt->flags);
17758 +}
17759 +
17760 +
17761 +/*     __remove_default()
17762 + *     remove a default
17763 + */
17764 +static inline int __remove_default(struct vx_info *vxi, umode_t mode)
17765 +{
17766 +       spinlock_t *hash_lock = &dmap_main_hash_lock;
17767 +       spin_lock(hash_lock);
17768 +
17769 +       if (vxi)
17770 +               vxi->dmap.targets[__mode_to_default(mode)].flags = 0;
17771 +       else    /* remove == reset */
17772 +               dmap_defaults[__mode_to_default(mode)].flags = DATTR_OPEN | mode;
17773 +
17774 +       spin_unlock(hash_lock);
17775 +       return 0;
17776 +}
17777 +
17778 +
17779 +/*     __find_mapping()
17780 + *     find a mapping in the hash table
17781 + *
17782 + *     caller must hold hash_lock
17783 + */
17784 +static inline int __find_mapping(xid_t xid, dev_t device, umode_t mode,
17785 +       struct vs_mapping **local, struct vs_mapping **global)
17786 +{
17787 +       struct hlist_head *hash = dmap_main_hash;
17788 +       struct hlist_head *head = &hash[__hashval(device, DMAP_HASH_BITS)];
17789 +       struct hlist_node *pos;
17790 +       struct vs_mapping *vdm;
17791 +
17792 +       *local = NULL;
17793 +       if (global)
17794 +               *global = NULL;
17795 +
17796 +       hlist_for_each(pos, head) {
17797 +               vdm = hlist_entry(pos, struct vs_mapping, dm_hlist);
17798 +
17799 +               if ((vdm->device == device) &&
17800 +                       !((vdm->target.flags ^ mode) & S_IFMT)) {
17801 +                       if (vdm->xid == xid) {
17802 +                               *local = vdm;
17803 +                               return 1;
17804 +                       } else if (global && vdm->xid == 0)
17805 +                               *global = vdm;
17806 +               }
17807 +       }
17808 +
17809 +       if (global && *global)
17810 +               return 0;
17811 +       else
17812 +               return -ENOENT;
17813 +}
17814 +
17815 +
17816 +/*     __lookup_mapping()
17817 + *     find a mapping and store the result in target and flags
17818 + */
17819 +static inline int __lookup_mapping(struct vx_info *vxi,
17820 +       dev_t device, dev_t *target, int *flags, umode_t mode)
17821 +{
17822 +       spinlock_t *hash_lock = &dmap_main_hash_lock;
17823 +       struct vs_mapping *vdm, *global;
17824 +       struct vx_dmap_target *vdmt;
17825 +       int ret = 0;
17826 +       xid_t xid = vxi->vx_id;
17827 +       int index;
17828 +
17829 +       spin_lock(hash_lock);
17830 +       if (__find_mapping(xid, device, mode, &vdm, &global) > 0) {
17831 +               ret = 1;
17832 +               vdmt = &vdm->target;
17833 +               goto found;
17834 +       }
17835 +
17836 +       index = __mode_to_default(mode);
17837 +       if (vxi && vxi->dmap.targets[index].flags) {
17838 +               ret = 2;
17839 +               vdmt = &vxi->dmap.targets[index];
17840 +       } else if (global) {
17841 +               ret = 3;
17842 +               vdmt = &global->target;
17843 +               goto found;
17844 +       } else {
17845 +               ret = 4;
17846 +               vdmt = &dmap_defaults[index];
17847 +       }
17848 +
17849 +found:
17850 +       if (target && (vdmt->flags & DATTR_REMAP))
17851 +               *target = vdmt->target;
17852 +       else if (target)
17853 +               *target = device;
17854 +       if (flags)
17855 +               *flags = vdmt->flags;
17856 +
17857 +       spin_unlock(hash_lock);
17858 +
17859 +       return ret;
17860 +}
17861 +
17862 +
17863 +/*     __remove_mapping()
17864 + *     remove a mapping from the hash table
17865 + */
17866 +static inline int __remove_mapping(struct vx_info *vxi, dev_t device,
17867 +       umode_t mode)
17868 +{
17869 +       spinlock_t *hash_lock = &dmap_main_hash_lock;
17870 +       struct vs_mapping *vdm = NULL;
17871 +       int ret = 0;
17872 +
17873 +       spin_lock(hash_lock);
17874 +
17875 +       ret = __find_mapping((vxi ? vxi->vx_id : 0), device, mode, &vdm,
17876 +               NULL);
17877 +       vxdprintk(VXD_CBIT(misc, 8), "__remove_mapping: %p[#%d] %08x %04x",
17878 +               vxi, vxi ? vxi->vx_id : 0, device, mode);
17879 +       if (ret < 0)
17880 +               goto out;
17881 +       hlist_del(&vdm->dm_hlist);
17882 +
17883 +out:
17884 +       spin_unlock(hash_lock);
17885 +       if (vdm)
17886 +               kmem_cache_free(dmap_cachep, vdm);
17887 +       return ret;
17888 +}
17889 +
17890 +
17891 +
17892 +int vs_map_device(struct vx_info *vxi,
17893 +       dev_t device, dev_t *target, umode_t mode)
17894 +{
17895 +       int ret, flags = DATTR_MASK;
17896 +
17897 +       if (!vxi) {
17898 +               if (target)
17899 +                       *target = device;
17900 +               goto out;
17901 +       }
17902 +       ret = __lookup_mapping(vxi, device, target, &flags, mode);
17903 +       vxdprintk(VXD_CBIT(misc, 8), "vs_map_device: %08x target: %08x flags: %04x mode: %04x mapped=%d",
17904 +               device, target ? *target : 0, flags, mode, ret);
17905 +out:
17906 +       return (flags & DATTR_MASK);
17907 +}
17908 +
17909 +
17910 +
17911 +static int do_set_mapping(struct vx_info *vxi,
17912 +       dev_t device, dev_t target, int flags, umode_t mode)
17913 +{
17914 +       if (device) {
17915 +               struct vs_mapping *new;
17916 +
17917 +               new = kmem_cache_alloc(dmap_cachep, GFP_KERNEL);
17918 +               if (!new)
17919 +                       return -ENOMEM;
17920 +
17921 +               INIT_HLIST_NODE(&new->dm_hlist);
17922 +               new->device = device;
17923 +               new->target.target = target;
17924 +               new->target.flags = flags | mode;
17925 +               new->xid = (vxi ? vxi->vx_id : 0);
17926 +
17927 +               vxdprintk(VXD_CBIT(misc, 8), "do_set_mapping: %08x target: %08x flags: %04x", device, target, flags);
17928 +               __hash_mapping(vxi, new);
17929 +       } else {
17930 +               struct vx_dmap_target new = {
17931 +                       .target = target,
17932 +                       .flags = flags | mode,
17933 +               };
17934 +               __set_default(vxi, mode, &new);
17935 +       }
17936 +       return 0;
17937 +}
17938 +
17939 +
17940 +static int do_unset_mapping(struct vx_info *vxi,
17941 +       dev_t device, dev_t target, int flags, umode_t mode)
17942 +{
17943 +       int ret = -EINVAL;
17944 +
17945 +       if (device) {
17946 +               ret = __remove_mapping(vxi, device, mode);
17947 +               if (ret < 0)
17948 +                       goto out;
17949 +       } else {
17950 +               ret = __remove_default(vxi, mode);
17951 +               if (ret < 0)
17952 +                       goto out;
17953 +       }
17954 +
17955 +out:
17956 +       return ret;
17957 +}
17958 +
17959 +
17960 +static inline int __user_device(const char __user *name, dev_t *dev,
17961 +       umode_t *mode)
17962 +{
17963 +       struct nameidata nd;
17964 +       int ret;
17965 +
17966 +       if (!name) {
17967 +               *dev = 0;
17968 +               return 0;
17969 +       }
17970 +       ret = user_lpath(name, &nd.path);
17971 +       if (ret)
17972 +               return ret;
17973 +       if (nd.path.dentry->d_inode) {
17974 +               *dev = nd.path.dentry->d_inode->i_rdev;
17975 +               *mode = nd.path.dentry->d_inode->i_mode;
17976 +       }
17977 +       path_put(&nd.path);
17978 +       return 0;
17979 +}
17980 +
17981 +static inline int __mapping_mode(dev_t device, dev_t target,
17982 +       umode_t device_mode, umode_t target_mode, umode_t *mode)
17983 +{
17984 +       if (device)
17985 +               *mode = device_mode & S_IFMT;
17986 +       else if (target)
17987 +               *mode = target_mode & S_IFMT;
17988 +       else
17989 +               return -EINVAL;
17990 +
17991 +       /* if both given, device and target mode have to match */
17992 +       if (device && target &&
17993 +               ((device_mode ^ target_mode) & S_IFMT))
17994 +               return -EINVAL;
17995 +       return 0;
17996 +}
17997 +
17998 +
17999 +static inline int do_mapping(struct vx_info *vxi, const char __user *device_path,
18000 +       const char __user *target_path, int flags, int set)
18001 +{
18002 +       dev_t device = ~0, target = ~0;
18003 +       umode_t device_mode = 0, target_mode = 0, mode;
18004 +       int ret;
18005 +
18006 +       ret = __user_device(device_path, &device, &device_mode);
18007 +       if (ret)
18008 +               return ret;
18009 +       ret = __user_device(target_path, &target, &target_mode);
18010 +       if (ret)
18011 +               return ret;
18012 +
18013 +       ret = __mapping_mode(device, target,
18014 +               device_mode, target_mode, &mode);
18015 +       if (ret)
18016 +               return ret;
18017 +
18018 +       if (set)
18019 +               return do_set_mapping(vxi, device, target,
18020 +                       flags, mode);
18021 +       else
18022 +               return do_unset_mapping(vxi, device, target,
18023 +                       flags, mode);
18024 +}
18025 +
18026 +
18027 +int vc_set_mapping(struct vx_info *vxi, void __user *data)
18028 +{
18029 +       struct vcmd_set_mapping_v0 vc_data;
18030 +
18031 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18032 +               return -EFAULT;
18033 +
18034 +       return do_mapping(vxi, vc_data.device, vc_data.target,
18035 +               vc_data.flags, 1);
18036 +}
18037 +
18038 +int vc_unset_mapping(struct vx_info *vxi, void __user *data)
18039 +{
18040 +       struct vcmd_set_mapping_v0 vc_data;
18041 +
18042 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18043 +               return -EFAULT;
18044 +
18045 +       return do_mapping(vxi, vc_data.device, vc_data.target,
18046 +               vc_data.flags, 0);
18047 +}
18048 +
18049 +
18050 +#ifdef CONFIG_COMPAT
18051 +
18052 +int vc_set_mapping_x32(struct vx_info *vxi, void __user *data)
18053 +{
18054 +       struct vcmd_set_mapping_v0_x32 vc_data;
18055 +
18056 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18057 +               return -EFAULT;
18058 +
18059 +       return do_mapping(vxi, compat_ptr(vc_data.device_ptr),
18060 +               compat_ptr(vc_data.target_ptr), vc_data.flags, 1);
18061 +}
18062 +
18063 +int vc_unset_mapping_x32(struct vx_info *vxi, void __user *data)
18064 +{
18065 +       struct vcmd_set_mapping_v0_x32 vc_data;
18066 +
18067 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18068 +               return -EFAULT;
18069 +
18070 +       return do_mapping(vxi, compat_ptr(vc_data.device_ptr),
18071 +               compat_ptr(vc_data.target_ptr), vc_data.flags, 0);
18072 +}
18073 +
18074 +#endif /* CONFIG_COMPAT */
18075 +
18076 +
18077 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/dlimit.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/dlimit.c
18078 --- linux-2.6.30.2/kernel/vserver/dlimit.c      1970-01-01 01:00:00.000000000 +0100
18079 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/dlimit.c   2009-07-04 01:11:39.000000000 +0200
18080 @@ -0,0 +1,522 @@
18081 +/*
18082 + *  linux/kernel/vserver/dlimit.c
18083 + *
18084 + *  Virtual Server: Context Disk Limits
18085 + *
18086 + *  Copyright (C) 2004-2007  Herbert Pötzl
18087 + *
18088 + *  V0.01  initial version
18089 + *  V0.02  compat32 splitup
18090 + *
18091 + */
18092 +
18093 +#include <linux/statfs.h>
18094 +#include <linux/sched.h>
18095 +#include <linux/namei.h>
18096 +#include <linux/vs_tag.h>
18097 +#include <linux/vs_dlimit.h>
18098 +#include <linux/vserver/dlimit_cmd.h>
18099 +
18100 +#include <asm/uaccess.h>
18101 +
18102 +/*     __alloc_dl_info()
18103 +
18104 +       * allocate an initialized dl_info struct
18105 +       * doesn't make it visible (hash)                        */
18106 +
18107 +static struct dl_info *__alloc_dl_info(struct super_block *sb, tag_t tag)
18108 +{
18109 +       struct dl_info *new = NULL;
18110 +
18111 +       vxdprintk(VXD_CBIT(dlim, 5),
18112 +               "alloc_dl_info(%p,%d)*", sb, tag);
18113 +
18114 +       /* would this benefit from a slab cache? */
18115 +       new = kmalloc(sizeof(struct dl_info), GFP_KERNEL);
18116 +       if (!new)
18117 +               return 0;
18118 +
18119 +       memset(new, 0, sizeof(struct dl_info));
18120 +       new->dl_tag = tag;
18121 +       new->dl_sb = sb;
18122 +       INIT_RCU_HEAD(&new->dl_rcu);
18123 +       INIT_HLIST_NODE(&new->dl_hlist);
18124 +       spin_lock_init(&new->dl_lock);
18125 +       atomic_set(&new->dl_refcnt, 0);
18126 +       atomic_set(&new->dl_usecnt, 0);
18127 +
18128 +       /* rest of init goes here */
18129 +
18130 +       vxdprintk(VXD_CBIT(dlim, 4),
18131 +               "alloc_dl_info(%p,%d) = %p", sb, tag, new);
18132 +       return new;
18133 +}
18134 +
18135 +/*     __dealloc_dl_info()
18136 +
18137 +       * final disposal of dl_info                             */
18138 +
18139 +static void __dealloc_dl_info(struct dl_info *dli)
18140 +{
18141 +       vxdprintk(VXD_CBIT(dlim, 4),
18142 +               "dealloc_dl_info(%p)", dli);
18143 +
18144 +       dli->dl_hlist.next = LIST_POISON1;
18145 +       dli->dl_tag = -1;
18146 +       dli->dl_sb = 0;
18147 +
18148 +       BUG_ON(atomic_read(&dli->dl_usecnt));
18149 +       BUG_ON(atomic_read(&dli->dl_refcnt));
18150 +
18151 +       kfree(dli);
18152 +}
18153 +
18154 +
18155 +/*     hash table for dl_info hash */
18156 +
18157 +#define DL_HASH_SIZE   13
18158 +
18159 +struct hlist_head dl_info_hash[DL_HASH_SIZE];
18160 +
18161 +static spinlock_t dl_info_hash_lock = SPIN_LOCK_UNLOCKED;
18162 +
18163 +
18164 +static inline unsigned int __hashval(struct super_block *sb, tag_t tag)
18165 +{
18166 +       return ((tag ^ (unsigned long)sb) % DL_HASH_SIZE);
18167 +}
18168 +
18169 +
18170 +
18171 +/*     __hash_dl_info()
18172 +
18173 +       * add the dli to the global hash table
18174 +       * requires the hash_lock to be held                     */
18175 +
18176 +static inline void __hash_dl_info(struct dl_info *dli)
18177 +{
18178 +       struct hlist_head *head;
18179 +
18180 +       vxdprintk(VXD_CBIT(dlim, 6),
18181 +               "__hash_dl_info: %p[#%d]", dli, dli->dl_tag);
18182 +       get_dl_info(dli);
18183 +       head = &dl_info_hash[__hashval(dli->dl_sb, dli->dl_tag)];
18184 +       hlist_add_head_rcu(&dli->dl_hlist, head);
18185 +}
18186 +
18187 +/*     __unhash_dl_info()
18188 +
18189 +       * remove the dli from the global hash table
18190 +       * requires the hash_lock to be held                     */
18191 +
18192 +static inline void __unhash_dl_info(struct dl_info *dli)
18193 +{
18194 +       vxdprintk(VXD_CBIT(dlim, 6),
18195 +               "__unhash_dl_info: %p[#%d]", dli, dli->dl_tag);
18196 +       hlist_del_rcu(&dli->dl_hlist);
18197 +       put_dl_info(dli);
18198 +}
18199 +
18200 +
18201 +/*     __lookup_dl_info()
18202 +
18203 +       * requires the rcu_read_lock()
18204 +       * doesn't increment the dl_refcnt                       */
18205 +
18206 +static inline struct dl_info *__lookup_dl_info(struct super_block *sb, tag_t tag)
18207 +{
18208 +       struct hlist_head *head = &dl_info_hash[__hashval(sb, tag)];
18209 +       struct hlist_node *pos;
18210 +       struct dl_info *dli;
18211 +
18212 +       hlist_for_each_entry_rcu(dli, pos, head, dl_hlist) {
18213 +
18214 +               if (dli->dl_tag == tag && dli->dl_sb == sb) {
18215 +                       return dli;
18216 +               }
18217 +       }
18218 +       return NULL;
18219 +}
18220 +
18221 +
18222 +struct dl_info *locate_dl_info(struct super_block *sb, tag_t tag)
18223 +{
18224 +       struct dl_info *dli;
18225 +
18226 +       rcu_read_lock();
18227 +       dli = get_dl_info(__lookup_dl_info(sb, tag));
18228 +       vxdprintk(VXD_CBIT(dlim, 7),
18229 +               "locate_dl_info(%p,#%d) = %p", sb, tag, dli);
18230 +       rcu_read_unlock();
18231 +       return dli;
18232 +}
18233 +
18234 +void rcu_free_dl_info(struct rcu_head *head)
18235 +{
18236 +       struct dl_info *dli = container_of(head, struct dl_info, dl_rcu);
18237 +       int usecnt, refcnt;
18238 +
18239 +       BUG_ON(!dli || !head);
18240 +
18241 +       usecnt = atomic_read(&dli->dl_usecnt);
18242 +       BUG_ON(usecnt < 0);
18243 +
18244 +       refcnt = atomic_read(&dli->dl_refcnt);
18245 +       BUG_ON(refcnt < 0);
18246 +
18247 +       vxdprintk(VXD_CBIT(dlim, 3),
18248 +               "rcu_free_dl_info(%p)", dli);
18249 +       if (!usecnt)
18250 +               __dealloc_dl_info(dli);
18251 +       else
18252 +               printk("!!! rcu didn't free\n");
18253 +}
18254 +
18255 +
18256 +
18257 +
18258 +static int do_addrem_dlimit(uint32_t id, const char __user *name,
18259 +       uint32_t flags, int add)
18260 +{
18261 +       struct path path;
18262 +       int ret;
18263 +
18264 +       ret = user_lpath(name, &path);
18265 +       if (!ret) {
18266 +               struct super_block *sb;
18267 +               struct dl_info *dli;
18268 +
18269 +               ret = -EINVAL;
18270 +               if (!path.dentry->d_inode)
18271 +                       goto out_release;
18272 +               if (!(sb = path.dentry->d_inode->i_sb))
18273 +                       goto out_release;
18274 +
18275 +               if (add) {
18276 +                       dli = __alloc_dl_info(sb, id);
18277 +                       spin_lock(&dl_info_hash_lock);
18278 +
18279 +                       ret = -EEXIST;
18280 +                       if (__lookup_dl_info(sb, id))
18281 +                               goto out_unlock;
18282 +                       __hash_dl_info(dli);
18283 +                       dli = NULL;
18284 +               } else {
18285 +                       spin_lock(&dl_info_hash_lock);
18286 +                       dli = __lookup_dl_info(sb, id);
18287 +
18288 +                       ret = -ESRCH;
18289 +                       if (!dli)
18290 +                               goto out_unlock;
18291 +                       __unhash_dl_info(dli);
18292 +               }
18293 +               ret = 0;
18294 +       out_unlock:
18295 +               spin_unlock(&dl_info_hash_lock);
18296 +               if (add && dli)
18297 +                       __dealloc_dl_info(dli);
18298 +       out_release:
18299 +               path_put(&path);
18300 +       }
18301 +       return ret;
18302 +}
18303 +
18304 +int vc_add_dlimit(uint32_t id, void __user *data)
18305 +{
18306 +       struct vcmd_ctx_dlimit_base_v0 vc_data;
18307 +
18308 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18309 +               return -EFAULT;
18310 +
18311 +       return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 1);
18312 +}
18313 +
18314 +int vc_rem_dlimit(uint32_t id, void __user *data)
18315 +{
18316 +       struct vcmd_ctx_dlimit_base_v0 vc_data;
18317 +
18318 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18319 +               return -EFAULT;
18320 +
18321 +       return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 0);
18322 +}
18323 +
18324 +#ifdef CONFIG_COMPAT
18325 +
18326 +int vc_add_dlimit_x32(uint32_t id, void __user *data)
18327 +{
18328 +       struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
18329 +
18330 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18331 +               return -EFAULT;
18332 +
18333 +       return do_addrem_dlimit(id,
18334 +               compat_ptr(vc_data.name_ptr), vc_data.flags, 1);
18335 +}
18336 +
18337 +int vc_rem_dlimit_x32(uint32_t id, void __user *data)
18338 +{
18339 +       struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
18340 +
18341 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18342 +               return -EFAULT;
18343 +
18344 +       return do_addrem_dlimit(id,
18345 +               compat_ptr(vc_data.name_ptr), vc_data.flags, 0);
18346 +}
18347 +
18348 +#endif /* CONFIG_COMPAT */
18349 +
18350 +
18351 +static inline
18352 +int do_set_dlimit(uint32_t id, const char __user *name,
18353 +       uint32_t space_used, uint32_t space_total,
18354 +       uint32_t inodes_used, uint32_t inodes_total,
18355 +       uint32_t reserved, uint32_t flags)
18356 +{
18357 +       struct path path;
18358 +       int ret;
18359 +
18360 +       ret = user_lpath(name, &path);
18361 +       if (!ret) {
18362 +               struct super_block *sb;
18363 +               struct dl_info *dli;
18364 +
18365 +               ret = -EINVAL;
18366 +               if (!path.dentry->d_inode)
18367 +                       goto out_release;
18368 +               if (!(sb = path.dentry->d_inode->i_sb))
18369 +                       goto out_release;
18370 +               if ((reserved != CDLIM_KEEP &&
18371 +                       reserved > 100) ||
18372 +                       (inodes_used != CDLIM_KEEP &&
18373 +                       inodes_used > inodes_total) ||
18374 +                       (space_used != CDLIM_KEEP &&
18375 +                       space_used > space_total))
18376 +                       goto out_release;
18377 +
18378 +               ret = -ESRCH;
18379 +               dli = locate_dl_info(sb, id);
18380 +               if (!dli)
18381 +                       goto out_release;
18382 +
18383 +               spin_lock(&dli->dl_lock);
18384 +
18385 +               if (inodes_used != CDLIM_KEEP)
18386 +                       dli->dl_inodes_used = inodes_used;
18387 +               if (inodes_total != CDLIM_KEEP)
18388 +                       dli->dl_inodes_total = inodes_total;
18389 +               if (space_used != CDLIM_KEEP) {
18390 +                       dli->dl_space_used = space_used;
18391 +                       dli->dl_space_used <<= 10;
18392 +               }
18393 +               if (space_total == CDLIM_INFINITY)
18394 +                       dli->dl_space_total = DLIM_INFINITY;
18395 +               else if (space_total != CDLIM_KEEP) {
18396 +                       dli->dl_space_total = space_total;
18397 +                       dli->dl_space_total <<= 10;
18398 +               }
18399 +               if (reserved != CDLIM_KEEP)
18400 +                       dli->dl_nrlmult = (1 << 10) * (100 - reserved) / 100;
18401 +
18402 +               spin_unlock(&dli->dl_lock);
18403 +
18404 +               put_dl_info(dli);
18405 +               ret = 0;
18406 +
18407 +       out_release:
18408 +               path_put(&path);
18409 +       }
18410 +       return ret;
18411 +}
18412 +
18413 +int vc_set_dlimit(uint32_t id, void __user *data)
18414 +{
18415 +       struct vcmd_ctx_dlimit_v0 vc_data;
18416 +
18417 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18418 +               return -EFAULT;
18419 +
18420 +       return do_set_dlimit(id, vc_data.name,
18421 +               vc_data.space_used, vc_data.space_total,
18422 +               vc_data.inodes_used, vc_data.inodes_total,
18423 +               vc_data.reserved, vc_data.flags);
18424 +}
18425 +
18426 +#ifdef CONFIG_COMPAT
18427 +
18428 +int vc_set_dlimit_x32(uint32_t id, void __user *data)
18429 +{
18430 +       struct vcmd_ctx_dlimit_v0_x32 vc_data;
18431 +
18432 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18433 +               return -EFAULT;
18434 +
18435 +       return do_set_dlimit(id, compat_ptr(vc_data.name_ptr),
18436 +               vc_data.space_used, vc_data.space_total,
18437 +               vc_data.inodes_used, vc_data.inodes_total,
18438 +               vc_data.reserved, vc_data.flags);
18439 +}
18440 +
18441 +#endif /* CONFIG_COMPAT */
18442 +
18443 +
18444 +static inline
18445 +int do_get_dlimit(uint32_t id, const char __user *name,
18446 +       uint32_t *space_used, uint32_t *space_total,
18447 +       uint32_t *inodes_used, uint32_t *inodes_total,
18448 +       uint32_t *reserved, uint32_t *flags)
18449 +{
18450 +       struct path path;
18451 +       int ret;
18452 +
18453 +       ret = user_lpath(name, &path);
18454 +       if (!ret) {
18455 +               struct super_block *sb;
18456 +               struct dl_info *dli;
18457 +
18458 +               ret = -EINVAL;
18459 +               if (!path.dentry->d_inode)
18460 +                       goto out_release;
18461 +               if (!(sb = path.dentry->d_inode->i_sb))
18462 +                       goto out_release;
18463 +
18464 +               ret = -ESRCH;
18465 +               dli = locate_dl_info(sb, id);
18466 +               if (!dli)
18467 +                       goto out_release;
18468 +
18469 +               spin_lock(&dli->dl_lock);
18470 +               *inodes_used = dli->dl_inodes_used;
18471 +               *inodes_total = dli->dl_inodes_total;
18472 +               *space_used = dli->dl_space_used >> 10;
18473 +               if (dli->dl_space_total == DLIM_INFINITY)
18474 +                       *space_total = CDLIM_INFINITY;
18475 +               else
18476 +                       *space_total = dli->dl_space_total >> 10;
18477 +
18478 +               *reserved = 100 - ((dli->dl_nrlmult * 100 + 512) >> 10);
18479 +               spin_unlock(&dli->dl_lock);
18480 +
18481 +               put_dl_info(dli);
18482 +               ret = -EFAULT;
18483 +
18484 +               ret = 0;
18485 +       out_release:
18486 +               path_put(&path);
18487 +       }
18488 +       return ret;
18489 +}
18490 +
18491 +
18492 +int vc_get_dlimit(uint32_t id, void __user *data)
18493 +{
18494 +       struct vcmd_ctx_dlimit_v0 vc_data;
18495 +       int ret;
18496 +
18497 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18498 +               return -EFAULT;
18499 +
18500 +       ret = do_get_dlimit(id, vc_data.name,
18501 +               &vc_data.space_used, &vc_data.space_total,
18502 +               &vc_data.inodes_used, &vc_data.inodes_total,
18503 +               &vc_data.reserved, &vc_data.flags);
18504 +       if (ret)
18505 +               return ret;
18506 +
18507 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18508 +               return -EFAULT;
18509 +       return 0;
18510 +}
18511 +
18512 +#ifdef CONFIG_COMPAT
18513 +
18514 +int vc_get_dlimit_x32(uint32_t id, void __user *data)
18515 +{
18516 +       struct vcmd_ctx_dlimit_v0_x32 vc_data;
18517 +       int ret;
18518 +
18519 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18520 +               return -EFAULT;
18521 +
18522 +       ret = do_get_dlimit(id, compat_ptr(vc_data.name_ptr),
18523 +               &vc_data.space_used, &vc_data.space_total,
18524 +               &vc_data.inodes_used, &vc_data.inodes_total,
18525 +               &vc_data.reserved, &vc_data.flags);
18526 +       if (ret)
18527 +               return ret;
18528 +
18529 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18530 +               return -EFAULT;
18531 +       return 0;
18532 +}
18533 +
18534 +#endif /* CONFIG_COMPAT */
18535 +
18536 +
18537 +void vx_vsi_statfs(struct super_block *sb, struct kstatfs *buf)
18538 +{
18539 +       struct dl_info *dli;
18540 +       __u64 blimit, bfree, bavail;
18541 +       __u32 ifree;
18542 +
18543 +       dli = locate_dl_info(sb, dx_current_tag());
18544 +       if (!dli)
18545 +               return;
18546 +
18547 +       spin_lock(&dli->dl_lock);
18548 +       if (dli->dl_inodes_total == (unsigned long)DLIM_INFINITY)
18549 +               goto no_ilim;
18550 +
18551 +       /* reduce max inodes available to limit */
18552 +       if (buf->f_files > dli->dl_inodes_total)
18553 +               buf->f_files = dli->dl_inodes_total;
18554 +
18555 +       ifree = dli->dl_inodes_total - dli->dl_inodes_used;
18556 +       /* reduce free inodes to min */
18557 +       if (ifree < buf->f_ffree)
18558 +               buf->f_ffree = ifree;
18559 +
18560 +no_ilim:
18561 +       if (dli->dl_space_total == DLIM_INFINITY)
18562 +               goto no_blim;
18563 +
18564 +       blimit = dli->dl_space_total >> sb->s_blocksize_bits;
18565 +
18566 +       if (dli->dl_space_total < dli->dl_space_used)
18567 +               bfree = 0;
18568 +       else
18569 +               bfree = (dli->dl_space_total - dli->dl_space_used)
18570 +                       >> sb->s_blocksize_bits;
18571 +
18572 +       bavail = ((dli->dl_space_total >> 10) * dli->dl_nrlmult);
18573 +       if (bavail < dli->dl_space_used)
18574 +               bavail = 0;
18575 +       else
18576 +               bavail = (bavail - dli->dl_space_used)
18577 +                       >> sb->s_blocksize_bits;
18578 +
18579 +       /* reduce max space available to limit */
18580 +       if (buf->f_blocks > blimit)
18581 +               buf->f_blocks = blimit;
18582 +
18583 +       /* reduce free space to min */
18584 +       if (bfree < buf->f_bfree)
18585 +               buf->f_bfree = bfree;
18586 +
18587 +       /* reduce avail space to min */
18588 +       if (bavail < buf->f_bavail)
18589 +               buf->f_bavail = bavail;
18590 +
18591 +no_blim:
18592 +       spin_unlock(&dli->dl_lock);
18593 +       put_dl_info(dli);
18594 +
18595 +       return;
18596 +}
18597 +
18598 +#include <linux/module.h>
18599 +
18600 +EXPORT_SYMBOL_GPL(locate_dl_info);
18601 +EXPORT_SYMBOL_GPL(rcu_free_dl_info);
18602 +
18603 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/helper.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/helper.c
18604 --- linux-2.6.30.2/kernel/vserver/helper.c      1970-01-01 01:00:00.000000000 +0100
18605 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/helper.c   2009-07-04 01:11:39.000000000 +0200
18606 @@ -0,0 +1,199 @@
18607 +/*
18608 + *  linux/kernel/vserver/helper.c
18609 + *
18610 + *  Virtual Context Support
18611 + *
18612 + *  Copyright (C) 2004-2007  Herbert Pötzl
18613 + *
18614 + *  V0.01  basic helper
18615 + *
18616 + */
18617 +
18618 +#include <linux/kmod.h>
18619 +#include <linux/reboot.h>
18620 +#include <linux/vs_context.h>
18621 +#include <linux/vs_network.h>
18622 +#include <linux/vserver/signal.h>
18623 +
18624 +
18625 +char vshelper_path[255] = "/sbin/vshelper";
18626 +
18627 +
18628 +static int do_vshelper(char *name, char *argv[], char *envp[], int sync)
18629 +{
18630 +       int ret;
18631 +
18632 +       if ((ret = call_usermodehelper(name, argv, envp, sync))) {
18633 +               printk( KERN_WARNING
18634 +                       "%s: (%s %s) returned %s with %d\n",
18635 +                       name, argv[1], argv[2],
18636 +                       sync ? "sync" : "async", ret);
18637 +       }
18638 +       vxdprintk(VXD_CBIT(switch, 4),
18639 +               "%s: (%s %s) returned %s with %d",
18640 +               name, argv[1], argv[2], sync ? "sync" : "async", ret);
18641 +       return ret;
18642 +}
18643 +
18644 +/*
18645 + *      vshelper path is set via /proc/sys
18646 + *      invoked by vserver sys_reboot(), with
18647 + *      the following arguments
18648 + *
18649 + *      argv [0] = vshelper_path;
18650 + *      argv [1] = action: "restart", "halt", "poweroff", ...
18651 + *      argv [2] = context identifier
18652 + *
18653 + *      envp [*] = type-specific parameters
18654 + */
18655 +
18656 +long vs_reboot_helper(struct vx_info *vxi, int cmd, void __user *arg)
18657 +{
18658 +       char id_buf[8], cmd_buf[16];
18659 +       char uid_buf[16], pid_buf[16];
18660 +       int ret;
18661 +
18662 +       char *argv[] = {vshelper_path, NULL, id_buf, 0};
18663 +       char *envp[] = {"HOME=/", "TERM=linux",
18664 +                       "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
18665 +                       uid_buf, pid_buf, cmd_buf, 0};
18666 +
18667 +       if (vx_info_state(vxi, VXS_HELPER))
18668 +               return -EAGAIN;
18669 +       vxi->vx_state |= VXS_HELPER;
18670 +
18671 +       snprintf(id_buf, sizeof(id_buf)-1, "%d", vxi->vx_id);
18672 +
18673 +       snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
18674 +       snprintf(uid_buf, sizeof(uid_buf)-1, "VS_UID=%d", current_uid());
18675 +       snprintf(pid_buf, sizeof(pid_buf)-1, "VS_PID=%d", current->pid);
18676 +
18677 +       switch (cmd) {
18678 +       case LINUX_REBOOT_CMD_RESTART:
18679 +               argv[1] = "restart";
18680 +               break;
18681 +
18682 +       case LINUX_REBOOT_CMD_HALT:
18683 +               argv[1] = "halt";
18684 +               break;
18685 +
18686 +       case LINUX_REBOOT_CMD_POWER_OFF:
18687 +               argv[1] = "poweroff";
18688 +               break;
18689 +
18690 +       case LINUX_REBOOT_CMD_SW_SUSPEND:
18691 +               argv[1] = "swsusp";
18692 +               break;
18693 +
18694 +       default:
18695 +               vxi->vx_state &= ~VXS_HELPER;
18696 +               return 0;
18697 +       }
18698 +
18699 +       ret = do_vshelper(vshelper_path, argv, envp, 0);
18700 +       vxi->vx_state &= ~VXS_HELPER;
18701 +       __wakeup_vx_info(vxi);
18702 +       return (ret) ? -EPERM : 0;
18703 +}
18704 +
18705 +
18706 +long vs_reboot(unsigned int cmd, void __user *arg)
18707 +{
18708 +       struct vx_info *vxi = current->vx_info;
18709 +       long ret = 0;
18710 +
18711 +       vxdprintk(VXD_CBIT(misc, 5),
18712 +               "vs_reboot(%p[#%d],%d)",
18713 +               vxi, vxi ? vxi->vx_id : 0, cmd);
18714 +
18715 +       ret = vs_reboot_helper(vxi, cmd, arg);
18716 +       if (ret)
18717 +               return ret;
18718 +
18719 +       vxi->reboot_cmd = cmd;
18720 +       if (vx_info_flags(vxi, VXF_REBOOT_KILL, 0)) {
18721 +               switch (cmd) {
18722 +               case LINUX_REBOOT_CMD_RESTART:
18723 +               case LINUX_REBOOT_CMD_HALT:
18724 +               case LINUX_REBOOT_CMD_POWER_OFF:
18725 +                       vx_info_kill(vxi, 0, SIGKILL);
18726 +                       vx_info_kill(vxi, 1, SIGKILL);
18727 +               default:
18728 +                       break;
18729 +               }
18730 +       }
18731 +       return 0;
18732 +}
18733 +
18734 +
18735 +/*
18736 + *      argv [0] = vshelper_path;
18737 + *      argv [1] = action: "startup", "shutdown"
18738 + *      argv [2] = context identifier
18739 + *
18740 + *      envp [*] = type-specific parameters
18741 + */
18742 +
18743 +long vs_state_change(struct vx_info *vxi, unsigned int cmd)
18744 +{
18745 +       char id_buf[8], cmd_buf[16];
18746 +       char *argv[] = {vshelper_path, NULL, id_buf, 0};
18747 +       char *envp[] = {"HOME=/", "TERM=linux",
18748 +                       "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
18749 +
18750 +       if (!vx_info_flags(vxi, VXF_SC_HELPER, 0))
18751 +               return 0;
18752 +
18753 +       snprintf(id_buf, sizeof(id_buf)-1, "%d", vxi->vx_id);
18754 +       snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
18755 +
18756 +       switch (cmd) {
18757 +       case VSC_STARTUP:
18758 +               argv[1] = "startup";
18759 +               break;
18760 +       case VSC_SHUTDOWN:
18761 +               argv[1] = "shutdown";
18762 +               break;
18763 +       default:
18764 +               return 0;
18765 +       }
18766 +
18767 +       return do_vshelper(vshelper_path, argv, envp, 1);
18768 +}
18769 +
18770 +
18771 +/*
18772 + *      argv [0] = vshelper_path;
18773 + *      argv [1] = action: "netup", "netdown"
18774 + *      argv [2] = context identifier
18775 + *
18776 + *      envp [*] = type-specific parameters
18777 + */
18778 +
18779 +long vs_net_change(struct nx_info *nxi, unsigned int cmd)
18780 +{
18781 +       char id_buf[8], cmd_buf[16];
18782 +       char *argv[] = {vshelper_path, NULL, id_buf, 0};
18783 +       char *envp[] = {"HOME=/", "TERM=linux",
18784 +                       "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
18785 +
18786 +       if (!nx_info_flags(nxi, NXF_SC_HELPER, 0))
18787 +               return 0;
18788 +
18789 +       snprintf(id_buf, sizeof(id_buf)-1, "%d", nxi->nx_id);
18790 +       snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
18791 +
18792 +       switch (cmd) {
18793 +       case VSC_NETUP:
18794 +               argv[1] = "netup";
18795 +               break;
18796 +       case VSC_NETDOWN:
18797 +               argv[1] = "netdown";
18798 +               break;
18799 +       default:
18800 +               return 0;
18801 +       }
18802 +
18803 +       return do_vshelper(vshelper_path, argv, envp, 1);
18804 +}
18805 +
18806 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/history.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/history.c
18807 --- linux-2.6.30.2/kernel/vserver/history.c     1970-01-01 01:00:00.000000000 +0100
18808 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/history.c  2009-07-04 01:11:39.000000000 +0200
18809 @@ -0,0 +1,258 @@
18810 +/*
18811 + *  kernel/vserver/history.c
18812 + *
18813 + *  Virtual Context History Backtrace
18814 + *
18815 + *  Copyright (C) 2004-2007  Herbert Pötzl
18816 + *
18817 + *  V0.01  basic structure
18818 + *  V0.02  hash/unhash and trace
18819 + *  V0.03  preemption fixes
18820 + *
18821 + */
18822 +
18823 +#include <linux/module.h>
18824 +#include <asm/uaccess.h>
18825 +
18826 +#include <linux/vserver/context.h>
18827 +#include <linux/vserver/debug.h>
18828 +#include <linux/vserver/debug_cmd.h>
18829 +#include <linux/vserver/history.h>
18830 +
18831 +
18832 +#ifdef CONFIG_VSERVER_HISTORY
18833 +#define VXH_SIZE       CONFIG_VSERVER_HISTORY_SIZE
18834 +#else
18835 +#define VXH_SIZE       64
18836 +#endif
18837 +
18838 +struct _vx_history {
18839 +       unsigned int counter;
18840 +
18841 +       struct _vx_hist_entry entry[VXH_SIZE + 1];
18842 +};
18843 +
18844 +
18845 +DEFINE_PER_CPU(struct _vx_history, vx_history_buffer);
18846 +
18847 +unsigned volatile int vxh_active = 1;
18848 +
18849 +static atomic_t sequence = ATOMIC_INIT(0);
18850 +
18851 +
18852 +/*     vxh_advance()
18853 +
18854 +       * requires disabled preemption                          */
18855 +
18856 +struct _vx_hist_entry *vxh_advance(void *loc)
18857 +{
18858 +       unsigned int cpu = smp_processor_id();
18859 +       struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu);
18860 +       struct _vx_hist_entry *entry;
18861 +       unsigned int index;
18862 +
18863 +       index = vxh_active ? (hist->counter++ % VXH_SIZE) : VXH_SIZE;
18864 +       entry = &hist->entry[index];
18865 +
18866 +       entry->seq = atomic_inc_return(&sequence);
18867 +       entry->loc = loc;
18868 +       return entry;
18869 +}
18870 +
18871 +EXPORT_SYMBOL_GPL(vxh_advance);
18872 +
18873 +
18874 +#define VXH_LOC_FMTS   "(#%04x,*%d):%p"
18875 +
18876 +#define VXH_LOC_ARGS(e)        (e)->seq, cpu, (e)->loc
18877 +
18878 +
18879 +#define VXH_VXI_FMTS   "%p[#%d,%d.%d]"
18880 +
18881 +#define VXH_VXI_ARGS(e)        (e)->vxi.ptr,                           \
18882 +                       (e)->vxi.ptr ? (e)->vxi.xid : 0,        \
18883 +                       (e)->vxi.ptr ? (e)->vxi.usecnt : 0,     \
18884 +                       (e)->vxi.ptr ? (e)->vxi.tasks : 0
18885 +
18886 +void   vxh_dump_entry(struct _vx_hist_entry *e, unsigned cpu)
18887 +{
18888 +       switch (e->type) {
18889 +       case VXH_THROW_OOPS:
18890 +               printk( VXH_LOC_FMTS " oops \n", VXH_LOC_ARGS(e));
18891 +               break;
18892 +
18893 +       case VXH_GET_VX_INFO:
18894 +       case VXH_PUT_VX_INFO:
18895 +               printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
18896 +                       VXH_LOC_ARGS(e),
18897 +                       (e->type == VXH_GET_VX_INFO) ? "get" : "put",
18898 +                       VXH_VXI_ARGS(e));
18899 +               break;
18900 +
18901 +       case VXH_INIT_VX_INFO:
18902 +       case VXH_SET_VX_INFO:
18903 +       case VXH_CLR_VX_INFO:
18904 +               printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
18905 +                       VXH_LOC_ARGS(e),
18906 +                       (e->type == VXH_INIT_VX_INFO) ? "init" :
18907 +                       ((e->type == VXH_SET_VX_INFO) ? "set" : "clr"),
18908 +                       VXH_VXI_ARGS(e), e->sc.data);
18909 +               break;
18910 +
18911 +       case VXH_CLAIM_VX_INFO:
18912 +       case VXH_RELEASE_VX_INFO:
18913 +               printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
18914 +                       VXH_LOC_ARGS(e),
18915 +                       (e->type == VXH_CLAIM_VX_INFO) ? "claim" : "release",
18916 +                       VXH_VXI_ARGS(e), e->sc.data);
18917 +               break;
18918 +
18919 +       case VXH_ALLOC_VX_INFO:
18920 +       case VXH_DEALLOC_VX_INFO:
18921 +               printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
18922 +                       VXH_LOC_ARGS(e),
18923 +                       (e->type == VXH_ALLOC_VX_INFO) ? "alloc" : "dealloc",
18924 +                       VXH_VXI_ARGS(e));
18925 +               break;
18926 +
18927 +       case VXH_HASH_VX_INFO:
18928 +       case VXH_UNHASH_VX_INFO:
18929 +               printk( VXH_LOC_FMTS " __%s_vx_info " VXH_VXI_FMTS "\n",
18930 +                       VXH_LOC_ARGS(e),
18931 +                       (e->type == VXH_HASH_VX_INFO) ? "hash" : "unhash",
18932 +                       VXH_VXI_ARGS(e));
18933 +               break;
18934 +
18935 +       case VXH_LOC_VX_INFO:
18936 +       case VXH_LOOKUP_VX_INFO:
18937 +       case VXH_CREATE_VX_INFO:
18938 +               printk( VXH_LOC_FMTS " __%s_vx_info [#%d] -> " VXH_VXI_FMTS "\n",
18939 +                       VXH_LOC_ARGS(e),
18940 +                       (e->type == VXH_CREATE_VX_INFO) ? "create" :
18941 +                       ((e->type == VXH_LOC_VX_INFO) ? "loc" : "lookup"),
18942 +                       e->ll.arg, VXH_VXI_ARGS(e));
18943 +               break;
18944 +       }
18945 +}
18946 +
18947 +static void __vxh_dump_history(void)
18948 +{
18949 +       unsigned int i, cpu;
18950 +
18951 +       printk("History:\tSEQ: %8x\tNR_CPUS: %d\n",
18952 +               atomic_read(&sequence), NR_CPUS);
18953 +
18954 +       for (i = 0; i < VXH_SIZE; i++) {
18955 +               for_each_online_cpu(cpu) {
18956 +                       struct _vx_history *hist =
18957 +                               &per_cpu(vx_history_buffer, cpu);
18958 +                       unsigned int index = (hist->counter - i) % VXH_SIZE;
18959 +                       struct _vx_hist_entry *entry = &hist->entry[index];
18960 +
18961 +                       vxh_dump_entry(entry, cpu);
18962 +               }
18963 +       }
18964 +}
18965 +
18966 +void   vxh_dump_history(void)
18967 +{
18968 +       vxh_active = 0;
18969 +#ifdef CONFIG_SMP
18970 +       local_irq_enable();
18971 +       smp_send_stop();
18972 +       local_irq_disable();
18973 +#endif
18974 +       __vxh_dump_history();
18975 +}
18976 +
18977 +
18978 +/* vserver syscall commands below here */
18979 +
18980 +
18981 +int vc_dump_history(uint32_t id)
18982 +{
18983 +       vxh_active = 0;
18984 +       __vxh_dump_history();
18985 +       vxh_active = 1;
18986 +
18987 +       return 0;
18988 +}
18989 +
18990 +
18991 +int do_read_history(struct __user _vx_hist_entry *data,
18992 +       int cpu, uint32_t *index, uint32_t *count)
18993 +{
18994 +       int pos, ret = 0;
18995 +       struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu);
18996 +       int end = hist->counter;
18997 +       int start = end - VXH_SIZE + 2;
18998 +       int idx = *index;
18999 +
19000 +       /* special case: get current pos */
19001 +       if (!*count) {
19002 +               *index = end;
19003 +               return 0;
19004 +       }
19005 +
19006 +       /* have we lost some data? */
19007 +       if (idx < start)
19008 +               idx = start;
19009 +
19010 +       for (pos = 0; (pos < *count) && (idx < end); pos++, idx++) {
19011 +               struct _vx_hist_entry *entry =
19012 +                       &hist->entry[idx % VXH_SIZE];
19013 +
19014 +               /* send entry to userspace */
19015 +               ret = copy_to_user(&data[pos], entry, sizeof(*entry));
19016 +               if (ret)
19017 +                       break;
19018 +       }
19019 +       /* save new index and count */
19020 +       *index = idx;
19021 +       *count = pos;
19022 +       return ret ? ret : (*index < end);
19023 +}
19024 +
19025 +int vc_read_history(uint32_t id, void __user *data)
19026 +{
19027 +       struct vcmd_read_history_v0 vc_data;
19028 +       int ret;
19029 +
19030 +       if (id >= NR_CPUS)
19031 +               return -EINVAL;
19032 +
19033 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19034 +               return -EFAULT;
19035 +
19036 +       ret = do_read_history((struct __user _vx_hist_entry *)vc_data.data,
19037 +               id, &vc_data.index, &vc_data.count);
19038 +
19039 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19040 +               return -EFAULT;
19041 +       return ret;
19042 +}
19043 +
19044 +#ifdef CONFIG_COMPAT
19045 +
19046 +int vc_read_history_x32(uint32_t id, void __user *data)
19047 +{
19048 +       struct vcmd_read_history_v0_x32 vc_data;
19049 +       int ret;
19050 +
19051 +       if (id >= NR_CPUS)
19052 +               return -EINVAL;
19053 +
19054 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19055 +               return -EFAULT;
19056 +
19057 +       ret = do_read_history((struct __user _vx_hist_entry *)
19058 +               compat_ptr(vc_data.data_ptr),
19059 +               id, &vc_data.index, &vc_data.count);
19060 +
19061 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19062 +               return -EFAULT;
19063 +       return ret;
19064 +}
19065 +
19066 +#endif /* CONFIG_COMPAT */
19067 +
19068 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/inet.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/inet.c
19069 --- linux-2.6.30.2/kernel/vserver/inet.c        1970-01-01 01:00:00.000000000 +0100
19070 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/inet.c     2009-07-04 01:11:39.000000000 +0200
19071 @@ -0,0 +1,225 @@
19072 +
19073 +#include <linux/in.h>
19074 +#include <linux/inetdevice.h>
19075 +#include <linux/vs_inet.h>
19076 +#include <linux/vs_inet6.h>
19077 +#include <linux/vserver/debug.h>
19078 +#include <net/route.h>
19079 +#include <net/addrconf.h>
19080 +
19081 +
19082 +int nx_v4_addr_conflict(struct nx_info *nxi1, struct nx_info *nxi2)
19083 +{
19084 +       int ret = 0;
19085 +
19086 +       if (!nxi1 || !nxi2 || nxi1 == nxi2)
19087 +               ret = 1;
19088 +       else {
19089 +               struct nx_addr_v4 *ptr;
19090 +
19091 +               for (ptr = &nxi1->v4; ptr; ptr = ptr->next) {
19092 +                       if (v4_nx_addr_in_nx_info(nxi2, ptr, -1)) {
19093 +                               ret = 1;
19094 +                               break;
19095 +                       }
19096 +               }
19097 +       }
19098 +
19099 +       vxdprintk(VXD_CBIT(net, 2),
19100 +               "nx_v4_addr_conflict(%p,%p): %d",
19101 +               nxi1, nxi2, ret);
19102 +
19103 +       return ret;
19104 +}
19105 +
19106 +
19107 +#ifdef CONFIG_IPV6
19108 +
19109 +int nx_v6_addr_conflict(struct nx_info *nxi1, struct nx_info *nxi2)
19110 +{
19111 +       int ret = 0;
19112 +
19113 +       if (!nxi1 || !nxi2 || nxi1 == nxi2)
19114 +               ret = 1;
19115 +       else {
19116 +               struct nx_addr_v6 *ptr;
19117 +
19118 +               for (ptr = &nxi1->v6; ptr; ptr = ptr->next) {
19119 +                       if (v6_nx_addr_in_nx_info(nxi2, ptr, -1)) {
19120 +                               ret = 1;
19121 +                               break;
19122 +                       }
19123 +               }
19124 +       }
19125 +
19126 +       vxdprintk(VXD_CBIT(net, 2),
19127 +               "nx_v6_addr_conflict(%p,%p): %d",
19128 +               nxi1, nxi2, ret);
19129 +
19130 +       return ret;
19131 +}
19132 +
19133 +#endif
19134 +
19135 +int v4_dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
19136 +{
19137 +       struct in_device *in_dev;
19138 +       struct in_ifaddr **ifap;
19139 +       struct in_ifaddr *ifa;
19140 +       int ret = 0;
19141 +
19142 +       if (!dev)
19143 +               goto out;
19144 +       in_dev = in_dev_get(dev);
19145 +       if (!in_dev)
19146 +               goto out;
19147 +
19148 +       for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
19149 +               ifap = &ifa->ifa_next) {
19150 +               if (v4_addr_in_nx_info(nxi, ifa->ifa_local, NXA_MASK_SHOW)) {
19151 +                       ret = 1;
19152 +                       break;
19153 +               }
19154 +       }
19155 +       in_dev_put(in_dev);
19156 +out:
19157 +       return ret;
19158 +}
19159 +
19160 +
19161 +#ifdef CONFIG_IPV6
19162 +
19163 +int v6_dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
19164 +{
19165 +       struct inet6_dev *in_dev;
19166 +       struct inet6_ifaddr **ifap;
19167 +       struct inet6_ifaddr *ifa;
19168 +       int ret = 0;
19169 +
19170 +       if (!dev)
19171 +               goto out;
19172 +       in_dev = in6_dev_get(dev);
19173 +       if (!in_dev)
19174 +               goto out;
19175 +
19176 +       for (ifap = &in_dev->addr_list; (ifa = *ifap) != NULL;
19177 +               ifap = &ifa->if_next) {
19178 +               if (v6_addr_in_nx_info(nxi, &ifa->addr, -1)) {
19179 +                       ret = 1;
19180 +                       break;
19181 +               }
19182 +       }
19183 +       in6_dev_put(in_dev);
19184 +out:
19185 +       return ret;
19186 +}
19187 +
19188 +#endif
19189 +
19190 +int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
19191 +{
19192 +       int ret = 1;
19193 +
19194 +       if (!nxi)
19195 +               goto out;
19196 +       if (nxi->v4.type && v4_dev_in_nx_info(dev, nxi))
19197 +               goto out;
19198 +#ifdef CONFIG_IPV6
19199 +       ret = 2;
19200 +       if (nxi->v6.type && v6_dev_in_nx_info(dev, nxi))
19201 +               goto out;
19202 +#endif
19203 +       ret = 0;
19204 +out:
19205 +       vxdprintk(VXD_CBIT(net, 3),
19206 +               "dev_in_nx_info(%p,%p[#%d]) = %d",
19207 +               dev, nxi, nxi ? nxi->nx_id : 0, ret);
19208 +       return ret;
19209 +}
19210 +
19211 +int ip_v4_find_src(struct net *net, struct nx_info *nxi,
19212 +       struct rtable **rp, struct flowi *fl)
19213 +{
19214 +       if (!nxi)
19215 +               return 0;
19216 +
19217 +       /* FIXME: handle lback only case */
19218 +       if (!NX_IPV4(nxi))
19219 +               return -EPERM;
19220 +
19221 +       vxdprintk(VXD_CBIT(net, 4),
19222 +               "ip_v4_find_src(%p[#%u]) " NIPQUAD_FMT " -> " NIPQUAD_FMT,
19223 +               nxi, nxi ? nxi->nx_id : 0,
19224 +               NIPQUAD(fl->fl4_src), NIPQUAD(fl->fl4_dst));
19225 +
19226 +       /* single IP is unconditional */
19227 +       if (nx_info_flags(nxi, NXF_SINGLE_IP, 0) &&
19228 +               (fl->fl4_src == INADDR_ANY))
19229 +               fl->fl4_src = nxi->v4.ip[0].s_addr;
19230 +
19231 +       if (fl->fl4_src == INADDR_ANY) {
19232 +               struct nx_addr_v4 *ptr;
19233 +               __be32 found = 0;
19234 +               int err;
19235 +
19236 +               err = __ip_route_output_key(net, rp, fl);
19237 +               if (!err) {
19238 +                       found = (*rp)->rt_src;
19239 +                       ip_rt_put(*rp);
19240 +                       vxdprintk(VXD_CBIT(net, 4),
19241 +                               "ip_v4_find_src(%p[#%u]) rok[%u]: " NIPQUAD_FMT,
19242 +                               nxi, nxi ? nxi->nx_id : 0, fl->oif, NIPQUAD(found));
19243 +                       if (v4_addr_in_nx_info(nxi, found, NXA_MASK_BIND))
19244 +                               goto found;
19245 +               }
19246 +
19247 +               for (ptr = &nxi->v4; ptr; ptr = ptr->next) {
19248 +                       __be32 primary = ptr->ip[0].s_addr;
19249 +                       __be32 mask = ptr->mask.s_addr;
19250 +                       __be32 neta = primary & mask;
19251 +
19252 +                       vxdprintk(VXD_CBIT(net, 4), "ip_v4_find_src(%p[#%u]) chk: "
19253 +                               NIPQUAD_FMT "/" NIPQUAD_FMT "/" NIPQUAD_FMT,
19254 +                               nxi, nxi ? nxi->nx_id : 0, NIPQUAD(primary),
19255 +                               NIPQUAD(mask), NIPQUAD(neta));
19256 +                       if ((found & mask) != neta)
19257 +                               continue;
19258 +
19259 +                       fl->fl4_src = primary;
19260 +                       err = __ip_route_output_key(net, rp, fl);
19261 +                       vxdprintk(VXD_CBIT(net, 4),
19262 +                               "ip_v4_find_src(%p[#%u]) rok[%u]: " NIPQUAD_FMT,
19263 +                               nxi, nxi ? nxi->nx_id : 0, fl->oif, NIPQUAD(primary));
19264 +                       if (!err) {
19265 +                               found = (*rp)->rt_src;
19266 +                               ip_rt_put(*rp);
19267 +                               if (found == primary)
19268 +                                       goto found;
19269 +                       }
19270 +               }
19271 +               /* still no source ip? */
19272 +               found = ipv4_is_loopback(fl->fl4_dst)
19273 +                       ? IPI_LOOPBACK : nxi->v4.ip[0].s_addr;
19274 +       found:
19275 +               /* assign src ip to flow */
19276 +               fl->fl4_src = found;
19277 +
19278 +       } else {
19279 +               if (!v4_addr_in_nx_info(nxi, fl->fl4_src, NXA_MASK_BIND))
19280 +                       return -EPERM;
19281 +       }
19282 +
19283 +       if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0)) {
19284 +               if (ipv4_is_loopback(fl->fl4_dst))
19285 +                       fl->fl4_dst = nxi->v4_lback.s_addr;
19286 +               if (ipv4_is_loopback(fl->fl4_src))
19287 +                       fl->fl4_src = nxi->v4_lback.s_addr;
19288 +       } else if (ipv4_is_loopback(fl->fl4_dst) &&
19289 +               !nx_info_flags(nxi, NXF_LBACK_ALLOW, 0))
19290 +               return -EPERM;
19291 +
19292 +       return 0;
19293 +}
19294 +
19295 +EXPORT_SYMBOL_GPL(ip_v4_find_src);
19296 +
19297 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/init.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/init.c
19298 --- linux-2.6.30.2/kernel/vserver/init.c        1970-01-01 01:00:00.000000000 +0100
19299 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/init.c     2009-07-04 01:11:39.000000000 +0200
19300 @@ -0,0 +1,45 @@
19301 +/*
19302 + *  linux/kernel/init.c
19303 + *
19304 + *  Virtual Server Init
19305 + *
19306 + *  Copyright (C) 2004-2007  Herbert Pötzl
19307 + *
19308 + *  V0.01  basic structure
19309 + *
19310 + */
19311 +
19312 +#include <linux/init.h>
19313 +
19314 +int    vserver_register_sysctl(void);
19315 +void   vserver_unregister_sysctl(void);
19316 +
19317 +
19318 +static int __init init_vserver(void)
19319 +{
19320 +       int ret = 0;
19321 +
19322 +#ifdef CONFIG_VSERVER_DEBUG
19323 +       vserver_register_sysctl();
19324 +#endif
19325 +       return ret;
19326 +}
19327 +
19328 +
19329 +static void __exit exit_vserver(void)
19330 +{
19331 +
19332 +#ifdef CONFIG_VSERVER_DEBUG
19333 +       vserver_unregister_sysctl();
19334 +#endif
19335 +       return;
19336 +}
19337 +
19338 +/* FIXME: GFP_ZONETYPES gone
19339 +long vx_slab[GFP_ZONETYPES]; */
19340 +long vx_area;
19341 +
19342 +
19343 +module_init(init_vserver);
19344 +module_exit(exit_vserver);
19345 +
19346 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/inode.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/inode.c
19347 --- linux-2.6.30.2/kernel/vserver/inode.c       1970-01-01 01:00:00.000000000 +0100
19348 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/inode.c    2009-07-04 01:11:39.000000000 +0200
19349 @@ -0,0 +1,422 @@
19350 +/*
19351 + *  linux/kernel/vserver/inode.c
19352 + *
19353 + *  Virtual Server: File System Support
19354 + *
19355 + *  Copyright (C) 2004-2007  Herbert Pötzl
19356 + *
19357 + *  V0.01  separated from vcontext V0.05
19358 + *  V0.02  moved to tag (instead of xid)
19359 + *
19360 + */
19361 +
19362 +#include <linux/tty.h>
19363 +#include <linux/proc_fs.h>
19364 +#include <linux/devpts_fs.h>
19365 +#include <linux/fs.h>
19366 +#include <linux/file.h>
19367 +#include <linux/mount.h>
19368 +#include <linux/parser.h>
19369 +#include <linux/namei.h>
19370 +#include <linux/vserver/inode.h>
19371 +#include <linux/vserver/inode_cmd.h>
19372 +#include <linux/vs_base.h>
19373 +#include <linux/vs_tag.h>
19374 +
19375 +#include <asm/uaccess.h>
19376 +
19377 +
19378 +static int __vc_get_iattr(struct inode *in, uint32_t *tag, uint32_t *flags, uint32_t *mask)
19379 +{
19380 +       struct proc_dir_entry *entry;
19381 +
19382 +       if (!in || !in->i_sb)
19383 +               return -ESRCH;
19384 +
19385 +       *flags = IATTR_TAG
19386 +               | (IS_BARRIER(in) ? IATTR_BARRIER : 0)
19387 +               | (IS_IXUNLINK(in) ? IATTR_IXUNLINK : 0)
19388 +               | (IS_IMMUTABLE(in) ? IATTR_IMMUTABLE : 0);
19389 +       *mask = IATTR_IXUNLINK | IATTR_IMMUTABLE;
19390 +
19391 +       if (S_ISDIR(in->i_mode))
19392 +               *mask |= IATTR_BARRIER;
19393 +
19394 +       if (IS_TAGGED(in)) {
19395 +               *tag = in->i_tag;
19396 +               *mask |= IATTR_TAG;
19397 +       }
19398 +
19399 +       switch (in->i_sb->s_magic) {
19400 +       case PROC_SUPER_MAGIC:
19401 +               entry = PROC_I(in)->pde;
19402 +
19403 +               /* check for specific inodes? */
19404 +               if (entry)
19405 +                       *mask |= IATTR_FLAGS;
19406 +               if (entry)
19407 +                       *flags |= (entry->vx_flags & IATTR_FLAGS);
19408 +               else
19409 +                       *flags |= (PROC_I(in)->vx_flags & IATTR_FLAGS);
19410 +               break;
19411 +
19412 +       case DEVPTS_SUPER_MAGIC:
19413 +               *tag = in->i_tag;
19414 +               *mask |= IATTR_TAG;
19415 +               break;
19416 +
19417 +       default:
19418 +               break;
19419 +       }
19420 +       return 0;
19421 +}
19422 +
19423 +int vc_get_iattr(void __user *data)
19424 +{
19425 +       struct path path;
19426 +       struct vcmd_ctx_iattr_v1 vc_data = { .tag = -1 };
19427 +       int ret;
19428 +
19429 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19430 +               return -EFAULT;
19431 +
19432 +       ret = user_lpath(vc_data.name, &path);
19433 +       if (!ret) {
19434 +               ret = __vc_get_iattr(path.dentry->d_inode,
19435 +                       &vc_data.tag, &vc_data.flags, &vc_data.mask);
19436 +               path_put(&path);
19437 +       }
19438 +       if (ret)
19439 +               return ret;
19440 +
19441 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19442 +               ret = -EFAULT;
19443 +       return ret;
19444 +}
19445 +
19446 +#ifdef CONFIG_COMPAT
19447 +
19448 +int vc_get_iattr_x32(void __user *data)
19449 +{
19450 +       struct path path;
19451 +       struct vcmd_ctx_iattr_v1_x32 vc_data = { .tag = -1 };
19452 +       int ret;
19453 +
19454 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19455 +               return -EFAULT;
19456 +
19457 +       ret = user_lpath(compat_ptr(vc_data.name_ptr), &path);
19458 +       if (!ret) {
19459 +               ret = __vc_get_iattr(path.dentry->d_inode,
19460 +                       &vc_data.tag, &vc_data.flags, &vc_data.mask);
19461 +               path_put(&path);
19462 +       }
19463 +       if (ret)
19464 +               return ret;
19465 +
19466 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19467 +               ret = -EFAULT;
19468 +       return ret;
19469 +}
19470 +
19471 +#endif /* CONFIG_COMPAT */
19472 +
19473 +
19474 +int vc_fget_iattr(uint32_t fd, void __user *data)
19475 +{
19476 +       struct file *filp;
19477 +       struct vcmd_ctx_fiattr_v0 vc_data = { .tag = -1 };
19478 +       int ret;
19479 +
19480 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19481 +               return -EFAULT;
19482 +
19483 +       filp = fget(fd);
19484 +       if (!filp || !filp->f_dentry || !filp->f_dentry->d_inode)
19485 +               return -EBADF;
19486 +
19487 +       ret = __vc_get_iattr(filp->f_dentry->d_inode,
19488 +               &vc_data.tag, &vc_data.flags, &vc_data.mask);
19489 +
19490 +       fput(filp);
19491 +
19492 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19493 +               ret = -EFAULT;
19494 +       return ret;
19495 +}
19496 +
19497 +
19498 +static int __vc_set_iattr(struct dentry *de, uint32_t *tag, uint32_t *flags, uint32_t *mask)
19499 +{
19500 +       struct inode *in = de->d_inode;
19501 +       int error = 0, is_proc = 0, has_tag = 0;
19502 +       struct iattr attr = { 0 };
19503 +
19504 +       if (!in || !in->i_sb)
19505 +               return -ESRCH;
19506 +
19507 +       is_proc = (in->i_sb->s_magic == PROC_SUPER_MAGIC);
19508 +       if ((*mask & IATTR_FLAGS) && !is_proc)
19509 +               return -EINVAL;
19510 +
19511 +       has_tag = IS_TAGGED(in) ||
19512 +               (in->i_sb->s_magic == DEVPTS_SUPER_MAGIC);
19513 +       if ((*mask & IATTR_TAG) && !has_tag)
19514 +               return -EINVAL;
19515 +
19516 +       mutex_lock(&in->i_mutex);
19517 +       if (*mask & IATTR_TAG) {
19518 +               attr.ia_tag = *tag;
19519 +               attr.ia_valid |= ATTR_TAG;
19520 +       }
19521 +
19522 +       if (*mask & IATTR_FLAGS) {
19523 +               struct proc_dir_entry *entry = PROC_I(in)->pde;
19524 +               unsigned int iflags = PROC_I(in)->vx_flags;
19525 +
19526 +               iflags = (iflags & ~(*mask & IATTR_FLAGS))
19527 +                       | (*flags & IATTR_FLAGS);
19528 +               PROC_I(in)->vx_flags = iflags;
19529 +               if (entry)
19530 +                       entry->vx_flags = iflags;
19531 +       }
19532 +
19533 +       if (*mask & (IATTR_BARRIER | IATTR_IXUNLINK | IATTR_IMMUTABLE)) {
19534 +               if (*mask & IATTR_IMMUTABLE) {
19535 +                       if (*flags & IATTR_IMMUTABLE)
19536 +                               in->i_flags |= S_IMMUTABLE;
19537 +                       else
19538 +                               in->i_flags &= ~S_IMMUTABLE;
19539 +               }
19540 +               if (*mask & IATTR_IXUNLINK) {
19541 +                       if (*flags & IATTR_IXUNLINK)
19542 +                               in->i_flags |= S_IXUNLINK;
19543 +                       else
19544 +                               in->i_flags &= ~S_IXUNLINK;
19545 +               }
19546 +               if (S_ISDIR(in->i_mode) && (*mask & IATTR_BARRIER)) {
19547 +                       if (*flags & IATTR_BARRIER)
19548 +                               in->i_vflags |= V_BARRIER;
19549 +                       else
19550 +                               in->i_vflags &= ~V_BARRIER;
19551 +               }
19552 +               if (in->i_op && in->i_op->sync_flags) {
19553 +                       error = in->i_op->sync_flags(in);
19554 +                       if (error)
19555 +                               goto out;
19556 +               }
19557 +       }
19558 +
19559 +       if (attr.ia_valid) {
19560 +               if (in->i_op && in->i_op->setattr)
19561 +                       error = in->i_op->setattr(de, &attr);
19562 +               else {
19563 +                       error = inode_change_ok(in, &attr);
19564 +                       if (!error)
19565 +                               error = inode_setattr(in, &attr);
19566 +               }
19567 +       }
19568 +
19569 +out:
19570 +       mutex_unlock(&in->i_mutex);
19571 +       return error;
19572 +}
19573 +
19574 +int vc_set_iattr(void __user *data)
19575 +{
19576 +       struct path path;
19577 +       struct vcmd_ctx_iattr_v1 vc_data;
19578 +       int ret;
19579 +
19580 +       if (!capable(CAP_LINUX_IMMUTABLE))
19581 +               return -EPERM;
19582 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19583 +               return -EFAULT;
19584 +
19585 +       ret = user_lpath(vc_data.name, &path);
19586 +       if (!ret) {
19587 +               ret = __vc_set_iattr(path.dentry,
19588 +                       &vc_data.tag, &vc_data.flags, &vc_data.mask);
19589 +               path_put(&path);
19590 +       }
19591 +
19592 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19593 +               ret = -EFAULT;
19594 +       return ret;
19595 +}
19596 +
19597 +#ifdef CONFIG_COMPAT
19598 +
19599 +int vc_set_iattr_x32(void __user *data)
19600 +{
19601 +       struct path path;
19602 +       struct vcmd_ctx_iattr_v1_x32 vc_data;
19603 +       int ret;
19604 +
19605 +       if (!capable(CAP_LINUX_IMMUTABLE))
19606 +               return -EPERM;
19607 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19608 +               return -EFAULT;
19609 +
19610 +       ret = user_lpath(compat_ptr(vc_data.name_ptr), &path);
19611 +       if (!ret) {
19612 +               ret = __vc_set_iattr(path.dentry,
19613 +                       &vc_data.tag, &vc_data.flags, &vc_data.mask);
19614 +               path_put(&path);
19615 +       }
19616 +
19617 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19618 +               ret = -EFAULT;
19619 +       return ret;
19620 +}
19621 +
19622 +#endif /* CONFIG_COMPAT */
19623 +
19624 +int vc_fset_iattr(uint32_t fd, void __user *data)
19625 +{
19626 +       struct file *filp;
19627 +       struct vcmd_ctx_fiattr_v0 vc_data;
19628 +       int ret;
19629 +
19630 +       if (!capable(CAP_LINUX_IMMUTABLE))
19631 +               return -EPERM;
19632 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19633 +               return -EFAULT;
19634 +
19635 +       filp = fget(fd);
19636 +       if (!filp || !filp->f_dentry || !filp->f_dentry->d_inode)
19637 +               return -EBADF;
19638 +
19639 +       ret = __vc_set_iattr(filp->f_dentry, &vc_data.tag,
19640 +               &vc_data.flags, &vc_data.mask);
19641 +
19642 +       fput(filp);
19643 +
19644 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19645 +               return -EFAULT;
19646 +       return ret;
19647 +}
19648 +
19649 +
19650 +enum { Opt_notagcheck, Opt_tag, Opt_notag, Opt_tagid, Opt_err };
19651 +
19652 +static match_table_t tokens = {
19653 +       {Opt_notagcheck, "notagcheck"},
19654 +#ifdef CONFIG_PROPAGATE
19655 +       {Opt_notag, "notag"},
19656 +       {Opt_tag, "tag"},
19657 +       {Opt_tagid, "tagid=%u"},
19658 +#endif
19659 +       {Opt_err, NULL}
19660 +};
19661 +
19662 +
19663 +static void __dx_parse_remove(char *string, char *opt)
19664 +{
19665 +       char *p = strstr(string, opt);
19666 +       char *q = p;
19667 +
19668 +       if (p) {
19669 +               while (*q != '\0' && *q != ',')
19670 +                       q++;
19671 +               while (*q)
19672 +                       *p++ = *q++;
19673 +               while (*p)
19674 +                       *p++ = '\0';
19675 +       }
19676 +}
19677 +
19678 +int dx_parse_tag(char *string, tag_t *tag, int remove, int *mnt_flags,
19679 +                unsigned long *flags)
19680 +{
19681 +       int set = 0;
19682 +       substring_t args[MAX_OPT_ARGS];
19683 +       int token, option = 0;
19684 +       char *s, *p, *opts;
19685 +
19686 +       if (!string)
19687 +               return 0;
19688 +       s = kstrdup(string, GFP_KERNEL | GFP_ATOMIC);
19689 +       if (!s)
19690 +               return 0;
19691 +
19692 +       opts = s;
19693 +       while ((p = strsep(&opts, ",")) != NULL) {
19694 +               token = match_token(p, tokens, args);
19695 +
19696 +               vxdprintk(VXD_CBIT(tag, 7),
19697 +                       "dx_parse_tag(»%s«): %d:#%d",
19698 +                       p, token, option);
19699 +
19700 +               switch (token) {
19701 +#ifdef CONFIG_PROPAGATE
19702 +               case Opt_tag:
19703 +                       if (tag)
19704 +                               *tag = 0;
19705 +                       if (remove)
19706 +                               __dx_parse_remove(s, "tag");
19707 +                       *mnt_flags |= MNT_TAGID;
19708 +                       set |= MNT_TAGID;
19709 +                       break;
19710 +               case Opt_notag:
19711 +                       if (remove)
19712 +                               __dx_parse_remove(s, "notag");
19713 +                       *mnt_flags |= MNT_NOTAG;
19714 +                       set |= MNT_NOTAG;
19715 +                       break;
19716 +               case Opt_tagid:
19717 +                       if (tag && !match_int(args, &option))
19718 +                               *tag = option;
19719 +                       if (remove)
19720 +                               __dx_parse_remove(s, "tagid");
19721 +                       *mnt_flags |= MNT_TAGID;
19722 +                       set |= MNT_TAGID;
19723 +                       break;
19724 +#endif
19725 +               case Opt_notagcheck:
19726 +                       if (remove)
19727 +                               __dx_parse_remove(s, "notagcheck");
19728 +                       *flags |= MS_NOTAGCHECK;
19729 +                       set |= MS_NOTAGCHECK;
19730 +                       break;
19731 +               }
19732 +       }
19733 +       if (set)
19734 +               strcpy(string, s);
19735 +       kfree(s);
19736 +       return set;
19737 +}
19738 +
19739 +#ifdef CONFIG_PROPAGATE
19740 +
19741 +void __dx_propagate_tag(struct nameidata *nd, struct inode *inode)
19742 +{
19743 +       tag_t new_tag = 0;
19744 +       struct vfsmount *mnt;
19745 +       int propagate;
19746 +
19747 +       if (!nd)
19748 +               return;
19749 +       mnt = nd->path.mnt;
19750 +       if (!mnt)
19751 +               return;
19752 +
19753 +       propagate = (mnt->mnt_flags & MNT_TAGID);
19754 +       if (propagate)
19755 +               new_tag = mnt->mnt_tag;
19756 +
19757 +       vxdprintk(VXD_CBIT(tag, 7),
19758 +               "dx_propagate_tag(%p[#%lu.%d]): %d,%d",
19759 +               inode, inode->i_ino, inode->i_tag,
19760 +               new_tag, (propagate) ? 1 : 0);
19761 +
19762 +       if (propagate)
19763 +               inode->i_tag = new_tag;
19764 +}
19765 +
19766 +#include <linux/module.h>
19767 +
19768 +EXPORT_SYMBOL_GPL(__dx_propagate_tag);
19769 +
19770 +#endif /* CONFIG_PROPAGATE */
19771 +
19772 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/Kconfig linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/Kconfig
19773 --- linux-2.6.30.2/kernel/vserver/Kconfig       1970-01-01 01:00:00.000000000 +0100
19774 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/Kconfig    2009-07-04 01:11:39.000000000 +0200
19775 @@ -0,0 +1,251 @@
19776 +#
19777 +# Linux VServer configuration
19778 +#
19779 +
19780 +menu "Linux VServer"
19781 +
19782 +config VSERVER_AUTO_LBACK
19783 +       bool    "Automatically Assign Loopback IP"
19784 +       default y
19785 +       help
19786 +         Automatically assign a guest specific loopback
19787 +         IP and add it to the kernel network stack on
19788 +         startup.
19789 +
19790 +config VSERVER_AUTO_SINGLE
19791 +       bool    "Automatic Single IP Special Casing"
19792 +       depends on EXPERIMENTAL
19793 +       default y
19794 +       help
19795 +         This allows network contexts with a single IP to
19796 +         automatically remap 0.0.0.0 bindings to that IP,
19797 +         avoiding further network checks and improving
19798 +         performance.
19799 +
19800 +         (note: such guests do not allow to change the ip
19801 +          on the fly and do not show loopback addresses)
19802 +
19803 +config VSERVER_COWBL
19804 +       bool    "Enable COW Immutable Link Breaking"
19805 +       default y
19806 +       help
19807 +         This enables the COW (Copy-On-Write) link break code.
19808 +         It allows you to treat unified files like normal files
19809 +         when writing to them (which will implicitely break the
19810 +         link and create a copy of the unified file)
19811 +
19812 +config VSERVER_VTIME
19813 +       bool    "Enable Virtualized Guest Time"
19814 +       depends on EXPERIMENTAL
19815 +       default n
19816 +       help
19817 +         This enables per guest time offsets to allow for
19818 +         adjusting the system clock individually per guest.
19819 +         this adds some overhead to the time functions and
19820 +         therefore should not be enabled without good reason.
19821 +
19822 +config VSERVER_DEVICE
19823 +       bool    "Enable Guest Device Mapping"
19824 +       depends on EXPERIMENTAL
19825 +       default n
19826 +       help
19827 +         This enables generic device remapping.
19828 +
19829 +config VSERVER_PROC_SECURE
19830 +       bool    "Enable Proc Security"
19831 +       depends on PROC_FS
19832 +       default y
19833 +       help
19834 +         This configures ProcFS security to initially hide
19835 +         non-process entries for all contexts except the main and
19836 +         spectator context (i.e. for all guests), which is a secure
19837 +         default.
19838 +
19839 +         (note: on 1.2x the entries were visible by default)
19840 +
19841 +config VSERVER_HARDCPU
19842 +       bool    "Enable Hard CPU Limits"
19843 +       default y
19844 +       help
19845 +         Activate the Hard CPU Limits
19846 +
19847 +         This will compile in code that allows the Token Bucket
19848 +         Scheduler to put processes on hold when a context's
19849 +         tokens are depleted (provided that its per-context
19850 +         sched_hard flag is set).
19851 +
19852 +         Processes belonging to that context will not be able
19853 +         to consume CPU resources again until a per-context
19854 +         configured minimum of tokens has been reached.
19855 +
19856 +config VSERVER_IDLETIME
19857 +       bool    "Avoid idle CPUs by skipping Time"
19858 +       depends on VSERVER_HARDCPU
19859 +       default y
19860 +       help
19861 +         This option allows the scheduler to artificially
19862 +         advance time (per cpu) when otherwise the idle
19863 +         task would be scheduled, thus keeping the cpu
19864 +         busy and sharing the available resources among
19865 +         certain contexts.
19866 +
19867 +config VSERVER_IDLELIMIT
19868 +       bool    "Limit the IDLE task"
19869 +       depends on VSERVER_HARDCPU
19870 +       default n
19871 +       help
19872 +         Limit the idle slices, so the the next context
19873 +         will be scheduled as soon as possible.
19874 +
19875 +         This might improve interactivity and latency, but
19876 +         will also marginally increase scheduling overhead.
19877 +
19878 +choice
19879 +       prompt  "Persistent Inode Tagging"
19880 +       default TAGGING_ID24
19881 +       help
19882 +         This adds persistent context information to filesystems
19883 +         mounted with the tagxid option. Tagging is a requirement
19884 +         for per-context disk limits and per-context quota.
19885 +
19886 +
19887 +config TAGGING_NONE
19888 +       bool    "Disabled"
19889 +       help
19890 +         do not store per-context information in inodes.
19891 +
19892 +config TAGGING_UID16
19893 +       bool    "UID16/GID32"
19894 +       help
19895 +         reduces UID to 16 bit, but leaves GID at 32 bit.
19896 +
19897 +config TAGGING_GID16
19898 +       bool    "UID32/GID16"
19899 +       help
19900 +         reduces GID to 16 bit, but leaves UID at 32 bit.
19901 +
19902 +config TAGGING_ID24
19903 +       bool    "UID24/GID24"
19904 +       help
19905 +         uses the upper 8bit from UID and GID for XID tagging
19906 +         which leaves 24bit for UID/GID each, which should be
19907 +         more than sufficient for normal use.
19908 +
19909 +config TAGGING_INTERN
19910 +       bool    "UID32/GID32"
19911 +       help
19912 +         this uses otherwise reserved inode fields in the on
19913 +         disk representation, which limits the use to a few
19914 +         filesystems (currently ext2 and ext3)
19915 +
19916 +endchoice
19917 +
19918 +config TAG_NFSD
19919 +       bool    "Tag NFSD User Auth and Files"
19920 +       default n
19921 +       help
19922 +         Enable this if you do want the in-kernel NFS
19923 +         Server to use the tagging specified above.
19924 +         (will require patched clients too)
19925 +
19926 +config VSERVER_PRIVACY
19927 +       bool    "Honor Privacy Aspects of Guests"
19928 +       default n
19929 +       help
19930 +         When enabled, most context checks will disallow
19931 +         access to structures assigned to a specific context,
19932 +         like ptys or loop devices.
19933 +
19934 +config VSERVER_CONTEXTS
19935 +       int     "Maximum number of Contexts (1-65533)"  if EMBEDDED
19936 +       range 1 65533
19937 +       default "768"   if 64BIT
19938 +       default "256"
19939 +       help
19940 +         This setting will optimize certain data structures
19941 +         and memory allocations according to the expected
19942 +         maximum.
19943 +
19944 +         note: this is not a strict upper limit.
19945 +
19946 +config VSERVER_WARN
19947 +       bool    "VServer Warnings"
19948 +       default y
19949 +       help
19950 +         This enables various runtime warnings, which will
19951 +         notify about potential manipulation attempts or
19952 +         resource shortage. It is generally considered to
19953 +         be a good idea to have that enabled.
19954 +
19955 +config VSERVER_DEBUG
19956 +       bool    "VServer Debugging Code"
19957 +       default n
19958 +       help
19959 +         Set this to yes if you want to be able to activate
19960 +         debugging output at runtime. It adds a very small
19961 +         overhead to all vserver related functions and
19962 +         increases the kernel size by about 20k.
19963 +
19964 +config VSERVER_HISTORY
19965 +       bool    "VServer History Tracing"
19966 +       depends on VSERVER_DEBUG
19967 +       default n
19968 +       help
19969 +         Set this to yes if you want to record the history of
19970 +         linux-vserver activities, so they can be replayed in
19971 +         the event of a kernel panic or oops.
19972 +
19973 +config VSERVER_HISTORY_SIZE
19974 +       int     "Per-CPU History Size (32-65536)"
19975 +       depends on VSERVER_HISTORY
19976 +       range 32 65536
19977 +       default 64
19978 +       help
19979 +         This allows you to specify the number of entries in
19980 +         the per-CPU history buffer.
19981 +
19982 +config VSERVER_MONITOR
19983 +       bool    "VServer Scheduling Monitor"
19984 +       depends on VSERVER_DISABLED
19985 +       default n
19986 +       help
19987 +         Set this to yes if you want to record the scheduling
19988 +         decisions, so that they can be relayed to userspace
19989 +         for detailed analysis.
19990 +
19991 +config VSERVER_MONITOR_SIZE
19992 +       int     "Per-CPU Monitor Queue Size (32-65536)"
19993 +       depends on VSERVER_MONITOR
19994 +       range 32 65536
19995 +       default 1024
19996 +       help
19997 +         This allows you to specify the number of entries in
19998 +         the per-CPU scheduling monitor buffer.
19999 +
20000 +config VSERVER_MONITOR_SYNC
20001 +       int     "Per-CPU Monitor Sync Interval (0-65536)"
20002 +       depends on VSERVER_MONITOR
20003 +       range 0 65536
20004 +       default 256
20005 +       help
20006 +         This allows you to specify the interval in ticks
20007 +         when a time sync entry is inserted.
20008 +
20009 +endmenu
20010 +
20011 +
20012 +config VSERVER
20013 +       bool
20014 +       default y
20015 +       select NAMESPACES
20016 +       select UTS_NS
20017 +       select IPC_NS
20018 +       select USER_NS
20019 +       select SYSVIPC
20020 +
20021 +config VSERVER_SECURITY
20022 +       bool
20023 +       depends on SECURITY
20024 +       default y
20025 +       select SECURITY_CAPABILITIES
20026 +
20027 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/limit.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/limit.c
20028 --- linux-2.6.30.2/kernel/vserver/limit.c       1970-01-01 01:00:00.000000000 +0100
20029 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/limit.c    2009-07-04 01:11:39.000000000 +0200
20030 @@ -0,0 +1,319 @@
20031 +/*
20032 + *  linux/kernel/vserver/limit.c
20033 + *
20034 + *  Virtual Server: Context Limits
20035 + *
20036 + *  Copyright (C) 2004-2007  Herbert Pötzl
20037 + *
20038 + *  V0.01  broken out from vcontext V0.05
20039 + *  V0.02  changed vcmds to vxi arg
20040 + *
20041 + */
20042 +
20043 +#include <linux/sched.h>
20044 +#include <linux/module.h>
20045 +#include <linux/vs_limit.h>
20046 +#include <linux/vserver/limit.h>
20047 +#include <linux/vserver/limit_cmd.h>
20048 +
20049 +#include <asm/uaccess.h>
20050 +
20051 +
20052 +const char *vlimit_name[NUM_LIMITS] = {
20053 +       [RLIMIT_CPU]            = "CPU",
20054 +       [RLIMIT_RSS]            = "RSS",
20055 +       [RLIMIT_NPROC]          = "NPROC",
20056 +       [RLIMIT_NOFILE]         = "NOFILE",
20057 +       [RLIMIT_MEMLOCK]        = "VML",
20058 +       [RLIMIT_AS]             = "VM",
20059 +       [RLIMIT_LOCKS]          = "LOCKS",
20060 +       [RLIMIT_SIGPENDING]     = "SIGP",
20061 +       [RLIMIT_MSGQUEUE]       = "MSGQ",
20062 +
20063 +       [VLIMIT_NSOCK]          = "NSOCK",
20064 +       [VLIMIT_OPENFD]         = "OPENFD",
20065 +       [VLIMIT_ANON]           = "ANON",
20066 +       [VLIMIT_SHMEM]          = "SHMEM",
20067 +       [VLIMIT_DENTRY]         = "DENTRY",
20068 +};
20069 +
20070 +EXPORT_SYMBOL_GPL(vlimit_name);
20071 +
20072 +#define MASK_ENTRY(x)  (1 << (x))
20073 +
20074 +const struct vcmd_ctx_rlimit_mask_v0 vlimit_mask = {
20075 +               /* minimum */
20076 +       0
20077 +       ,       /* softlimit */
20078 +       MASK_ENTRY( RLIMIT_RSS          ) |
20079 +       MASK_ENTRY( VLIMIT_ANON         ) |
20080 +       0
20081 +       ,       /* maximum */
20082 +       MASK_ENTRY( RLIMIT_RSS          ) |
20083 +       MASK_ENTRY( RLIMIT_NPROC        ) |
20084 +       MASK_ENTRY( RLIMIT_NOFILE       ) |
20085 +       MASK_ENTRY( RLIMIT_MEMLOCK      ) |
20086 +       MASK_ENTRY( RLIMIT_AS           ) |
20087 +       MASK_ENTRY( RLIMIT_LOCKS        ) |
20088 +       MASK_ENTRY( RLIMIT_MSGQUEUE     ) |
20089 +
20090 +       MASK_ENTRY( VLIMIT_NSOCK        ) |
20091 +       MASK_ENTRY( VLIMIT_OPENFD       ) |
20092 +       MASK_ENTRY( VLIMIT_ANON         ) |
20093 +       MASK_ENTRY( VLIMIT_SHMEM        ) |
20094 +       MASK_ENTRY( VLIMIT_DENTRY       ) |
20095 +       0
20096 +};
20097 +               /* accounting only */
20098 +uint32_t account_mask =
20099 +       MASK_ENTRY( VLIMIT_SEMARY       ) |
20100 +       MASK_ENTRY( VLIMIT_NSEMS        ) |
20101 +       MASK_ENTRY( VLIMIT_MAPPED       ) |
20102 +       0;
20103 +
20104 +
20105 +static int is_valid_vlimit(int id)
20106 +{
20107 +       uint32_t mask = vlimit_mask.minimum |
20108 +               vlimit_mask.softlimit | vlimit_mask.maximum;
20109 +       return mask & (1 << id);
20110 +}
20111 +
20112 +static int is_accounted_vlimit(int id)
20113 +{
20114 +       if (is_valid_vlimit(id))
20115 +               return 1;
20116 +       return account_mask & (1 << id);
20117 +}
20118 +
20119 +
20120 +static inline uint64_t vc_get_soft(struct vx_info *vxi, int id)
20121 +{
20122 +       rlim_t limit = __rlim_soft(&vxi->limit, id);
20123 +       return VX_VLIM(limit);
20124 +}
20125 +
20126 +static inline uint64_t vc_get_hard(struct vx_info *vxi, int id)
20127 +{
20128 +       rlim_t limit = __rlim_hard(&vxi->limit, id);
20129 +       return VX_VLIM(limit);
20130 +}
20131 +
20132 +static int do_get_rlimit(struct vx_info *vxi, uint32_t id,
20133 +       uint64_t *minimum, uint64_t *softlimit, uint64_t *maximum)
20134 +{
20135 +       if (!is_valid_vlimit(id))
20136 +               return -EINVAL;
20137 +
20138 +       if (minimum)
20139 +               *minimum = CRLIM_UNSET;
20140 +       if (softlimit)
20141 +               *softlimit = vc_get_soft(vxi, id);
20142 +       if (maximum)
20143 +               *maximum = vc_get_hard(vxi, id);
20144 +       return 0;
20145 +}
20146 +
20147 +int vc_get_rlimit(struct vx_info *vxi, void __user *data)
20148 +{
20149 +       struct vcmd_ctx_rlimit_v0 vc_data;
20150 +       int ret;
20151 +
20152 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
20153 +               return -EFAULT;
20154 +
20155 +       ret = do_get_rlimit(vxi, vc_data.id,
20156 +               &vc_data.minimum, &vc_data.softlimit, &vc_data.maximum);
20157 +       if (ret)
20158 +               return ret;
20159 +
20160 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
20161 +               return -EFAULT;
20162 +       return 0;
20163 +}
20164 +
20165 +static int do_set_rlimit(struct vx_info *vxi, uint32_t id,
20166 +       uint64_t minimum, uint64_t softlimit, uint64_t maximum)
20167 +{
20168 +       if (!is_valid_vlimit(id))
20169 +               return -EINVAL;
20170 +
20171 +       if (maximum != CRLIM_KEEP)
20172 +               __rlim_hard(&vxi->limit, id) = VX_RLIM(maximum);
20173 +       if (softlimit != CRLIM_KEEP)
20174 +               __rlim_soft(&vxi->limit, id) = VX_RLIM(softlimit);
20175 +
20176 +       /* clamp soft limit */
20177 +       if (__rlim_soft(&vxi->limit, id) > __rlim_hard(&vxi->limit, id))
20178 +               __rlim_soft(&vxi->limit, id) = __rlim_hard(&vxi->limit, id);
20179 +
20180 +       return 0;
20181 +}
20182 +
20183 +int vc_set_rlimit(struct vx_info *vxi, void __user *data)
20184 +{
20185 +       struct vcmd_ctx_rlimit_v0 vc_data;
20186 +
20187 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
20188 +               return -EFAULT;
20189 +
20190 +       return do_set_rlimit(vxi, vc_data.id,
20191 +               vc_data.minimum, vc_data.softlimit, vc_data.maximum);
20192 +}
20193 +
20194 +#ifdef CONFIG_IA32_EMULATION
20195 +
20196 +int vc_set_rlimit_x32(struct vx_info *vxi, void __user *data)
20197 +{
20198 +       struct vcmd_ctx_rlimit_v0_x32 vc_data;
20199 +
20200 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
20201 +               return -EFAULT;
20202 +
20203 +       return do_set_rlimit(vxi, vc_data.id,
20204 +               vc_data.minimum, vc_data.softlimit, vc_data.maximum);
20205 +}
20206 +
20207 +int vc_get_rlimit_x32(struct vx_info *vxi, void __user *data)
20208 +{
20209 +       struct vcmd_ctx_rlimit_v0_x32 vc_data;
20210 +       int ret;
20211 +
20212 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
20213 +               return -EFAULT;
20214 +
20215 +       ret = do_get_rlimit(vxi, vc_data.id,
20216 +               &vc_data.minimum, &vc_data.softlimit, &vc_data.maximum);
20217 +       if (ret)
20218 +               return ret;
20219 +
20220 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
20221 +               return -EFAULT;
20222 +       return 0;
20223 +}
20224 +
20225 +#endif /* CONFIG_IA32_EMULATION */
20226 +
20227 +
20228 +int vc_get_rlimit_mask(uint32_t id, void __user *data)
20229 +{
20230 +       if (copy_to_user(data, &vlimit_mask, sizeof(vlimit_mask)))
20231 +               return -EFAULT;
20232 +       return 0;
20233 +}
20234 +
20235 +
20236 +static inline void vx_reset_minmax(struct _vx_limit *limit)
20237 +{
20238 +       rlim_t value;
20239 +       int lim;
20240 +
20241 +       for (lim = 0; lim < NUM_LIMITS; lim++) {
20242 +               value = __rlim_get(limit, lim);
20243 +               __rlim_rmax(limit, lim) = value;
20244 +               __rlim_rmin(limit, lim) = value;
20245 +       }
20246 +}
20247 +
20248 +
20249 +int vc_reset_minmax(struct vx_info *vxi, void __user *data)
20250 +{
20251 +       vx_reset_minmax(&vxi->limit);
20252 +       return 0;
20253 +}
20254 +
20255 +
20256 +int vc_rlimit_stat(struct vx_info *vxi, void __user *data)
20257 +{
20258 +       struct vcmd_rlimit_stat_v0 vc_data;
20259 +       struct _vx_limit *limit = &vxi->limit;
20260 +       int id;
20261 +
20262 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
20263 +               return -EFAULT;
20264 +
20265 +       id = vc_data.id;
20266 +       if (!is_accounted_vlimit(id))
20267 +               return -EINVAL;
20268 +
20269 +       vx_limit_fixup(limit, id);
20270 +       vc_data.hits = atomic_read(&__rlim_lhit(limit, id));
20271 +       vc_data.value = __rlim_get(limit, id);
20272 +       vc_data.minimum = __rlim_rmin(limit, id);
20273 +       vc_data.maximum = __rlim_rmax(limit, id);
20274 +
20275 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
20276 +               return -EFAULT;
20277 +       return 0;
20278 +}
20279 +
20280 +
20281 +void vx_vsi_meminfo(struct sysinfo *val)
20282 +{
20283 +       struct vx_info *vxi = current->vx_info;
20284 +       unsigned long totalram, freeram;
20285 +       rlim_t v;
20286 +
20287 +       /* we blindly accept the max */
20288 +       v = __rlim_soft(&vxi->limit, RLIMIT_RSS);
20289 +       totalram = (v != RLIM_INFINITY) ? v : val->totalram;
20290 +
20291 +       /* total minus used equals free */
20292 +       v = __vx_cres_array_fixup(&vxi->limit, VLA_RSS);
20293 +       freeram = (v < totalram) ? totalram - v : 0;
20294 +
20295 +       val->totalram = totalram;
20296 +       val->freeram = freeram;
20297 +       val->bufferram = 0;
20298 +       val->totalhigh = 0;
20299 +       val->freehigh = 0;
20300 +       return;
20301 +}
20302 +
20303 +void vx_vsi_swapinfo(struct sysinfo *val)
20304 +{
20305 +       struct vx_info *vxi = current->vx_info;
20306 +       unsigned long totalswap, freeswap;
20307 +       rlim_t v, w;
20308 +
20309 +       v = __rlim_soft(&vxi->limit, RLIMIT_RSS);
20310 +       if (v == RLIM_INFINITY) {
20311 +               val->freeswap = val->totalswap;
20312 +               return;
20313 +       }
20314 +
20315 +       /* we blindly accept the max */
20316 +       w = __rlim_hard(&vxi->limit, RLIMIT_RSS);
20317 +       totalswap = (w != RLIM_INFINITY) ? (w - v) : val->totalswap;
20318 +
20319 +       /* currently 'used' swap */
20320 +       w = __vx_cres_array_fixup(&vxi->limit, VLA_RSS);
20321 +       w -= (w > v) ? v : w;
20322 +
20323 +       /* total minus used equals free */
20324 +       freeswap = (w < totalswap) ? totalswap - w : 0;
20325 +
20326 +       val->totalswap = totalswap;
20327 +       val->freeswap = freeswap;
20328 +       return;
20329 +}
20330 +
20331 +
20332 +unsigned long vx_badness(struct task_struct *task, struct mm_struct *mm)
20333 +{
20334 +       struct vx_info *vxi = mm->mm_vx_info;
20335 +       unsigned long points;
20336 +       rlim_t v, w;
20337 +
20338 +       if (!vxi)
20339 +               return 0;
20340 +
20341 +       points = vxi->vx_badness_bias;
20342 +
20343 +       v = __vx_cres_array_fixup(&vxi->limit, VLA_RSS);
20344 +       w = __rlim_soft(&vxi->limit, RLIMIT_RSS);
20345 +       points += (v > w) ? (v - w) : 0;
20346 +
20347 +       return points;
20348 +}
20349 +
20350 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/limit_init.h linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/limit_init.h
20351 --- linux-2.6.30.2/kernel/vserver/limit_init.h  1970-01-01 01:00:00.000000000 +0100
20352 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/limit_init.h       2009-07-04 01:11:39.000000000 +0200
20353 @@ -0,0 +1,31 @@
20354 +
20355 +
20356 +static inline void vx_info_init_limit(struct _vx_limit *limit)
20357 +{
20358 +       int lim;
20359 +
20360 +       for (lim = 0; lim < NUM_LIMITS; lim++) {
20361 +               __rlim_soft(limit, lim) = RLIM_INFINITY;
20362 +               __rlim_hard(limit, lim) = RLIM_INFINITY;
20363 +               __rlim_set(limit, lim, 0);
20364 +               atomic_set(&__rlim_lhit(limit, lim), 0);
20365 +               __rlim_rmin(limit, lim) = 0;
20366 +               __rlim_rmax(limit, lim) = 0;
20367 +       }
20368 +}
20369 +
20370 +static inline void vx_info_exit_limit(struct _vx_limit *limit)
20371 +{
20372 +       rlim_t value;
20373 +       int lim;
20374 +
20375 +       for (lim = 0; lim < NUM_LIMITS; lim++) {
20376 +               if ((1 << lim) & VLIM_NOCHECK)
20377 +                       continue;
20378 +               value = __rlim_get(limit, lim);
20379 +               vxwprintk_xid(value,
20380 +                       "!!! limit: %p[%s,%d] = %ld on exit.",
20381 +                       limit, vlimit_name[lim], lim, (long)value);
20382 +       }
20383 +}
20384 +
20385 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/limit_proc.h linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/limit_proc.h
20386 --- linux-2.6.30.2/kernel/vserver/limit_proc.h  1970-01-01 01:00:00.000000000 +0100
20387 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/limit_proc.h       2009-07-04 01:11:39.000000000 +0200
20388 @@ -0,0 +1,57 @@
20389 +#ifndef _VX_LIMIT_PROC_H
20390 +#define _VX_LIMIT_PROC_H
20391 +
20392 +#include <linux/vserver/limit_int.h>
20393 +
20394 +
20395 +#define VX_LIMIT_FMT   ":\t%8ld\t%8ld/%8ld\t%8lld/%8lld\t%6d\n"
20396 +#define VX_LIMIT_TOP   \
20397 +       "Limit\t current\t     min/max\t\t    soft/hard\t\thits\n"
20398 +
20399 +#define VX_LIMIT_ARG(r)                                \
20400 +       (unsigned long)__rlim_get(limit, r),    \
20401 +       (unsigned long)__rlim_rmin(limit, r),   \
20402 +       (unsigned long)__rlim_rmax(limit, r),   \
20403 +       VX_VLIM(__rlim_soft(limit, r)),         \
20404 +       VX_VLIM(__rlim_hard(limit, r)),         \
20405 +       atomic_read(&__rlim_lhit(limit, r))
20406 +
20407 +static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer)
20408 +{
20409 +       vx_limit_fixup(limit, -1);
20410 +       return sprintf(buffer, VX_LIMIT_TOP
20411 +               "PROC"  VX_LIMIT_FMT
20412 +               "VM"    VX_LIMIT_FMT
20413 +               "VML"   VX_LIMIT_FMT
20414 +               "RSS"   VX_LIMIT_FMT
20415 +               "ANON"  VX_LIMIT_FMT
20416 +               "RMAP"  VX_LIMIT_FMT
20417 +               "FILES" VX_LIMIT_FMT
20418 +               "OFD"   VX_LIMIT_FMT
20419 +               "LOCKS" VX_LIMIT_FMT
20420 +               "SOCK"  VX_LIMIT_FMT
20421 +               "MSGQ"  VX_LIMIT_FMT
20422 +               "SHM"   VX_LIMIT_FMT
20423 +               "SEMA"  VX_LIMIT_FMT
20424 +               "SEMS"  VX_LIMIT_FMT
20425 +               "DENT"  VX_LIMIT_FMT,
20426 +               VX_LIMIT_ARG(RLIMIT_NPROC),
20427 +               VX_LIMIT_ARG(RLIMIT_AS),
20428 +               VX_LIMIT_ARG(RLIMIT_MEMLOCK),
20429 +               VX_LIMIT_ARG(RLIMIT_RSS),
20430 +               VX_LIMIT_ARG(VLIMIT_ANON),
20431 +               VX_LIMIT_ARG(VLIMIT_MAPPED),
20432 +               VX_LIMIT_ARG(RLIMIT_NOFILE),
20433 +               VX_LIMIT_ARG(VLIMIT_OPENFD),
20434 +               VX_LIMIT_ARG(RLIMIT_LOCKS),
20435 +               VX_LIMIT_ARG(VLIMIT_NSOCK),
20436 +               VX_LIMIT_ARG(RLIMIT_MSGQUEUE),
20437 +               VX_LIMIT_ARG(VLIMIT_SHMEM),
20438 +               VX_LIMIT_ARG(VLIMIT_SEMARY),
20439 +               VX_LIMIT_ARG(VLIMIT_NSEMS),
20440 +               VX_LIMIT_ARG(VLIMIT_DENTRY));
20441 +}
20442 +
20443 +#endif /* _VX_LIMIT_PROC_H */
20444 +
20445 +
20446 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/Makefile linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/Makefile
20447 --- linux-2.6.30.2/kernel/vserver/Makefile      1970-01-01 01:00:00.000000000 +0100
20448 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/Makefile   2009-07-04 01:11:39.000000000 +0200
20449 @@ -0,0 +1,18 @@
20450 +#
20451 +# Makefile for the Linux vserver routines.
20452 +#
20453 +
20454 +
20455 +obj-y          += vserver.o
20456 +
20457 +vserver-y      := switch.o context.o space.o sched.o network.o inode.o \
20458 +                  limit.o cvirt.o cacct.o signal.o helper.o init.o \
20459 +                  dlimit.o tag.o
20460 +
20461 +vserver-$(CONFIG_INET) += inet.o
20462 +vserver-$(CONFIG_PROC_FS) += proc.o
20463 +vserver-$(CONFIG_VSERVER_DEBUG) += sysctl.o debug.o
20464 +vserver-$(CONFIG_VSERVER_HISTORY) += history.o
20465 +vserver-$(CONFIG_VSERVER_MONITOR) += monitor.o
20466 +vserver-$(CONFIG_VSERVER_DEVICE) += device.o
20467 +
20468 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/monitor.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/monitor.c
20469 --- linux-2.6.30.2/kernel/vserver/monitor.c     1970-01-01 01:00:00.000000000 +0100
20470 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/monitor.c  2009-07-04 01:11:39.000000000 +0200
20471 @@ -0,0 +1,138 @@
20472 +/*
20473 + *  kernel/vserver/monitor.c
20474 + *
20475 + *  Virtual Context Scheduler Monitor
20476 + *
20477 + *  Copyright (C) 2006-2007 Herbert Pötzl
20478 + *
20479 + *  V0.01  basic design
20480 + *
20481 + */
20482 +
20483 +#include <linux/module.h>
20484 +#include <linux/jiffies.h>
20485 +#include <asm/uaccess.h>
20486 +#include <asm/atomic.h>
20487 +
20488 +#include <linux/vserver/monitor.h>
20489 +#include <linux/vserver/debug_cmd.h>
20490 +
20491 +
20492 +#ifdef CONFIG_VSERVER_MONITOR
20493 +#define VXM_SIZE       CONFIG_VSERVER_MONITOR_SIZE
20494 +#else
20495 +#define VXM_SIZE       64
20496 +#endif
20497 +
20498 +struct _vx_monitor {
20499 +       unsigned int counter;
20500 +
20501 +       struct _vx_mon_entry entry[VXM_SIZE+1];
20502 +};
20503 +
20504 +
20505 +DEFINE_PER_CPU(struct _vx_monitor, vx_monitor_buffer);
20506 +
20507 +unsigned volatile int vxm_active = 1;
20508 +
20509 +static atomic_t sequence = ATOMIC_INIT(0);
20510 +
20511 +
20512 +/*     vxm_advance()
20513 +
20514 +       * requires disabled preemption                          */
20515 +
20516 +struct _vx_mon_entry *vxm_advance(int cpu)
20517 +{
20518 +       struct _vx_monitor *mon = &per_cpu(vx_monitor_buffer, cpu);
20519 +       struct _vx_mon_entry *entry;
20520 +       unsigned int index;
20521 +
20522 +       index = vxm_active ? (mon->counter++ % VXM_SIZE) : VXM_SIZE;
20523 +       entry = &mon->entry[index];
20524 +
20525 +       entry->ev.seq = atomic_inc_return(&sequence);
20526 +       entry->ev.jif = jiffies;
20527 +       return entry;
20528 +}
20529 +
20530 +EXPORT_SYMBOL_GPL(vxm_advance);
20531 +
20532 +
20533 +int do_read_monitor(struct __user _vx_mon_entry *data,
20534 +       int cpu, uint32_t *index, uint32_t *count)
20535 +{
20536 +       int pos, ret = 0;
20537 +       struct _vx_monitor *mon = &per_cpu(vx_monitor_buffer, cpu);
20538 +       int end = mon->counter;
20539 +       int start = end - VXM_SIZE + 2;
20540 +       int idx = *index;
20541 +
20542 +       /* special case: get current pos */
20543 +       if (!*count) {
20544 +               *index = end;
20545 +               return 0;
20546 +       }
20547 +
20548 +       /* have we lost some data? */
20549 +       if (idx < start)
20550 +               idx = start;
20551 +
20552 +       for (pos = 0; (pos < *count) && (idx < end); pos++, idx++) {
20553 +               struct _vx_mon_entry *entry =
20554 +                       &mon->entry[idx % VXM_SIZE];
20555 +
20556 +               /* send entry to userspace */
20557 +               ret = copy_to_user(&data[pos], entry, sizeof(*entry));
20558 +               if (ret)
20559 +                       break;
20560 +       }
20561 +       /* save new index and count */
20562 +       *index = idx;
20563 +       *count = pos;
20564 +       return ret ? ret : (*index < end);
20565 +}
20566 +
20567 +int vc_read_monitor(uint32_t id, void __user *data)
20568 +{
20569 +       struct vcmd_read_monitor_v0 vc_data;
20570 +       int ret;
20571 +
20572 +       if (id >= NR_CPUS)
20573 +               return -EINVAL;
20574 +
20575 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
20576 +               return -EFAULT;
20577 +
20578 +       ret = do_read_monitor((struct __user _vx_mon_entry *)vc_data.data,
20579 +               id, &vc_data.index, &vc_data.count);
20580 +
20581 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
20582 +               return -EFAULT;
20583 +       return ret;
20584 +}
20585 +
20586 +#ifdef CONFIG_COMPAT
20587 +
20588 +int vc_read_monitor_x32(uint32_t id, void __user *data)
20589 +{
20590 +       struct vcmd_read_monitor_v0_x32 vc_data;
20591 +       int ret;
20592 +
20593 +       if (id >= NR_CPUS)
20594 +               return -EINVAL;
20595 +
20596 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
20597 +               return -EFAULT;
20598 +
20599 +       ret = do_read_monitor((struct __user _vx_mon_entry *)
20600 +               compat_ptr(vc_data.data_ptr),
20601 +               id, &vc_data.index, &vc_data.count);
20602 +
20603 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
20604 +               return -EFAULT;
20605 +       return ret;
20606 +}
20607 +
20608 +#endif /* CONFIG_COMPAT */
20609 +
20610 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/network.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/network.c
20611 --- linux-2.6.30.2/kernel/vserver/network.c     1970-01-01 01:00:00.000000000 +0100
20612 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/network.c  2009-07-04 01:11:39.000000000 +0200
20613 @@ -0,0 +1,864 @@
20614 +/*
20615 + *  linux/kernel/vserver/network.c
20616 + *
20617 + *  Virtual Server: Network Support
20618 + *
20619 + *  Copyright (C) 2003-2007  Herbert Pötzl
20620 + *
20621 + *  V0.01  broken out from vcontext V0.05
20622 + *  V0.02  cleaned up implementation
20623 + *  V0.03  added equiv nx commands
20624 + *  V0.04  switch to RCU based hash
20625 + *  V0.05  and back to locking again
20626 + *  V0.06  changed vcmds to nxi arg
20627 + *  V0.07  have __create claim() the nxi
20628 + *
20629 + */
20630 +
20631 +#include <linux/err.h>
20632 +#include <linux/slab.h>
20633 +#include <linux/rcupdate.h>
20634 +
20635 +#include <linux/vs_network.h>
20636 +#include <linux/vs_pid.h>
20637 +#include <linux/vserver/network_cmd.h>
20638 +
20639 +
20640 +atomic_t nx_global_ctotal      = ATOMIC_INIT(0);
20641 +atomic_t nx_global_cactive     = ATOMIC_INIT(0);
20642 +
20643 +static struct kmem_cache *nx_addr_v4_cachep = NULL;
20644 +static struct kmem_cache *nx_addr_v6_cachep = NULL;
20645 +
20646 +
20647 +static int __init init_network(void)
20648 +{
20649 +       nx_addr_v4_cachep = kmem_cache_create("nx_v4_addr_cache",
20650 +               sizeof(struct nx_addr_v4), 0,
20651 +               SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
20652 +       nx_addr_v6_cachep = kmem_cache_create("nx_v6_addr_cache",
20653 +               sizeof(struct nx_addr_v6), 0,
20654 +               SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
20655 +       return 0;
20656 +}
20657 +
20658 +
20659 +/*     __alloc_nx_addr_v4()                                    */
20660 +
20661 +static inline struct nx_addr_v4 *__alloc_nx_addr_v4(void)
20662 +{
20663 +       struct nx_addr_v4 *nxa = kmem_cache_alloc(
20664 +               nx_addr_v4_cachep, GFP_KERNEL);
20665 +
20666 +       if (!IS_ERR(nxa))
20667 +               memset(nxa, 0, sizeof(*nxa));
20668 +       return nxa;
20669 +}
20670 +
20671 +/*     __dealloc_nx_addr_v4()                                  */
20672 +
20673 +static inline void __dealloc_nx_addr_v4(struct nx_addr_v4 *nxa)
20674 +{
20675 +       kmem_cache_free(nx_addr_v4_cachep, nxa);
20676 +}
20677 +
20678 +/*     __dealloc_nx_addr_v4_all()                              */
20679 +
20680 +static inline void __dealloc_nx_addr_v4_all(struct nx_addr_v4 *nxa)
20681 +{
20682 +       while (nxa) {
20683 +               struct nx_addr_v4 *next = nxa->next;
20684 +
20685 +               __dealloc_nx_addr_v4(nxa);
20686 +               nxa = next;
20687 +       }
20688 +}
20689 +
20690 +
20691 +#ifdef CONFIG_IPV6
20692 +
20693 +/*     __alloc_nx_addr_v6()                                    */
20694 +
20695 +static inline struct nx_addr_v6 *__alloc_nx_addr_v6(void)
20696 +{
20697 +       struct nx_addr_v6 *nxa = kmem_cache_alloc(
20698 +               nx_addr_v6_cachep, GFP_KERNEL);
20699 +
20700 +       if (!IS_ERR(nxa))
20701 +               memset(nxa, 0, sizeof(*nxa));
20702 +       return nxa;
20703 +}
20704 +
20705 +/*     __dealloc_nx_addr_v6()                                  */
20706 +
20707 +static inline void __dealloc_nx_addr_v6(struct nx_addr_v6 *nxa)
20708 +{
20709 +       kmem_cache_free(nx_addr_v6_cachep, nxa);
20710 +}
20711 +
20712 +/*     __dealloc_nx_addr_v6_all()                              */
20713 +
20714 +static inline void __dealloc_nx_addr_v6_all(struct nx_addr_v6 *nxa)
20715 +{
20716 +       while (nxa) {
20717 +               struct nx_addr_v6 *next = nxa->next;
20718 +
20719 +               __dealloc_nx_addr_v6(nxa);
20720 +               nxa = next;
20721 +       }
20722 +}
20723 +
20724 +#endif /* CONFIG_IPV6 */
20725 +
20726 +/*     __alloc_nx_info()
20727 +
20728 +       * allocate an initialized nx_info struct
20729 +       * doesn't make it visible (hash)                        */
20730 +
20731 +static struct nx_info *__alloc_nx_info(nid_t nid)
20732 +{
20733 +       struct nx_info *new = NULL;
20734 +
20735 +       vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid);
20736 +
20737 +       /* would this benefit from a slab cache? */
20738 +       new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
20739 +       if (!new)
20740 +               return 0;
20741 +
20742 +       memset(new, 0, sizeof(struct nx_info));
20743 +       new->nx_id = nid;
20744 +       INIT_HLIST_NODE(&new->nx_hlist);
20745 +       atomic_set(&new->nx_usecnt, 0);
20746 +       atomic_set(&new->nx_tasks, 0);
20747 +       new->nx_state = 0;
20748 +
20749 +       new->nx_flags = NXF_INIT_SET;
20750 +
20751 +       /* rest of init goes here */
20752 +
20753 +       new->v4_lback.s_addr = htonl(INADDR_LOOPBACK);
20754 +       new->v4_bcast.s_addr = htonl(INADDR_BROADCAST);
20755 +
20756 +       vxdprintk(VXD_CBIT(nid, 0),
20757 +               "alloc_nx_info(%d) = %p", nid, new);
20758 +       atomic_inc(&nx_global_ctotal);
20759 +       return new;
20760 +}
20761 +
20762 +/*     __dealloc_nx_info()
20763 +
20764 +       * final disposal of nx_info                             */
20765 +
20766 +static void __dealloc_nx_info(struct nx_info *nxi)
20767 +{
20768 +       vxdprintk(VXD_CBIT(nid, 0),
20769 +               "dealloc_nx_info(%p)", nxi);
20770 +
20771 +       nxi->nx_hlist.next = LIST_POISON1;
20772 +       nxi->nx_id = -1;
20773 +
20774 +       BUG_ON(atomic_read(&nxi->nx_usecnt));
20775 +       BUG_ON(atomic_read(&nxi->nx_tasks));
20776 +
20777 +       __dealloc_nx_addr_v4_all(nxi->v4.next);
20778 +
20779 +       nxi->nx_state |= NXS_RELEASED;
20780 +       kfree(nxi);
20781 +       atomic_dec(&nx_global_ctotal);
20782 +}
20783 +
20784 +static void __shutdown_nx_info(struct nx_info *nxi)
20785 +{
20786 +       nxi->nx_state |= NXS_SHUTDOWN;
20787 +       vs_net_change(nxi, VSC_NETDOWN);
20788 +}
20789 +
20790 +/*     exported stuff                                          */
20791 +
20792 +void free_nx_info(struct nx_info *nxi)
20793 +{
20794 +       /* context shutdown is mandatory */
20795 +       BUG_ON(nxi->nx_state != NXS_SHUTDOWN);
20796 +
20797 +       /* context must not be hashed */
20798 +       BUG_ON(nxi->nx_state & NXS_HASHED);
20799 +
20800 +       BUG_ON(atomic_read(&nxi->nx_usecnt));
20801 +       BUG_ON(atomic_read(&nxi->nx_tasks));
20802 +
20803 +       __dealloc_nx_info(nxi);
20804 +}
20805 +
20806 +
20807 +void __nx_set_lback(struct nx_info *nxi)
20808 +{
20809 +       int nid = nxi->nx_id;
20810 +       __be32 lback = htonl(INADDR_LOOPBACK ^ ((nid & 0xFFFF) << 8));
20811 +
20812 +       nxi->v4_lback.s_addr = lback;
20813 +}
20814 +
20815 +extern int __nx_inet_add_lback(__be32 addr);
20816 +extern int __nx_inet_del_lback(__be32 addr);
20817 +
20818 +
20819 +/*     hash table for nx_info hash */
20820 +
20821 +#define NX_HASH_SIZE   13
20822 +
20823 +struct hlist_head nx_info_hash[NX_HASH_SIZE];
20824 +
20825 +static spinlock_t nx_info_hash_lock = SPIN_LOCK_UNLOCKED;
20826 +
20827 +
20828 +static inline unsigned int __hashval(nid_t nid)
20829 +{
20830 +       return (nid % NX_HASH_SIZE);
20831 +}
20832 +
20833 +
20834 +
20835 +/*     __hash_nx_info()
20836 +
20837 +       * add the nxi to the global hash table
20838 +       * requires the hash_lock to be held                     */
20839 +
20840 +static inline void __hash_nx_info(struct nx_info *nxi)
20841 +{
20842 +       struct hlist_head *head;
20843 +
20844 +       vxd_assert_lock(&nx_info_hash_lock);
20845 +       vxdprintk(VXD_CBIT(nid, 4),
20846 +               "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id);
20847 +
20848 +       /* context must not be hashed */
20849 +       BUG_ON(nx_info_state(nxi, NXS_HASHED));
20850 +
20851 +       nxi->nx_state |= NXS_HASHED;
20852 +       head = &nx_info_hash[__hashval(nxi->nx_id)];
20853 +       hlist_add_head(&nxi->nx_hlist, head);
20854 +       atomic_inc(&nx_global_cactive);
20855 +}
20856 +
20857 +/*     __unhash_nx_info()
20858 +
20859 +       * remove the nxi from the global hash table
20860 +       * requires the hash_lock to be held                     */
20861 +
20862 +static inline void __unhash_nx_info(struct nx_info *nxi)
20863 +{
20864 +       vxd_assert_lock(&nx_info_hash_lock);
20865 +       vxdprintk(VXD_CBIT(nid, 4),
20866 +               "__unhash_nx_info: %p[#%d.%d.%d]", nxi, nxi->nx_id,
20867 +               atomic_read(&nxi->nx_usecnt), atomic_read(&nxi->nx_tasks));
20868 +
20869 +       /* context must be hashed */
20870 +       BUG_ON(!nx_info_state(nxi, NXS_HASHED));
20871 +       /* but without tasks */
20872 +       BUG_ON(atomic_read(&nxi->nx_tasks));
20873 +
20874 +       nxi->nx_state &= ~NXS_HASHED;
20875 +       hlist_del(&nxi->nx_hlist);
20876 +       atomic_dec(&nx_global_cactive);
20877 +}
20878 +
20879 +
20880 +/*     __lookup_nx_info()
20881 +
20882 +       * requires the hash_lock to be held
20883 +       * doesn't increment the nx_refcnt                       */
20884 +
20885 +static inline struct nx_info *__lookup_nx_info(nid_t nid)
20886 +{
20887 +       struct hlist_head *head = &nx_info_hash[__hashval(nid)];
20888 +       struct hlist_node *pos;
20889 +       struct nx_info *nxi;
20890 +
20891 +       vxd_assert_lock(&nx_info_hash_lock);
20892 +       hlist_for_each(pos, head) {
20893 +               nxi = hlist_entry(pos, struct nx_info, nx_hlist);
20894 +
20895 +               if (nxi->nx_id == nid)
20896 +                       goto found;
20897 +       }
20898 +       nxi = NULL;
20899 +found:
20900 +       vxdprintk(VXD_CBIT(nid, 0),
20901 +               "__lookup_nx_info(#%u): %p[#%u]",
20902 +               nid, nxi, nxi ? nxi->nx_id : 0);
20903 +       return nxi;
20904 +}
20905 +
20906 +
20907 +/*     __create_nx_info()
20908 +
20909 +       * create the requested context
20910 +       * get(), claim() and hash it                            */
20911 +
20912 +static struct nx_info *__create_nx_info(int id)
20913 +{
20914 +       struct nx_info *new, *nxi = NULL;
20915 +
20916 +       vxdprintk(VXD_CBIT(nid, 1), "create_nx_info(%d)*", id);
20917 +
20918 +       if (!(new = __alloc_nx_info(id)))
20919 +               return ERR_PTR(-ENOMEM);
20920 +
20921 +       /* required to make dynamic xids unique */
20922 +       spin_lock(&nx_info_hash_lock);
20923 +
20924 +       /* static context requested */
20925 +       if ((nxi = __lookup_nx_info(id))) {
20926 +               vxdprintk(VXD_CBIT(nid, 0),
20927 +                       "create_nx_info(%d) = %p (already there)", id, nxi);
20928 +               if (nx_info_flags(nxi, NXF_STATE_SETUP, 0))
20929 +                       nxi = ERR_PTR(-EBUSY);
20930 +               else
20931 +                       nxi = ERR_PTR(-EEXIST);
20932 +               goto out_unlock;
20933 +       }
20934 +       /* new context */
20935 +       vxdprintk(VXD_CBIT(nid, 0),
20936 +               "create_nx_info(%d) = %p (new)", id, new);
20937 +       claim_nx_info(new, NULL);
20938 +       __nx_set_lback(new);
20939 +       __hash_nx_info(get_nx_info(new));
20940 +       nxi = new, new = NULL;
20941 +
20942 +out_unlock:
20943 +       spin_unlock(&nx_info_hash_lock);
20944 +       if (new)
20945 +               __dealloc_nx_info(new);
20946 +       return nxi;
20947 +}
20948 +
20949 +
20950 +
20951 +/*     exported stuff                                          */
20952 +
20953 +
20954 +void unhash_nx_info(struct nx_info *nxi)
20955 +{
20956 +       __shutdown_nx_info(nxi);
20957 +       spin_lock(&nx_info_hash_lock);
20958 +       __unhash_nx_info(nxi);
20959 +       spin_unlock(&nx_info_hash_lock);
20960 +}
20961 +
20962 +/*     lookup_nx_info()
20963 +
20964 +       * search for a nx_info and get() it
20965 +       * negative id means current                             */
20966 +
20967 +struct nx_info *lookup_nx_info(int id)
20968 +{
20969 +       struct nx_info *nxi = NULL;
20970 +
20971 +       if (id < 0) {
20972 +               nxi = get_nx_info(current->nx_info);
20973 +       } else if (id > 1) {
20974 +               spin_lock(&nx_info_hash_lock);
20975 +               nxi = get_nx_info(__lookup_nx_info(id));
20976 +               spin_unlock(&nx_info_hash_lock);
20977 +       }
20978 +       return nxi;
20979 +}
20980 +
20981 +/*     nid_is_hashed()
20982 +
20983 +       * verify that nid is still hashed                       */
20984 +
20985 +int nid_is_hashed(nid_t nid)
20986 +{
20987 +       int hashed;
20988 +
20989 +       spin_lock(&nx_info_hash_lock);
20990 +       hashed = (__lookup_nx_info(nid) != NULL);
20991 +       spin_unlock(&nx_info_hash_lock);
20992 +       return hashed;
20993 +}
20994 +
20995 +
20996 +#ifdef CONFIG_PROC_FS
20997 +
20998 +/*     get_nid_list()
20999 +
21000 +       * get a subset of hashed nids for proc
21001 +       * assumes size is at least one                          */
21002 +
21003 +int get_nid_list(int index, unsigned int *nids, int size)
21004 +{
21005 +       int hindex, nr_nids = 0;
21006 +
21007 +       /* only show current and children */
21008 +       if (!nx_check(0, VS_ADMIN | VS_WATCH)) {
21009 +               if (index > 0)
21010 +                       return 0;
21011 +               nids[nr_nids] = nx_current_nid();
21012 +               return 1;
21013 +       }
21014 +
21015 +       for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
21016 +               struct hlist_head *head = &nx_info_hash[hindex];
21017 +               struct hlist_node *pos;
21018 +
21019 +               spin_lock(&nx_info_hash_lock);
21020 +               hlist_for_each(pos, head) {
21021 +                       struct nx_info *nxi;
21022 +
21023 +                       if (--index > 0)
21024 +                               continue;
21025 +
21026 +                       nxi = hlist_entry(pos, struct nx_info, nx_hlist);
21027 +                       nids[nr_nids] = nxi->nx_id;
21028 +                       if (++nr_nids >= size) {
21029 +                               spin_unlock(&nx_info_hash_lock);
21030 +                               goto out;
21031 +                       }
21032 +               }
21033 +               /* keep the lock time short */
21034 +               spin_unlock(&nx_info_hash_lock);
21035 +       }
21036 +out:
21037 +       return nr_nids;
21038 +}
21039 +#endif
21040 +
21041 +
21042 +/*
21043 + *     migrate task to new network
21044 + *     gets nxi, puts old_nxi on change
21045 + */
21046 +
21047 +int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
21048 +{
21049 +       struct nx_info *old_nxi;
21050 +       int ret = 0;
21051 +
21052 +       if (!p || !nxi)
21053 +               BUG();
21054 +
21055 +       vxdprintk(VXD_CBIT(nid, 5),
21056 +               "nx_migrate_task(%p,%p[#%d.%d.%d])",
21057 +               p, nxi, nxi->nx_id,
21058 +               atomic_read(&nxi->nx_usecnt),
21059 +               atomic_read(&nxi->nx_tasks));
21060 +
21061 +       if (nx_info_flags(nxi, NXF_INFO_PRIVATE, 0) &&
21062 +               !nx_info_flags(nxi, NXF_STATE_SETUP, 0))
21063 +               return -EACCES;
21064 +
21065 +       if (nx_info_state(nxi, NXS_SHUTDOWN))
21066 +               return -EFAULT;
21067 +
21068 +       /* maybe disallow this completely? */
21069 +       old_nxi = task_get_nx_info(p);
21070 +       if (old_nxi == nxi)
21071 +               goto out;
21072 +
21073 +       task_lock(p);
21074 +       if (old_nxi)
21075 +               clr_nx_info(&p->nx_info);
21076 +       claim_nx_info(nxi, p);
21077 +       set_nx_info(&p->nx_info, nxi);
21078 +       p->nid = nxi->nx_id;
21079 +       task_unlock(p);
21080 +
21081 +       vxdprintk(VXD_CBIT(nid, 5),
21082 +               "moved task %p into nxi:%p[#%d]",
21083 +               p, nxi, nxi->nx_id);
21084 +
21085 +       if (old_nxi)
21086 +               release_nx_info(old_nxi, p);
21087 +       ret = 0;
21088 +out:
21089 +       put_nx_info(old_nxi);
21090 +       return ret;
21091 +}
21092 +
21093 +
21094 +void nx_set_persistent(struct nx_info *nxi)
21095 +{
21096 +       vxdprintk(VXD_CBIT(nid, 6),
21097 +               "nx_set_persistent(%p[#%d])", nxi, nxi->nx_id);
21098 +
21099 +       get_nx_info(nxi);
21100 +       claim_nx_info(nxi, NULL);
21101 +}
21102 +
21103 +void nx_clear_persistent(struct nx_info *nxi)
21104 +{
21105 +       vxdprintk(VXD_CBIT(nid, 6),
21106 +               "nx_clear_persistent(%p[#%d])", nxi, nxi->nx_id);
21107 +
21108 +       release_nx_info(nxi, NULL);
21109 +       put_nx_info(nxi);
21110 +}
21111 +
21112 +void nx_update_persistent(struct nx_info *nxi)
21113 +{
21114 +       if (nx_info_flags(nxi, NXF_PERSISTENT, 0))
21115 +               nx_set_persistent(nxi);
21116 +       else
21117 +               nx_clear_persistent(nxi);
21118 +}
21119 +
21120 +/* vserver syscall commands below here */
21121 +
21122 +/* taks nid and nx_info functions */
21123 +
21124 +#include <asm/uaccess.h>
21125 +
21126 +
21127 +int vc_task_nid(uint32_t id)
21128 +{
21129 +       nid_t nid;
21130 +
21131 +       if (id) {
21132 +               struct task_struct *tsk;
21133 +
21134 +               read_lock(&tasklist_lock);
21135 +               tsk = find_task_by_real_pid(id);
21136 +               nid = (tsk) ? tsk->nid : -ESRCH;
21137 +               read_unlock(&tasklist_lock);
21138 +       } else
21139 +               nid = nx_current_nid();
21140 +       return nid;
21141 +}
21142 +
21143 +
21144 +int vc_nx_info(struct nx_info *nxi, void __user *data)
21145 +{
21146 +       struct vcmd_nx_info_v0 vc_data;
21147 +
21148 +       vc_data.nid = nxi->nx_id;
21149 +
21150 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21151 +               return -EFAULT;
21152 +       return 0;
21153 +}
21154 +
21155 +
21156 +/* network functions */
21157 +
21158 +int vc_net_create(uint32_t nid, void __user *data)
21159 +{
21160 +       struct vcmd_net_create vc_data = { .flagword = NXF_INIT_SET };
21161 +       struct nx_info *new_nxi;
21162 +       int ret;
21163 +
21164 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21165 +               return -EFAULT;
21166 +
21167 +       if ((nid > MAX_S_CONTEXT) || (nid < 2))
21168 +               return -EINVAL;
21169 +
21170 +       new_nxi = __create_nx_info(nid);
21171 +       if (IS_ERR(new_nxi))
21172 +               return PTR_ERR(new_nxi);
21173 +
21174 +       /* initial flags */
21175 +       new_nxi->nx_flags = vc_data.flagword;
21176 +
21177 +       ret = -ENOEXEC;
21178 +       if (vs_net_change(new_nxi, VSC_NETUP))
21179 +               goto out;
21180 +
21181 +       ret = nx_migrate_task(current, new_nxi);
21182 +       if (ret)
21183 +               goto out;
21184 +
21185 +       /* return context id on success */
21186 +       ret = new_nxi->nx_id;
21187 +
21188 +       /* get a reference for persistent contexts */
21189 +       if ((vc_data.flagword & NXF_PERSISTENT))
21190 +               nx_set_persistent(new_nxi);
21191 +out:
21192 +       release_nx_info(new_nxi, NULL);
21193 +       put_nx_info(new_nxi);
21194 +       return ret;
21195 +}
21196 +
21197 +
21198 +int vc_net_migrate(struct nx_info *nxi, void __user *data)
21199 +{
21200 +       return nx_migrate_task(current, nxi);
21201 +}
21202 +
21203 +
21204 +
21205 +int do_add_v4_addr(struct nx_info *nxi, __be32 ip, __be32 ip2, __be32 mask,
21206 +       uint16_t type, uint16_t flags)
21207 +{
21208 +       struct nx_addr_v4 *nxa = &nxi->v4;
21209 +
21210 +       if (NX_IPV4(nxi)) {
21211 +               /* locate last entry */
21212 +               for (; nxa->next; nxa = nxa->next);
21213 +               nxa->next = __alloc_nx_addr_v4();
21214 +               nxa = nxa->next;
21215 +
21216 +               if (IS_ERR(nxa))
21217 +                       return PTR_ERR(nxa);
21218 +       }
21219 +
21220 +       if (nxi->v4.next)
21221 +               /* remove single ip for ip list */
21222 +               nxi->nx_flags &= ~NXF_SINGLE_IP;
21223 +
21224 +       nxa->ip[0].s_addr = ip;
21225 +       nxa->ip[1].s_addr = ip2;
21226 +       nxa->mask.s_addr = mask;
21227 +       nxa->type = type;
21228 +       nxa->flags = flags;
21229 +       return 0;
21230 +}
21231 +
21232 +
21233 +int vc_net_add(struct nx_info *nxi, void __user *data)
21234 +{
21235 +       struct vcmd_net_addr_v0 vc_data;
21236 +       int index, ret = 0;
21237 +
21238 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21239 +               return -EFAULT;
21240 +
21241 +       switch (vc_data.type) {
21242 +       case NXA_TYPE_IPV4:
21243 +               if ((vc_data.count < 1) || (vc_data.count > 4))
21244 +                       return -EINVAL;
21245 +
21246 +               index = 0;
21247 +               while (index < vc_data.count) {
21248 +                       ret = do_add_v4_addr(nxi, vc_data.ip[index].s_addr, 0,
21249 +                               vc_data.mask[index].s_addr, NXA_TYPE_ADDR, 0);
21250 +                       if (ret)
21251 +                               return ret;
21252 +                       index++;
21253 +               }
21254 +               ret = index;
21255 +               break;
21256 +
21257 +       case NXA_TYPE_IPV4|NXA_MOD_BCAST:
21258 +               nxi->v4_bcast = vc_data.ip[0];
21259 +               ret = 1;
21260 +               break;
21261 +
21262 +       case NXA_TYPE_IPV4|NXA_MOD_LBACK:
21263 +               nxi->v4_lback = vc_data.ip[0];
21264 +               ret = 1;
21265 +               break;
21266 +
21267 +       default:
21268 +               ret = -EINVAL;
21269 +               break;
21270 +       }
21271 +       return ret;
21272 +}
21273 +
21274 +int vc_net_remove(struct nx_info *nxi, void __user *data)
21275 +{
21276 +       struct vcmd_net_addr_v0 vc_data;
21277 +
21278 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21279 +               return -EFAULT;
21280 +
21281 +       switch (vc_data.type) {
21282 +       case NXA_TYPE_ANY:
21283 +               __dealloc_nx_addr_v4_all(xchg(&nxi->v4.next, NULL));
21284 +               memset(&nxi->v4, 0, sizeof(nxi->v4));
21285 +               break;
21286 +
21287 +       default:
21288 +               return -EINVAL;
21289 +       }
21290 +       return 0;
21291 +}
21292 +
21293 +
21294 +int vc_net_add_ipv4(struct nx_info *nxi, void __user *data)
21295 +{
21296 +       struct vcmd_net_addr_ipv4_v1 vc_data;
21297 +
21298 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21299 +               return -EFAULT;
21300 +
21301 +       switch (vc_data.type) {
21302 +       case NXA_TYPE_ADDR:
21303 +       case NXA_TYPE_RANGE:
21304 +       case NXA_TYPE_MASK:
21305 +               return do_add_v4_addr(nxi, vc_data.ip.s_addr, 0,
21306 +                       vc_data.mask.s_addr, vc_data.type, vc_data.flags);
21307 +
21308 +       case NXA_TYPE_ADDR | NXA_MOD_BCAST:
21309 +               nxi->v4_bcast = vc_data.ip;
21310 +               break;
21311 +
21312 +       case NXA_TYPE_ADDR | NXA_MOD_LBACK:
21313 +               nxi->v4_lback = vc_data.ip;
21314 +               break;
21315 +
21316 +       default:
21317 +               return -EINVAL;
21318 +       }
21319 +       return 0;
21320 +}
21321 +
21322 +int vc_net_remove_ipv4(struct nx_info *nxi, void __user *data)
21323 +{
21324 +       struct vcmd_net_addr_ipv4_v1 vc_data;
21325 +
21326 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21327 +               return -EFAULT;
21328 +
21329 +       switch (vc_data.type) {
21330 +/*     case NXA_TYPE_ADDR:
21331 +               break;          */
21332 +
21333 +       case NXA_TYPE_ANY:
21334 +               __dealloc_nx_addr_v4_all(xchg(&nxi->v4.next, NULL));
21335 +               memset(&nxi->v4, 0, sizeof(nxi->v4));
21336 +               break;
21337 +
21338 +       default:
21339 +               return -EINVAL;
21340 +       }
21341 +       return 0;
21342 +}
21343 +
21344 +
21345 +#ifdef CONFIG_IPV6
21346 +
21347 +int do_add_v6_addr(struct nx_info *nxi,
21348 +       struct in6_addr *ip, struct in6_addr *mask,
21349 +       uint32_t prefix, uint16_t type, uint16_t flags)
21350 +{
21351 +       struct nx_addr_v6 *nxa = &nxi->v6;
21352 +
21353 +       if (NX_IPV6(nxi)) {
21354 +               /* locate last entry */
21355 +               for (; nxa->next; nxa = nxa->next);
21356 +               nxa->next = __alloc_nx_addr_v6();
21357 +               nxa = nxa->next;
21358 +
21359 +               if (IS_ERR(nxa))
21360 +                       return PTR_ERR(nxa);
21361 +       }
21362 +
21363 +       nxa->ip = *ip;
21364 +       nxa->mask = *mask;
21365 +       nxa->prefix = prefix;
21366 +       nxa->type = type;
21367 +       nxa->flags = flags;
21368 +       return 0;
21369 +}
21370 +
21371 +
21372 +int vc_net_add_ipv6(struct nx_info *nxi, void __user *data)
21373 +{
21374 +       struct vcmd_net_addr_ipv6_v1 vc_data;
21375 +
21376 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21377 +               return -EFAULT;
21378 +
21379 +       switch (vc_data.type) {
21380 +       case NXA_TYPE_ADDR:
21381 +       case NXA_TYPE_MASK:
21382 +               return do_add_v6_addr(nxi, &vc_data.ip, &vc_data.mask,
21383 +                       vc_data.prefix, vc_data.type, vc_data.flags);
21384 +       default:
21385 +               return -EINVAL;
21386 +       }
21387 +       return 0;
21388 +}
21389 +
21390 +int vc_net_remove_ipv6(struct nx_info *nxi, void __user *data)
21391 +{
21392 +       struct vcmd_net_addr_ipv6_v1 vc_data;
21393 +
21394 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21395 +               return -EFAULT;
21396 +
21397 +       switch (vc_data.type) {
21398 +       case NXA_TYPE_ANY:
21399 +               __dealloc_nx_addr_v6_all(xchg(&nxi->v6.next, NULL));
21400 +               memset(&nxi->v6, 0, sizeof(nxi->v6));
21401 +               break;
21402 +
21403 +       default:
21404 +               return -EINVAL;
21405 +       }
21406 +       return 0;
21407 +}
21408 +
21409 +#endif /* CONFIG_IPV6 */
21410 +
21411 +
21412 +int vc_get_nflags(struct nx_info *nxi, void __user *data)
21413 +{
21414 +       struct vcmd_net_flags_v0 vc_data;
21415 +
21416 +       vc_data.flagword = nxi->nx_flags;
21417 +
21418 +       /* special STATE flag handling */
21419 +       vc_data.mask = vs_mask_flags(~0ULL, nxi->nx_flags, NXF_ONE_TIME);
21420 +
21421 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21422 +               return -EFAULT;
21423 +       return 0;
21424 +}
21425 +
21426 +int vc_set_nflags(struct nx_info *nxi, void __user *data)
21427 +{
21428 +       struct vcmd_net_flags_v0 vc_data;
21429 +       uint64_t mask, trigger;
21430 +
21431 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21432 +               return -EFAULT;
21433 +
21434 +       /* special STATE flag handling */
21435 +       mask = vs_mask_mask(vc_data.mask, nxi->nx_flags, NXF_ONE_TIME);
21436 +       trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
21437 +
21438 +       nxi->nx_flags = vs_mask_flags(nxi->nx_flags,
21439 +               vc_data.flagword, mask);
21440 +       if (trigger & NXF_PERSISTENT)
21441 +               nx_update_persistent(nxi);
21442 +
21443 +       return 0;
21444 +}
21445 +
21446 +int vc_get_ncaps(struct nx_info *nxi, void __user *data)
21447 +{
21448 +       struct vcmd_net_caps_v0 vc_data;
21449 +
21450 +       vc_data.ncaps = nxi->nx_ncaps;
21451 +       vc_data.cmask = ~0ULL;
21452 +
21453 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21454 +               return -EFAULT;
21455 +       return 0;
21456 +}
21457 +
21458 +int vc_set_ncaps(struct nx_info *nxi, void __user *data)
21459 +{
21460 +       struct vcmd_net_caps_v0 vc_data;
21461 +
21462 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21463 +               return -EFAULT;
21464 +
21465 +       nxi->nx_ncaps = vs_mask_flags(nxi->nx_ncaps,
21466 +               vc_data.ncaps, vc_data.cmask);
21467 +       return 0;
21468 +}
21469 +
21470 +
21471 +#include <linux/module.h>
21472 +
21473 +module_init(init_network);
21474 +
21475 +EXPORT_SYMBOL_GPL(free_nx_info);
21476 +EXPORT_SYMBOL_GPL(unhash_nx_info);
21477 +
21478 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/proc.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/proc.c
21479 --- linux-2.6.30.2/kernel/vserver/proc.c        1970-01-01 01:00:00.000000000 +0100
21480 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/proc.c     2009-07-04 04:52:43.000000000 +0200
21481 @@ -0,0 +1,1097 @@
21482 +/*
21483 + *  linux/kernel/vserver/proc.c
21484 + *
21485 + *  Virtual Context Support
21486 + *
21487 + *  Copyright (C) 2003-2007  Herbert Pötzl
21488 + *
21489 + *  V0.01  basic structure
21490 + *  V0.02  adaptation vs1.3.0
21491 + *  V0.03  proc permissions
21492 + *  V0.04  locking/generic
21493 + *  V0.05  next generation procfs
21494 + *  V0.06  inode validation
21495 + *  V0.07  generic rewrite vid
21496 + *  V0.08  remove inode type
21497 + *
21498 + */
21499 +
21500 +#include <linux/proc_fs.h>
21501 +#include <linux/fs_struct.h>
21502 +#include <asm/unistd.h>
21503 +
21504 +#include <linux/vs_context.h>
21505 +#include <linux/vs_network.h>
21506 +#include <linux/vs_cvirt.h>
21507 +
21508 +#include <linux/in.h>
21509 +#include <linux/inetdevice.h>
21510 +#include <linux/vs_inet.h>
21511 +#include <linux/vs_inet6.h>
21512 +
21513 +#include <linux/vserver/global.h>
21514 +
21515 +#include "cvirt_proc.h"
21516 +#include "cacct_proc.h"
21517 +#include "limit_proc.h"
21518 +#include "sched_proc.h"
21519 +#include "vci_config.h"
21520 +
21521 +
21522 +static inline char *print_cap_t(char *buffer, kernel_cap_t *c)
21523 +{
21524 +       unsigned __capi;
21525 +
21526 +       CAP_FOR_EACH_U32(__capi) {
21527 +               buffer += sprintf(buffer, "%08x",
21528 +                       c->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
21529 +       }
21530 +       return buffer;
21531 +}
21532 +
21533 +
21534 +static struct proc_dir_entry *proc_virtual;
21535 +
21536 +static struct proc_dir_entry *proc_virtnet;
21537 +
21538 +
21539 +/* first the actual feeds */
21540 +
21541 +
21542 +static int proc_vci(char *buffer)
21543 +{
21544 +       return sprintf(buffer,
21545 +               "VCIVersion:\t%04x:%04x\n"
21546 +               "VCISyscall:\t%d\n"
21547 +               "VCIKernel:\t%08x\n",
21548 +               VCI_VERSION >> 16,
21549 +               VCI_VERSION & 0xFFFF,
21550 +               __NR_vserver,
21551 +               vci_kernel_config());
21552 +}
21553 +
21554 +static int proc_virtual_info(char *buffer)
21555 +{
21556 +       return proc_vci(buffer);
21557 +}
21558 +
21559 +static int proc_virtual_status(char *buffer)
21560 +{
21561 +       return sprintf(buffer,
21562 +               "#CTotal:\t%d\n"
21563 +               "#CActive:\t%d\n"
21564 +               "#NSProxy:\t%d\t%d %d %d %d %d %d\n"
21565 +               "#InitTask:\t%d\t%d %d\n",
21566 +               atomic_read(&vx_global_ctotal),
21567 +               atomic_read(&vx_global_cactive),
21568 +               atomic_read(&vs_global_nsproxy),
21569 +               atomic_read(&vs_global_fs),
21570 +               atomic_read(&vs_global_mnt_ns),
21571 +               atomic_read(&vs_global_uts_ns),
21572 +               atomic_read(&nr_ipc_ns),
21573 +               atomic_read(&vs_global_user_ns),
21574 +               atomic_read(&vs_global_pid_ns),
21575 +               atomic_read(&init_task.usage),
21576 +               atomic_read(&init_task.nsproxy->count),
21577 +               init_task.fs->users);
21578 +}
21579 +
21580 +
21581 +int proc_vxi_info(struct vx_info *vxi, char *buffer)
21582 +{
21583 +       int length;
21584 +
21585 +       length = sprintf(buffer,
21586 +               "ID:\t%d\n"
21587 +               "Info:\t%p\n"
21588 +               "Init:\t%d\n"
21589 +               "OOM:\t%lld\n",
21590 +               vxi->vx_id,
21591 +               vxi,
21592 +               vxi->vx_initpid,
21593 +               vxi->vx_badness_bias);
21594 +       return length;
21595 +}
21596 +
21597 +int proc_vxi_status(struct vx_info *vxi, char *buffer)
21598 +{
21599 +       char *orig = buffer;
21600 +
21601 +       buffer += sprintf(buffer,
21602 +               "UseCnt:\t%d\n"
21603 +               "Tasks:\t%d\n"
21604 +               "Flags:\t%016llx\n",
21605 +               atomic_read(&vxi->vx_usecnt),
21606 +               atomic_read(&vxi->vx_tasks),
21607 +               (unsigned long long)vxi->vx_flags);
21608 +
21609 +       buffer += sprintf(buffer, "BCaps:\t");
21610 +       buffer = print_cap_t(buffer, &vxi->vx_bcaps);
21611 +       buffer += sprintf(buffer, "\n");
21612 +
21613 +       buffer += sprintf(buffer,
21614 +               "CCaps:\t%016llx\n"
21615 +               "Spaces:\t%08lx %08lx\n",
21616 +               (unsigned long long)vxi->vx_ccaps,
21617 +               vxi->vx_nsmask[0], vxi->vx_nsmask[1]);
21618 +       return buffer - orig;
21619 +}
21620 +
21621 +int proc_vxi_limit(struct vx_info *vxi, char *buffer)
21622 +{
21623 +       return vx_info_proc_limit(&vxi->limit, buffer);
21624 +}
21625 +
21626 +int proc_vxi_sched(struct vx_info *vxi, char *buffer)
21627 +{
21628 +       int cpu, length;
21629 +
21630 +       length = vx_info_proc_sched(&vxi->sched, buffer);
21631 +       for_each_online_cpu(cpu) {
21632 +               length += vx_info_proc_sched_pc(
21633 +                       &vx_per_cpu(vxi, sched_pc, cpu),
21634 +                       buffer + length, cpu);
21635 +       }
21636 +       return length;
21637 +}
21638 +
21639 +int proc_vxi_nsproxy0(struct vx_info *vxi, char *buffer)
21640 +{
21641 +       return vx_info_proc_nsproxy(vxi->vx_nsproxy[0], buffer);
21642 +}
21643 +
21644 +int proc_vxi_nsproxy1(struct vx_info *vxi, char *buffer)
21645 +{
21646 +       return vx_info_proc_nsproxy(vxi->vx_nsproxy[1], buffer);
21647 +}
21648 +
21649 +int proc_vxi_cvirt(struct vx_info *vxi, char *buffer)
21650 +{
21651 +       int cpu, length;
21652 +
21653 +       vx_update_load(vxi);
21654 +       length = vx_info_proc_cvirt(&vxi->cvirt, buffer);
21655 +       for_each_online_cpu(cpu) {
21656 +               length += vx_info_proc_cvirt_pc(
21657 +                       &vx_per_cpu(vxi, cvirt_pc, cpu),
21658 +                       buffer + length, cpu);
21659 +       }
21660 +       return length;
21661 +}
21662 +
21663 +int proc_vxi_cacct(struct vx_info *vxi, char *buffer)
21664 +{
21665 +       return vx_info_proc_cacct(&vxi->cacct, buffer);
21666 +}
21667 +
21668 +
21669 +static int proc_virtnet_info(char *buffer)
21670 +{
21671 +       return proc_vci(buffer);
21672 +}
21673 +
21674 +static int proc_virtnet_status(char *buffer)
21675 +{
21676 +       return sprintf(buffer,
21677 +               "#CTotal:\t%d\n"
21678 +               "#CActive:\t%d\n",
21679 +               atomic_read(&nx_global_ctotal),
21680 +               atomic_read(&nx_global_cactive));
21681 +}
21682 +
21683 +int proc_nxi_info(struct nx_info *nxi, char *buffer)
21684 +{
21685 +       struct nx_addr_v4 *v4a;
21686 +#ifdef CONFIG_IPV6
21687 +       struct nx_addr_v6 *v6a;
21688 +#endif
21689 +       int length, i;
21690 +
21691 +       length = sprintf(buffer,
21692 +               "ID:\t%d\n"
21693 +               "Info:\t%p\n"
21694 +               "Bcast:\t" NIPQUAD_FMT "\n"
21695 +               "Lback:\t" NIPQUAD_FMT "\n",
21696 +               nxi->nx_id,
21697 +               nxi,
21698 +               NIPQUAD(nxi->v4_bcast.s_addr),
21699 +               NIPQUAD(nxi->v4_lback.s_addr));
21700 +
21701 +       if (!NX_IPV4(nxi))
21702 +               goto skip_v4;
21703 +       for (i = 0, v4a = &nxi->v4; v4a; i++, v4a = v4a->next)
21704 +               length += sprintf(buffer + length, "%d:\t" NXAV4_FMT "\n",
21705 +                       i, NXAV4(v4a));
21706 +skip_v4:
21707 +#ifdef CONFIG_IPV6
21708 +       if (!NX_IPV6(nxi))
21709 +               goto skip_v6;
21710 +       for (i = 0, v6a = &nxi->v6; v6a; i++, v6a = v6a->next)
21711 +               length += sprintf(buffer + length, "%d:\t" NXAV6_FMT "\n",
21712 +                       i, NXAV6(v6a));
21713 +skip_v6:
21714 +#endif
21715 +       return length;
21716 +}
21717 +
21718 +int proc_nxi_status(struct nx_info *nxi, char *buffer)
21719 +{
21720 +       int length;
21721 +
21722 +       length = sprintf(buffer,
21723 +               "UseCnt:\t%d\n"
21724 +               "Tasks:\t%d\n"
21725 +               "Flags:\t%016llx\n"
21726 +               "NCaps:\t%016llx\n",
21727 +               atomic_read(&nxi->nx_usecnt),
21728 +               atomic_read(&nxi->nx_tasks),
21729 +               (unsigned long long)nxi->nx_flags,
21730 +               (unsigned long long)nxi->nx_ncaps);
21731 +       return length;
21732 +}
21733 +
21734 +
21735 +
21736 +/* here the inode helpers */
21737 +
21738 +struct vs_entry {
21739 +       int len;
21740 +       char *name;
21741 +       mode_t mode;
21742 +       struct inode_operations *iop;
21743 +       struct file_operations *fop;
21744 +       union proc_op op;
21745 +};
21746 +
21747 +static struct inode *vs_proc_make_inode(struct super_block *sb, struct vs_entry *p)
21748 +{
21749 +       struct inode *inode = new_inode(sb);
21750 +
21751 +       if (!inode)
21752 +               goto out;
21753 +
21754 +       inode->i_mode = p->mode;
21755 +       if (p->iop)
21756 +               inode->i_op = p->iop;
21757 +       if (p->fop)
21758 +               inode->i_fop = p->fop;
21759 +
21760 +       inode->i_nlink = (p->mode & S_IFDIR) ? 2 : 1;
21761 +       inode->i_flags |= S_IMMUTABLE;
21762 +
21763 +       inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
21764 +
21765 +       inode->i_uid = 0;
21766 +       inode->i_gid = 0;
21767 +       inode->i_tag = 0;
21768 +out:
21769 +       return inode;
21770 +}
21771 +
21772 +static struct dentry *vs_proc_instantiate(struct inode *dir,
21773 +       struct dentry *dentry, int id, void *ptr)
21774 +{
21775 +       struct vs_entry *p = ptr;
21776 +       struct inode *inode = vs_proc_make_inode(dir->i_sb, p);
21777 +       struct dentry *error = ERR_PTR(-EINVAL);
21778 +
21779 +       if (!inode)
21780 +               goto out;
21781 +
21782 +       PROC_I(inode)->op = p->op;
21783 +       PROC_I(inode)->fd = id;
21784 +       d_add(dentry, inode);
21785 +       error = NULL;
21786 +out:
21787 +       return error;
21788 +}
21789 +
21790 +/* Lookups */
21791 +
21792 +typedef struct dentry *instantiate_t(struct inode *, struct dentry *, int, void *);
21793 +
21794 +/*
21795 + * Fill a directory entry.
21796 + *
21797 + * If possible create the dcache entry and derive our inode number and
21798 + * file type from dcache entry.
21799 + *
21800 + * Since all of the proc inode numbers are dynamically generated, the inode
21801 + * numbers do not exist until the inode is cache.  This means creating the
21802 + * the dcache entry in readdir is necessary to keep the inode numbers
21803 + * reported by readdir in sync with the inode numbers reported
21804 + * by stat.
21805 + */
21806 +static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
21807 +       char *name, int len, instantiate_t instantiate, int id, void *ptr)
21808 +{
21809 +       struct dentry *child, *dir = filp->f_dentry;
21810 +       struct inode *inode;
21811 +       struct qstr qname;
21812 +       ino_t ino = 0;
21813 +       unsigned type = DT_UNKNOWN;
21814 +
21815 +       qname.name = name;
21816 +       qname.len  = len;
21817 +       qname.hash = full_name_hash(name, len);
21818 +
21819 +       child = d_lookup(dir, &qname);
21820 +       if (!child) {
21821 +               struct dentry *new;
21822 +               new = d_alloc(dir, &qname);
21823 +               if (new) {
21824 +                       child = instantiate(dir->d_inode, new, id, ptr);
21825 +                       if (child)
21826 +                               dput(new);
21827 +                       else
21828 +                               child = new;
21829 +               }
21830 +       }
21831 +       if (!child || IS_ERR(child) || !child->d_inode)
21832 +               goto end_instantiate;
21833 +       inode = child->d_inode;
21834 +       if (inode) {
21835 +               ino = inode->i_ino;
21836 +               type = inode->i_mode >> 12;
21837 +       }
21838 +       dput(child);
21839 +end_instantiate:
21840 +       if (!ino)
21841 +               ino = find_inode_number(dir, &qname);
21842 +       if (!ino)
21843 +               ino = 1;
21844 +       return filldir(dirent, name, len, filp->f_pos, ino, type);
21845 +}
21846 +
21847 +
21848 +
21849 +/* get and revalidate vx_info/xid */
21850 +
21851 +static inline
21852 +struct vx_info *get_proc_vx_info(struct inode *inode)
21853 +{
21854 +       return lookup_vx_info(PROC_I(inode)->fd);
21855 +}
21856 +
21857 +static int proc_xid_revalidate(struct dentry *dentry, struct nameidata *nd)
21858 +{
21859 +       struct inode *inode = dentry->d_inode;
21860 +       xid_t xid = PROC_I(inode)->fd;
21861 +
21862 +       if (!xid || xid_is_hashed(xid))
21863 +               return 1;
21864 +       d_drop(dentry);
21865 +       return 0;
21866 +}
21867 +
21868 +
21869 +/* get and revalidate nx_info/nid */
21870 +
21871 +static int proc_nid_revalidate(struct dentry *dentry, struct nameidata *nd)
21872 +{
21873 +       struct inode *inode = dentry->d_inode;
21874 +       nid_t nid = PROC_I(inode)->fd;
21875 +
21876 +       if (!nid || nid_is_hashed(nid))
21877 +               return 1;
21878 +       d_drop(dentry);
21879 +       return 0;
21880 +}
21881 +
21882 +
21883 +
21884 +#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
21885 +
21886 +static ssize_t proc_vs_info_read(struct file *file, char __user *buf,
21887 +                         size_t count, loff_t *ppos)
21888 +{
21889 +       struct inode *inode = file->f_dentry->d_inode;
21890 +       unsigned long page;
21891 +       ssize_t length = 0;
21892 +
21893 +       if (count > PROC_BLOCK_SIZE)
21894 +               count = PROC_BLOCK_SIZE;
21895 +
21896 +       /* fade that out as soon as stable */
21897 +       WARN_ON(PROC_I(inode)->fd);
21898 +
21899 +       if (!(page = __get_free_page(GFP_KERNEL)))
21900 +               return -ENOMEM;
21901 +
21902 +       BUG_ON(!PROC_I(inode)->op.proc_vs_read);
21903 +       length = PROC_I(inode)->op.proc_vs_read((char *)page);
21904 +
21905 +       if (length >= 0)
21906 +               length = simple_read_from_buffer(buf, count, ppos,
21907 +                       (char *)page, length);
21908 +
21909 +       free_page(page);
21910 +       return length;
21911 +}
21912 +
21913 +static ssize_t proc_vx_info_read(struct file *file, char __user *buf,
21914 +                         size_t count, loff_t *ppos)
21915 +{
21916 +       struct inode *inode = file->f_dentry->d_inode;
21917 +       struct vx_info *vxi = NULL;
21918 +       xid_t xid = PROC_I(inode)->fd;
21919 +       unsigned long page;
21920 +       ssize_t length = 0;
21921 +
21922 +       if (count > PROC_BLOCK_SIZE)
21923 +               count = PROC_BLOCK_SIZE;
21924 +
21925 +       /* fade that out as soon as stable */
21926 +       WARN_ON(!xid);
21927 +       vxi = lookup_vx_info(xid);
21928 +       if (!vxi)
21929 +               goto out;
21930 +
21931 +       length = -ENOMEM;
21932 +       if (!(page = __get_free_page(GFP_KERNEL)))
21933 +               goto out_put;
21934 +
21935 +       BUG_ON(!PROC_I(inode)->op.proc_vxi_read);
21936 +       length = PROC_I(inode)->op.proc_vxi_read(vxi, (char *)page);
21937 +
21938 +       if (length >= 0)
21939 +               length = simple_read_from_buffer(buf, count, ppos,
21940 +                       (char *)page, length);
21941 +
21942 +       free_page(page);
21943 +out_put:
21944 +       put_vx_info(vxi);
21945 +out:
21946 +       return length;
21947 +}
21948 +
21949 +static ssize_t proc_nx_info_read(struct file *file, char __user *buf,
21950 +                         size_t count, loff_t *ppos)
21951 +{
21952 +       struct inode *inode = file->f_dentry->d_inode;
21953 +       struct nx_info *nxi = NULL;
21954 +       nid_t nid = PROC_I(inode)->fd;
21955 +       unsigned long page;
21956 +       ssize_t length = 0;
21957 +
21958 +       if (count > PROC_BLOCK_SIZE)
21959 +               count = PROC_BLOCK_SIZE;
21960 +
21961 +       /* fade that out as soon as stable */
21962 +       WARN_ON(!nid);
21963 +       nxi = lookup_nx_info(nid);
21964 +       if (!nxi)
21965 +               goto out;
21966 +
21967 +       length = -ENOMEM;
21968 +       if (!(page = __get_free_page(GFP_KERNEL)))
21969 +               goto out_put;
21970 +
21971 +       BUG_ON(!PROC_I(inode)->op.proc_nxi_read);
21972 +       length = PROC_I(inode)->op.proc_nxi_read(nxi, (char *)page);
21973 +
21974 +       if (length >= 0)
21975 +               length = simple_read_from_buffer(buf, count, ppos,
21976 +                       (char *)page, length);
21977 +
21978 +       free_page(page);
21979 +out_put:
21980 +       put_nx_info(nxi);
21981 +out:
21982 +       return length;
21983 +}
21984 +
21985 +
21986 +
21987 +/* here comes the lower level */
21988 +
21989 +
21990 +#define NOD(NAME, MODE, IOP, FOP, OP) {        \
21991 +       .len  = sizeof(NAME) - 1,       \
21992 +       .name = (NAME),                 \
21993 +       .mode = MODE,                   \
21994 +       .iop  = IOP,                    \
21995 +       .fop  = FOP,                    \
21996 +       .op   = OP,                     \
21997 +}
21998 +
21999 +
22000 +#define DIR(NAME, MODE, OTYPE)                         \
22001 +       NOD(NAME, (S_IFDIR | (MODE)),                   \
22002 +               &proc_ ## OTYPE ## _inode_operations,   \
22003 +               &proc_ ## OTYPE ## _file_operations, { } )
22004 +
22005 +#define INF(NAME, MODE, OTYPE)                         \
22006 +       NOD(NAME, (S_IFREG | (MODE)), NULL,             \
22007 +               &proc_vs_info_file_operations,          \
22008 +               { .proc_vs_read = &proc_##OTYPE } )
22009 +
22010 +#define VINF(NAME, MODE, OTYPE)                                \
22011 +       NOD(NAME, (S_IFREG | (MODE)), NULL,             \
22012 +               &proc_vx_info_file_operations,          \
22013 +               { .proc_vxi_read = &proc_##OTYPE } )
22014 +
22015 +#define NINF(NAME, MODE, OTYPE)                                \
22016 +       NOD(NAME, (S_IFREG | (MODE)), NULL,             \
22017 +               &proc_nx_info_file_operations,          \
22018 +               { .proc_nxi_read = &proc_##OTYPE } )
22019 +
22020 +
22021 +static struct file_operations proc_vs_info_file_operations = {
22022 +       .read =         proc_vs_info_read,
22023 +};
22024 +
22025 +static struct file_operations proc_vx_info_file_operations = {
22026 +       .read =         proc_vx_info_read,
22027 +};
22028 +
22029 +static struct dentry_operations proc_xid_dentry_operations = {
22030 +       .d_revalidate = proc_xid_revalidate,
22031 +};
22032 +
22033 +static struct vs_entry vx_base_stuff[] = {
22034 +       VINF("info",    S_IRUGO, vxi_info),
22035 +       VINF("status",  S_IRUGO, vxi_status),
22036 +       VINF("limit",   S_IRUGO, vxi_limit),
22037 +       VINF("sched",   S_IRUGO, vxi_sched),
22038 +       VINF("nsproxy", S_IRUGO, vxi_nsproxy0),
22039 +       VINF("nsproxy1",S_IRUGO, vxi_nsproxy1),
22040 +       VINF("cvirt",   S_IRUGO, vxi_cvirt),
22041 +       VINF("cacct",   S_IRUGO, vxi_cacct),
22042 +       {}
22043 +};
22044 +
22045 +
22046 +
22047 +
22048 +static struct dentry *proc_xid_instantiate(struct inode *dir,
22049 +       struct dentry *dentry, int id, void *ptr)
22050 +{
22051 +       dentry->d_op = &proc_xid_dentry_operations;
22052 +       return vs_proc_instantiate(dir, dentry, id, ptr);
22053 +}
22054 +
22055 +static struct dentry *proc_xid_lookup(struct inode *dir,
22056 +       struct dentry *dentry, struct nameidata *nd)
22057 +{
22058 +       struct vs_entry *p = vx_base_stuff;
22059 +       struct dentry *error = ERR_PTR(-ENOENT);
22060 +
22061 +       for (; p->name; p++) {
22062 +               if (p->len != dentry->d_name.len)
22063 +                       continue;
22064 +               if (!memcmp(dentry->d_name.name, p->name, p->len))
22065 +                       break;
22066 +       }
22067 +       if (!p->name)
22068 +               goto out;
22069 +
22070 +       error = proc_xid_instantiate(dir, dentry, PROC_I(dir)->fd, p);
22071 +out:
22072 +       return error;
22073 +}
22074 +
22075 +static int proc_xid_readdir(struct file *filp,
22076 +       void *dirent, filldir_t filldir)
22077 +{
22078 +       struct dentry *dentry = filp->f_dentry;
22079 +       struct inode *inode = dentry->d_inode;
22080 +       struct vs_entry *p = vx_base_stuff;
22081 +       int size = sizeof(vx_base_stuff) / sizeof(struct vs_entry);
22082 +       int pos, index;
22083 +       u64 ino;
22084 +
22085 +       pos = filp->f_pos;
22086 +       switch (pos) {
22087 +       case 0:
22088 +               ino = inode->i_ino;
22089 +               if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
22090 +                       goto out;
22091 +               pos++;
22092 +               /* fall through */
22093 +       case 1:
22094 +               ino = parent_ino(dentry);
22095 +               if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
22096 +                       goto out;
22097 +               pos++;
22098 +               /* fall through */
22099 +       default:
22100 +               index = pos - 2;
22101 +               if (index >= size)
22102 +                       goto out;
22103 +               for (p += index; p->name; p++) {
22104 +                       if (proc_fill_cache(filp, dirent, filldir, p->name, p->len,
22105 +                               vs_proc_instantiate, PROC_I(inode)->fd, p))
22106 +                               goto out;
22107 +                       pos++;
22108 +               }
22109 +       }
22110 +out:
22111 +       filp->f_pos = pos;
22112 +       return 1;
22113 +}
22114 +
22115 +
22116 +
22117 +static struct file_operations proc_nx_info_file_operations = {
22118 +       .read =         proc_nx_info_read,
22119 +};
22120 +
22121 +static struct dentry_operations proc_nid_dentry_operations = {
22122 +       .d_revalidate = proc_nid_revalidate,
22123 +};
22124 +
22125 +static struct vs_entry nx_base_stuff[] = {
22126 +       NINF("info",    S_IRUGO, nxi_info),
22127 +       NINF("status",  S_IRUGO, nxi_status),
22128 +       {}
22129 +};
22130 +
22131 +
22132 +static struct dentry *proc_nid_instantiate(struct inode *dir,
22133 +       struct dentry *dentry, int id, void *ptr)
22134 +{
22135 +       dentry->d_op = &proc_nid_dentry_operations;
22136 +       return vs_proc_instantiate(dir, dentry, id, ptr);
22137 +}
22138 +
22139 +static struct dentry *proc_nid_lookup(struct inode *dir,
22140 +       struct dentry *dentry, struct nameidata *nd)
22141 +{
22142 +       struct vs_entry *p = nx_base_stuff;
22143 +       struct dentry *error = ERR_PTR(-ENOENT);
22144 +
22145 +       for (; p->name; p++) {
22146 +               if (p->len != dentry->d_name.len)
22147 +                       continue;
22148 +               if (!memcmp(dentry->d_name.name, p->name, p->len))
22149 +                       break;
22150 +       }
22151 +       if (!p->name)
22152 +               goto out;
22153 +
22154 +       error = proc_nid_instantiate(dir, dentry, PROC_I(dir)->fd, p);
22155 +out:
22156 +       return error;
22157 +}
22158 +
22159 +static int proc_nid_readdir(struct file *filp,
22160 +       void *dirent, filldir_t filldir)
22161 +{
22162 +       struct dentry *dentry = filp->f_dentry;
22163 +       struct inode *inode = dentry->d_inode;
22164 +       struct vs_entry *p = nx_base_stuff;
22165 +       int size = sizeof(nx_base_stuff) / sizeof(struct vs_entry);
22166 +       int pos, index;
22167 +       u64 ino;
22168 +
22169 +       pos = filp->f_pos;
22170 +       switch (pos) {
22171 +       case 0:
22172 +               ino = inode->i_ino;
22173 +               if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
22174 +                       goto out;
22175 +               pos++;
22176 +               /* fall through */
22177 +       case 1:
22178 +               ino = parent_ino(dentry);
22179 +               if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
22180 +                       goto out;
22181 +               pos++;
22182 +               /* fall through */
22183 +       default:
22184 +               index = pos - 2;
22185 +               if (index >= size)
22186 +                       goto out;
22187 +               for (p += index; p->name; p++) {
22188 +                       if (proc_fill_cache(filp, dirent, filldir, p->name, p->len,
22189 +                               vs_proc_instantiate, PROC_I(inode)->fd, p))
22190 +                               goto out;
22191 +                       pos++;
22192 +               }
22193 +       }
22194 +out:
22195 +       filp->f_pos = pos;
22196 +       return 1;
22197 +}
22198 +
22199 +
22200 +#define MAX_MULBY10    ((~0U - 9) / 10)
22201 +
22202 +static inline int atovid(const char *str, int len)
22203 +{
22204 +       int vid, c;
22205 +
22206 +       vid = 0;
22207 +       while (len-- > 0) {
22208 +               c = *str - '0';
22209 +               str++;
22210 +               if (c > 9)
22211 +                       return -1;
22212 +               if (vid >= MAX_MULBY10)
22213 +                       return -1;
22214 +               vid *= 10;
22215 +               vid += c;
22216 +               if (!vid)
22217 +                       return -1;
22218 +       }
22219 +       return vid;
22220 +}
22221 +
22222 +/* now the upper level (virtual) */
22223 +
22224 +
22225 +static struct file_operations proc_xid_file_operations = {
22226 +       .read =         generic_read_dir,
22227 +       .readdir =      proc_xid_readdir,
22228 +};
22229 +
22230 +static struct inode_operations proc_xid_inode_operations = {
22231 +       .lookup =       proc_xid_lookup,
22232 +};
22233 +
22234 +static struct vs_entry vx_virtual_stuff[] = {
22235 +       INF("info",     S_IRUGO, virtual_info),
22236 +       INF("status",   S_IRUGO, virtual_status),
22237 +       DIR(NULL,       S_IRUGO | S_IXUGO, xid),
22238 +};
22239 +
22240 +
22241 +static struct dentry *proc_virtual_lookup(struct inode *dir,
22242 +       struct dentry *dentry, struct nameidata *nd)
22243 +{
22244 +       struct vs_entry *p = vx_virtual_stuff;
22245 +       struct dentry *error = ERR_PTR(-ENOENT);
22246 +       int id = 0;
22247 +
22248 +       for (; p->name; p++) {
22249 +               if (p->len != dentry->d_name.len)
22250 +                       continue;
22251 +               if (!memcmp(dentry->d_name.name, p->name, p->len))
22252 +                       break;
22253 +       }
22254 +       if (p->name)
22255 +               goto instantiate;
22256 +
22257 +       id = atovid(dentry->d_name.name, dentry->d_name.len);
22258 +       if ((id < 0) || !xid_is_hashed(id))
22259 +               goto out;
22260 +
22261 +instantiate:
22262 +       error = proc_xid_instantiate(dir, dentry, id, p);
22263 +out:
22264 +       return error;
22265 +}
22266 +
22267 +static struct file_operations proc_nid_file_operations = {
22268 +       .read =         generic_read_dir,
22269 +       .readdir =      proc_nid_readdir,
22270 +};
22271 +
22272 +static struct inode_operations proc_nid_inode_operations = {
22273 +       .lookup =       proc_nid_lookup,
22274 +};
22275 +
22276 +static struct vs_entry nx_virtnet_stuff[] = {
22277 +       INF("info",     S_IRUGO, virtnet_info),
22278 +       INF("status",   S_IRUGO, virtnet_status),
22279 +       DIR(NULL,       S_IRUGO | S_IXUGO, nid),
22280 +};
22281 +
22282 +
22283 +static struct dentry *proc_virtnet_lookup(struct inode *dir,
22284 +       struct dentry *dentry, struct nameidata *nd)
22285 +{
22286 +       struct vs_entry *p = nx_virtnet_stuff;
22287 +       struct dentry *error = ERR_PTR(-ENOENT);
22288 +       int id = 0;
22289 +
22290 +       for (; p->name; p++) {
22291 +               if (p->len != dentry->d_name.len)
22292 +                       continue;
22293 +               if (!memcmp(dentry->d_name.name, p->name, p->len))
22294 +                       break;
22295 +       }
22296 +       if (p->name)
22297 +               goto instantiate;
22298 +
22299 +       id = atovid(dentry->d_name.name, dentry->d_name.len);
22300 +       if ((id < 0) || !nid_is_hashed(id))
22301 +               goto out;
22302 +
22303 +instantiate:
22304 +       error = proc_nid_instantiate(dir, dentry, id, p);
22305 +out:
22306 +       return error;
22307 +}
22308 +
22309 +
22310 +#define PROC_MAXVIDS 32
22311 +
22312 +int proc_virtual_readdir(struct file *filp,
22313 +       void *dirent, filldir_t filldir)
22314 +{
22315 +       struct dentry *dentry = filp->f_dentry;
22316 +       struct inode *inode = dentry->d_inode;
22317 +       struct vs_entry *p = vx_virtual_stuff;
22318 +       int size = sizeof(vx_virtual_stuff) / sizeof(struct vs_entry);
22319 +       int pos, index;
22320 +       unsigned int xid_array[PROC_MAXVIDS];
22321 +       char buf[PROC_NUMBUF];
22322 +       unsigned int nr_xids, i;
22323 +       u64 ino;
22324 +
22325 +       pos = filp->f_pos;
22326 +       switch (pos) {
22327 +       case 0:
22328 +               ino = inode->i_ino;
22329 +               if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
22330 +                       goto out;
22331 +               pos++;
22332 +               /* fall through */
22333 +       case 1:
22334 +               ino = parent_ino(dentry);
22335 +               if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
22336 +                       goto out;
22337 +               pos++;
22338 +               /* fall through */
22339 +       default:
22340 +               index = pos - 2;
22341 +               if (index >= size)
22342 +                       goto entries;
22343 +               for (p += index; p->name; p++) {
22344 +                       if (proc_fill_cache(filp, dirent, filldir, p->name, p->len,
22345 +                               vs_proc_instantiate, 0, p))
22346 +                               goto out;
22347 +                       pos++;
22348 +               }
22349 +       entries:
22350 +               index = pos - size;
22351 +               p = &vx_virtual_stuff[size - 1];
22352 +               nr_xids = get_xid_list(index, xid_array, PROC_MAXVIDS);
22353 +               for (i = 0; i < nr_xids; i++) {
22354 +                       int n, xid = xid_array[i];
22355 +                       unsigned int j = PROC_NUMBUF;
22356 +
22357 +                       n = xid;
22358 +                       do
22359 +                               buf[--j] = '0' + (n % 10);
22360 +                       while (n /= 10);
22361 +
22362 +                       if (proc_fill_cache(filp, dirent, filldir,
22363 +                               buf + j, PROC_NUMBUF - j,
22364 +                               vs_proc_instantiate, xid, p))
22365 +                               goto out;
22366 +                       pos++;
22367 +               }
22368 +       }
22369 +out:
22370 +       filp->f_pos = pos;
22371 +       return 0;
22372 +}
22373 +
22374 +static int proc_virtual_getattr(struct vfsmount *mnt,
22375 +       struct dentry *dentry, struct kstat *stat)
22376 +{
22377 +       struct inode *inode = dentry->d_inode;
22378 +
22379 +       generic_fillattr(inode, stat);
22380 +       stat->nlink = 2 + atomic_read(&vx_global_cactive);
22381 +       return 0;
22382 +}
22383 +
22384 +static struct file_operations proc_virtual_dir_operations = {
22385 +       .read =         generic_read_dir,
22386 +       .readdir =      proc_virtual_readdir,
22387 +};
22388 +
22389 +static struct inode_operations proc_virtual_dir_inode_operations = {
22390 +       .getattr =      proc_virtual_getattr,
22391 +       .lookup =       proc_virtual_lookup,
22392 +};
22393 +
22394 +
22395 +
22396 +
22397 +
22398 +int proc_virtnet_readdir(struct file *filp,
22399 +       void *dirent, filldir_t filldir)
22400 +{
22401 +       struct dentry *dentry = filp->f_dentry;
22402 +       struct inode *inode = dentry->d_inode;
22403 +       struct vs_entry *p = nx_virtnet_stuff;
22404 +       int size = sizeof(nx_virtnet_stuff) / sizeof(struct vs_entry);
22405 +       int pos, index;
22406 +       unsigned int nid_array[PROC_MAXVIDS];
22407 +       char buf[PROC_NUMBUF];
22408 +       unsigned int nr_nids, i;
22409 +       u64 ino;
22410 +
22411 +       pos = filp->f_pos;
22412 +       switch (pos) {
22413 +       case 0:
22414 +               ino = inode->i_ino;
22415 +               if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
22416 +                       goto out;
22417 +               pos++;
22418 +               /* fall through */
22419 +       case 1:
22420 +               ino = parent_ino(dentry);
22421 +               if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
22422 +                       goto out;
22423 +               pos++;
22424 +               /* fall through */
22425 +       default:
22426 +               index = pos - 2;
22427 +               if (index >= size)
22428 +                       goto entries;
22429 +               for (p += index; p->name; p++) {
22430 +                       if (proc_fill_cache(filp, dirent, filldir, p->name, p->len,
22431 +                               vs_proc_instantiate, 0, p))
22432 +                               goto out;
22433 +                       pos++;
22434 +               }
22435 +       entries:
22436 +               index = pos - size;
22437 +               p = &nx_virtnet_stuff[size - 1];
22438 +               nr_nids = get_nid_list(index, nid_array, PROC_MAXVIDS);
22439 +               for (i = 0; i < nr_nids; i++) {
22440 +                       int n, nid = nid_array[i];
22441 +                       unsigned int j = PROC_NUMBUF;
22442 +
22443 +                       n = nid;
22444 +                       do
22445 +                               buf[--j] = '0' + (n % 10);
22446 +                       while (n /= 10);
22447 +
22448 +                       if (proc_fill_cache(filp, dirent, filldir,
22449 +                               buf + j, PROC_NUMBUF - j,
22450 +                               vs_proc_instantiate, nid, p))
22451 +                               goto out;
22452 +                       pos++;
22453 +               }
22454 +       }
22455 +out:
22456 +       filp->f_pos = pos;
22457 +       return 0;
22458 +}
22459 +
22460 +static int proc_virtnet_getattr(struct vfsmount *mnt,
22461 +       struct dentry *dentry, struct kstat *stat)
22462 +{
22463 +       struct inode *inode = dentry->d_inode;
22464 +
22465 +       generic_fillattr(inode, stat);
22466 +       stat->nlink = 2 + atomic_read(&nx_global_cactive);
22467 +       return 0;
22468 +}
22469 +
22470 +static struct file_operations proc_virtnet_dir_operations = {
22471 +       .read =         generic_read_dir,
22472 +       .readdir =      proc_virtnet_readdir,
22473 +};
22474 +
22475 +static struct inode_operations proc_virtnet_dir_inode_operations = {
22476 +       .getattr =      proc_virtnet_getattr,
22477 +       .lookup =       proc_virtnet_lookup,
22478 +};
22479 +
22480 +
22481 +
22482 +void proc_vx_init(void)
22483 +{
22484 +       struct proc_dir_entry *ent;
22485 +
22486 +       ent = proc_mkdir("virtual", 0);
22487 +       if (ent) {
22488 +               ent->proc_fops = &proc_virtual_dir_operations;
22489 +               ent->proc_iops = &proc_virtual_dir_inode_operations;
22490 +       }
22491 +       proc_virtual = ent;
22492 +
22493 +       ent = proc_mkdir("virtnet", 0);
22494 +       if (ent) {
22495 +               ent->proc_fops = &proc_virtnet_dir_operations;
22496 +               ent->proc_iops = &proc_virtnet_dir_inode_operations;
22497 +       }
22498 +       proc_virtnet = ent;
22499 +}
22500 +
22501 +
22502 +
22503 +
22504 +/* per pid info */
22505 +
22506 +
22507 +int proc_pid_vx_info(struct task_struct *p, char *buffer)
22508 +{
22509 +       struct vx_info *vxi;
22510 +       char *orig = buffer;
22511 +
22512 +       buffer += sprintf(buffer, "XID:\t%d\n", vx_task_xid(p));
22513 +
22514 +       vxi = task_get_vx_info(p);
22515 +       if (!vxi)
22516 +               goto out;
22517 +
22518 +       buffer += sprintf(buffer, "BCaps:\t");
22519 +       buffer = print_cap_t(buffer, &vxi->vx_bcaps);
22520 +       buffer += sprintf(buffer, "\n");
22521 +       buffer += sprintf(buffer, "CCaps:\t%016llx\n",
22522 +               (unsigned long long)vxi->vx_ccaps);
22523 +       buffer += sprintf(buffer, "CFlags:\t%016llx\n",
22524 +               (unsigned long long)vxi->vx_flags);
22525 +       buffer += sprintf(buffer, "CIPid:\t%d\n", vxi->vx_initpid);
22526 +
22527 +       put_vx_info(vxi);
22528 +out:
22529 +       return buffer - orig;
22530 +}
22531 +
22532 +
22533 +int proc_pid_nx_info(struct task_struct *p, char *buffer)
22534 +{
22535 +       struct nx_info *nxi;
22536 +       struct nx_addr_v4 *v4a;
22537 +#ifdef CONFIG_IPV6
22538 +       struct nx_addr_v6 *v6a;
22539 +#endif
22540 +       char *orig = buffer;
22541 +       int i;
22542 +
22543 +       buffer += sprintf(buffer, "NID:\t%d\n", nx_task_nid(p));
22544 +
22545 +       nxi = task_get_nx_info(p);
22546 +       if (!nxi)
22547 +               goto out;
22548 +
22549 +       buffer += sprintf(buffer, "NCaps:\t%016llx\n",
22550 +               (unsigned long long)nxi->nx_ncaps);
22551 +       buffer += sprintf(buffer, "NFlags:\t%016llx\n",
22552 +               (unsigned long long)nxi->nx_flags);
22553 +
22554 +       buffer += sprintf(buffer,
22555 +               "V4Root[bcast]:\t" NIPQUAD_FMT "\n",
22556 +               NIPQUAD(nxi->v4_bcast.s_addr));
22557 +       buffer += sprintf (buffer,
22558 +               "V4Root[lback]:\t" NIPQUAD_FMT "\n",
22559 +               NIPQUAD(nxi->v4_lback.s_addr));
22560 +       if (!NX_IPV4(nxi))
22561 +               goto skip_v4;
22562 +       for (i = 0, v4a = &nxi->v4; v4a; i++, v4a = v4a->next)
22563 +               buffer += sprintf(buffer, "V4Root[%d]:\t" NXAV4_FMT "\n",
22564 +                       i, NXAV4(v4a));
22565 +skip_v4:
22566 +#ifdef CONFIG_IPV6
22567 +       if (!NX_IPV6(nxi))
22568 +               goto skip_v6;
22569 +       for (i = 0, v6a = &nxi->v6; v6a; i++, v6a = v6a->next)
22570 +               buffer += sprintf(buffer, "V6Root[%d]:\t" NXAV6_FMT "\n",
22571 +                       i, NXAV6(v6a));
22572 +skip_v6:
22573 +#endif
22574 +       put_nx_info(nxi);
22575 +out:
22576 +       return buffer - orig;
22577 +}
22578 +
22579 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/sched.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/sched.c
22580 --- linux-2.6.30.2/kernel/vserver/sched.c       1970-01-01 01:00:00.000000000 +0100
22581 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/sched.c    2009-07-04 01:11:39.000000000 +0200
22582 @@ -0,0 +1,414 @@
22583 +/*
22584 + *  linux/kernel/vserver/sched.c
22585 + *
22586 + *  Virtual Server: Scheduler Support
22587 + *
22588 + *  Copyright (C) 2004-2007  Herbert Pötzl
22589 + *
22590 + *  V0.01  adapted Sam Vilains version to 2.6.3
22591 + *  V0.02  removed legacy interface
22592 + *  V0.03  changed vcmds to vxi arg
22593 + *  V0.04  removed older and legacy interfaces
22594 + *
22595 + */
22596 +
22597 +#include <linux/vs_context.h>
22598 +#include <linux/vs_sched.h>
22599 +#include <linux/vserver/sched_cmd.h>
22600 +
22601 +#include <asm/uaccess.h>
22602 +
22603 +
22604 +#define vxd_check_range(val, min, max) do {            \
22605 +       vxlprintk((val < min) || (val > max),           \
22606 +               "check_range(%ld,%ld,%ld)",             \
22607 +               (long)val, (long)min, (long)max,        \
22608 +               __FILE__, __LINE__);                    \
22609 +       } while (0)
22610 +
22611 +
22612 +void vx_update_sched_param(struct _vx_sched *sched,
22613 +       struct _vx_sched_pc *sched_pc)
22614 +{
22615 +       unsigned int set_mask = sched->update_mask;
22616 +
22617 +       if (set_mask & VXSM_FILL_RATE)
22618 +               sched_pc->fill_rate[0] = sched->fill_rate[0];
22619 +       if (set_mask & VXSM_INTERVAL)
22620 +               sched_pc->interval[0] = sched->interval[0];
22621 +       if (set_mask & VXSM_FILL_RATE2)
22622 +               sched_pc->fill_rate[1] = sched->fill_rate[1];
22623 +       if (set_mask & VXSM_INTERVAL2)
22624 +               sched_pc->interval[1] = sched->interval[1];
22625 +       if (set_mask & VXSM_TOKENS)
22626 +               sched_pc->tokens = sched->tokens;
22627 +       if (set_mask & VXSM_TOKENS_MIN)
22628 +               sched_pc->tokens_min = sched->tokens_min;
22629 +       if (set_mask & VXSM_TOKENS_MAX)
22630 +               sched_pc->tokens_max = sched->tokens_max;
22631 +       if (set_mask & VXSM_PRIO_BIAS)
22632 +               sched_pc->prio_bias = sched->prio_bias;
22633 +
22634 +       if (set_mask & VXSM_IDLE_TIME)
22635 +               sched_pc->flags |= VXSF_IDLE_TIME;
22636 +       else
22637 +               sched_pc->flags &= ~VXSF_IDLE_TIME;
22638 +
22639 +       /* reset time */
22640 +       sched_pc->norm_time = jiffies;
22641 +}
22642 +
22643 +
22644 +/*
22645 + * recalculate the context's scheduling tokens
22646 + *
22647 + * ret > 0 : number of tokens available
22648 + * ret < 0 : on hold, check delta_min[]
22649 + *          -1 only jiffies
22650 + *          -2 also idle time
22651 + *
22652 + */
22653 +int vx_tokens_recalc(struct _vx_sched_pc *sched_pc,
22654 +       unsigned long *norm_time, unsigned long *idle_time, int delta_min[2])
22655 +{
22656 +       long delta;
22657 +       long tokens = 0;
22658 +       int flags = sched_pc->flags;
22659 +
22660 +       /* how much time did pass? */
22661 +       delta = *norm_time - sched_pc->norm_time;
22662 +       // printk("@ %ld, %ld, %ld\n", *norm_time, sched_pc->norm_time, jiffies);
22663 +       vxd_check_range(delta, 0, INT_MAX);
22664 +
22665 +       if (delta >= sched_pc->interval[0]) {
22666 +               long tokens, integral;
22667 +
22668 +               /* calc integral token part */
22669 +               tokens = delta / sched_pc->interval[0];
22670 +               integral = tokens * sched_pc->interval[0];
22671 +               tokens *= sched_pc->fill_rate[0];
22672 +#ifdef CONFIG_VSERVER_HARDCPU
22673 +               delta_min[0] = delta - integral;
22674 +               vxd_check_range(delta_min[0], 0, sched_pc->interval[0]);
22675 +#endif
22676 +               /* advance time */
22677 +               sched_pc->norm_time += delta;
22678 +
22679 +               /* add tokens */
22680 +               sched_pc->tokens += tokens;
22681 +               sched_pc->token_time += tokens;
22682 +       } else
22683 +               delta_min[0] = delta;
22684 +
22685 +#ifdef CONFIG_VSERVER_IDLETIME
22686 +       if (!(flags & VXSF_IDLE_TIME))
22687 +               goto skip_idle;
22688 +
22689 +       /* how much was the idle skip? */
22690 +       delta = *idle_time - sched_pc->idle_time;
22691 +       vxd_check_range(delta, 0, INT_MAX);
22692 +
22693 +       if (delta >= sched_pc->interval[1]) {
22694 +               long tokens, integral;
22695 +
22696 +               /* calc fair share token part */
22697 +               tokens = delta / sched_pc->interval[1];
22698 +               integral = tokens * sched_pc->interval[1];
22699 +               tokens *= sched_pc->fill_rate[1];
22700 +               delta_min[1] = delta - integral;
22701 +               vxd_check_range(delta_min[1], 0, sched_pc->interval[1]);
22702 +
22703 +               /* advance idle time */
22704 +               sched_pc->idle_time += integral;
22705 +
22706 +               /* add tokens */
22707 +               sched_pc->tokens += tokens;
22708 +               sched_pc->token_time += tokens;
22709 +       } else
22710 +               delta_min[1] = delta;
22711 +skip_idle:
22712 +#endif
22713 +
22714 +       /* clip at maximum */
22715 +       if (sched_pc->tokens > sched_pc->tokens_max)
22716 +               sched_pc->tokens = sched_pc->tokens_max;
22717 +       tokens = sched_pc->tokens;
22718 +
22719 +       if ((flags & VXSF_ONHOLD)) {
22720 +               /* can we unhold? */
22721 +               if (tokens >= sched_pc->tokens_min) {
22722 +                       flags &= ~VXSF_ONHOLD;
22723 +                       sched_pc->hold_ticks +=
22724 +                               *norm_time - sched_pc->onhold;
22725 +               } else
22726 +                       goto on_hold;
22727 +       } else {
22728 +               /* put on hold? */
22729 +               if (tokens <= 0) {
22730 +                       flags |= VXSF_ONHOLD;
22731 +                       sched_pc->onhold = *norm_time;
22732 +                       goto on_hold;
22733 +               }
22734 +       }
22735 +       sched_pc->flags = flags;
22736 +       return tokens;
22737 +
22738 +on_hold:
22739 +       tokens = sched_pc->tokens_min - tokens;
22740 +       sched_pc->flags = flags;
22741 +       // BUG_ON(tokens < 0); probably doesn't hold anymore
22742 +
22743 +#ifdef CONFIG_VSERVER_HARDCPU
22744 +       /* next interval? */
22745 +       if (!sched_pc->fill_rate[0])
22746 +               delta_min[0] = HZ;
22747 +       else if (tokens > sched_pc->fill_rate[0])
22748 +               delta_min[0] += sched_pc->interval[0] *
22749 +                       tokens / sched_pc->fill_rate[0];
22750 +       else
22751 +               delta_min[0] = sched_pc->interval[0] - delta_min[0];
22752 +       vxd_check_range(delta_min[0], 0, INT_MAX);
22753 +
22754 +#ifdef CONFIG_VSERVER_IDLETIME
22755 +       if (!(flags & VXSF_IDLE_TIME))
22756 +               return -1;
22757 +
22758 +       /* next interval? */
22759 +       if (!sched_pc->fill_rate[1])
22760 +               delta_min[1] = HZ;
22761 +       else if (tokens > sched_pc->fill_rate[1])
22762 +               delta_min[1] += sched_pc->interval[1] *
22763 +                       tokens / sched_pc->fill_rate[1];
22764 +       else
22765 +               delta_min[1] = sched_pc->interval[1] - delta_min[1];
22766 +       vxd_check_range(delta_min[1], 0, INT_MAX);
22767 +
22768 +       return -2;
22769 +#else
22770 +       return -1;
22771 +#endif /* CONFIG_VSERVER_IDLETIME */
22772 +#else
22773 +       return 0;
22774 +#endif /* CONFIG_VSERVER_HARDCPU */
22775 +}
22776 +
22777 +static inline unsigned long msec_to_ticks(unsigned long msec)
22778 +{
22779 +       return msecs_to_jiffies(msec);
22780 +}
22781 +
22782 +static inline unsigned long ticks_to_msec(unsigned long ticks)
22783 +{
22784 +       return jiffies_to_msecs(ticks);
22785 +}
22786 +
22787 +static inline unsigned long ticks_to_usec(unsigned long ticks)
22788 +{
22789 +       return jiffies_to_usecs(ticks);
22790 +}
22791 +
22792 +
22793 +static int do_set_sched(struct vx_info *vxi, struct vcmd_sched_v5 *data)
22794 +{
22795 +       unsigned int set_mask = data->mask;
22796 +       unsigned int update_mask;
22797 +       int i, cpu;
22798 +
22799 +       /* Sanity check data values */
22800 +       if (data->tokens_max <= 0)
22801 +               data->tokens_max = HZ;
22802 +       if (data->tokens_min < 0)
22803 +               data->tokens_min = HZ / 3;
22804 +       if (data->tokens_min >= data->tokens_max)
22805 +               data->tokens_min = data->tokens_max;
22806 +
22807 +       if (data->prio_bias > MAX_PRIO_BIAS)
22808 +               data->prio_bias = MAX_PRIO_BIAS;
22809 +       if (data->prio_bias < MIN_PRIO_BIAS)
22810 +               data->prio_bias = MIN_PRIO_BIAS;
22811 +
22812 +       spin_lock(&vxi->sched.tokens_lock);
22813 +
22814 +       /* sync up on delayed updates */
22815 +       for_each_cpu_mask(cpu, vxi->sched.update)
22816 +               vx_update_sched_param(&vxi->sched,
22817 +                       &vx_per_cpu(vxi, sched_pc, cpu));
22818 +
22819 +       if (set_mask & VXSM_FILL_RATE)
22820 +               vxi->sched.fill_rate[0] = data->fill_rate[0];
22821 +       if (set_mask & VXSM_FILL_RATE2)
22822 +               vxi->sched.fill_rate[1] = data->fill_rate[1];
22823 +       if (set_mask & VXSM_INTERVAL)
22824 +               vxi->sched.interval[0] = (set_mask & VXSM_MSEC) ?
22825 +                       msec_to_ticks(data->interval[0]) : data->interval[0];
22826 +       if (set_mask & VXSM_INTERVAL2)
22827 +               vxi->sched.interval[1] = (set_mask & VXSM_MSEC) ?
22828 +                       msec_to_ticks(data->interval[1]) : data->interval[1];
22829 +       if (set_mask & VXSM_TOKENS)
22830 +               vxi->sched.tokens = data->tokens;
22831 +       if (set_mask & VXSM_TOKENS_MIN)
22832 +               vxi->sched.tokens_min = data->tokens_min;
22833 +       if (set_mask & VXSM_TOKENS_MAX)
22834 +               vxi->sched.tokens_max = data->tokens_max;
22835 +       if (set_mask & VXSM_PRIO_BIAS)
22836 +               vxi->sched.prio_bias = data->prio_bias;
22837 +
22838 +       /* Sanity check rate/interval */
22839 +       for (i = 0; i < 2; i++) {
22840 +               if (data->fill_rate[i] < 0)
22841 +                       data->fill_rate[i] = 0;
22842 +               if (data->interval[i] <= 0)
22843 +                       data->interval[i] = HZ;
22844 +       }
22845 +
22846 +       update_mask = vxi->sched.update_mask & VXSM_SET_MASK;
22847 +       update_mask |= (set_mask & (VXSM_SET_MASK | VXSM_IDLE_TIME));
22848 +       vxi->sched.update_mask = update_mask;
22849 +
22850 +#ifdef CONFIG_SMP
22851 +       rmb();
22852 +       if (set_mask & VXSM_CPU_ID) {
22853 +               vxi->sched.update = cpumask_of_cpu(data->cpu_id);
22854 +               cpus_and(vxi->sched.update, cpu_online_map,
22855 +                       vxi->sched.update);
22856 +       } else
22857 +               vxi->sched.update = cpu_online_map;
22858 +
22859 +       /* forced reload? */
22860 +       if (set_mask & VXSM_FORCE) {
22861 +               for_each_cpu_mask(cpu, vxi->sched.update)
22862 +                       vx_update_sched_param(&vxi->sched,
22863 +                               &vx_per_cpu(vxi, sched_pc, cpu));
22864 +               vxi->sched.update = CPU_MASK_NONE;
22865 +       }
22866 +#else
22867 +       /* on UP we update immediately */
22868 +       vx_update_sched_param(&vxi->sched,
22869 +               &vx_per_cpu(vxi, sched_pc, 0));
22870 +#endif
22871 +
22872 +       spin_unlock(&vxi->sched.tokens_lock);
22873 +       return 0;
22874 +}
22875 +
22876 +
22877 +#define COPY_IDS(C) C(cpu_id); C(bucket_id)
22878 +#define COPY_PRI(C) C(prio_bias)
22879 +#define COPY_TOK(C) C(tokens); C(tokens_min); C(tokens_max)
22880 +#define COPY_FRI(C) C(fill_rate[0]); C(interval[0]);   \
22881 +                   C(fill_rate[1]); C(interval[1]);
22882 +
22883 +#define COPY_VALUE(name) vc_data.name = data->name
22884 +
22885 +static int do_set_sched_v4(struct vx_info *vxi, struct vcmd_set_sched_v4 *data)
22886 +{
22887 +       struct vcmd_sched_v5 vc_data;
22888 +
22889 +       vc_data.mask = data->set_mask;
22890 +       COPY_IDS(COPY_VALUE);
22891 +       COPY_PRI(COPY_VALUE);
22892 +       COPY_TOK(COPY_VALUE);
22893 +       vc_data.fill_rate[0] = vc_data.fill_rate[1] = data->fill_rate;
22894 +       vc_data.interval[0] = vc_data.interval[1] = data->interval;
22895 +       return do_set_sched(vxi, &vc_data);
22896 +}
22897 +
22898 +int vc_set_sched_v4(struct vx_info *vxi, void __user *data)
22899 +{
22900 +       struct vcmd_set_sched_v4 vc_data;
22901 +
22902 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
22903 +               return -EFAULT;
22904 +
22905 +       return do_set_sched_v4(vxi, &vc_data);
22906 +}
22907 +
22908 +       /* latest interface is v5 */
22909 +
22910 +int vc_set_sched(struct vx_info *vxi, void __user *data)
22911 +{
22912 +       struct vcmd_sched_v5 vc_data;
22913 +
22914 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
22915 +               return -EFAULT;
22916 +
22917 +       return do_set_sched(vxi, &vc_data);
22918 +}
22919 +
22920 +
22921 +#define COPY_PRI(C) C(prio_bias)
22922 +#define COPY_TOK(C) C(tokens); C(tokens_min); C(tokens_max)
22923 +#define COPY_FRI(C) C(fill_rate[0]); C(interval[0]);    \
22924 +                   C(fill_rate[1]); C(interval[1]);
22925 +
22926 +#define COPY_VALUE(name) vc_data.name = data->name
22927 +
22928 +
22929 +int vc_get_sched(struct vx_info *vxi, void __user *data)
22930 +{
22931 +       struct vcmd_sched_v5 vc_data;
22932 +
22933 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
22934 +               return -EFAULT;
22935 +
22936 +       if (vc_data.mask & VXSM_CPU_ID) {
22937 +               int cpu = vc_data.cpu_id;
22938 +               struct _vx_sched_pc *data;
22939 +
22940 +               if (!cpu_possible(cpu))
22941 +                       return -EINVAL;
22942 +
22943 +               data = &vx_per_cpu(vxi, sched_pc, cpu);
22944 +               COPY_TOK(COPY_VALUE);
22945 +               COPY_PRI(COPY_VALUE);
22946 +               COPY_FRI(COPY_VALUE);
22947 +
22948 +               if (data->flags & VXSF_IDLE_TIME)
22949 +                       vc_data.mask |= VXSM_IDLE_TIME;
22950 +       } else {
22951 +               struct _vx_sched *data = &vxi->sched;
22952 +
22953 +               COPY_TOK(COPY_VALUE);
22954 +               COPY_PRI(COPY_VALUE);
22955 +               COPY_FRI(COPY_VALUE);
22956 +       }
22957 +
22958 +       if (vc_data.mask & VXSM_MSEC) {
22959 +               vc_data.interval[0] = ticks_to_msec(vc_data.interval[0]);
22960 +               vc_data.interval[1] = ticks_to_msec(vc_data.interval[1]);
22961 +       }
22962 +
22963 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
22964 +               return -EFAULT;
22965 +       return 0;
22966 +}
22967 +
22968 +
22969 +int vc_sched_info(struct vx_info *vxi, void __user *data)
22970 +{
22971 +       struct vcmd_sched_info vc_data;
22972 +       int cpu;
22973 +
22974 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
22975 +               return -EFAULT;
22976 +
22977 +       cpu = vc_data.cpu_id;
22978 +       if (!cpu_possible(cpu))
22979 +               return -EINVAL;
22980 +
22981 +       if (vxi) {
22982 +               struct _vx_sched_pc *sched_pc =
22983 +                       &vx_per_cpu(vxi, sched_pc, cpu);
22984 +
22985 +               vc_data.user_msec = ticks_to_msec(sched_pc->user_ticks);
22986 +               vc_data.sys_msec = ticks_to_msec(sched_pc->sys_ticks);
22987 +               vc_data.hold_msec = ticks_to_msec(sched_pc->hold_ticks);
22988 +               vc_data.vavavoom = sched_pc->vavavoom;
22989 +       }
22990 +       vc_data.token_usec = ticks_to_usec(1);
22991 +
22992 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
22993 +               return -EFAULT;
22994 +       return 0;
22995 +}
22996 +
22997 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/sched_init.h linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/sched_init.h
22998 --- linux-2.6.30.2/kernel/vserver/sched_init.h  1970-01-01 01:00:00.000000000 +0100
22999 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/sched_init.h       2009-07-04 01:11:39.000000000 +0200
23000 @@ -0,0 +1,50 @@
23001 +
23002 +static inline void vx_info_init_sched(struct _vx_sched *sched)
23003 +{
23004 +       static struct lock_class_key tokens_lock_key;
23005 +
23006 +       /* scheduling; hard code starting values as constants */
23007 +       sched->fill_rate[0]     = 1;
23008 +       sched->interval[0]      = 4;
23009 +       sched->fill_rate[1]     = 1;
23010 +       sched->interval[1]      = 8;
23011 +       sched->tokens           = HZ >> 2;
23012 +       sched->tokens_min       = HZ >> 4;
23013 +       sched->tokens_max       = HZ >> 1;
23014 +       sched->tokens_lock      = SPIN_LOCK_UNLOCKED;
23015 +       sched->prio_bias        = 0;
23016 +
23017 +       lockdep_set_class(&sched->tokens_lock, &tokens_lock_key);
23018 +}
23019 +
23020 +static inline
23021 +void vx_info_init_sched_pc(struct _vx_sched_pc *sched_pc, int cpu)
23022 +{
23023 +       sched_pc->fill_rate[0]  = 1;
23024 +       sched_pc->interval[0]   = 4;
23025 +       sched_pc->fill_rate[1]  = 1;
23026 +       sched_pc->interval[1]   = 8;
23027 +       sched_pc->tokens        = HZ >> 2;
23028 +       sched_pc->tokens_min    = HZ >> 4;
23029 +       sched_pc->tokens_max    = HZ >> 1;
23030 +       sched_pc->prio_bias     = 0;
23031 +       sched_pc->vavavoom      = 0;
23032 +       sched_pc->token_time    = 0;
23033 +       sched_pc->idle_time     = 0;
23034 +       sched_pc->norm_time     = jiffies;
23035 +
23036 +       sched_pc->user_ticks = 0;
23037 +       sched_pc->sys_ticks = 0;
23038 +       sched_pc->hold_ticks = 0;
23039 +}
23040 +
23041 +static inline void vx_info_exit_sched(struct _vx_sched *sched)
23042 +{
23043 +       return;
23044 +}
23045 +
23046 +static inline
23047 +void vx_info_exit_sched_pc(struct _vx_sched_pc *sched_pc, int cpu)
23048 +{
23049 +       return;
23050 +}
23051 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/sched_proc.h linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/sched_proc.h
23052 --- linux-2.6.30.2/kernel/vserver/sched_proc.h  1970-01-01 01:00:00.000000000 +0100
23053 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/sched_proc.h       2009-07-04 01:11:39.000000000 +0200
23054 @@ -0,0 +1,57 @@
23055 +#ifndef _VX_SCHED_PROC_H
23056 +#define _VX_SCHED_PROC_H
23057 +
23058 +
23059 +static inline
23060 +int vx_info_proc_sched(struct _vx_sched *sched, char *buffer)
23061 +{
23062 +       int length = 0;
23063 +
23064 +       length += sprintf(buffer,
23065 +               "FillRate:\t%8d,%d\n"
23066 +               "Interval:\t%8d,%d\n"
23067 +               "TokensMin:\t%8d\n"
23068 +               "TokensMax:\t%8d\n"
23069 +               "PrioBias:\t%8d\n",
23070 +               sched->fill_rate[0],
23071 +               sched->fill_rate[1],
23072 +               sched->interval[0],
23073 +               sched->interval[1],
23074 +               sched->tokens_min,
23075 +               sched->tokens_max,
23076 +               sched->prio_bias);
23077 +       return length;
23078 +}
23079 +
23080 +static inline
23081 +int vx_info_proc_sched_pc(struct _vx_sched_pc *sched_pc,
23082 +       char *buffer, int cpu)
23083 +{
23084 +       int length = 0;
23085 +
23086 +       length += sprintf(buffer + length,
23087 +               "cpu %d: %lld %lld %lld %ld %ld", cpu,
23088 +               (unsigned long long)sched_pc->user_ticks,
23089 +               (unsigned long long)sched_pc->sys_ticks,
23090 +               (unsigned long long)sched_pc->hold_ticks,
23091 +               sched_pc->token_time,
23092 +               sched_pc->idle_time);
23093 +       length += sprintf(buffer + length,
23094 +               " %c%c %d %d %d %d/%d %d/%d",
23095 +               (sched_pc->flags & VXSF_ONHOLD) ? 'H' : 'R',
23096 +               (sched_pc->flags & VXSF_IDLE_TIME) ? 'I' : '-',
23097 +               sched_pc->tokens,
23098 +               sched_pc->tokens_min,
23099 +               sched_pc->tokens_max,
23100 +               sched_pc->fill_rate[0],
23101 +               sched_pc->interval[0],
23102 +               sched_pc->fill_rate[1],
23103 +               sched_pc->interval[1]);
23104 +       length += sprintf(buffer + length,
23105 +               " %d %d\n",
23106 +               sched_pc->prio_bias,
23107 +               sched_pc->vavavoom);
23108 +       return length;
23109 +}
23110 +
23111 +#endif /* _VX_SCHED_PROC_H */
23112 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/signal.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/signal.c
23113 --- linux-2.6.30.2/kernel/vserver/signal.c      1970-01-01 01:00:00.000000000 +0100
23114 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/signal.c   2009-07-04 01:11:39.000000000 +0200
23115 @@ -0,0 +1,132 @@
23116 +/*
23117 + *  linux/kernel/vserver/signal.c
23118 + *
23119 + *  Virtual Server: Signal Support
23120 + *
23121 + *  Copyright (C) 2003-2007  Herbert Pötzl
23122 + *
23123 + *  V0.01  broken out from vcontext V0.05
23124 + *  V0.02  changed vcmds to vxi arg
23125 + *  V0.03  adjusted siginfo for kill
23126 + *
23127 + */
23128 +
23129 +#include <asm/uaccess.h>
23130 +
23131 +#include <linux/vs_context.h>
23132 +#include <linux/vs_pid.h>
23133 +#include <linux/vserver/signal_cmd.h>
23134 +
23135 +
23136 +int vx_info_kill(struct vx_info *vxi, int pid, int sig)
23137 +{
23138 +       int retval, count = 0;
23139 +       struct task_struct *p;
23140 +       struct siginfo *sip = SEND_SIG_PRIV;
23141 +
23142 +       retval = -ESRCH;
23143 +       vxdprintk(VXD_CBIT(misc, 4),
23144 +               "vx_info_kill(%p[#%d],%d,%d)*",
23145 +               vxi, vxi->vx_id, pid, sig);
23146 +       read_lock(&tasklist_lock);
23147 +       switch (pid) {
23148 +       case  0:
23149 +       case -1:
23150 +               for_each_process(p) {
23151 +                       int err = 0;
23152 +
23153 +                       if (vx_task_xid(p) != vxi->vx_id || p->pid <= 1 ||
23154 +                               (pid && vxi->vx_initpid == p->pid))
23155 +                               continue;
23156 +
23157 +                       err = group_send_sig_info(sig, sip, p);
23158 +                       ++count;
23159 +                       if (err != -EPERM)
23160 +                               retval = err;
23161 +               }
23162 +               break;
23163 +
23164 +       case 1:
23165 +               if (vxi->vx_initpid) {
23166 +                       pid = vxi->vx_initpid;
23167 +                       /* for now, only SIGINT to private init ... */
23168 +                       if (!vx_info_flags(vxi, VXF_STATE_ADMIN, 0) &&
23169 +                               /* ... as long as there are tasks left */
23170 +                               (atomic_read(&vxi->vx_tasks) > 1))
23171 +                               sig = SIGINT;
23172 +               }
23173 +               /* fallthrough */
23174 +       default:
23175 +               p = find_task_by_real_pid(pid);
23176 +               if (p) {
23177 +                       if (vx_task_xid(p) == vxi->vx_id)
23178 +                               retval = group_send_sig_info(sig, sip, p);
23179 +               }
23180 +               break;
23181 +       }
23182 +       read_unlock(&tasklist_lock);
23183 +       vxdprintk(VXD_CBIT(misc, 4),
23184 +               "vx_info_kill(%p[#%d],%d,%d,%ld) = %d",
23185 +               vxi, vxi->vx_id, pid, sig, (long)sip, retval);
23186 +       return retval;
23187 +}
23188 +
23189 +int vc_ctx_kill(struct vx_info *vxi, void __user *data)
23190 +{
23191 +       struct vcmd_ctx_kill_v0 vc_data;
23192 +
23193 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
23194 +               return -EFAULT;
23195 +
23196 +       /* special check to allow guest shutdown */
23197 +       if (!vx_info_flags(vxi, VXF_STATE_ADMIN, 0) &&
23198 +               /* forbid killall pid=0 when init is present */
23199 +               (((vc_data.pid < 1) && vxi->vx_initpid) ||
23200 +               (vc_data.pid > 1)))
23201 +               return -EACCES;
23202 +
23203 +       return vx_info_kill(vxi, vc_data.pid, vc_data.sig);
23204 +}
23205 +
23206 +
23207 +static int __wait_exit(struct vx_info *vxi)
23208 +{
23209 +       DECLARE_WAITQUEUE(wait, current);
23210 +       int ret = 0;
23211 +
23212 +       add_wait_queue(&vxi->vx_wait, &wait);
23213 +       set_current_state(TASK_INTERRUPTIBLE);
23214 +
23215 +wait:
23216 +       if (vx_info_state(vxi,
23217 +               VXS_SHUTDOWN | VXS_HASHED | VXS_HELPER) == VXS_SHUTDOWN)
23218 +               goto out;
23219 +       if (signal_pending(current)) {
23220 +               ret = -ERESTARTSYS;
23221 +               goto out;
23222 +       }
23223 +       schedule();
23224 +       goto wait;
23225 +
23226 +out:
23227 +       set_current_state(TASK_RUNNING);
23228 +       remove_wait_queue(&vxi->vx_wait, &wait);
23229 +       return ret;
23230 +}
23231 +
23232 +
23233 +
23234 +int vc_wait_exit(struct vx_info *vxi, void __user *data)
23235 +{
23236 +       struct vcmd_wait_exit_v0 vc_data;
23237 +       int ret;
23238 +
23239 +       ret = __wait_exit(vxi);
23240 +       vc_data.reboot_cmd = vxi->reboot_cmd;
23241 +       vc_data.exit_code = vxi->exit_code;
23242 +
23243 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
23244 +               ret = -EFAULT;
23245 +       return ret;
23246 +}
23247 +
23248 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/space.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/space.c
23249 --- linux-2.6.30.2/kernel/vserver/space.c       1970-01-01 01:00:00.000000000 +0100
23250 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/space.c    2009-07-04 04:52:00.000000000 +0200
23251 @@ -0,0 +1,375 @@
23252 +/*
23253 + *  linux/kernel/vserver/space.c
23254 + *
23255 + *  Virtual Server: Context Space Support
23256 + *
23257 + *  Copyright (C) 2003-2007  Herbert Pötzl
23258 + *
23259 + *  V0.01  broken out from context.c 0.07
23260 + *  V0.02  added task locking for namespace
23261 + *  V0.03  broken out vx_enter_namespace
23262 + *  V0.04  added *space support and commands
23263 + *
23264 + */
23265 +
23266 +#include <linux/utsname.h>
23267 +#include <linux/nsproxy.h>
23268 +#include <linux/err.h>
23269 +#include <linux/fs_struct.h>
23270 +#include <asm/uaccess.h>
23271 +
23272 +#include <linux/vs_context.h>
23273 +#include <linux/vserver/space.h>
23274 +#include <linux/vserver/space_cmd.h>
23275 +
23276 +atomic_t vs_global_nsproxy     = ATOMIC_INIT(0);
23277 +atomic_t vs_global_fs          = ATOMIC_INIT(0);
23278 +atomic_t vs_global_mnt_ns      = ATOMIC_INIT(0);
23279 +atomic_t vs_global_uts_ns      = ATOMIC_INIT(0);
23280 +atomic_t vs_global_user_ns     = ATOMIC_INIT(0);
23281 +atomic_t vs_global_pid_ns      = ATOMIC_INIT(0);
23282 +
23283 +
23284 +/* namespace functions */
23285 +
23286 +#include <linux/mnt_namespace.h>
23287 +#include <linux/user_namespace.h>
23288 +#include <linux/pid_namespace.h>
23289 +#include <linux/ipc_namespace.h>
23290 +#include <net/net_namespace.h>
23291 +
23292 +
23293 +static const struct vcmd_space_mask_v1 space_mask_v0 = {
23294 +       .mask = CLONE_FS |
23295 +               CLONE_NEWNS |
23296 +               CLONE_NEWUTS |
23297 +               CLONE_NEWIPC |
23298 +               CLONE_NEWUSER |
23299 +               0
23300 +};
23301 +
23302 +static const struct vcmd_space_mask_v1 space_mask = {
23303 +       .mask = CLONE_FS |
23304 +               CLONE_NEWNS |
23305 +               CLONE_NEWUTS |
23306 +               CLONE_NEWIPC |
23307 +               CLONE_NEWUSER |
23308 +#ifdef CONFIG_PID_NS
23309 +               CLONE_NEWPID |
23310 +#endif
23311 +#ifdef CONFIG_NET_NS
23312 +               CLONE_NEWNET |
23313 +#endif
23314 +               0
23315 +};
23316 +
23317 +static const struct vcmd_space_mask_v1 default_space_mask = {
23318 +       .mask = CLONE_FS |
23319 +               CLONE_NEWNS |
23320 +               CLONE_NEWUTS |
23321 +               CLONE_NEWIPC |
23322 +               CLONE_NEWUSER |
23323 +#ifdef CONFIG_PID_NS
23324 +//             CLONE_NEWPID |
23325 +#endif
23326 +               0
23327 +};
23328 +
23329 +/*
23330 + *     build a new nsproxy mix
23331 + *      assumes that both proxies are 'const'
23332 + *     does not touch nsproxy refcounts
23333 + *     will hold a reference on the result.
23334 + */
23335 +
23336 +struct nsproxy *vs_mix_nsproxy(struct nsproxy *old_nsproxy,
23337 +       struct nsproxy *new_nsproxy, unsigned long mask)
23338 +{
23339 +       struct mnt_namespace *old_ns;
23340 +       struct uts_namespace *old_uts;
23341 +       struct ipc_namespace *old_ipc;
23342 +#ifdef CONFIG_PID_NS
23343 +       struct pid_namespace *old_pid;
23344 +#endif
23345 +#ifdef CONFIG_NET_NS
23346 +       struct net *old_net;
23347 +#endif
23348 +       struct nsproxy *nsproxy;
23349 +
23350 +       nsproxy = copy_nsproxy(old_nsproxy);
23351 +       if (!nsproxy)
23352 +               goto out;
23353 +
23354 +       if (mask & CLONE_NEWNS) {
23355 +               old_ns = nsproxy->mnt_ns;
23356 +               nsproxy->mnt_ns = new_nsproxy->mnt_ns;
23357 +               if (nsproxy->mnt_ns)
23358 +                       get_mnt_ns(nsproxy->mnt_ns);
23359 +       } else
23360 +               old_ns = NULL;
23361 +
23362 +       if (mask & CLONE_NEWUTS) {
23363 +               old_uts = nsproxy->uts_ns;
23364 +               nsproxy->uts_ns = new_nsproxy->uts_ns;
23365 +               if (nsproxy->uts_ns)
23366 +                       get_uts_ns(nsproxy->uts_ns);
23367 +       } else
23368 +               old_uts = NULL;
23369 +
23370 +       if (mask & CLONE_NEWIPC) {
23371 +               old_ipc = nsproxy->ipc_ns;
23372 +               nsproxy->ipc_ns = new_nsproxy->ipc_ns;
23373 +               if (nsproxy->ipc_ns)
23374 +                       get_ipc_ns(nsproxy->ipc_ns);
23375 +       } else
23376 +               old_ipc = NULL;
23377 +
23378 +#ifdef CONFIG_PID_NS
23379 +       if (mask & CLONE_NEWPID) {
23380 +               old_pid = nsproxy->pid_ns;
23381 +               nsproxy->pid_ns = new_nsproxy->pid_ns;
23382 +               if (nsproxy->pid_ns)
23383 +                       get_pid_ns(nsproxy->pid_ns);
23384 +       } else
23385 +               old_pid = NULL;
23386 +#endif
23387 +#ifdef CONFIG_NET_NS
23388 +       if (mask & CLONE_NEWNET) {
23389 +               old_net = nsproxy->net_ns;
23390 +               nsproxy->net_ns = new_nsproxy->net_ns;
23391 +               if (nsproxy->net_ns)
23392 +                       get_net(nsproxy->net_ns);
23393 +       } else
23394 +               old_net = NULL;
23395 +#endif
23396 +       if (old_ns)
23397 +               put_mnt_ns(old_ns);
23398 +       if (old_uts)
23399 +               put_uts_ns(old_uts);
23400 +       if (old_ipc)
23401 +               put_ipc_ns(old_ipc);
23402 +#ifdef CONFIG_PID_NS
23403 +       if (old_pid)
23404 +               put_pid_ns(old_pid);
23405 +#endif
23406 +#ifdef CONFIG_NET_NS
23407 +       if (old_net)
23408 +               put_net(old_net);
23409 +#endif
23410 +out:
23411 +       return nsproxy;
23412 +}
23413 +
23414 +
23415 +/*
23416 + *     merge two nsproxy structs into a new one.
23417 + *     will hold a reference on the result.
23418 + */
23419 +
23420 +static inline
23421 +struct nsproxy *__vs_merge_nsproxy(struct nsproxy *old,
23422 +       struct nsproxy *proxy, unsigned long mask)
23423 +{
23424 +       struct nsproxy null_proxy = { .mnt_ns = NULL };
23425 +
23426 +       if (!proxy)
23427 +               return NULL;
23428 +
23429 +       if (mask) {
23430 +               /* vs_mix_nsproxy returns with reference */
23431 +               return vs_mix_nsproxy(old ? old : &null_proxy,
23432 +                       proxy, mask);
23433 +       }
23434 +       get_nsproxy(proxy);
23435 +       return proxy;
23436 +}
23437 +
23438 +
23439 +int vx_enter_space(struct vx_info *vxi, unsigned long mask, unsigned index)
23440 +{
23441 +       struct nsproxy *proxy, *proxy_cur, *proxy_new;
23442 +       struct fs_struct *fs_cur, *fs = NULL;
23443 +       int ret, kill = 0;
23444 +
23445 +       vxdprintk(VXD_CBIT(space, 8), "vx_enter_space(%p[#%u],0x%08lx,%d)",
23446 +               vxi, vxi->vx_id, mask, index);
23447 +
23448 +       if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
23449 +               return -EACCES;
23450 +
23451 +       if (!mask)
23452 +               mask = vxi->vx_nsmask[index];
23453 +
23454 +       if ((mask & vxi->vx_nsmask[index]) != mask)
23455 +               return -EINVAL;
23456 +
23457 +       if (mask & CLONE_FS) {
23458 +               fs = copy_fs_struct(vxi->vx_fs[index]);
23459 +               if (!fs)
23460 +                       return -ENOMEM;
23461 +       }
23462 +       proxy = vxi->vx_nsproxy[index];
23463 +
23464 +       vxdprintk(VXD_CBIT(space, 9),
23465 +               "vx_enter_space(%p[#%u],0x%08lx,%d) -> (%p,%p)",
23466 +               vxi, vxi->vx_id, mask, index, proxy, fs);
23467 +
23468 +       task_lock(current);
23469 +       fs_cur = current->fs;
23470 +
23471 +       if (mask & CLONE_FS) {
23472 +               write_lock(&fs_cur->lock);
23473 +               current->fs = fs;
23474 +               kill = !--fs_cur->users;
23475 +               write_unlock(&fs_cur->lock);
23476 +       }
23477 +
23478 +       proxy_cur = current->nsproxy;
23479 +       get_nsproxy(proxy_cur);
23480 +       task_unlock(current);
23481 +
23482 +       if (kill)
23483 +               free_fs_struct(fs_cur);
23484 +
23485 +       proxy_new = __vs_merge_nsproxy(proxy_cur, proxy, mask);
23486 +       if (IS_ERR(proxy_new)) {
23487 +               ret = PTR_ERR(proxy_new);
23488 +               goto out_put;
23489 +       }
23490 +
23491 +       proxy_new = xchg(&current->nsproxy, proxy_new);
23492 +       ret = 0;
23493 +
23494 +       if (proxy_new)
23495 +               put_nsproxy(proxy_new);
23496 +out_put:
23497 +       if (proxy_cur)
23498 +               put_nsproxy(proxy_cur);
23499 +       return ret;
23500 +}
23501 +
23502 +
23503 +int vx_set_space(struct vx_info *vxi, unsigned long mask, unsigned index)
23504 +{
23505 +       struct nsproxy *proxy_vxi, *proxy_cur, *proxy_new;
23506 +       struct fs_struct *fs_vxi, *fs;
23507 +       int ret, kill = 0;
23508 +
23509 +       vxdprintk(VXD_CBIT(space, 8), "vx_set_space(%p[#%u],0x%08lx,%d)",
23510 +               vxi, vxi->vx_id, mask, index);
23511 +#if 0
23512 +       if (!mask)
23513 +               mask = default_space_mask.mask;
23514 +#endif
23515 +       if ((mask & space_mask.mask) != mask)
23516 +               return -EINVAL;
23517 +
23518 +       proxy_vxi = vxi->vx_nsproxy[index];
23519 +       fs_vxi = vxi->vx_fs[index];
23520 +
23521 +       if (mask & CLONE_FS) {
23522 +               fs = copy_fs_struct(current->fs);
23523 +               if (!fs)
23524 +                       return -ENOMEM;
23525 +       }
23526 +
23527 +       task_lock(current);
23528 +
23529 +       if (mask & CLONE_FS) {
23530 +               write_lock(&fs_vxi->lock);
23531 +               vxi->vx_fs[index] = fs;
23532 +               kill = !--fs_vxi->users;
23533 +               write_unlock(&fs_vxi->lock);
23534 +       }
23535 +
23536 +       proxy_cur = current->nsproxy;
23537 +       get_nsproxy(proxy_cur);
23538 +       task_unlock(current);
23539 +
23540 +       if (kill)
23541 +               free_fs_struct(fs_vxi);
23542 +
23543 +       proxy_new = __vs_merge_nsproxy(proxy_vxi, proxy_cur, mask);
23544 +       if (IS_ERR(proxy_new)) {
23545 +               ret = PTR_ERR(proxy_new);
23546 +               goto out_put;
23547 +       }
23548 +
23549 +       proxy_new = xchg(&vxi->vx_nsproxy[index], proxy_new);
23550 +       vxi->vx_nsmask[index] |= mask;
23551 +       ret = 0;
23552 +
23553 +       if (proxy_new)
23554 +               put_nsproxy(proxy_new);
23555 +out_put:
23556 +       if (proxy_cur)
23557 +               put_nsproxy(proxy_cur);
23558 +       return ret;
23559 +}
23560 +
23561 +
23562 +int vc_enter_space_v1(struct vx_info *vxi, void __user *data)
23563 +{
23564 +       struct vcmd_space_mask_v1 vc_data = { .mask = 0 };
23565 +
23566 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
23567 +               return -EFAULT;
23568 +
23569 +       return vx_enter_space(vxi, vc_data.mask, 0);
23570 +}
23571 +
23572 +int vc_enter_space(struct vx_info *vxi, void __user *data)
23573 +{
23574 +       struct vcmd_space_mask_v2 vc_data = { .mask = 0 };
23575 +
23576 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
23577 +               return -EFAULT;
23578 +
23579 +       if (vc_data.index >= VX_SPACES)
23580 +               return -EINVAL;
23581 +
23582 +       return vx_enter_space(vxi, vc_data.mask, vc_data.index);
23583 +}
23584 +
23585 +int vc_set_space_v1(struct vx_info *vxi, void __user *data)
23586 +{
23587 +       struct vcmd_space_mask_v1 vc_data = { .mask = 0 };
23588 +
23589 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
23590 +               return -EFAULT;
23591 +
23592 +       return vx_set_space(vxi, vc_data.mask, 0);
23593 +}
23594 +
23595 +int vc_set_space(struct vx_info *vxi, void __user *data)
23596 +{
23597 +       struct vcmd_space_mask_v2 vc_data = { .mask = 0 };
23598 +
23599 +       if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
23600 +               return -EFAULT;
23601 +
23602 +       if (vc_data.index >= VX_SPACES)
23603 +               return -EINVAL;
23604 +
23605 +       return vx_set_space(vxi, vc_data.mask, vc_data.index);
23606 +}
23607 +
23608 +int vc_get_space_mask(void __user *data, int type)
23609 +{
23610 +       const struct vcmd_space_mask_v1 *mask;
23611 +
23612 +       if (type == 0)
23613 +               mask = &space_mask_v0;
23614 +       else if (type == 1)
23615 +               mask = &space_mask;
23616 +       else
23617 +               mask = &default_space_mask;
23618 +
23619 +       vxdprintk(VXD_CBIT(space, 10),
23620 +               "vc_get_space_mask(%d) = %08llx", type, mask->mask);
23621 +
23622 +       if (copy_to_user(data, mask, sizeof(*mask)))
23623 +               return -EFAULT;
23624 +       return 0;
23625 +}
23626 +
23627 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/switch.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/switch.c
23628 --- linux-2.6.30.2/kernel/vserver/switch.c      1970-01-01 01:00:00.000000000 +0100
23629 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/switch.c   2009-07-04 01:11:39.000000000 +0200
23630 @@ -0,0 +1,543 @@
23631 +/*
23632 + *  linux/kernel/vserver/switch.c
23633 + *
23634 + *  Virtual Server: Syscall Switch
23635 + *
23636 + *  Copyright (C) 2003-2007  Herbert Pötzl
23637 + *
23638 + *  V0.01  syscall switch
23639 + *  V0.02  added signal to context
23640 + *  V0.03  added rlimit functions
23641 + *  V0.04  added iattr, task/xid functions
23642 + *  V0.05  added debug/history stuff
23643 + *  V0.06  added compat32 layer
23644 + *  V0.07  vcmd args and perms
23645 + *  V0.08  added status commands
23646 + *  V0.09  added tag commands
23647 + *  V0.10  added oom bias
23648 + *  V0.11  added device commands
23649 + *
23650 + */
23651 +
23652 +#include <linux/vs_context.h>
23653 +#include <linux/vs_network.h>
23654 +#include <linux/vserver/switch.h>
23655 +
23656 +#include "vci_config.h"
23657 +
23658 +
23659 +static inline
23660 +int vc_get_version(uint32_t id)
23661 +{
23662 +       return VCI_VERSION;
23663 +}
23664 +
23665 +static inline
23666 +int vc_get_vci(uint32_t id)
23667 +{
23668 +       return vci_kernel_config();
23669 +}
23670 +
23671 +#include <linux/vserver/context_cmd.h>
23672 +#include <linux/vserver/cvirt_cmd.h>
23673 +#include <linux/vserver/cacct_cmd.h>
23674 +#include <linux/vserver/limit_cmd.h>
23675 +#include <linux/vserver/network_cmd.h>
23676 +#include <linux/vserver/sched_cmd.h>
23677 +#include <linux/vserver/debug_cmd.h>
23678 +#include <linux/vserver/inode_cmd.h>
23679 +#include <linux/vserver/dlimit_cmd.h>
23680 +#include <linux/vserver/signal_cmd.h>
23681 +#include <linux/vserver/space_cmd.h>
23682 +#include <linux/vserver/tag_cmd.h>
23683 +#include <linux/vserver/device_cmd.h>
23684 +
23685 +#include <linux/vserver/inode.h>
23686 +#include <linux/vserver/dlimit.h>
23687 +
23688 +
23689 +#ifdef CONFIG_COMPAT
23690 +#define __COMPAT(name, id, data, compat)       \
23691 +       (compat) ? name ## _x32(id, data) : name(id, data)
23692 +#define __COMPAT_NO_ID(name, data, compat)     \
23693 +       (compat) ? name ## _x32(data) : name(data)
23694 +#else
23695 +#define __COMPAT(name, id, data, compat)       \
23696 +       name(id, data)
23697 +#define __COMPAT_NO_ID(name, data, compat)     \
23698 +       name(data)
23699 +#endif
23700 +
23701 +
23702 +static inline
23703 +long do_vcmd(uint32_t cmd, uint32_t id,
23704 +       struct vx_info *vxi, struct nx_info *nxi,
23705 +       void __user *data, int compat)
23706 +{
23707 +       switch (cmd) {
23708 +
23709 +       case VCMD_get_version:
23710 +               return vc_get_version(id);
23711 +       case VCMD_get_vci:
23712 +               return vc_get_vci(id);
23713 +
23714 +       case VCMD_task_xid:
23715 +               return vc_task_xid(id);
23716 +       case VCMD_vx_info:
23717 +               return vc_vx_info(vxi, data);
23718 +
23719 +       case VCMD_task_nid:
23720 +               return vc_task_nid(id);
23721 +       case VCMD_nx_info:
23722 +               return vc_nx_info(nxi, data);
23723 +
23724 +       case VCMD_task_tag:
23725 +               return vc_task_tag(id);
23726 +
23727 +       case VCMD_set_space_v1:
23728 +               return vc_set_space_v1(vxi, data);
23729 +       /* this is version 2 */
23730 +       case VCMD_set_space:
23731 +               return vc_set_space(vxi, data);
23732 +
23733 +       case VCMD_get_space_mask_v0:
23734 +               return vc_get_space_mask(data, 0);
23735 +       /* this is version 1 */
23736 +       case VCMD_get_space_mask:
23737 +               return vc_get_space_mask(data, 1);
23738 +
23739 +       case VCMD_get_space_default:
23740 +               return vc_get_space_mask(data, -1);
23741 +
23742 +#ifdef CONFIG_IA32_EMULATION
23743 +       case VCMD_get_rlimit:
23744 +               return __COMPAT(vc_get_rlimit, vxi, data, compat);
23745 +       case VCMD_set_rlimit:
23746 +               return __COMPAT(vc_set_rlimit, vxi, data, compat);
23747 +#else
23748 +       case VCMD_get_rlimit:
23749 +               return vc_get_rlimit(vxi, data);
23750 +       case VCMD_set_rlimit:
23751 +               return vc_set_rlimit(vxi, data);
23752 +#endif
23753 +       case VCMD_get_rlimit_mask:
23754 +               return vc_get_rlimit_mask(id, data);
23755 +       case VCMD_reset_minmax:
23756 +               return vc_reset_minmax(vxi, data);
23757 +
23758 +       case VCMD_get_vhi_name:
23759 +               return vc_get_vhi_name(vxi, data);
23760 +       case VCMD_set_vhi_name:
23761 +               return vc_set_vhi_name(vxi, data);
23762 +
23763 +       case VCMD_ctx_stat:
23764 +               return vc_ctx_stat(vxi, data);
23765 +       case VCMD_virt_stat:
23766 +               return vc_virt_stat(vxi, data);
23767 +       case VCMD_sock_stat:
23768 +               return vc_sock_stat(vxi, data);
23769 +       case VCMD_rlimit_stat:
23770 +               return vc_rlimit_stat(vxi, data);
23771 +
23772 +       case VCMD_set_cflags:
23773 +               return vc_set_cflags(vxi, data);
23774 +       case VCMD_get_cflags:
23775 +               return vc_get_cflags(vxi, data);
23776 +
23777 +       /* this is version 1 */
23778 +       case VCMD_set_ccaps:
23779 +               return vc_set_ccaps(vxi, data);
23780 +       /* this is version 1 */
23781 +       case VCMD_get_ccaps:
23782 +               return vc_get_ccaps(vxi, data);
23783 +       case VCMD_set_bcaps:
23784 +               return vc_set_bcaps(vxi, data);
23785 +       case VCMD_get_bcaps:
23786 +               return vc_get_bcaps(vxi, data);
23787 +
23788 +       case VCMD_set_badness:
23789 +               return vc_set_badness(vxi, data);
23790 +       case VCMD_get_badness:
23791 +               return vc_get_badness(vxi, data);
23792 +
23793 +       case VCMD_set_nflags:
23794 +               return vc_set_nflags(nxi, data);
23795 +       case VCMD_get_nflags:
23796 +               return vc_get_nflags(nxi, data);
23797 +
23798 +       case VCMD_set_ncaps:
23799 +               return vc_set_ncaps(nxi, data);
23800 +       case VCMD_get_ncaps:
23801 +               return vc_get_ncaps(nxi, data);
23802 +
23803 +       case VCMD_set_sched_v4:
23804 +               return vc_set_sched_v4(vxi, data);
23805 +       /* this is version 5 */
23806 +       case VCMD_set_sched:
23807 +               return vc_set_sched(vxi, data);
23808 +       case VCMD_get_sched:
23809 +               return vc_get_sched(vxi, data);
23810 +       case VCMD_sched_info:
23811 +               return vc_sched_info(vxi, data);
23812 +
23813 +       case VCMD_add_dlimit:
23814 +               return __COMPAT(vc_add_dlimit, id, data, compat);
23815 +       case VCMD_rem_dlimit:
23816 +               return __COMPAT(vc_rem_dlimit, id, data, compat);
23817 +       case VCMD_set_dlimit:
23818 +               return __COMPAT(vc_set_dlimit, id, data, compat);
23819 +       case VCMD_get_dlimit:
23820 +               return __COMPAT(vc_get_dlimit, id, data, compat);
23821 +
23822 +       case VCMD_ctx_kill:
23823 +               return vc_ctx_kill(vxi, data);
23824 +
23825 +       case VCMD_wait_exit:
23826 +               return vc_wait_exit(vxi, data);
23827 +
23828 +       case VCMD_get_iattr:
23829 +               return __COMPAT_NO_ID(vc_get_iattr, data, compat);
23830 +       case VCMD_set_iattr:
23831 +               return __COMPAT_NO_ID(vc_set_iattr, data, compat);
23832 +
23833 +       case VCMD_fget_iattr:
23834 +               return vc_fget_iattr(id, data);
23835 +       case VCMD_fset_iattr:
23836 +               return vc_fset_iattr(id, data);
23837 +
23838 +       case VCMD_enter_space_v0:
23839 +               return vc_enter_space_v1(vxi, NULL);
23840 +       case VCMD_enter_space_v1:
23841 +               return vc_enter_space_v1(vxi, data);
23842 +       /* this is version 2 */
23843 +       case VCMD_enter_space:
23844 +               return vc_enter_space(vxi, data);
23845 +
23846 +       case VCMD_ctx_create_v0:
23847 +               return vc_ctx_create(id, NULL);
23848 +       case VCMD_ctx_create:
23849 +               return vc_ctx_create(id, data);
23850 +       case VCMD_ctx_migrate_v0:
23851 +               return vc_ctx_migrate(vxi, NULL);
23852 +       case VCMD_ctx_migrate:
23853 +               return vc_ctx_migrate(vxi, data);
23854 +
23855 +       case VCMD_net_create_v0:
23856 +               return vc_net_create(id, NULL);
23857 +       case VCMD_net_create:
23858 +               return vc_net_create(id, data);
23859 +       case VCMD_net_migrate:
23860 +               return vc_net_migrate(nxi, data);
23861 +
23862 +       case VCMD_tag_migrate:
23863 +               return vc_tag_migrate(id);
23864 +
23865 +       case VCMD_net_add:
23866 +               return vc_net_add(nxi, data);
23867 +       case VCMD_net_remove:
23868 +               return vc_net_remove(nxi, data);
23869 +
23870 +       case VCMD_net_add_ipv4:
23871 +               return vc_net_add_ipv4(nxi, data);
23872 +       case VCMD_net_remove_ipv4:
23873 +               return vc_net_remove_ipv4(nxi, data);
23874 +#ifdef CONFIG_IPV6
23875 +       case VCMD_net_add_ipv6:
23876 +               return vc_net_add_ipv6(nxi, data);
23877 +       case VCMD_net_remove_ipv6:
23878 +               return vc_net_remove_ipv6(nxi, data);
23879 +#endif
23880 +/*     case VCMD_add_match_ipv4:
23881 +               return vc_add_match_ipv4(nxi, data);
23882 +       case VCMD_get_match_ipv4:
23883 +               return vc_get_match_ipv4(nxi, data);
23884 +#ifdef CONFIG_IPV6
23885 +       case VCMD_add_match_ipv6:
23886 +               return vc_add_match_ipv6(nxi, data);
23887 +       case VCMD_get_match_ipv6:
23888 +               return vc_get_match_ipv6(nxi, data);
23889 +#endif */
23890 +
23891 +#ifdef CONFIG_VSERVER_DEVICE
23892 +       case VCMD_set_mapping:
23893 +               return __COMPAT(vc_set_mapping, vxi, data, compat);
23894 +       case VCMD_unset_mapping:
23895 +               return __COMPAT(vc_unset_mapping, vxi, data, compat);
23896 +#endif
23897 +#ifdef CONFIG_VSERVER_HISTORY
23898 +       case VCMD_dump_history:
23899 +               return vc_dump_history(id);
23900 +       case VCMD_read_history:
23901 +               return __COMPAT(vc_read_history, id, data, compat);
23902 +#endif
23903 +#ifdef CONFIG_VSERVER_MONITOR
23904 +       case VCMD_read_monitor:
23905 +               return __COMPAT(vc_read_monitor, id, data, compat);
23906 +#endif
23907 +       default:
23908 +               vxwprintk_task(1, "unimplemented VCMD_%02d_%d[%d]",
23909 +                       VC_CATEGORY(cmd), VC_COMMAND(cmd), VC_VERSION(cmd));
23910 +       }
23911 +       return -ENOSYS;
23912 +}
23913 +
23914 +
23915 +#define        __VCMD(vcmd, _perm, _args, _flags)              \
23916 +       case VCMD_ ## vcmd: perm = _perm;               \
23917 +               args = _args; flags = _flags; break
23918 +
23919 +
23920 +#define VCA_NONE       0x00
23921 +#define VCA_VXI                0x01
23922 +#define VCA_NXI                0x02
23923 +
23924 +#define VCF_NONE       0x00
23925 +#define VCF_INFO       0x01
23926 +#define VCF_ADMIN      0x02
23927 +#define VCF_ARES       0x06    /* includes admin */
23928 +#define VCF_SETUP      0x08
23929 +
23930 +#define VCF_ZIDOK      0x10    /* zero id okay */
23931 +
23932 +
23933 +static inline
23934 +long do_vserver(uint32_t cmd, uint32_t id, void __user *data, int compat)
23935 +{
23936 +       long ret;
23937 +       int permit = -1, state = 0;
23938 +       int perm = -1, args = 0, flags = 0;
23939 +       struct vx_info *vxi = NULL;
23940 +       struct nx_info *nxi = NULL;
23941 +
23942 +       switch (cmd) {
23943 +       /* unpriviledged commands */
23944 +       __VCMD(get_version,      0, VCA_NONE,   0);
23945 +       __VCMD(get_vci,          0, VCA_NONE,   0);
23946 +       __VCMD(get_rlimit_mask,  0, VCA_NONE,   0);
23947 +       __VCMD(get_space_mask_v0,0, VCA_NONE,   0);
23948 +       __VCMD(get_space_mask,   0, VCA_NONE,   0);
23949 +       __VCMD(get_space_default,0, VCA_NONE,   0);
23950 +
23951 +       /* info commands */
23952 +       __VCMD(task_xid,         2, VCA_NONE,   0);
23953 +       __VCMD(reset_minmax,     2, VCA_VXI,    0);
23954 +       __VCMD(vx_info,          3, VCA_VXI,    VCF_INFO);
23955 +       __VCMD(get_bcaps,        3, VCA_VXI,    VCF_INFO);
23956 +       __VCMD(get_ccaps,        3, VCA_VXI,    VCF_INFO);
23957 +       __VCMD(get_cflags,       3, VCA_VXI,    VCF_INFO);
23958 +       __VCMD(get_badness,      3, VCA_VXI,    VCF_INFO);
23959 +       __VCMD(get_vhi_name,     3, VCA_VXI,    VCF_INFO);
23960 +       __VCMD(get_rlimit,       3, VCA_VXI,    VCF_INFO);
23961 +
23962 +       __VCMD(ctx_stat,         3, VCA_VXI,    VCF_INFO);
23963 +       __VCMD(virt_stat,        3, VCA_VXI,    VCF_INFO);
23964 +       __VCMD(sock_stat,        3, VCA_VXI,    VCF_INFO);
23965 +       __VCMD(rlimit_stat,      3, VCA_VXI,    VCF_INFO);
23966 +
23967 +       __VCMD(task_nid,         2, VCA_NONE,   0);
23968 +       __VCMD(nx_info,          3, VCA_NXI,    VCF_INFO);
23969 +       __VCMD(get_ncaps,        3, VCA_NXI,    VCF_INFO);
23970 +       __VCMD(get_nflags,       3, VCA_NXI,    VCF_INFO);
23971 +
23972 +       __VCMD(task_tag,         2, VCA_NONE,   0);
23973 +
23974 +       __VCMD(get_iattr,        2, VCA_NONE,   0);
23975 +       __VCMD(fget_iattr,       2, VCA_NONE,   0);
23976 +       __VCMD(get_dlimit,       3, VCA_NONE,   VCF_INFO);
23977 +       __VCMD(get_sched,        3, VCA_VXI,    VCF_INFO);
23978 +       __VCMD(sched_info,       3, VCA_VXI,    VCF_INFO | VCF_ZIDOK);
23979 +
23980 +       /* lower admin commands */
23981 +       __VCMD(wait_exit,        4, VCA_VXI,    VCF_INFO);
23982 +       __VCMD(ctx_create_v0,    5, VCA_NONE,   0);
23983 +       __VCMD(ctx_create,       5, VCA_NONE,   0);
23984 +       __VCMD(ctx_migrate_v0,   5, VCA_VXI,    VCF_ADMIN);
23985 +       __VCMD(ctx_migrate,      5, VCA_VXI,    VCF_ADMIN);
23986 +       __VCMD(enter_space_v0,   5, VCA_VXI,    VCF_ADMIN);
23987 +       __VCMD(enter_space_v1,   5, VCA_VXI,    VCF_ADMIN);
23988 +       __VCMD(enter_space,      5, VCA_VXI,    VCF_ADMIN);
23989 +
23990 +       __VCMD(net_create_v0,    5, VCA_NONE,   0);
23991 +       __VCMD(net_create,       5, VCA_NONE,   0);
23992 +       __VCMD(net_migrate,      5, VCA_NXI,    VCF_ADMIN);
23993 +
23994 +       __VCMD(tag_migrate,      5, VCA_NONE,   VCF_ADMIN);
23995 +
23996 +       /* higher admin commands */
23997 +       __VCMD(ctx_kill,         6, VCA_VXI,    VCF_ARES);
23998 +       __VCMD(set_space_v1,     7, VCA_VXI,    VCF_ARES | VCF_SETUP);
23999 +       __VCMD(set_space,        7, VCA_VXI,    VCF_ARES | VCF_SETUP);
24000 +
24001 +       __VCMD(set_ccaps,        7, VCA_VXI,    VCF_ARES | VCF_SETUP);
24002 +       __VCMD(set_bcaps,        7, VCA_VXI,    VCF_ARES | VCF_SETUP);
24003 +       __VCMD(set_cflags,       7, VCA_VXI,    VCF_ARES | VCF_SETUP);
24004 +       __VCMD(set_badness,      7, VCA_VXI,    VCF_ARES | VCF_SETUP);
24005 +
24006 +       __VCMD(set_vhi_name,     7, VCA_VXI,    VCF_ARES | VCF_SETUP);
24007 +       __VCMD(set_rlimit,       7, VCA_VXI,    VCF_ARES | VCF_SETUP);
24008 +       __VCMD(set_sched,        7, VCA_VXI,    VCF_ARES | VCF_SETUP);
24009 +       __VCMD(set_sched_v4,     7, VCA_VXI,    VCF_ARES | VCF_SETUP);
24010 +
24011 +       __VCMD(set_ncaps,        7, VCA_NXI,    VCF_ARES | VCF_SETUP);
24012 +       __VCMD(set_nflags,       7, VCA_NXI,    VCF_ARES | VCF_SETUP);
24013 +       __VCMD(net_add,          8, VCA_NXI,    VCF_ARES | VCF_SETUP);
24014 +       __VCMD(net_remove,       8, VCA_NXI,    VCF_ARES | VCF_SETUP);
24015 +       __VCMD(net_add_ipv4,     8, VCA_NXI,    VCF_ARES | VCF_SETUP);
24016 +       __VCMD(net_remove_ipv4,  8, VCA_NXI,    VCF_ARES | VCF_SETUP);
24017 +#ifdef CONFIG_IPV6
24018 +       __VCMD(net_add_ipv6,     8, VCA_NXI,    VCF_ARES | VCF_SETUP);
24019 +       __VCMD(net_remove_ipv6,  8, VCA_NXI,    VCF_ARES | VCF_SETUP);
24020 +#endif
24021 +       __VCMD(set_iattr,        7, VCA_NONE,   0);
24022 +       __VCMD(fset_iattr,       7, VCA_NONE,   0);
24023 +       __VCMD(set_dlimit,       7, VCA_NONE,   VCF_ARES);
24024 +       __VCMD(add_dlimit,       8, VCA_NONE,   VCF_ARES);
24025 +       __VCMD(rem_dlimit,       8, VCA_NONE,   VCF_ARES);
24026 +
24027 +#ifdef CONFIG_VSERVER_DEVICE
24028 +       __VCMD(set_mapping,      8, VCA_VXI,    VCF_ARES|VCF_ZIDOK);
24029 +       __VCMD(unset_mapping,    8, VCA_VXI,    VCF_ARES|VCF_ZIDOK);
24030 +#endif
24031 +       /* debug level admin commands */
24032 +#ifdef CONFIG_VSERVER_HISTORY
24033 +       __VCMD(dump_history,     9, VCA_NONE,   0);
24034 +       __VCMD(read_history,     9, VCA_NONE,   0);
24035 +#endif
24036 +#ifdef CONFIG_VSERVER_MONITOR
24037 +       __VCMD(read_monitor,     9, VCA_NONE,   0);
24038 +#endif
24039 +
24040 +       default:
24041 +               perm = -1;
24042 +       }
24043 +
24044 +       vxdprintk(VXD_CBIT(switch, 0),
24045 +               "vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]",
24046 +               VC_CATEGORY(cmd), VC_COMMAND(cmd),
24047 +               VC_VERSION(cmd), id, data, compat,
24048 +               perm, args, flags);
24049 +
24050 +       ret = -ENOSYS;
24051 +       if (perm < 0)
24052 +               goto out;
24053 +
24054 +       state = 1;
24055 +       if (!capable(CAP_CONTEXT))
24056 +               goto out;
24057 +
24058 +       state = 2;
24059 +       /* moved here from the individual commands */
24060 +       ret = -EPERM;
24061 +       if ((perm > 1) && !capable(CAP_SYS_ADMIN))
24062 +               goto out;
24063 +
24064 +       state = 3;
24065 +       /* vcmd involves resource management  */
24066 +       ret = -EPERM;
24067 +       if ((flags & VCF_ARES) && !capable(CAP_SYS_RESOURCE))
24068 +               goto out;
24069 +
24070 +       state = 4;
24071 +       /* various legacy exceptions */
24072 +       switch (cmd) {
24073 +       /* will go away when spectator is a cap */
24074 +       case VCMD_ctx_migrate_v0:
24075 +       case VCMD_ctx_migrate:
24076 +               if (id == 1) {
24077 +                       current->xid = 1;
24078 +                       ret = 1;
24079 +                       goto out;
24080 +               }
24081 +               break;
24082 +
24083 +       /* will go away when spectator is a cap */
24084 +       case VCMD_net_migrate:
24085 +               if (id == 1) {
24086 +                       current->nid = 1;
24087 +                       ret = 1;
24088 +                       goto out;
24089 +               }
24090 +               break;
24091 +       }
24092 +
24093 +       /* vcmds are fine by default */
24094 +       permit = 1;
24095 +
24096 +       /* admin type vcmds require admin ... */
24097 +       if (flags & VCF_ADMIN)
24098 +               permit = vx_check(0, VS_ADMIN) ? 1 : 0;
24099 +
24100 +       /* ... but setup type vcmds override that */
24101 +       if (!permit && (flags & VCF_SETUP))
24102 +               permit = vx_flags(VXF_STATE_SETUP, 0) ? 2 : 0;
24103 +
24104 +       state = 5;
24105 +       ret = -EPERM;
24106 +       if (!permit)
24107 +               goto out;
24108 +
24109 +       state = 6;
24110 +       if (!id && (flags & VCF_ZIDOK))
24111 +               goto skip_id;
24112 +
24113 +       ret = -ESRCH;
24114 +       if (args & VCA_VXI) {
24115 +               vxi = lookup_vx_info(id);
24116 +               if (!vxi)
24117 +                       goto out;
24118 +
24119 +               if ((flags & VCF_ADMIN) &&
24120 +                       /* special case kill for shutdown */
24121 +                       (cmd != VCMD_ctx_kill) &&
24122 +                       /* can context be administrated? */
24123 +                       !vx_info_flags(vxi, VXF_STATE_ADMIN, 0)) {
24124 +                       ret = -EACCES;
24125 +                       goto out_vxi;
24126 +               }
24127 +       }
24128 +       state = 7;
24129 +       if (args & VCA_NXI) {
24130 +               nxi = lookup_nx_info(id);
24131 +               if (!nxi)
24132 +                       goto out_vxi;
24133 +
24134 +               if ((flags & VCF_ADMIN) &&
24135 +                       /* can context be administrated? */
24136 +                       !nx_info_flags(nxi, NXF_STATE_ADMIN, 0)) {
24137 +                       ret = -EACCES;
24138 +                       goto out_nxi;
24139 +               }
24140 +       }
24141 +skip_id:
24142 +       state = 8;
24143 +       ret = do_vcmd(cmd, id, vxi, nxi, data, compat);
24144 +
24145 +out_nxi:
24146 +       if ((args & VCA_NXI) && nxi)
24147 +               put_nx_info(nxi);
24148 +out_vxi:
24149 +       if ((args & VCA_VXI) && vxi)
24150 +               put_vx_info(vxi);
24151 +out:
24152 +       vxdprintk(VXD_CBIT(switch, 1),
24153 +               "vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]",
24154 +               VC_CATEGORY(cmd), VC_COMMAND(cmd),
24155 +               VC_VERSION(cmd), ret, ret, state, permit);
24156 +       return ret;
24157 +}
24158 +
24159 +asmlinkage long
24160 +sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
24161 +{
24162 +       return do_vserver(cmd, id, data, 0);
24163 +}
24164 +
24165 +#ifdef CONFIG_COMPAT
24166 +
24167 +asmlinkage long
24168 +sys32_vserver(uint32_t cmd, uint32_t id, void __user *data)
24169 +{
24170 +       return do_vserver(cmd, id, data, 1);
24171 +}
24172 +
24173 +#endif /* CONFIG_COMPAT */
24174 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/sysctl.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/sysctl.c
24175 --- linux-2.6.30.2/kernel/vserver/sysctl.c      1970-01-01 01:00:00.000000000 +0100
24176 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/sysctl.c   2009-07-04 01:11:39.000000000 +0200
24177 @@ -0,0 +1,244 @@
24178 +/*
24179 + *  kernel/vserver/sysctl.c
24180 + *
24181 + *  Virtual Context Support
24182 + *
24183 + *  Copyright (C) 2004-2007  Herbert Pötzl
24184 + *
24185 + *  V0.01  basic structure
24186 + *
24187 + */
24188 +
24189 +#include <linux/module.h>
24190 +#include <linux/ctype.h>
24191 +#include <linux/sysctl.h>
24192 +#include <linux/parser.h>
24193 +#include <asm/uaccess.h>
24194 +
24195 +
24196 +enum {
24197 +       CTL_DEBUG_ERROR         = 0,
24198 +       CTL_DEBUG_SWITCH        = 1,
24199 +       CTL_DEBUG_XID,
24200 +       CTL_DEBUG_NID,
24201 +       CTL_DEBUG_TAG,
24202 +       CTL_DEBUG_NET,
24203 +       CTL_DEBUG_LIMIT,
24204 +       CTL_DEBUG_CRES,
24205 +       CTL_DEBUG_DLIM,
24206 +       CTL_DEBUG_QUOTA,
24207 +       CTL_DEBUG_CVIRT,
24208 +       CTL_DEBUG_SPACE,
24209 +       CTL_DEBUG_MISC,
24210 +};
24211 +
24212 +
24213 +unsigned int vx_debug_switch   = 0;
24214 +unsigned int vx_debug_xid      = 0;
24215 +unsigned int vx_debug_nid      = 0;
24216 +unsigned int vx_debug_tag      = 0;
24217 +unsigned int vx_debug_net      = 0;
24218 +unsigned int vx_debug_limit    = 0;
24219 +unsigned int vx_debug_cres     = 0;
24220 +unsigned int vx_debug_dlim     = 0;
24221 +unsigned int vx_debug_quota    = 0;
24222 +unsigned int vx_debug_cvirt    = 0;
24223 +unsigned int vx_debug_space    = 0;
24224 +unsigned int vx_debug_misc     = 0;
24225 +
24226 +
24227 +static struct ctl_table_header *vserver_table_header;
24228 +static ctl_table vserver_root_table[];
24229 +
24230 +
24231 +void vserver_register_sysctl(void)
24232 +{
24233 +       if (!vserver_table_header) {
24234 +               vserver_table_header = register_sysctl_table(vserver_root_table);
24235 +       }
24236 +
24237 +}
24238 +
24239 +void vserver_unregister_sysctl(void)
24240 +{
24241 +       if (vserver_table_header) {
24242 +               unregister_sysctl_table(vserver_table_header);
24243 +               vserver_table_header = NULL;
24244 +       }
24245 +}
24246 +
24247 +
24248 +static int proc_dodebug(ctl_table *table, int write,
24249 +       struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
24250 +{
24251 +       char            tmpbuf[20], *p, c;
24252 +       unsigned int    value;
24253 +       size_t          left, len;
24254 +
24255 +       if ((*ppos && !write) || !*lenp) {
24256 +               *lenp = 0;
24257 +               return 0;
24258 +       }
24259 +
24260 +       left = *lenp;
24261 +
24262 +       if (write) {
24263 +               if (!access_ok(VERIFY_READ, buffer, left))
24264 +                       return -EFAULT;
24265 +               p = (char *)buffer;
24266 +               while (left && __get_user(c, p) >= 0 && isspace(c))
24267 +                       left--, p++;
24268 +               if (!left)
24269 +                       goto done;
24270 +
24271 +               if (left > sizeof(tmpbuf) - 1)
24272 +                       return -EINVAL;
24273 +               if (copy_from_user(tmpbuf, p, left))
24274 +                       return -EFAULT;
24275 +               tmpbuf[left] = '\0';
24276 +
24277 +               for (p = tmpbuf, value = 0; '0' <= *p && *p <= '9'; p++, left--)
24278 +                       value = 10 * value + (*p - '0');
24279 +               if (*p && !isspace(*p))
24280 +                       return -EINVAL;
24281 +               while (left && isspace(*p))
24282 +                       left--, p++;
24283 +               *(unsigned int *)table->data = value;
24284 +       } else {
24285 +               if (!access_ok(VERIFY_WRITE, buffer, left))
24286 +                       return -EFAULT;
24287 +               len = sprintf(tmpbuf, "%d", *(unsigned int *)table->data);
24288 +               if (len > left)
24289 +                       len = left;
24290 +               if (__copy_to_user(buffer, tmpbuf, len))
24291 +                       return -EFAULT;
24292 +               if ((left -= len) > 0) {
24293 +                       if (put_user('\n', (char *)buffer + len))
24294 +                               return -EFAULT;
24295 +                       left--;
24296 +               }
24297 +       }
24298 +
24299 +done:
24300 +       *lenp -= left;
24301 +       *ppos += *lenp;
24302 +       return 0;
24303 +}
24304 +
24305 +static int zero;
24306 +
24307 +#define        CTL_ENTRY(ctl, name)                            \
24308 +       {                                               \
24309 +               .ctl_name       = ctl,                  \
24310 +               .procname       = #name,                \
24311 +               .data           = &vx_ ## name,         \
24312 +               .maxlen         = sizeof(int),          \
24313 +               .mode           = 0644,                 \
24314 +               .proc_handler   = &proc_dodebug,        \
24315 +               .strategy       = &sysctl_intvec,       \
24316 +               .extra1         = &zero,                \
24317 +       }
24318 +
24319 +static ctl_table vserver_debug_table[] = {
24320 +       CTL_ENTRY(CTL_DEBUG_SWITCH,     debug_switch),
24321 +       CTL_ENTRY(CTL_DEBUG_XID,        debug_xid),
24322 +       CTL_ENTRY(CTL_DEBUG_NID,        debug_nid),
24323 +       CTL_ENTRY(CTL_DEBUG_TAG,        debug_tag),
24324 +       CTL_ENTRY(CTL_DEBUG_NET,        debug_net),
24325 +       CTL_ENTRY(CTL_DEBUG_LIMIT,      debug_limit),
24326 +       CTL_ENTRY(CTL_DEBUG_CRES,       debug_cres),
24327 +       CTL_ENTRY(CTL_DEBUG_DLIM,       debug_dlim),
24328 +       CTL_ENTRY(CTL_DEBUG_QUOTA,      debug_quota),
24329 +       CTL_ENTRY(CTL_DEBUG_CVIRT,      debug_cvirt),
24330 +       CTL_ENTRY(CTL_DEBUG_SPACE,      debug_space),
24331 +       CTL_ENTRY(CTL_DEBUG_MISC,       debug_misc),
24332 +       { .ctl_name = 0 }
24333 +};
24334 +
24335 +static ctl_table vserver_root_table[] = {
24336 +       {
24337 +               .ctl_name       = CTL_VSERVER,
24338 +               .procname       = "vserver",
24339 +               .mode           = 0555,
24340 +               .child          = vserver_debug_table
24341 +       },
24342 +       { .ctl_name = 0 }
24343 +};
24344 +
24345 +
24346 +static match_table_t tokens = {
24347 +       { CTL_DEBUG_SWITCH,     "switch=%x"     },
24348 +       { CTL_DEBUG_XID,        "xid=%x"        },
24349 +       { CTL_DEBUG_NID,        "nid=%x"        },
24350 +       { CTL_DEBUG_TAG,        "tag=%x"        },
24351 +       { CTL_DEBUG_NET,        "net=%x"        },
24352 +       { CTL_DEBUG_LIMIT,      "limit=%x"      },
24353 +       { CTL_DEBUG_CRES,       "cres=%x"       },
24354 +       { CTL_DEBUG_DLIM,       "dlim=%x"       },
24355 +       { CTL_DEBUG_QUOTA,      "quota=%x"      },
24356 +       { CTL_DEBUG_CVIRT,      "cvirt=%x"      },
24357 +       { CTL_DEBUG_SPACE,      "space=%x"      },
24358 +       { CTL_DEBUG_MISC,       "misc=%x"       },
24359 +       { CTL_DEBUG_ERROR,      NULL            }
24360 +};
24361 +
24362 +#define        HANDLE_CASE(id, name, val)                              \
24363 +       case CTL_DEBUG_ ## id:                                  \
24364 +               vx_debug_ ## name = val;                        \
24365 +               printk("vs_debug_" #name "=0x%x\n", val);       \
24366 +               break
24367 +
24368 +
24369 +static int __init vs_debug_setup(char *str)
24370 +{
24371 +       char *p;
24372 +       int token;
24373 +
24374 +       printk("vs_debug_setup(%s)\n", str);
24375 +       while ((p = strsep(&str, ",")) != NULL) {
24376 +               substring_t args[MAX_OPT_ARGS];
24377 +               unsigned int value;
24378 +
24379 +               if (!*p)
24380 +                       continue;
24381 +
24382 +               token = match_token(p, tokens, args);
24383 +               value = (token > 0) ? simple_strtoul(args[0].from, NULL, 0) : 0;
24384 +
24385 +               switch (token) {
24386 +               HANDLE_CASE(SWITCH, switch, value);
24387 +               HANDLE_CASE(XID,    xid,    value);
24388 +               HANDLE_CASE(NID,    nid,    value);
24389 +               HANDLE_CASE(TAG,    tag,    value);
24390 +               HANDLE_CASE(NET,    net,    value);
24391 +               HANDLE_CASE(LIMIT,  limit,  value);
24392 +               HANDLE_CASE(CRES,   cres,   value);
24393 +               HANDLE_CASE(DLIM,   dlim,   value);
24394 +               HANDLE_CASE(QUOTA,  quota,  value);
24395 +               HANDLE_CASE(CVIRT,  cvirt,  value);
24396 +               HANDLE_CASE(SPACE,  space,  value);
24397 +               HANDLE_CASE(MISC,   misc,   value);
24398 +               default:
24399 +                       return -EINVAL;
24400 +                       break;
24401 +               }
24402 +       }
24403 +       return 1;
24404 +}
24405 +
24406 +__setup("vsdebug=", vs_debug_setup);
24407 +
24408 +
24409 +
24410 +EXPORT_SYMBOL_GPL(vx_debug_switch);
24411 +EXPORT_SYMBOL_GPL(vx_debug_xid);
24412 +EXPORT_SYMBOL_GPL(vx_debug_nid);
24413 +EXPORT_SYMBOL_GPL(vx_debug_net);
24414 +EXPORT_SYMBOL_GPL(vx_debug_limit);
24415 +EXPORT_SYMBOL_GPL(vx_debug_cres);
24416 +EXPORT_SYMBOL_GPL(vx_debug_dlim);
24417 +EXPORT_SYMBOL_GPL(vx_debug_quota);
24418 +EXPORT_SYMBOL_GPL(vx_debug_cvirt);
24419 +EXPORT_SYMBOL_GPL(vx_debug_space);
24420 +EXPORT_SYMBOL_GPL(vx_debug_misc);
24421 +
24422 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/tag.c linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/tag.c
24423 --- linux-2.6.30.2/kernel/vserver/tag.c 1970-01-01 01:00:00.000000000 +0100
24424 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/tag.c      2009-07-04 01:11:39.000000000 +0200
24425 @@ -0,0 +1,63 @@
24426 +/*
24427 + *  linux/kernel/vserver/tag.c
24428 + *
24429 + *  Virtual Server: Shallow Tag Space
24430 + *
24431 + *  Copyright (C) 2007  Herbert Pötzl
24432 + *
24433 + *  V0.01  basic implementation
24434 + *
24435 + */
24436 +
24437 +#include <linux/sched.h>
24438 +#include <linux/vserver/debug.h>
24439 +#include <linux/vs_pid.h>
24440 +#include <linux/vs_tag.h>
24441 +
24442 +#include <linux/vserver/tag_cmd.h>
24443 +
24444 +
24445 +int dx_migrate_task(struct task_struct *p, tag_t tag)
24446 +{
24447 +       if (!p)
24448 +               BUG();
24449 +
24450 +       vxdprintk(VXD_CBIT(tag, 5),
24451 +               "dx_migrate_task(%p[#%d],#%d)", p, p->tag, tag);
24452 +
24453 +       task_lock(p);
24454 +       p->tag = tag;
24455 +       task_unlock(p);
24456 +
24457 +       vxdprintk(VXD_CBIT(tag, 5),
24458 +               "moved task %p into [#%d]", p, tag);
24459 +       return 0;
24460 +}
24461 +
24462 +/* vserver syscall commands below here */
24463 +
24464 +/* taks xid and vx_info functions */
24465 +
24466 +
24467 +int vc_task_tag(uint32_t id)
24468 +{
24469 +       tag_t tag;
24470 +
24471 +       if (id) {
24472 +               struct task_struct *tsk;
24473 +               read_lock(&tasklist_lock);
24474 +               tsk = find_task_by_real_pid(id);
24475 +               tag = (tsk) ? tsk->tag : -ESRCH;
24476 +               read_unlock(&tasklist_lock);
24477 +       } else
24478 +               tag = dx_current_tag();
24479 +       return tag;
24480 +}
24481 +
24482 +
24483 +int vc_tag_migrate(uint32_t tag)
24484 +{
24485 +       return dx_migrate_task(current, tag & 0xFFFF);
24486 +}
24487 +
24488 +
24489 diff -NurpP --minimal linux-2.6.30.2/kernel/vserver/vci_config.h linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/vci_config.h
24490 --- linux-2.6.30.2/kernel/vserver/vci_config.h  1970-01-01 01:00:00.000000000 +0100
24491 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/kernel/vserver/vci_config.h       2009-07-04 01:11:39.000000000 +0200
24492 @@ -0,0 +1,81 @@
24493 +
24494 +/*  interface version */
24495 +
24496 +#define VCI_VERSION            0x00020304
24497 +
24498 +
24499 +enum {
24500 +       VCI_KCBIT_NO_DYNAMIC = 0,
24501 +
24502 +       VCI_KCBIT_PROC_SECURE = 4,
24503 +       VCI_KCBIT_HARDCPU = 5,
24504 +       VCI_KCBIT_IDLELIMIT = 6,
24505 +       VCI_KCBIT_IDLETIME = 7,
24506 +
24507 +       VCI_KCBIT_COWBL = 8,
24508 +       VCI_KCBIT_FULLCOWBL = 9,
24509 +       VCI_KCBIT_SPACES = 10,
24510 +       VCI_KCBIT_NETV2 = 11,
24511 +
24512 +       VCI_KCBIT_DEBUG = 16,
24513 +       VCI_KCBIT_HISTORY = 20,
24514 +       VCI_KCBIT_TAGGED = 24,
24515 +       VCI_KCBIT_PPTAG = 28,
24516 +
24517 +       VCI_KCBIT_MORE = 31,
24518 +};
24519 +
24520 +
24521 +static inline uint32_t vci_kernel_config(void)
24522 +{
24523 +       return
24524 +       (1 << VCI_KCBIT_NO_DYNAMIC) |
24525 +
24526 +       /* configured features */
24527 +#ifdef CONFIG_VSERVER_PROC_SECURE
24528 +       (1 << VCI_KCBIT_PROC_SECURE) |
24529 +#endif
24530 +#ifdef CONFIG_VSERVER_HARDCPU
24531 +       (1 << VCI_KCBIT_HARDCPU) |
24532 +#endif
24533 +#ifdef CONFIG_VSERVER_IDLELIMIT
24534 +       (1 << VCI_KCBIT_IDLELIMIT) |
24535 +#endif
24536 +#ifdef CONFIG_VSERVER_IDLETIME
24537 +       (1 << VCI_KCBIT_IDLETIME) |
24538 +#endif
24539 +#ifdef CONFIG_VSERVER_COWBL
24540 +       (1 << VCI_KCBIT_COWBL) |
24541 +       (1 << VCI_KCBIT_FULLCOWBL) |
24542 +#endif
24543 +       (1 << VCI_KCBIT_SPACES) |
24544 +       (1 << VCI_KCBIT_NETV2) |
24545 +
24546 +       /* debug options */
24547 +#ifdef CONFIG_VSERVER_DEBUG
24548 +       (1 << VCI_KCBIT_DEBUG) |
24549 +#endif
24550 +#ifdef CONFIG_VSERVER_HISTORY
24551 +       (1 << VCI_KCBIT_HISTORY) |
24552 +#endif
24553 +
24554 +       /* inode context tagging */
24555 +#if    defined(CONFIG_TAGGING_NONE)
24556 +       (0 << VCI_KCBIT_TAGGED) |
24557 +#elif  defined(CONFIG_TAGGING_UID16)
24558 +       (1 << VCI_KCBIT_TAGGED) |
24559 +#elif  defined(CONFIG_TAGGING_GID16)
24560 +       (2 << VCI_KCBIT_TAGGED) |
24561 +#elif  defined(CONFIG_TAGGING_ID24)
24562 +       (3 << VCI_KCBIT_TAGGED) |
24563 +#elif  defined(CONFIG_TAGGING_INTERN)
24564 +       (4 << VCI_KCBIT_TAGGED) |
24565 +#elif  defined(CONFIG_TAGGING_RUNTIME)
24566 +       (5 << VCI_KCBIT_TAGGED) |
24567 +#else
24568 +       (7 << VCI_KCBIT_TAGGED) |
24569 +#endif
24570 +       (1 << VCI_KCBIT_PPTAG) |
24571 +       0;
24572 +}
24573 +
24574 diff -NurpP --minimal linux-2.6.30.2/mm/filemap_xip.c linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/filemap_xip.c
24575 --- linux-2.6.30.2/mm/filemap_xip.c     2009-06-11 17:13:27.000000000 +0200
24576 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/filemap_xip.c  2009-07-04 01:11:39.000000000 +0200
24577 @@ -17,6 +17,7 @@
24578  #include <linux/sched.h>
24579  #include <linux/seqlock.h>
24580  #include <linux/mutex.h>
24581 +#include <linux/vs_memory.h>
24582  #include <asm/tlbflush.h>
24583  #include <asm/io.h>
24584  
24585 diff -NurpP --minimal linux-2.6.30.2/mm/fremap.c linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/fremap.c
24586 --- linux-2.6.30.2/mm/fremap.c  2009-03-24 14:22:45.000000000 +0100
24587 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/fremap.c       2009-07-04 01:11:39.000000000 +0200
24588 @@ -16,6 +16,7 @@
24589  #include <linux/module.h>
24590  #include <linux/syscalls.h>
24591  #include <linux/mmu_notifier.h>
24592 +#include <linux/vs_memory.h>
24593  
24594  #include <asm/mmu_context.h>
24595  #include <asm/cacheflush.h>
24596 diff -NurpP --minimal linux-2.6.30.2/mm/hugetlb.c linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/hugetlb.c
24597 --- linux-2.6.30.2/mm/hugetlb.c 2009-06-11 17:13:27.000000000 +0200
24598 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/hugetlb.c      2009-07-04 01:11:39.000000000 +0200
24599 @@ -24,6 +24,7 @@
24600  #include <asm/io.h>
24601  
24602  #include <linux/hugetlb.h>
24603 +#include <linux/vs_memory.h>
24604  #include "internal.h"
24605  
24606  const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
24607 diff -NurpP --minimal linux-2.6.30.2/mm/memory.c linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/memory.c
24608 --- linux-2.6.30.2/mm/memory.c  2009-06-11 17:13:27.000000000 +0200
24609 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/memory.c       2009-07-04 01:11:39.000000000 +0200
24610 @@ -55,6 +55,7 @@
24611  #include <linux/kallsyms.h>
24612  #include <linux/swapops.h>
24613  #include <linux/elf.h>
24614 +// #include <linux/vs_memory.h>
24615  
24616  #include <asm/pgalloc.h>
24617  #include <asm/uaccess.h>
24618 @@ -612,6 +613,9 @@ static int copy_pte_range(struct mm_stru
24619         int progress = 0;
24620         int rss[2];
24621  
24622 +       if (!vx_rss_avail(dst_mm, ((end - addr)/PAGE_SIZE + 1)))
24623 +               return -ENOMEM;
24624 +
24625  again:
24626         rss[1] = rss[0] = 0;
24627         dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
24628 @@ -2575,6 +2579,8 @@ static int do_anonymous_page(struct mm_s
24629         /* Allocate our own private page. */
24630         pte_unmap(page_table);
24631  
24632 +       if (!vx_rss_avail(mm, 1))
24633 +               goto oom;
24634         if (unlikely(anon_vma_prepare(vma)))
24635                 goto oom;
24636         page = alloc_zeroed_user_highpage_movable(vma, address);
24637 @@ -2858,6 +2864,7 @@ static inline int handle_pte_fault(struc
24638  {
24639         pte_t entry;
24640         spinlock_t *ptl;
24641 +       int ret = 0, type = VXPT_UNKNOWN;
24642  
24643         entry = *pte;
24644         if (!pte_present(entry)) {
24645 @@ -2882,9 +2889,12 @@ static inline int handle_pte_fault(struc
24646         if (unlikely(!pte_same(*pte, entry)))
24647                 goto unlock;
24648         if (write_access) {
24649 -               if (!pte_write(entry))
24650 -                       return do_wp_page(mm, vma, address,
24651 +               if (!pte_write(entry)) {
24652 +                       ret = do_wp_page(mm, vma, address,
24653                                         pte, pmd, ptl, entry);
24654 +                       type = VXPT_WRITE;
24655 +                       goto out;
24656 +               }
24657                 entry = pte_mkdirty(entry);
24658         }
24659         entry = pte_mkyoung(entry);
24660 @@ -2902,7 +2912,10 @@ static inline int handle_pte_fault(struc
24661         }
24662  unlock:
24663         pte_unmap_unlock(pte, ptl);
24664 -       return 0;
24665 +       ret = 0;
24666 +out:
24667 +       vx_page_fault(mm, vma, type, ret);
24668 +       return ret;
24669  }
24670  
24671  /*
24672 diff -NurpP --minimal linux-2.6.30.2/mm/mlock.c linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/mlock.c
24673 --- linux-2.6.30.2/mm/mlock.c   2009-03-24 14:22:45.000000000 +0100
24674 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/mlock.c        2009-07-04 01:11:39.000000000 +0200
24675 @@ -18,6 +18,7 @@
24676  #include <linux/rmap.h>
24677  #include <linux/mmzone.h>
24678  #include <linux/hugetlb.h>
24679 +#include <linux/vs_memory.h>
24680  
24681  #include "internal.h"
24682  
24683 @@ -415,7 +416,7 @@ success:
24684         nr_pages = (end - start) >> PAGE_SHIFT;
24685         if (!lock)
24686                 nr_pages = -nr_pages;
24687 -       mm->locked_vm += nr_pages;
24688 +       vx_vmlocked_add(mm, nr_pages);
24689  
24690         /*
24691          * vm_flags is protected by the mmap_sem held in write mode.
24692 @@ -492,7 +493,7 @@ static int do_mlock(unsigned long start,
24693  
24694  SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
24695  {
24696 -       unsigned long locked;
24697 +       unsigned long locked, grow;
24698         unsigned long lock_limit;
24699         int error = -ENOMEM;
24700  
24701 @@ -505,8 +506,10 @@ SYSCALL_DEFINE2(mlock, unsigned long, st
24702         len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
24703         start &= PAGE_MASK;
24704  
24705 -       locked = len >> PAGE_SHIFT;
24706 -       locked += current->mm->locked_vm;
24707 +       grow = len >> PAGE_SHIFT;
24708 +       if (!vx_vmlocked_avail(current->mm, grow))
24709 +               goto out;
24710 +       locked = current->mm->locked_vm + grow;
24711  
24712         lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
24713         lock_limit >>= PAGE_SHIFT;
24714 @@ -514,6 +517,7 @@ SYSCALL_DEFINE2(mlock, unsigned long, st
24715         /* check against resource limits */
24716         if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
24717                 error = do_mlock(start, len, 1);
24718 +out:
24719         up_write(&current->mm->mmap_sem);
24720         return error;
24721  }
24722 @@ -575,6 +579,8 @@ SYSCALL_DEFINE1(mlockall, int, flags)
24723         lock_limit >>= PAGE_SHIFT;
24724  
24725         ret = -ENOMEM;
24726 +       if (!vx_vmlocked_avail(current->mm, current->mm->total_vm))
24727 +               goto out;
24728         if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
24729             capable(CAP_IPC_LOCK))
24730                 ret = do_mlockall(flags);
24731 @@ -652,8 +658,10 @@ void *alloc_locked_buffer(size_t size)
24732         if (!buffer)
24733                 goto out;
24734  
24735 -       current->mm->total_vm  += pgsz;
24736 -       current->mm->locked_vm += pgsz;
24737 +       // current->mm->total_vm  += pgsz;
24738 +       vx_vmpages_add(current->mm, pgsz);
24739 +       // current->mm->locked_vm += pgsz;
24740 +       vx_vmlocked_add(current->mm, pgsz);
24741  
24742   out:
24743         up_write(&current->mm->mmap_sem);
24744 @@ -666,8 +674,10 @@ void release_locked_buffer(void *buffer,
24745  
24746         down_write(&current->mm->mmap_sem);
24747  
24748 -       current->mm->total_vm  -= pgsz;
24749 -       current->mm->locked_vm -= pgsz;
24750 +       // current->mm->total_vm  -= pgsz;
24751 +       vx_vmpages_sub(current->mm, pgsz);
24752 +       // current->mm->locked_vm -= pgsz;
24753 +       vx_vmlocked_sub(current->mm, pgsz);
24754  
24755         up_write(&current->mm->mmap_sem);
24756  }
24757 diff -NurpP --minimal linux-2.6.30.2/mm/mmap.c linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/mmap.c
24758 --- linux-2.6.30.2/mm/mmap.c    2009-07-23 13:28:48.000000000 +0200
24759 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/mmap.c 2009-07-23 13:50:49.000000000 +0200
24760 @@ -1222,7 +1222,8 @@ munmap_back:
24761         if (correct_wcount)
24762                 atomic_inc(&inode->i_writecount);
24763  out:
24764 -       mm->total_vm += len >> PAGE_SHIFT;
24765 +       // mm->total_vm += len >> PAGE_SHIFT;
24766 +       vx_vmpages_add(mm, len >> PAGE_SHIFT);
24767         vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
24768         if (vm_flags & VM_LOCKED) {
24769                 /*
24770 @@ -1231,7 +1232,8 @@ out:
24771                 long nr_pages = mlock_vma_pages_range(vma, addr, addr + len);
24772                 if (nr_pages < 0)
24773                         return nr_pages;        /* vma gone! */
24774 -               mm->locked_vm += (len >> PAGE_SHIFT) - nr_pages;
24775 +               // mm->locked_vm += (len >> PAGE_SHIFT) - nr_pages;
24776 +               vx_vmlocked_add(mm, (len >> PAGE_SHIFT) - nr_pages);
24777         } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
24778                 make_pages_present(addr, addr + len);
24779         return addr;
24780 @@ -1578,9 +1580,9 @@ static int acct_stack_growth(struct vm_a
24781                 return -ENOMEM;
24782  
24783         /* Ok, everything looks good - let it rip */
24784 -       mm->total_vm += grow;
24785 +       vx_vmpages_add(mm, grow);
24786         if (vma->vm_flags & VM_LOCKED)
24787 -               mm->locked_vm += grow;
24788 +               vx_vmlocked_add(mm, grow);
24789         vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
24790         return 0;
24791  }
24792 @@ -1755,7 +1757,8 @@ static void remove_vma_list(struct mm_st
24793         do {
24794                 long nrpages = vma_pages(vma);
24795  
24796 -               mm->total_vm -= nrpages;
24797 +               // mm->total_vm -= nrpages;
24798 +               vx_vmpages_sub(mm, nrpages);
24799                 vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
24800                 vma = remove_vma(vma);
24801         } while (vma);
24802 @@ -1927,7 +1930,8 @@ int do_munmap(struct mm_struct *mm, unsi
24803                 struct vm_area_struct *tmp = vma;
24804                 while (tmp && tmp->vm_start < end) {
24805                         if (tmp->vm_flags & VM_LOCKED) {
24806 -                               mm->locked_vm -= vma_pages(tmp);
24807 +                               // mm->locked_vm -= vma_pages(tmp);
24808 +                               vx_vmlocked_sub(mm, vma_pages(tmp));
24809                                 munlock_vma_pages_all(tmp);
24810                         }
24811                         tmp = tmp->vm_next;
24812 @@ -2016,6 +2020,8 @@ unsigned long do_brk(unsigned long addr,
24813                 lock_limit >>= PAGE_SHIFT;
24814                 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
24815                         return -EAGAIN;
24816 +               if (!vx_vmlocked_avail(mm, len >> PAGE_SHIFT))
24817 +                       return -ENOMEM;
24818         }
24819  
24820         /*
24821 @@ -2042,7 +2048,8 @@ unsigned long do_brk(unsigned long addr,
24822         if (mm->map_count > sysctl_max_map_count)
24823                 return -ENOMEM;
24824  
24825 -       if (security_vm_enough_memory(len >> PAGE_SHIFT))
24826 +       if (security_vm_enough_memory(len >> PAGE_SHIFT) ||
24827 +               !vx_vmpages_avail(mm, len >> PAGE_SHIFT))
24828                 return -ENOMEM;
24829  
24830         /* Can we just expand an old private anonymous mapping? */
24831 @@ -2068,10 +2075,13 @@ unsigned long do_brk(unsigned long addr,
24832         vma->vm_page_prot = vm_get_page_prot(flags);
24833         vma_link(mm, vma, prev, rb_link, rb_parent);
24834  out:
24835 -       mm->total_vm += len >> PAGE_SHIFT;
24836 +       // mm->total_vm += len >> PAGE_SHIFT;
24837 +       vx_vmpages_add(mm, len >> PAGE_SHIFT);
24838 +
24839         if (flags & VM_LOCKED) {
24840                 if (!mlock_vma_pages_range(vma, addr, addr + len))
24841 -                       mm->locked_vm += (len >> PAGE_SHIFT);
24842 +                       // mm->locked_vm += (len >> PAGE_SHIFT);
24843 +                       vx_vmlocked_add(mm, len >> PAGE_SHIFT);
24844         }
24845         return addr;
24846  }
24847 @@ -2114,6 +2124,11 @@ void exit_mmap(struct mm_struct *mm)
24848         free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
24849         tlb_finish_mmu(tlb, 0, end);
24850  
24851 +       set_mm_counter(mm, file_rss, 0);
24852 +       set_mm_counter(mm, anon_rss, 0);
24853 +       vx_vmpages_sub(mm, mm->total_vm);
24854 +       vx_vmlocked_sub(mm, mm->locked_vm);
24855 +
24856         /*
24857          * Walk the list again, actually closing and freeing it,
24858          * with preemption enabled, without holding any MM locks.
24859 @@ -2153,7 +2168,8 @@ int insert_vm_struct(struct mm_struct * 
24860         if (__vma && __vma->vm_start < vma->vm_end)
24861                 return -ENOMEM;
24862         if ((vma->vm_flags & VM_ACCOUNT) &&
24863 -            security_vm_enough_memory_mm(mm, vma_pages(vma)))
24864 +               (security_vm_enough_memory_mm(mm, vma_pages(vma)) ||
24865 +               !vx_vmpages_avail(mm, vma_pages(vma))))
24866                 return -ENOMEM;
24867         vma_link(mm, vma, prev, rb_link, rb_parent);
24868         return 0;
24869 @@ -2229,6 +2245,8 @@ int may_expand_vm(struct mm_struct *mm, 
24870  
24871         if (cur + npages > lim)
24872                 return 0;
24873 +       if (!vx_vmpages_avail(mm, npages))
24874 +               return 0;
24875         return 1;
24876  }
24877  
24878 @@ -2306,8 +2324,7 @@ int install_special_mapping(struct mm_st
24879                 return -ENOMEM;
24880         }
24881  
24882 -       mm->total_vm += len >> PAGE_SHIFT;
24883 -
24884 +       vx_vmpages_add(mm, len >> PAGE_SHIFT);
24885         return 0;
24886  }
24887  
24888 diff -NurpP --minimal linux-2.6.30.2/mm/mremap.c linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/mremap.c
24889 --- linux-2.6.30.2/mm/mremap.c  2009-03-24 14:22:45.000000000 +0100
24890 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/mremap.c       2009-07-04 01:11:39.000000000 +0200
24891 @@ -19,6 +19,7 @@
24892  #include <linux/security.h>
24893  #include <linux/syscalls.h>
24894  #include <linux/mmu_notifier.h>
24895 +#include <linux/vs_memory.h>
24896  
24897  #include <asm/uaccess.h>
24898  #include <asm/cacheflush.h>
24899 @@ -220,7 +221,7 @@ static unsigned long move_vma(struct vm_
24900          * If this were a serious issue, we'd add a flag to do_munmap().
24901          */
24902         hiwater_vm = mm->hiwater_vm;
24903 -       mm->total_vm += new_len >> PAGE_SHIFT;
24904 +       vx_vmpages_add(mm, new_len >> PAGE_SHIFT);
24905         vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT);
24906  
24907         if (do_munmap(mm, old_addr, old_len) < 0) {
24908 @@ -238,7 +239,7 @@ static unsigned long move_vma(struct vm_
24909         }
24910  
24911         if (vm_flags & VM_LOCKED) {
24912 -               mm->locked_vm += new_len >> PAGE_SHIFT;
24913 +               vx_vmlocked_add(mm, new_len >> PAGE_SHIFT);
24914                 if (new_len > old_len)
24915                         mlock_vma_pages_range(new_vma, new_addr + old_len,
24916                                                        new_addr + new_len);
24917 @@ -349,6 +350,9 @@ unsigned long do_mremap(unsigned long ad
24918                 ret = -EAGAIN;
24919                 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
24920                         goto out;
24921 +               if (!vx_vmlocked_avail(current->mm,
24922 +                       (new_len - old_len) >> PAGE_SHIFT))
24923 +                       goto out;
24924         }
24925         if (!may_expand_vm(mm, (new_len - old_len) >> PAGE_SHIFT)) {
24926                 ret = -ENOMEM;
24927 @@ -377,10 +381,12 @@ unsigned long do_mremap(unsigned long ad
24928                         vma_adjust(vma, vma->vm_start,
24929                                 addr + new_len, vma->vm_pgoff, NULL);
24930  
24931 -                       mm->total_vm += pages;
24932 +                       // mm->total_vm += pages;
24933 +                       vx_vmpages_add(mm, pages);
24934                         vm_stat_account(mm, vma->vm_flags, vma->vm_file, pages);
24935                         if (vma->vm_flags & VM_LOCKED) {
24936 -                               mm->locked_vm += pages;
24937 +                               // mm->locked_vm += pages;
24938 +                               vx_vmlocked_add(mm, pages);
24939                                 mlock_vma_pages_range(vma, addr + old_len,
24940                                                    addr + new_len);
24941                         }
24942 diff -NurpP --minimal linux-2.6.30.2/mm/nommu.c linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/nommu.c
24943 --- linux-2.6.30.2/mm/nommu.c   2009-06-11 17:13:27.000000000 +0200
24944 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/nommu.c        2009-07-04 01:11:39.000000000 +0200
24945 @@ -1343,7 +1343,7 @@ unsigned long do_mmap_pgoff(struct file 
24946         /* okay... we have a mapping; now we have to register it */
24947         result = vma->vm_start;
24948  
24949 -       current->mm->total_vm += len >> PAGE_SHIFT;
24950 +       vx_vmpages_add(current->mm, len >> PAGE_SHIFT);
24951  
24952  share:
24953         add_vma_to_mm(current->mm, vma);
24954 @@ -1614,7 +1614,7 @@ void exit_mmap(struct mm_struct *mm)
24955  
24956         kenter("");
24957  
24958 -       mm->total_vm = 0;
24959 +       vx_vmpages_sub(mm, mm->total_vm);
24960  
24961         while ((vma = mm->mmap)) {
24962                 mm->mmap = vma->vm_next;
24963 diff -NurpP --minimal linux-2.6.30.2/mm/oom_kill.c linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/oom_kill.c
24964 --- linux-2.6.30.2/mm/oom_kill.c        2009-06-11 17:13:27.000000000 +0200
24965 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/oom_kill.c     2009-07-04 01:11:39.000000000 +0200
24966 @@ -27,6 +27,7 @@
24967  #include <linux/notifier.h>
24968  #include <linux/memcontrol.h>
24969  #include <linux/security.h>
24970 +#include <linux/vs_memory.h>
24971  
24972  int sysctl_panic_on_oom;
24973  int sysctl_oom_kill_allocating_task;
24974 @@ -72,6 +73,12 @@ unsigned long badness(struct task_struct
24975         points = mm->total_vm;
24976  
24977         /*
24978 +        * add points for context badness
24979 +        */
24980 +
24981 +       points += vx_badness(p, mm);
24982 +
24983 +       /*
24984          * After this unlock we can no longer dereference local variable `mm'
24985          */
24986         task_unlock(p);
24987 @@ -160,8 +167,8 @@ unsigned long badness(struct task_struct
24988         }
24989  
24990  #ifdef DEBUG
24991 -       printk(KERN_DEBUG "OOMkill: task %d (%s) got %lu points\n",
24992 -       p->pid, p->comm, points);
24993 +       printk(KERN_DEBUG "OOMkill: task %d:#%u (%s) got %d points\n",
24994 +               task_pid_nr(p), p->xid, p->comm, points);
24995  #endif
24996         return points;
24997  }
24998 @@ -330,8 +337,8 @@ static void __oom_kill_task(struct task_
24999         }
25000  
25001         if (verbose)
25002 -               printk(KERN_ERR "Killed process %d (%s)\n",
25003 -                               task_pid_nr(p), p->comm);
25004 +               printk(KERN_ERR "Killed process %d:#%u (%s)\n",
25005 +                               task_pid_nr(p), p->xid, p->comm);
25006  
25007         /*
25008          * We give our sacrificial lamb high priority and access to
25009 @@ -415,8 +422,8 @@ static int oom_kill_process(struct task_
25010                 return 0;
25011         }
25012  
25013 -       printk(KERN_ERR "%s: kill process %d (%s) score %li or a child\n",
25014 -                                       message, task_pid_nr(p), p->comm, points);
25015 +       printk(KERN_ERR "%s: kill process %d:#%u (%s) score %li or a child\n",
25016 +                               message, task_pid_nr(p), p->xid, p->comm, points);
25017  
25018         /* Try to kill a child first */
25019         list_for_each_entry(c, &p->children, sibling) {
25020 diff -NurpP --minimal linux-2.6.30.2/mm/page_alloc.c linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/page_alloc.c
25021 --- linux-2.6.30.2/mm/page_alloc.c      2009-07-23 13:28:48.000000000 +0200
25022 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/page_alloc.c   2009-07-04 01:11:39.000000000 +0200
25023 @@ -46,6 +46,8 @@
25024  #include <linux/page-isolation.h>
25025  #include <linux/page_cgroup.h>
25026  #include <linux/debugobjects.h>
25027 +#include <linux/vs_base.h>
25028 +#include <linux/vs_limit.h>
25029  
25030  #include <asm/tlbflush.h>
25031  #include <asm/div64.h>
25032 @@ -1842,6 +1844,9 @@ void si_meminfo(struct sysinfo *val)
25033         val->totalhigh = totalhigh_pages;
25034         val->freehigh = nr_free_highpages();
25035         val->mem_unit = PAGE_SIZE;
25036 +
25037 +       if (vx_flags(VXF_VIRT_MEM, 0))
25038 +               vx_vsi_meminfo(val);
25039  }
25040  
25041  EXPORT_SYMBOL(si_meminfo);
25042 @@ -1862,6 +1867,9 @@ void si_meminfo_node(struct sysinfo *val
25043         val->freehigh = 0;
25044  #endif
25045         val->mem_unit = PAGE_SIZE;
25046 +
25047 +       if (vx_flags(VXF_VIRT_MEM, 0))
25048 +               vx_vsi_meminfo(val);
25049  }
25050  #endif
25051  
25052 diff -NurpP --minimal linux-2.6.30.2/mm/rmap.c linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/rmap.c
25053 --- linux-2.6.30.2/mm/rmap.c    2009-06-11 17:13:27.000000000 +0200
25054 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/rmap.c 2009-07-04 01:11:39.000000000 +0200
25055 @@ -50,6 +50,7 @@
25056  #include <linux/memcontrol.h>
25057  #include <linux/mmu_notifier.h>
25058  #include <linux/migrate.h>
25059 +#include <linux/vs_memory.h>
25060  
25061  #include <asm/tlbflush.h>
25062  
25063 diff -NurpP --minimal linux-2.6.30.2/mm/shmem.c linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/shmem.c
25064 --- linux-2.6.30.2/mm/shmem.c   2009-06-11 17:13:27.000000000 +0200
25065 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/shmem.c        2009-07-04 01:11:39.000000000 +0200
25066 @@ -1776,7 +1776,7 @@ static int shmem_statfs(struct dentry *d
25067  {
25068         struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
25069  
25070 -       buf->f_type = TMPFS_MAGIC;
25071 +       buf->f_type = TMPFS_SUPER_MAGIC;
25072         buf->f_bsize = PAGE_CACHE_SIZE;
25073         buf->f_namelen = NAME_MAX;
25074         spin_lock(&sbinfo->stat_lock);
25075 @@ -2345,7 +2345,7 @@ static int shmem_fill_super(struct super
25076         sb->s_maxbytes = SHMEM_MAX_BYTES;
25077         sb->s_blocksize = PAGE_CACHE_SIZE;
25078         sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
25079 -       sb->s_magic = TMPFS_MAGIC;
25080 +       sb->s_magic = TMPFS_SUPER_MAGIC;
25081         sb->s_op = &shmem_ops;
25082         sb->s_time_gran = 1;
25083  #ifdef CONFIG_TMPFS_POSIX_ACL
25084 diff -NurpP --minimal linux-2.6.30.2/mm/slab.c linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/slab.c
25085 --- linux-2.6.30.2/mm/slab.c    2009-06-11 17:13:27.000000000 +0200
25086 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/slab.c 2009-07-04 01:11:39.000000000 +0200
25087 @@ -510,6 +510,8 @@ struct kmem_cache {
25088  #define STATS_INC_FREEMISS(x)  do { } while (0)
25089  #endif
25090  
25091 +#include "slab_vs.h"
25092 +
25093  #if DEBUG
25094  
25095  /*
25096 @@ -3284,6 +3286,7 @@ retry:
25097  
25098         obj = slab_get_obj(cachep, slabp, nodeid);
25099         check_slabp(cachep, slabp);
25100 +       vx_slab_alloc(cachep, flags);
25101         l3->free_objects--;
25102         /* move slabp to correct slabp list: */
25103         list_del(&slabp->list);
25104 @@ -3358,6 +3361,7 @@ __cache_alloc_node(struct kmem_cache *ca
25105         /* ___cache_alloc_node can fall back to other nodes */
25106         ptr = ____cache_alloc_node(cachep, flags, nodeid);
25107    out:
25108 +       vx_slab_alloc(cachep, flags);
25109         local_irq_restore(save_flags);
25110         ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
25111  
25112 @@ -3531,6 +3535,7 @@ static inline void __cache_free(struct k
25113  
25114         check_irq_off();
25115         objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
25116 +       vx_slab_free(cachep);
25117  
25118         /*
25119          * Skip calling cache_free_alien() when the platform is not numa.
25120 diff -NurpP --minimal linux-2.6.30.2/mm/slab_vs.h linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/slab_vs.h
25121 --- linux-2.6.30.2/mm/slab_vs.h 1970-01-01 01:00:00.000000000 +0100
25122 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/slab_vs.h      2009-07-04 01:11:39.000000000 +0200
25123 @@ -0,0 +1,27 @@
25124 +
25125 +#include <linux/vserver/context.h>
25126 +
25127 +#include <linux/vs_context.h>
25128 +
25129 +static inline
25130 +void vx_slab_alloc(struct kmem_cache *cachep, gfp_t flags)
25131 +{
25132 +       int what = gfp_zone(cachep->gfpflags);
25133 +
25134 +       if (!current->vx_info)
25135 +               return;
25136 +
25137 +       atomic_add(cachep->buffer_size, &current->vx_info->cacct.slab[what]);
25138 +}
25139 +
25140 +static inline
25141 +void vx_slab_free(struct kmem_cache *cachep)
25142 +{
25143 +       int what = gfp_zone(cachep->gfpflags);
25144 +
25145 +       if (!current->vx_info)
25146 +               return;
25147 +
25148 +       atomic_sub(cachep->buffer_size, &current->vx_info->cacct.slab[what]);
25149 +}
25150 +
25151 diff -NurpP --minimal linux-2.6.30.2/mm/swapfile.c linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/swapfile.c
25152 --- linux-2.6.30.2/mm/swapfile.c        2009-03-24 14:22:45.000000000 +0100
25153 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/mm/swapfile.c     2009-07-04 01:11:39.000000000 +0200
25154 @@ -34,6 +34,8 @@
25155  #include <asm/tlbflush.h>
25156  #include <linux/swapops.h>
25157  #include <linux/page_cgroup.h>
25158 +#include <linux/vs_base.h>
25159 +#include <linux/vs_memory.h>
25160  
25161  static DEFINE_SPINLOCK(swap_lock);
25162  static unsigned int nr_swapfiles;
25163 @@ -1935,6 +1937,8 @@ void si_swapinfo(struct sysinfo *val)
25164         val->freeswap = nr_swap_pages + nr_to_be_unused;
25165         val->totalswap = total_swap_pages + nr_to_be_unused;
25166         spin_unlock(&swap_lock);
25167 +       if (vx_flags(VXF_VIRT_MEM, 0))
25168 +               vx_vsi_swapinfo(val);
25169  }
25170  
25171  /*
25172 diff -NurpP --minimal linux-2.6.30.2/net/core/dev.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/core/dev.c
25173 --- linux-2.6.30.2/net/core/dev.c       2009-06-11 17:13:28.000000000 +0200
25174 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/core/dev.c    2009-07-04 01:11:39.000000000 +0200
25175 @@ -126,6 +126,7 @@
25176  #include <linux/in.h>
25177  #include <linux/jhash.h>
25178  #include <linux/random.h>
25179 +#include <linux/vs_inet.h>
25180  
25181  #include "net-sysfs.h"
25182  
25183 @@ -2904,6 +2905,8 @@ static int dev_ifconf(struct net *net, c
25184  
25185         total = 0;
25186         for_each_netdev(net, dev) {
25187 +               if (!nx_dev_visible(current->nx_info, dev))
25188 +                       continue;
25189                 for (i = 0; i < NPROTO; i++) {
25190                         if (gifconf_list[i]) {
25191                                 int done;
25192 @@ -2972,6 +2975,9 @@ static void dev_seq_printf_stats(struct 
25193  {
25194         const struct net_device_stats *stats = dev_get_stats(dev);
25195  
25196 +       if (!nx_dev_visible(current->nx_info, dev))
25197 +               return;
25198 +
25199         seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
25200                    "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
25201                    dev->name, stats->rx_bytes, stats->rx_packets,
25202 @@ -4915,6 +4921,15 @@ int dev_change_net_namespace(struct net_
25203                 goto out;
25204  #endif
25205  
25206 +#ifdef CONFIG_SYSFS
25207 +       /* Don't allow real devices to be moved when sysfs
25208 +        * is enabled.
25209 +        */
25210 +       err = -EINVAL;
25211 +       if (dev->dev.parent)
25212 +               goto out;
25213 +#endif
25214 +
25215         /* Ensure the device has been registrered */
25216         err = -EINVAL;
25217         if (dev->reg_state != NETREG_REGISTERED)
25218 @@ -4974,6 +4989,8 @@ int dev_change_net_namespace(struct net_
25219  
25220         netdev_unregister_kobject(dev);
25221  
25222 +       netdev_unregister_kobject(dev);
25223 +
25224         /* Actually switch the network namespace */
25225         dev_net_set(dev, net);
25226  
25227 diff -NurpP --minimal linux-2.6.30.2/net/core/net-sysfs.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/core/net-sysfs.c
25228 --- linux-2.6.30.2/net/core/net-sysfs.c 2009-06-11 17:13:29.000000000 +0200
25229 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/core/net-sysfs.c      2009-07-04 01:11:39.000000000 +0200
25230 @@ -512,6 +512,9 @@ int netdev_register_kobject(struct net_d
25231         if (dev_net(net) != &init_net)
25232                 return 0;
25233  
25234 +       if (dev_net(net) != &init_net)
25235 +               return 0;
25236 +
25237         return device_add(dev);
25238  }
25239  
25240 diff -NurpP --minimal linux-2.6.30.2/net/core/rtnetlink.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/core/rtnetlink.c
25241 --- linux-2.6.30.2/net/core/rtnetlink.c 2009-06-11 17:13:29.000000000 +0200
25242 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/core/rtnetlink.c      2009-07-04 01:11:39.000000000 +0200
25243 @@ -690,6 +690,8 @@ static int rtnl_dump_ifinfo(struct sk_bu
25244  
25245         idx = 0;
25246         for_each_netdev(net, dev) {
25247 +               if (!nx_dev_visible(skb->sk->sk_nx_info, dev))
25248 +                       continue;
25249                 if (idx < s_idx)
25250                         goto cont;
25251                 if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
25252 @@ -1235,6 +1237,9 @@ void rtmsg_ifinfo(int type, struct net_d
25253         struct sk_buff *skb;
25254         int err = -ENOBUFS;
25255  
25256 +       if (!nx_dev_visible(current->nx_info, dev))
25257 +               return;
25258 +
25259         skb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL);
25260         if (skb == NULL)
25261                 goto errout;
25262 diff -NurpP --minimal linux-2.6.30.2/net/core/sock.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/core/sock.c
25263 --- linux-2.6.30.2/net/core/sock.c      2009-06-11 17:13:29.000000000 +0200
25264 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/core/sock.c   2009-07-04 01:11:39.000000000 +0200
25265 @@ -125,6 +125,10 @@
25266  #include <linux/ipsec.h>
25267  
25268  #include <linux/filter.h>
25269 +#include <linux/vs_socket.h>
25270 +#include <linux/vs_limit.h>
25271 +#include <linux/vs_context.h>
25272 +#include <linux/vs_network.h>
25273  
25274  #ifdef CONFIG_INET
25275  #include <net/tcp.h>
25276 @@ -947,6 +951,8 @@ static struct sock *sk_prot_alloc(struct
25277                 if (!try_module_get(prot->owner))
25278                         goto out_free_sec;
25279         }
25280 +               sock_vx_init(sk);
25281 +               sock_nx_init(sk);
25282  
25283         return sk;
25284  
25285 @@ -1024,6 +1030,11 @@ void sk_free(struct sock *sk)
25286                        __func__, atomic_read(&sk->sk_omem_alloc));
25287  
25288         put_net(sock_net(sk));
25289 +       vx_sock_dec(sk);
25290 +       clr_vx_info(&sk->sk_vx_info);
25291 +       sk->sk_xid = -1;
25292 +       clr_nx_info(&sk->sk_nx_info);
25293 +       sk->sk_nid = -1;
25294         sk_prot_free(sk->sk_prot_creator, sk);
25295  }
25296  
25297 @@ -1059,6 +1070,8 @@ struct sock *sk_clone(const struct sock 
25298  
25299                 /* SANITY */
25300                 get_net(sock_net(newsk));
25301 +               sock_vx_init(newsk);
25302 +               sock_nx_init(newsk);
25303                 sk_node_init(&newsk->sk_node);
25304                 sock_lock_init(newsk);
25305                 bh_lock_sock(newsk);
25306 @@ -1105,6 +1118,12 @@ struct sock *sk_clone(const struct sock 
25307                 newsk->sk_priority = 0;
25308                 atomic_set(&newsk->sk_refcnt, 2);
25309  
25310 +               set_vx_info(&newsk->sk_vx_info, sk->sk_vx_info);
25311 +               newsk->sk_xid = sk->sk_xid;
25312 +               vx_sock_inc(newsk);
25313 +               set_nx_info(&newsk->sk_nx_info, sk->sk_nx_info);
25314 +               newsk->sk_nid = sk->sk_nid;
25315 +
25316                 /*
25317                  * Increment the counter in the same struct proto as the master
25318                  * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
25319 @@ -1794,6 +1813,11 @@ void sock_init_data(struct socket *sock,
25320  
25321         sk->sk_stamp = ktime_set(-1L, 0);
25322  
25323 +       set_vx_info(&sk->sk_vx_info, current->vx_info);
25324 +       sk->sk_xid = vx_current_xid();
25325 +       vx_sock_inc(sk);
25326 +       set_nx_info(&sk->sk_nx_info, current->nx_info);
25327 +       sk->sk_nid = nx_current_nid();
25328         atomic_set(&sk->sk_refcnt, 1);
25329         atomic_set(&sk->sk_drops, 0);
25330  }
25331 diff -NurpP --minimal linux-2.6.30.2/net/ipv4/af_inet.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/af_inet.c
25332 --- linux-2.6.30.2/net/ipv4/af_inet.c   2009-06-11 17:13:29.000000000 +0200
25333 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/af_inet.c        2009-07-04 01:11:39.000000000 +0200
25334 @@ -115,6 +115,7 @@
25335  #ifdef CONFIG_IP_MROUTE
25336  #include <linux/mroute.h>
25337  #endif
25338 +#include <linux/vs_limit.h>
25339  
25340  extern void ip_mc_drop_socket(struct sock *sk);
25341  
25342 @@ -325,9 +326,12 @@ lookup_protocol:
25343         }
25344  
25345         err = -EPERM;
25346 +       if ((protocol == IPPROTO_ICMP) &&
25347 +               nx_capable(answer->capability, NXC_RAW_ICMP))
25348 +               goto override;
25349         if (answer->capability > 0 && !capable(answer->capability))
25350                 goto out_rcu_unlock;
25351 -
25352 +override:
25353         err = -EAFNOSUPPORT;
25354         if (!inet_netns_ok(net, protocol))
25355                 goto out_rcu_unlock;
25356 @@ -444,6 +448,7 @@ int inet_bind(struct socket *sock, struc
25357         struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
25358         struct sock *sk = sock->sk;
25359         struct inet_sock *inet = inet_sk(sk);
25360 +       struct nx_v4_sock_addr nsa;
25361         unsigned short snum;
25362         int chk_addr_ret;
25363         int err;
25364 @@ -457,7 +462,11 @@ int inet_bind(struct socket *sock, struc
25365         if (addr_len < sizeof(struct sockaddr_in))
25366                 goto out;
25367  
25368 -       chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
25369 +       err = v4_map_sock_addr(inet, addr, &nsa);
25370 +       if (err)
25371 +               goto out;
25372 +
25373 +       chk_addr_ret = inet_addr_type(sock_net(sk), nsa.saddr);
25374  
25375         /* Not specified by any standard per-se, however it breaks too
25376          * many applications when removed.  It is unfortunate since
25377 @@ -469,7 +478,7 @@ int inet_bind(struct socket *sock, struc
25378         err = -EADDRNOTAVAIL;
25379         if (!sysctl_ip_nonlocal_bind &&
25380             !(inet->freebind || inet->transparent) &&
25381 -           addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
25382 +           nsa.saddr != htonl(INADDR_ANY) &&
25383             chk_addr_ret != RTN_LOCAL &&
25384             chk_addr_ret != RTN_MULTICAST &&
25385             chk_addr_ret != RTN_BROADCAST)
25386 @@ -494,7 +503,7 @@ int inet_bind(struct socket *sock, struc
25387         if (sk->sk_state != TCP_CLOSE || inet->num)
25388                 goto out_release_sock;
25389  
25390 -       inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr;
25391 +       v4_set_sock_addr(inet, &nsa);
25392         if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
25393                 inet->saddr = 0;  /* Use device */
25394  
25395 @@ -687,11 +696,13 @@ int inet_getname(struct socket *sock, st
25396                      peer == 1))
25397                         return -ENOTCONN;
25398                 sin->sin_port = inet->dport;
25399 -               sin->sin_addr.s_addr = inet->daddr;
25400 +               sin->sin_addr.s_addr =
25401 +                       nx_map_sock_lback(sk->sk_nx_info, inet->daddr);
25402         } else {
25403                 __be32 addr = inet->rcv_saddr;
25404                 if (!addr)
25405                         addr = inet->saddr;
25406 +               addr = nx_map_sock_lback(sk->sk_nx_info, addr);
25407                 sin->sin_port = inet->sport;
25408                 sin->sin_addr.s_addr = addr;
25409         }
25410 diff -NurpP --minimal linux-2.6.30.2/net/ipv4/devinet.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/devinet.c
25411 --- linux-2.6.30.2/net/ipv4/devinet.c   2009-06-11 17:13:29.000000000 +0200
25412 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/devinet.c        2009-07-04 01:11:39.000000000 +0200
25413 @@ -413,6 +413,7 @@ struct in_device *inetdev_by_index(struc
25414         return in_dev;
25415  }
25416  
25417 +
25418  /* Called only from RTNL semaphored context. No locks. */
25419  
25420  struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
25421 @@ -653,6 +654,8 @@ int devinet_ioctl(struct net *net, unsig
25422                 *colon = ':';
25423  
25424         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
25425 +               struct nx_info *nxi = current->nx_info;
25426 +
25427                 if (tryaddrmatch) {
25428                         /* Matthias Andree */
25429                         /* compare label and address (4.4BSD style) */
25430 @@ -661,6 +664,8 @@ int devinet_ioctl(struct net *net, unsig
25431                            This is checked above. */
25432                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
25433                              ifap = &ifa->ifa_next) {
25434 +                               if (!nx_v4_ifa_visible(nxi, ifa))
25435 +                                       continue;
25436                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
25437                                     sin_orig.sin_addr.s_addr ==
25438                                                         ifa->ifa_address) {
25439 @@ -673,9 +678,12 @@ int devinet_ioctl(struct net *net, unsig
25440                    comparing just the label */
25441                 if (!ifa) {
25442                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
25443 -                            ifap = &ifa->ifa_next)
25444 +                            ifap = &ifa->ifa_next) {
25445 +                               if (!nx_v4_ifa_visible(nxi, ifa))
25446 +                                       continue;
25447                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
25448                                         break;
25449 +                       }
25450                 }
25451         }
25452  
25453 @@ -826,6 +834,8 @@ static int inet_gifconf(struct net_devic
25454                 goto out;
25455  
25456         for (; ifa; ifa = ifa->ifa_next) {
25457 +               if (!nx_v4_ifa_visible(current->nx_info, ifa))
25458 +                       continue;
25459                 if (!buf) {
25460                         done += sizeof(ifr);
25461                         continue;
25462 @@ -1164,6 +1174,7 @@ static int inet_dump_ifaddr(struct sk_bu
25463         struct net_device *dev;
25464         struct in_device *in_dev;
25465         struct in_ifaddr *ifa;
25466 +       struct sock *sk = skb->sk;
25467         int s_ip_idx, s_idx = cb->args[0];
25468  
25469         s_ip_idx = ip_idx = cb->args[1];
25470 @@ -1178,6 +1189,8 @@ static int inet_dump_ifaddr(struct sk_bu
25471  
25472                 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
25473                      ifa = ifa->ifa_next, ip_idx++) {
25474 +                       if (sk && !nx_v4_ifa_visible(sk->sk_nx_info, ifa))
25475 +                               continue;
25476                         if (ip_idx < s_ip_idx)
25477                                 continue;
25478                         if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
25479 diff -NurpP --minimal linux-2.6.30.2/net/ipv4/fib_hash.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/fib_hash.c
25480 --- linux-2.6.30.2/net/ipv4/fib_hash.c  2009-03-24 14:22:46.000000000 +0100
25481 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/fib_hash.c       2009-07-04 01:11:39.000000000 +0200
25482 @@ -1022,7 +1022,7 @@ static int fib_seq_show(struct seq_file 
25483         prefix  = f->fn_key;
25484         mask    = FZ_MASK(iter->zone);
25485         flags   = fib_flag_trans(fa->fa_type, mask, fi);
25486 -       if (fi)
25487 +       if (fi && nx_dev_visible(current->nx_info, fi->fib_dev))
25488                 seq_printf(seq,
25489                          "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u%n",
25490                          fi->fib_dev ? fi->fib_dev->name : "*", prefix,
25491 diff -NurpP --minimal linux-2.6.30.2/net/ipv4/inet_connection_sock.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/inet_connection_sock.c
25492 --- linux-2.6.30.2/net/ipv4/inet_connection_sock.c      2009-06-11 17:13:29.000000000 +0200
25493 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/inet_connection_sock.c   2009-07-04 01:11:39.000000000 +0200
25494 @@ -49,10 +49,40 @@ void inet_get_local_port_range(int *low,
25495  }
25496  EXPORT_SYMBOL(inet_get_local_port_range);
25497  
25498 +int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
25499 +{
25500 +       __be32  sk1_rcv_saddr = inet_rcv_saddr(sk1),
25501 +               sk2_rcv_saddr = inet_rcv_saddr(sk2);
25502 +
25503 +       if (inet_v6_ipv6only(sk2))
25504 +               return 0;
25505 +
25506 +       if (sk1_rcv_saddr &&
25507 +           sk2_rcv_saddr &&
25508 +           sk1_rcv_saddr == sk2_rcv_saddr)
25509 +               return 1;
25510 +
25511 +       if (sk1_rcv_saddr &&
25512 +           !sk2_rcv_saddr &&
25513 +           v4_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr, NXA_MASK_BIND))
25514 +               return 1;
25515 +
25516 +       if (sk2_rcv_saddr &&
25517 +           !sk1_rcv_saddr &&
25518 +           v4_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr, NXA_MASK_BIND))
25519 +               return 1;
25520 +
25521 +       if (!sk1_rcv_saddr &&
25522 +           !sk2_rcv_saddr &&
25523 +           nx_v4_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info))
25524 +               return 1;
25525 +
25526 +       return 0;
25527 +}
25528 +
25529  int inet_csk_bind_conflict(const struct sock *sk,
25530                            const struct inet_bind_bucket *tb)
25531  {
25532 -       const __be32 sk_rcv_saddr = inet_rcv_saddr(sk);
25533         struct sock *sk2;
25534         struct hlist_node *node;
25535         int reuse = sk->sk_reuse;
25536 @@ -72,9 +102,7 @@ int inet_csk_bind_conflict(const struct 
25537                      sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
25538                         if (!reuse || !sk2->sk_reuse ||
25539                             sk2->sk_state == TCP_LISTEN) {
25540 -                               const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
25541 -                               if (!sk2_rcv_saddr || !sk_rcv_saddr ||
25542 -                                   sk2_rcv_saddr == sk_rcv_saddr)
25543 +                               if (ipv4_rcv_saddr_equal(sk, sk2))
25544                                         break;
25545                         }
25546                 }
25547 diff -NurpP --minimal linux-2.6.30.2/net/ipv4/inet_diag.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/inet_diag.c
25548 --- linux-2.6.30.2/net/ipv4/inet_diag.c 2009-03-24 14:22:46.000000000 +0100
25549 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/inet_diag.c      2009-07-04 01:11:39.000000000 +0200
25550 @@ -32,6 +32,8 @@
25551  #include <linux/stddef.h>
25552  
25553  #include <linux/inet_diag.h>
25554 +#include <linux/vs_network.h>
25555 +#include <linux/vs_inet.h>
25556  
25557  static const struct inet_diag_handler **inet_diag_table;
25558  
25559 @@ -118,8 +120,8 @@ static int inet_csk_diag_fill(struct soc
25560  
25561         r->id.idiag_sport = inet->sport;
25562         r->id.idiag_dport = inet->dport;
25563 -       r->id.idiag_src[0] = inet->rcv_saddr;
25564 -       r->id.idiag_dst[0] = inet->daddr;
25565 +       r->id.idiag_src[0] = nx_map_sock_lback(sk->sk_nx_info, inet->rcv_saddr);
25566 +       r->id.idiag_dst[0] = nx_map_sock_lback(sk->sk_nx_info, inet->daddr);
25567  
25568  #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
25569         if (r->idiag_family == AF_INET6) {
25570 @@ -206,8 +208,8 @@ static int inet_twsk_diag_fill(struct in
25571         r->id.idiag_cookie[1] = (u32)(((unsigned long)tw >> 31) >> 1);
25572         r->id.idiag_sport     = tw->tw_sport;
25573         r->id.idiag_dport     = tw->tw_dport;
25574 -       r->id.idiag_src[0]    = tw->tw_rcv_saddr;
25575 -       r->id.idiag_dst[0]    = tw->tw_daddr;
25576 +       r->id.idiag_src[0]    = nx_map_sock_lback(tw->tw_nx_info, tw->tw_rcv_saddr);
25577 +       r->id.idiag_dst[0]    = nx_map_sock_lback(tw->tw_nx_info, tw->tw_daddr);
25578         r->idiag_state        = tw->tw_substate;
25579         r->idiag_timer        = 3;
25580         r->idiag_expires      = DIV_ROUND_UP(tmo * 1000, HZ);
25581 @@ -264,6 +266,7 @@ static int inet_diag_get_exact(struct sk
25582         err = -EINVAL;
25583  
25584         if (req->idiag_family == AF_INET) {
25585 +               /* TODO: lback */
25586                 sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0],
25587                                  req->id.idiag_dport, req->id.idiag_src[0],
25588                                  req->id.idiag_sport, req->id.idiag_if);
25589 @@ -506,6 +509,7 @@ static int inet_csk_diag_dump(struct soc
25590                 } else
25591  #endif
25592                 {
25593 +                       /* TODO: lback */
25594                         entry.saddr = &inet->rcv_saddr;
25595                         entry.daddr = &inet->daddr;
25596                 }
25597 @@ -542,6 +546,7 @@ static int inet_twsk_diag_dump(struct in
25598                 } else
25599  #endif
25600                 {
25601 +                       /* TODO: lback */
25602                         entry.saddr = &tw->tw_rcv_saddr;
25603                         entry.daddr = &tw->tw_daddr;
25604                 }
25605 @@ -588,8 +593,8 @@ static int inet_diag_fill_req(struct sk_
25606  
25607         r->id.idiag_sport = inet->sport;
25608         r->id.idiag_dport = ireq->rmt_port;
25609 -       r->id.idiag_src[0] = ireq->loc_addr;
25610 -       r->id.idiag_dst[0] = ireq->rmt_addr;
25611 +       r->id.idiag_src[0] = nx_map_sock_lback(sk->sk_nx_info, ireq->loc_addr);
25612 +       r->id.idiag_dst[0] = nx_map_sock_lback(sk->sk_nx_info, ireq->rmt_addr);
25613         r->idiag_expires = jiffies_to_msecs(tmo);
25614         r->idiag_rqueue = 0;
25615         r->idiag_wqueue = 0;
25616 @@ -659,6 +664,7 @@ static int inet_diag_dump_reqs(struct sk
25617                                 continue;
25618  
25619                         if (bc) {
25620 +                               /* TODO: lback */
25621                                 entry.saddr =
25622  #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
25623                                         (entry.family == AF_INET6) ?
25624 @@ -729,6 +735,8 @@ static int inet_diag_dump(struct sk_buff
25625                         sk_nulls_for_each(sk, node, &ilb->head) {
25626                                 struct inet_sock *inet = inet_sk(sk);
25627  
25628 +                               if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
25629 +                                       continue;
25630                                 if (num < s_num) {
25631                                         num++;
25632                                         continue;
25633 @@ -795,6 +803,8 @@ skip_listen_ht:
25634                 sk_nulls_for_each(sk, node, &head->chain) {
25635                         struct inet_sock *inet = inet_sk(sk);
25636  
25637 +                       if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
25638 +                               continue;
25639                         if (num < s_num)
25640                                 goto next_normal;
25641                         if (!(r->idiag_states & (1 << sk->sk_state)))
25642 @@ -819,6 +829,8 @@ next_normal:
25643                         inet_twsk_for_each(tw, node,
25644                                     &head->twchain) {
25645  
25646 +                               if (!nx_check(tw->tw_nid, VS_WATCH_P | VS_IDENT))
25647 +                                       continue;
25648                                 if (num < s_num)
25649                                         goto next_dying;
25650                                 if (r->id.idiag_sport != tw->tw_sport &&
25651 diff -NurpP --minimal linux-2.6.30.2/net/ipv4/inet_hashtables.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/inet_hashtables.c
25652 --- linux-2.6.30.2/net/ipv4/inet_hashtables.c   2009-06-11 17:13:29.000000000 +0200
25653 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/inet_hashtables.c        2009-07-04 01:11:39.000000000 +0200
25654 @@ -21,6 +21,7 @@
25655  
25656  #include <net/inet_connection_sock.h>
25657  #include <net/inet_hashtables.h>
25658 +#include <net/route.h>
25659  #include <net/ip.h>
25660  
25661  /*
25662 @@ -134,6 +135,11 @@ static inline int compute_score(struct s
25663                         if (rcv_saddr != daddr)
25664                                 return -1;
25665                         score += 2;
25666 +               } else {
25667 +                       /* block non nx_info ips */
25668 +                       if (!v4_addr_in_nx_info(sk->sk_nx_info,
25669 +                               daddr, NXA_MASK_BIND))
25670 +                               return -1;
25671                 }
25672                 if (sk->sk_bound_dev_if) {
25673                         if (sk->sk_bound_dev_if != dif)
25674 @@ -151,7 +157,6 @@ static inline int compute_score(struct s
25675   * wildcarded during the search since they can never be otherwise.
25676   */
25677  
25678 -
25679  struct sock *__inet_lookup_listener(struct net *net,
25680                                     struct inet_hashinfo *hashinfo,
25681                                     const __be32 daddr, const unsigned short hnum,
25682 @@ -174,6 +179,7 @@ begin:
25683                         hiscore = score;
25684                 }
25685         }
25686 +
25687         /*
25688          * if the nulls value we got at the end of this lookup is
25689          * not the expected one, we must restart lookup.
25690 diff -NurpP --minimal linux-2.6.30.2/net/ipv4/netfilter/nf_nat_helper.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/netfilter/nf_nat_helper.c
25691 --- linux-2.6.30.2/net/ipv4/netfilter/nf_nat_helper.c   2008-12-25 00:26:37.000000000 +0100
25692 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/netfilter/nf_nat_helper.c        2009-07-04 01:11:39.000000000 +0200
25693 @@ -19,6 +19,7 @@
25694  #include <net/route.h>
25695  
25696  #include <linux/netfilter_ipv4.h>
25697 +#include <net/route.h>
25698  #include <net/netfilter/nf_conntrack.h>
25699  #include <net/netfilter/nf_conntrack_helper.h>
25700  #include <net/netfilter/nf_conntrack_ecache.h>
25701 diff -NurpP --minimal linux-2.6.30.2/net/ipv4/netfilter.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/netfilter.c
25702 --- linux-2.6.30.2/net/ipv4/netfilter.c 2009-03-24 14:22:46.000000000 +0100
25703 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/netfilter.c      2009-07-04 01:11:39.000000000 +0200
25704 @@ -4,7 +4,7 @@
25705  #include <linux/netfilter_ipv4.h>
25706  #include <linux/ip.h>
25707  #include <linux/skbuff.h>
25708 -#include <net/route.h>
25709 +// #include <net/route.h>
25710  #include <net/xfrm.h>
25711  #include <net/ip.h>
25712  #include <net/netfilter/nf_queue.h>
25713 diff -NurpP --minimal linux-2.6.30.2/net/ipv4/raw.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/raw.c
25714 --- linux-2.6.30.2/net/ipv4/raw.c       2009-06-11 17:13:30.000000000 +0200
25715 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/raw.c    2009-07-04 01:11:39.000000000 +0200
25716 @@ -117,7 +117,7 @@ static struct sock *__raw_v4_lookup(stru
25717  
25718                 if (net_eq(sock_net(sk), net) && inet->num == num       &&
25719                     !(inet->daddr && inet->daddr != raddr)              &&
25720 -                   !(inet->rcv_saddr && inet->rcv_saddr != laddr)      &&
25721 +                   v4_sock_addr_match(sk->sk_nx_info, inet, laddr)     &&
25722                     !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
25723                         goto found; /* gotcha */
25724         }
25725 @@ -372,6 +372,12 @@ static int raw_send_hdrinc(struct sock *
25726                 icmp_out_count(net, ((struct icmphdr *)
25727                         skb_transport_header(skb))->type);
25728  
25729 +       err = -EPERM;
25730 +       if (!nx_check(0, VS_ADMIN) && !capable(CAP_NET_RAW) &&
25731 +               sk->sk_nx_info &&
25732 +               !v4_addr_in_nx_info(sk->sk_nx_info, iph->saddr, NXA_MASK_BIND))
25733 +               goto error_free;
25734 +
25735         err = NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
25736                       dst_output);
25737         if (err > 0)
25738 @@ -383,6 +389,7 @@ out:
25739  
25740  error_fault:
25741         err = -EFAULT;
25742 +error_free:
25743         kfree_skb(skb);
25744  error:
25745         IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
25746 @@ -551,6 +558,13 @@ static int raw_sendmsg(struct kiocb *ioc
25747                 }
25748  
25749                 security_sk_classify_flow(sk, &fl);
25750 +               if (sk->sk_nx_info) {
25751 +                       err = ip_v4_find_src(sock_net(sk),
25752 +                               sk->sk_nx_info, &rt, &fl);
25753 +
25754 +                       if (err)
25755 +                               goto done;
25756 +               }
25757                 err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1);
25758         }
25759         if (err)
25760 @@ -620,17 +634,19 @@ static int raw_bind(struct sock *sk, str
25761  {
25762         struct inet_sock *inet = inet_sk(sk);
25763         struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
25764 +       struct nx_v4_sock_addr nsa = { 0 };
25765         int ret = -EINVAL;
25766         int chk_addr_ret;
25767  
25768         if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
25769                 goto out;
25770 -       chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
25771 +       v4_map_sock_addr(inet, addr, &nsa);
25772 +       chk_addr_ret = inet_addr_type(sock_net(sk), nsa.saddr);
25773         ret = -EADDRNOTAVAIL;
25774 -       if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
25775 +       if (nsa.saddr && chk_addr_ret != RTN_LOCAL &&
25776             chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
25777                 goto out;
25778 -       inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr;
25779 +       v4_set_sock_addr(inet, &nsa);
25780         if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
25781                 inet->saddr = 0;  /* Use device */
25782         sk_dst_reset(sk);
25783 @@ -682,7 +698,8 @@ static int raw_recvmsg(struct kiocb *ioc
25784         /* Copy the address. */
25785         if (sin) {
25786                 sin->sin_family = AF_INET;
25787 -               sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
25788 +               sin->sin_addr.s_addr =
25789 +                       nx_map_sock_lback(sk->sk_nx_info, ip_hdr(skb)->saddr);
25790                 sin->sin_port = 0;
25791                 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
25792         }
25793 @@ -859,7 +876,8 @@ static struct sock *raw_get_first(struct
25794                 struct hlist_node *node;
25795  
25796                 sk_for_each(sk, node, &state->h->ht[state->bucket])
25797 -                       if (sock_net(sk) == seq_file_net(seq))
25798 +                       if ((sock_net(sk) == seq_file_net(seq)) &&
25799 +                               nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
25800                                 goto found;
25801         }
25802         sk = NULL;
25803 @@ -875,7 +893,8 @@ static struct sock *raw_get_next(struct 
25804                 sk = sk_next(sk);
25805  try_again:
25806                 ;
25807 -       } while (sk && sock_net(sk) != seq_file_net(seq));
25808 +       } while (sk && ((sock_net(sk) != seq_file_net(seq)) ||
25809 +               !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
25810  
25811         if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
25812                 sk = sk_head(&state->h->ht[state->bucket]);
25813 @@ -934,7 +953,10 @@ static void raw_sock_seq_show(struct seq
25814  
25815         seq_printf(seq, "%4d: %08X:%04X %08X:%04X"
25816                 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
25817 -               i, src, srcp, dest, destp, sp->sk_state,
25818 +               i,
25819 +               nx_map_sock_lback(current_nx_info(), src), srcp,
25820 +               nx_map_sock_lback(current_nx_info(), dest), destp,
25821 +               sp->sk_state,
25822                 atomic_read(&sp->sk_wmem_alloc),
25823                 atomic_read(&sp->sk_rmem_alloc),
25824                 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
25825 diff -NurpP --minimal linux-2.6.30.2/net/ipv4/tcp.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/tcp.c
25826 --- linux-2.6.30.2/net/ipv4/tcp.c       2009-06-11 17:13:30.000000000 +0200
25827 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/tcp.c    2009-07-04 01:11:39.000000000 +0200
25828 @@ -264,6 +264,7 @@
25829  #include <linux/cache.h>
25830  #include <linux/err.h>
25831  #include <linux/crypto.h>
25832 +#include <linux/in.h>
25833  
25834  #include <net/icmp.h>
25835  #include <net/tcp.h>
25836 diff -NurpP --minimal linux-2.6.30.2/net/ipv4/tcp_ipv4.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/tcp_ipv4.c
25837 --- linux-2.6.30.2/net/ipv4/tcp_ipv4.c  2009-06-11 17:13:30.000000000 +0200
25838 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/tcp_ipv4.c       2009-07-04 01:11:39.000000000 +0200
25839 @@ -1886,6 +1886,12 @@ static void *listening_get_next(struct s
25840                 req = req->dl_next;
25841                 while (1) {
25842                         while (req) {
25843 +                               vxdprintk(VXD_CBIT(net, 6),
25844 +                                       "sk,req: %p [#%d] (from %d)", req->sk,
25845 +                                       (req->sk)?req->sk->sk_nid:0, nx_current_nid());
25846 +                               if (req->sk &&
25847 +                                       !nx_check(req->sk->sk_nid, VS_WATCH_P | VS_IDENT))
25848 +                                       continue;
25849                                 if (req->rsk_ops->family == st->family) {
25850                                         cur = req;
25851                                         goto out;
25852 @@ -1910,6 +1916,10 @@ get_req:
25853         }
25854  get_sk:
25855         sk_nulls_for_each_from(sk, node) {
25856 +               vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)",
25857 +                       sk, sk->sk_nid, nx_current_nid());
25858 +               if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
25859 +                       continue;
25860                 if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
25861                         cur = sk;
25862                         goto out;
25863 @@ -1973,6 +1983,11 @@ static void *established_get_first(struc
25864  
25865                 spin_lock_bh(lock);
25866                 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
25867 +                       vxdprintk(VXD_CBIT(net, 6),
25868 +                               "sk,egf: %p [#%d] (from %d)",
25869 +                               sk, sk->sk_nid, nx_current_nid());
25870 +                       if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
25871 +                               continue;
25872                         if (sk->sk_family != st->family ||
25873                             !net_eq(sock_net(sk), net)) {
25874                                 continue;
25875 @@ -1983,6 +1998,11 @@ static void *established_get_first(struc
25876                 st->state = TCP_SEQ_STATE_TIME_WAIT;
25877                 inet_twsk_for_each(tw, node,
25878                                    &tcp_hashinfo.ehash[st->bucket].twchain) {
25879 +                       vxdprintk(VXD_CBIT(net, 6),
25880 +                               "tw: %p [#%d] (from %d)",
25881 +                               tw, tw->tw_nid, nx_current_nid());
25882 +                       if (!nx_check(tw->tw_nid, VS_WATCH_P | VS_IDENT))
25883 +                               continue;
25884                         if (tw->tw_family != st->family ||
25885                             !net_eq(twsk_net(tw), net)) {
25886                                 continue;
25887 @@ -2011,7 +2031,9 @@ static void *established_get_next(struct
25888                 tw = cur;
25889                 tw = tw_next(tw);
25890  get_tw:
25891 -               while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
25892 +               while (tw && (tw->tw_family != st->family ||
25893 +                       !net_eq(twsk_net(tw), net) ||
25894 +                       !nx_check(tw->tw_nid, VS_WATCH_P | VS_IDENT))) {
25895                         tw = tw_next(tw);
25896                 }
25897                 if (tw) {
25898 @@ -2034,6 +2056,11 @@ get_tw:
25899                 sk = sk_nulls_next(sk);
25900  
25901         sk_nulls_for_each_from(sk, node) {
25902 +               vxdprintk(VXD_CBIT(net, 6),
25903 +                       "sk,egn: %p [#%d] (from %d)",
25904 +                       sk, sk->sk_nid, nx_current_nid());
25905 +               if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
25906 +                       continue;
25907                 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
25908                         goto found;
25909         }
25910 @@ -2185,9 +2212,9 @@ static void get_openreq4(struct sock *sk
25911         seq_printf(f, "%4d: %08X:%04X %08X:%04X"
25912                 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",
25913                 i,
25914 -               ireq->loc_addr,
25915 +               nx_map_sock_lback(current_nx_info(), ireq->loc_addr),
25916                 ntohs(inet_sk(sk)->sport),
25917 -               ireq->rmt_addr,
25918 +               nx_map_sock_lback(current_nx_info(), ireq->rmt_addr),
25919                 ntohs(ireq->rmt_port),
25920                 TCP_SYN_RECV,
25921                 0, 0, /* could print option size, but that is af dependent. */
25922 @@ -2230,7 +2257,10 @@ static void get_tcp4_sock(struct sock *s
25923  
25924         seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
25925                         "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
25926 -               i, src, srcp, dest, destp, sk->sk_state,
25927 +               i,
25928 +               nx_map_sock_lback(current_nx_info(), src), srcp,
25929 +               nx_map_sock_lback(current_nx_info(), dest), destp,
25930 +               sk->sk_state,
25931                 tp->write_seq - tp->snd_una,
25932                 sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
25933                                              (tp->rcv_nxt - tp->copied_seq),
25934 @@ -2266,7 +2296,10 @@ static void get_timewait4_sock(struct in
25935  
25936         seq_printf(f, "%4d: %08X:%04X %08X:%04X"
25937                 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n",
25938 -               i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
25939 +               i,
25940 +               nx_map_sock_lback(current_nx_info(), src), srcp,
25941 +               nx_map_sock_lback(current_nx_info(), dest), destp,
25942 +               tw->tw_substate, 0, 0,
25943                 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
25944                 atomic_read(&tw->tw_refcnt), tw, len);
25945  }
25946 diff -NurpP --minimal linux-2.6.30.2/net/ipv4/tcp_minisocks.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/tcp_minisocks.c
25947 --- linux-2.6.30.2/net/ipv4/tcp_minisocks.c     2009-06-11 17:13:30.000000000 +0200
25948 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/tcp_minisocks.c  2009-07-04 01:11:39.000000000 +0200
25949 @@ -26,6 +26,10 @@
25950  #include <net/inet_common.h>
25951  #include <net/xfrm.h>
25952  
25953 +#include <linux/vs_limit.h>
25954 +#include <linux/vs_socket.h>
25955 +#include <linux/vs_context.h>
25956 +
25957  #ifdef CONFIG_SYSCTL
25958  #define SYNC_INIT 0 /* let the user enable it */
25959  #else
25960 @@ -293,6 +297,11 @@ void tcp_time_wait(struct sock *sk, int 
25961                 tcptw->tw_ts_recent     = tp->rx_opt.ts_recent;
25962                 tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
25963  
25964 +               tw->tw_xid              = sk->sk_xid;
25965 +               tw->tw_vx_info          = NULL;
25966 +               tw->tw_nid              = sk->sk_nid;
25967 +               tw->tw_nx_info          = NULL;
25968 +
25969  #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
25970                 if (tw->tw_family == PF_INET6) {
25971                         struct ipv6_pinfo *np = inet6_sk(sk);
25972 diff -NurpP --minimal linux-2.6.30.2/net/ipv4/udp.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/udp.c
25973 --- linux-2.6.30.2/net/ipv4/udp.c       2009-06-11 17:13:30.000000000 +0200
25974 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv4/udp.c    2009-07-04 01:11:39.000000000 +0200
25975 @@ -222,14 +222,7 @@ fail:
25976         return error;
25977  }
25978  
25979 -static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
25980 -{
25981 -       struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
25982 -
25983 -       return  ( !ipv6_only_sock(sk2)  &&
25984 -                 (!inet1->rcv_saddr || !inet2->rcv_saddr ||
25985 -                  inet1->rcv_saddr == inet2->rcv_saddr      ));
25986 -}
25987 +extern int ipv4_rcv_saddr_equal(const struct sock *, const struct sock *);
25988  
25989  int udp_v4_get_port(struct sock *sk, unsigned short snum)
25990  {
25991 @@ -251,6 +244,11 @@ static inline int compute_score(struct s
25992                         if (inet->rcv_saddr != daddr)
25993                                 return -1;
25994                         score += 2;
25995 +               } else {
25996 +                       /* block non nx_info ips */
25997 +                       if (!v4_addr_in_nx_info(sk->sk_nx_info,
25998 +                               daddr, NXA_MASK_BIND))
25999 +                               return -1;
26000                 }
26001                 if (inet->daddr) {
26002                         if (inet->daddr != saddr)
26003 @@ -271,6 +269,7 @@ static inline int compute_score(struct s
26004         return score;
26005  }
26006  
26007 +
26008  /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
26009   * harder than this. -DaveM
26010   */
26011 @@ -292,6 +291,11 @@ begin:
26012         sk_nulls_for_each_rcu(sk, node, &hslot->head) {
26013                 score = compute_score(sk, net, saddr, hnum, sport,
26014                                       daddr, dport, dif);
26015 +               /* FIXME: disabled?
26016 +               if (score == 9) {
26017 +                       result = sk;
26018 +                       break;
26019 +               } else */
26020                 if (score > badness) {
26021                         result = sk;
26022                         badness = score;
26023 @@ -305,6 +309,7 @@ begin:
26024         if (get_nulls_value(node) != hash)
26025                 goto begin;
26026  
26027 +
26028         if (result) {
26029                 if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
26030                         result = NULL;
26031 @@ -314,6 +319,7 @@ begin:
26032                         goto begin;
26033                 }
26034         }
26035 +
26036         rcu_read_unlock();
26037         return result;
26038  }
26039 @@ -356,7 +362,7 @@ static inline struct sock *udp_v4_mcast_
26040                     s->sk_hash != hnum                                  ||
26041                     (inet->daddr && inet->daddr != rmt_addr)            ||
26042                     (inet->dport != rmt_port && inet->dport)            ||
26043 -                   (inet->rcv_saddr && inet->rcv_saddr != loc_addr)    ||
26044 +                   !v4_sock_addr_match(sk->sk_nx_info, inet, loc_addr) ||
26045                     ipv6_only_sock(s)                                   ||
26046                     (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
26047                         continue;
26048 @@ -698,8 +704,13 @@ int udp_sendmsg(struct kiocb *iocb, stru
26049                                                { .sport = inet->sport,
26050                                                  .dport = dport } } };
26051                 struct net *net = sock_net(sk);
26052 +               struct nx_info *nxi = sk->sk_nx_info;
26053  
26054                 security_sk_classify_flow(sk, &fl);
26055 +               err = ip_v4_find_src(net, nxi, &rt, &fl);
26056 +               if (err)
26057 +                       goto out;
26058 +
26059                 err = ip_route_output_flow(net, &rt, &fl, sk, 1);
26060                 if (err) {
26061                         if (err == -ENETUNREACH)
26062 @@ -944,7 +955,8 @@ try_again:
26063         {
26064                 sin->sin_family = AF_INET;
26065                 sin->sin_port = udp_hdr(skb)->source;
26066 -               sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
26067 +               sin->sin_addr.s_addr = nx_map_sock_lback(
26068 +                       skb->sk->sk_nx_info, ip_hdr(skb)->saddr);
26069                 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
26070         }
26071         if (inet->cmsg_flags)
26072 @@ -1598,6 +1610,8 @@ static struct sock *udp_get_first(struct
26073                 sk_nulls_for_each(sk, node, &hslot->head) {
26074                         if (!net_eq(sock_net(sk), net))
26075                                 continue;
26076 +                       if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
26077 +                               continue;
26078                         if (sk->sk_family == state->family)
26079                                 goto found;
26080                 }
26081 @@ -1615,7 +1629,9 @@ static struct sock *udp_get_next(struct 
26082  
26083         do {
26084                 sk = sk_nulls_next(sk);
26085 -       } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
26086 +       } while (sk && (!net_eq(sock_net(sk), net) ||
26087 +               sk->sk_family != state->family ||
26088 +               !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
26089  
26090         if (!sk) {
26091                 if (state->bucket < UDP_HTABLE_SIZE)
26092 @@ -1720,7 +1736,10 @@ static void udp4_format_sock(struct sock
26093  
26094         seq_printf(f, "%4d: %08X:%04X %08X:%04X"
26095                 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n",
26096 -               bucket, src, srcp, dest, destp, sp->sk_state,
26097 +               bucket,
26098 +               nx_map_sock_lback(current_nx_info(), src), srcp,
26099 +               nx_map_sock_lback(current_nx_info(), dest), destp,
26100 +               sp->sk_state,
26101                 atomic_read(&sp->sk_wmem_alloc),
26102                 atomic_read(&sp->sk_rmem_alloc),
26103                 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
26104 diff -NurpP --minimal linux-2.6.30.2/net/ipv6/addrconf.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv6/addrconf.c
26105 --- linux-2.6.30.2/net/ipv6/addrconf.c  2009-06-11 17:13:30.000000000 +0200
26106 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv6/addrconf.c       2009-07-04 01:11:39.000000000 +0200
26107 @@ -86,6 +86,8 @@
26108  
26109  #include <linux/proc_fs.h>
26110  #include <linux/seq_file.h>
26111 +#include <linux/vs_network.h>
26112 +#include <linux/vs_inet6.h>
26113  
26114  /* Set to 3 to get tracing... */
26115  #define ACONF_DEBUG 2
26116 @@ -1118,7 +1120,7 @@ out:
26117  
26118  int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev,
26119                        const struct in6_addr *daddr, unsigned int prefs,
26120 -                      struct in6_addr *saddr)
26121 +                      struct in6_addr *saddr, struct nx_info *nxi)
26122  {
26123         struct ipv6_saddr_score scores[2],
26124                                 *score = &scores[0], *hiscore = &scores[1];
26125 @@ -1191,6 +1193,8 @@ int ipv6_dev_get_saddr(struct net *net, 
26126                                                dev->name);
26127                                 continue;
26128                         }
26129 +                       if (!v6_addr_in_nx_info(nxi, &score->ifa->addr, -1))
26130 +                               continue;
26131  
26132                         score->rule = -1;
26133                         bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX);
26134 @@ -2954,7 +2958,10 @@ static void if6_seq_stop(struct seq_file
26135  static int if6_seq_show(struct seq_file *seq, void *v)
26136  {
26137         struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v;
26138 -       seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n",
26139 +
26140 +       if (nx_check(0, VS_ADMIN|VS_WATCH) ||
26141 +           v6_addr_in_nx_info(current_nx_info(), &ifp->addr, -1))
26142 +               seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n",
26143                    &ifp->addr,
26144                    ifp->idev->dev->ifindex,
26145                    ifp->prefix_len,
26146 @@ -3448,6 +3455,12 @@ static int inet6_dump_addr(struct sk_buf
26147         struct ifmcaddr6 *ifmca;
26148         struct ifacaddr6 *ifaca;
26149         struct net *net = sock_net(skb->sk);
26150 +       struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
26151 +
26152 +       /* disable ipv6 on non v6 guests */
26153 +       if (nxi && !nx_info_has_v6(nxi))
26154 +               return skb->len;
26155 +
26156  
26157         s_idx = cb->args[0];
26158         s_ip_idx = ip_idx = cb->args[1];
26159 @@ -3469,6 +3482,8 @@ static int inet6_dump_addr(struct sk_buf
26160                              ifa = ifa->if_next, ip_idx++) {
26161                                 if (ip_idx < s_ip_idx)
26162                                         continue;
26163 +                               if (!v6_addr_in_nx_info(nxi, &ifa->addr, -1))
26164 +                                       continue;
26165                                 err = inet6_fill_ifaddr(skb, ifa,
26166                                                         NETLINK_CB(cb->skb).pid,
26167                                                         cb->nlh->nlmsg_seq,
26168 @@ -3482,6 +3497,8 @@ static int inet6_dump_addr(struct sk_buf
26169                              ifmca = ifmca->next, ip_idx++) {
26170                                 if (ip_idx < s_ip_idx)
26171                                         continue;
26172 +                               if (!v6_addr_in_nx_info(nxi, &ifmca->mca_addr, -1))
26173 +                                       continue;
26174                                 err = inet6_fill_ifmcaddr(skb, ifmca,
26175                                                           NETLINK_CB(cb->skb).pid,
26176                                                           cb->nlh->nlmsg_seq,
26177 @@ -3495,6 +3512,8 @@ static int inet6_dump_addr(struct sk_buf
26178                              ifaca = ifaca->aca_next, ip_idx++) {
26179                                 if (ip_idx < s_ip_idx)
26180                                         continue;
26181 +                               if (!v6_addr_in_nx_info(nxi, &ifaca->aca_addr, -1))
26182 +                                       continue;
26183                                 err = inet6_fill_ifacaddr(skb, ifaca,
26184                                                           NETLINK_CB(cb->skb).pid,
26185                                                           cb->nlh->nlmsg_seq,
26186 @@ -3781,12 +3800,19 @@ static int inet6_dump_ifinfo(struct sk_b
26187         int s_idx = cb->args[0];
26188         struct net_device *dev;
26189         struct inet6_dev *idev;
26190 +       struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
26191 +
26192 +       /* FIXME: maybe disable ipv6 on non v6 guests?
26193 +       if (skb->sk && skb->sk->sk_vx_info)
26194 +               return skb->len; */
26195  
26196         read_lock(&dev_base_lock);
26197         idx = 0;
26198         for_each_netdev(net, dev) {
26199                 if (idx < s_idx)
26200                         goto cont;
26201 +               if (!v6_dev_in_nx_info(dev, nxi))
26202 +                       goto cont;
26203                 if ((idev = in6_dev_get(dev)) == NULL)
26204                         goto cont;
26205                 err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid,
26206 diff -NurpP --minimal linux-2.6.30.2/net/ipv6/af_inet6.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv6/af_inet6.c
26207 --- linux-2.6.30.2/net/ipv6/af_inet6.c  2009-06-11 17:13:30.000000000 +0200
26208 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv6/af_inet6.c       2009-07-04 01:11:39.000000000 +0200
26209 @@ -41,6 +41,8 @@
26210  #include <linux/netdevice.h>
26211  #include <linux/icmpv6.h>
26212  #include <linux/netfilter_ipv6.h>
26213 +#include <linux/vs_inet.h>
26214 +#include <linux/vs_inet6.h>
26215  
26216  #include <net/ip.h>
26217  #include <net/ipv6.h>
26218 @@ -49,6 +51,7 @@
26219  #include <net/tcp.h>
26220  #include <net/ipip.h>
26221  #include <net/protocol.h>
26222 +#include <net/route.h>
26223  #include <net/inet_common.h>
26224  #include <net/route.h>
26225  #include <net/transp_v6.h>
26226 @@ -146,9 +149,12 @@ lookup_protocol:
26227         }
26228  
26229         err = -EPERM;
26230 +       if ((protocol == IPPROTO_ICMPV6) &&
26231 +               nx_capable(answer->capability, NXC_RAW_ICMP))
26232 +               goto override;
26233         if (answer->capability > 0 && !capable(answer->capability))
26234                 goto out_rcu_unlock;
26235 -
26236 +override:
26237         sock->ops = answer->ops;
26238         answer_prot = answer->prot;
26239         answer_no_check = answer->no_check;
26240 @@ -247,6 +253,7 @@ int inet6_bind(struct socket *sock, stru
26241         struct inet_sock *inet = inet_sk(sk);
26242         struct ipv6_pinfo *np = inet6_sk(sk);
26243         struct net *net = sock_net(sk);
26244 +       struct nx_v6_sock_addr nsa;
26245         __be32 v4addr = 0;
26246         unsigned short snum;
26247         int addr_type = 0;
26248 @@ -258,6 +265,11 @@ int inet6_bind(struct socket *sock, stru
26249  
26250         if (addr_len < SIN6_LEN_RFC2133)
26251                 return -EINVAL;
26252 +
26253 +       err = v6_map_sock_addr(inet, addr, &nsa);
26254 +       if (err)
26255 +               return err;
26256 +
26257         addr_type = ipv6_addr_type(&addr->sin6_addr);
26258         if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
26259                 return -EINVAL;
26260 @@ -321,6 +333,11 @@ int inet6_bind(struct socket *sock, stru
26261                                 }
26262                         }
26263  
26264 +                       if (!v6_addr_in_nx_info(sk->sk_nx_info, &addr->sin6_addr, -1)) {
26265 +                               err = -EADDRNOTAVAIL;
26266 +                               goto out;
26267 +                       }
26268 +
26269                         /* ipv4 addr of the socket is invalid.  Only the
26270                          * unspecified and mapped address have a v4 equivalent.
26271                          */
26272 @@ -339,6 +356,8 @@ int inet6_bind(struct socket *sock, stru
26273                 }
26274         }
26275  
26276 +       v6_set_sock_addr(inet, &nsa);
26277 +
26278         inet->rcv_saddr = v4addr;
26279         inet->saddr = v4addr;
26280  
26281 @@ -434,9 +453,11 @@ int inet6_getname(struct socket *sock, s
26282                         return -ENOTCONN;
26283                 sin->sin6_port = inet->dport;
26284                 ipv6_addr_copy(&sin->sin6_addr, &np->daddr);
26285 +               /* FIXME: remap lback? */
26286                 if (np->sndflow)
26287                         sin->sin6_flowinfo = np->flow_label;
26288         } else {
26289 +               /* FIXME: remap lback? */
26290                 if (ipv6_addr_any(&np->rcv_saddr))
26291                         ipv6_addr_copy(&sin->sin6_addr, &np->saddr);
26292                 else
26293 diff -NurpP --minimal linux-2.6.30.2/net/ipv6/fib6_rules.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv6/fib6_rules.c
26294 --- linux-2.6.30.2/net/ipv6/fib6_rules.c        2008-12-25 00:26:37.000000000 +0100
26295 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv6/fib6_rules.c     2009-07-04 01:11:39.000000000 +0200
26296 @@ -96,7 +96,7 @@ static int fib6_rule_action(struct fib_r
26297                         if (ipv6_dev_get_saddr(net,
26298                                                ip6_dst_idev(&rt->u.dst)->dev,
26299                                                &flp->fl6_dst, srcprefs,
26300 -                                              &saddr))
26301 +                                              &saddr, NULL))
26302                                 goto again;
26303                         if (!ipv6_prefix_equal(&saddr, &r->src.addr,
26304                                                r->src.plen))
26305 diff -NurpP --minimal linux-2.6.30.2/net/ipv6/inet6_hashtables.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv6/inet6_hashtables.c
26306 --- linux-2.6.30.2/net/ipv6/inet6_hashtables.c  2009-03-24 14:22:46.000000000 +0100
26307 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv6/inet6_hashtables.c       2009-07-04 01:11:39.000000000 +0200
26308 @@ -16,6 +16,7 @@
26309  
26310  #include <linux/module.h>
26311  #include <linux/random.h>
26312 +#include <linux/vs_inet6.h>
26313  
26314  #include <net/inet_connection_sock.h>
26315  #include <net/inet_hashtables.h>
26316 @@ -76,7 +77,6 @@ struct sock *__inet6_lookup_established(
26317         unsigned int slot = hash & (hashinfo->ehash_size - 1);
26318         struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
26319  
26320 -
26321         rcu_read_lock();
26322  begin:
26323         sk_nulls_for_each_rcu(sk, node, &head->chain) {
26324 @@ -88,7 +88,7 @@ begin:
26325                                 sock_put(sk);
26326                                 goto begin;
26327                         }
26328 -               goto out;
26329 +                       goto out;
26330                 }
26331         }
26332         if (get_nulls_value(node) != slot)
26333 @@ -134,6 +134,9 @@ static int inline compute_score(struct s
26334                         if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
26335                                 return -1;
26336                         score++;
26337 +               } else {
26338 +                       if (!v6_addr_in_nx_info(sk->sk_nx_info, daddr, -1))
26339 +                               return -1;
26340                 }
26341                 if (sk->sk_bound_dev_if) {
26342                         if (sk->sk_bound_dev_if != dif)
26343 diff -NurpP --minimal linux-2.6.30.2/net/ipv6/ip6_output.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv6/ip6_output.c
26344 --- linux-2.6.30.2/net/ipv6/ip6_output.c        2009-03-24 14:22:47.000000000 +0100
26345 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv6/ip6_output.c     2009-07-04 01:11:39.000000000 +0200
26346 @@ -951,7 +951,7 @@ static int ip6_dst_lookup_tail(struct so
26347                 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
26348                                          &fl->fl6_dst,
26349                                          sk ? inet6_sk(sk)->srcprefs : 0,
26350 -                                        &fl->fl6_src);
26351 +                                        &fl->fl6_src, sk->sk_nx_info);
26352                 if (err)
26353                         goto out_err_release;
26354         }
26355 diff -NurpP --minimal linux-2.6.30.2/net/ipv6/Kconfig linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv6/Kconfig
26356 --- linux-2.6.30.2/net/ipv6/Kconfig     2009-06-11 17:13:30.000000000 +0200
26357 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv6/Kconfig  2009-07-04 01:11:39.000000000 +0200
26358 @@ -4,8 +4,8 @@
26359  
26360  #   IPv6 as module will cause a CRASH if you try to unload it
26361  menuconfig IPV6
26362 -       tristate "The IPv6 protocol"
26363 -       default m
26364 +       bool "The IPv6 protocol"
26365 +       default n
26366         ---help---
26367           This is complemental support for the IP version 6.
26368           You will still be able to do traditional IPv4 networking as well.
26369 diff -NurpP --minimal linux-2.6.30.2/net/ipv6/ndisc.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv6/ndisc.c
26370 --- linux-2.6.30.2/net/ipv6/ndisc.c     2009-06-11 17:13:30.000000000 +0200
26371 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv6/ndisc.c  2009-07-04 01:11:39.000000000 +0200
26372 @@ -589,7 +589,7 @@ static void ndisc_send_na(struct net_dev
26373         } else {
26374                 if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
26375                                        inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
26376 -                                      &tmpaddr))
26377 +                                      &tmpaddr, NULL /* FIXME: ? */ ))
26378                         return;
26379                 src_addr = &tmpaddr;
26380         }
26381 diff -NurpP --minimal linux-2.6.30.2/net/ipv6/raw.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv6/raw.c
26382 --- linux-2.6.30.2/net/ipv6/raw.c       2009-03-24 14:22:47.000000000 +0100
26383 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv6/raw.c    2009-07-04 01:11:39.000000000 +0200
26384 @@ -29,6 +29,7 @@
26385  #include <linux/icmpv6.h>
26386  #include <linux/netfilter.h>
26387  #include <linux/netfilter_ipv6.h>
26388 +#include <linux/vs_inet6.h>
26389  #include <linux/skbuff.h>
26390  #include <asm/uaccess.h>
26391  #include <asm/ioctls.h>
26392 @@ -281,6 +282,13 @@ static int rawv6_bind(struct sock *sk, s
26393                         }
26394                 }
26395  
26396 +               if (!v6_addr_in_nx_info(sk->sk_nx_info, &addr->sin6_addr, -1)) {
26397 +                       err = -EADDRNOTAVAIL;
26398 +                       if (dev)
26399 +                               dev_put(dev);
26400 +                       goto out;
26401 +               }
26402 +
26403                 /* ipv4 addr of the socket is invalid.  Only the
26404                  * unspecified and mapped address have a v4 equivalent.
26405                  */
26406 diff -NurpP --minimal linux-2.6.30.2/net/ipv6/route.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv6/route.c
26407 --- linux-2.6.30.2/net/ipv6/route.c     2009-06-11 17:13:30.000000000 +0200
26408 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv6/route.c  2009-07-04 01:11:39.000000000 +0200
26409 @@ -2257,7 +2257,8 @@ static int rt6_fill_node(struct net *net
26410                 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
26411                 struct in6_addr saddr_buf;
26412                 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
26413 -                                      dst, 0, &saddr_buf) == 0)
26414 +                       dst, 0, &saddr_buf,
26415 +                       (skb->sk ? skb->sk->sk_nx_info : NULL)) == 0)
26416                         NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
26417         }
26418  
26419 diff -NurpP --minimal linux-2.6.30.2/net/ipv6/tcp_ipv6.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv6/tcp_ipv6.c
26420 --- linux-2.6.30.2/net/ipv6/tcp_ipv6.c  2009-06-11 17:13:30.000000000 +0200
26421 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv6/tcp_ipv6.c       2009-07-04 01:11:39.000000000 +0200
26422 @@ -68,6 +68,7 @@
26423  
26424  #include <linux/crypto.h>
26425  #include <linux/scatterlist.h>
26426 +#include <linux/vs_inet6.h>
26427  
26428  static void    tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
26429  static void    tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
26430 @@ -156,8 +157,15 @@ static int tcp_v6_connect(struct sock *s
26431          *      connect() to INADDR_ANY means loopback (BSD'ism).
26432          */
26433  
26434 -       if(ipv6_addr_any(&usin->sin6_addr))
26435 -               usin->sin6_addr.s6_addr[15] = 0x1;
26436 +       if(ipv6_addr_any(&usin->sin6_addr)) {
26437 +               struct nx_info *nxi =  sk->sk_nx_info;
26438 +
26439 +               if (nxi && nx_info_has_v6(nxi))
26440 +                       /* FIXME: remap lback? */
26441 +                       usin->sin6_addr = nxi->v6.ip;
26442 +               else
26443 +                       usin->sin6_addr.s6_addr[15] = 0x1;
26444 +       }
26445  
26446         addr_type = ipv6_addr_type(&usin->sin6_addr);
26447  
26448 diff -NurpP --minimal linux-2.6.30.2/net/ipv6/udp.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv6/udp.c
26449 --- linux-2.6.30.2/net/ipv6/udp.c       2009-06-11 17:13:30.000000000 +0200
26450 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv6/udp.c    2009-07-04 01:11:39.000000000 +0200
26451 @@ -47,6 +47,7 @@
26452  
26453  #include <linux/proc_fs.h>
26454  #include <linux/seq_file.h>
26455 +#include <linux/vs_inet6.h>
26456  #include "udp_impl.h"
26457  
26458  int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
26459 @@ -109,6 +110,10 @@ static inline int compute_score(struct s
26460                         if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
26461                                 return -1;
26462                         score++;
26463 +               } else {
26464 +                       /* block non nx_info ips */
26465 +                       if (!v6_addr_in_nx_info(sk->sk_nx_info, daddr, -1))
26466 +                               return -1;
26467                 }
26468                 if (!ipv6_addr_any(&np->daddr)) {
26469                         if (!ipv6_addr_equal(&np->daddr, saddr))
26470 diff -NurpP --minimal linux-2.6.30.2/net/ipv6/xfrm6_policy.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv6/xfrm6_policy.c
26471 --- linux-2.6.30.2/net/ipv6/xfrm6_policy.c      2009-06-11 17:13:30.000000000 +0200
26472 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/ipv6/xfrm6_policy.c   2009-07-04 01:11:39.000000000 +0200
26473 @@ -63,7 +63,7 @@ static int xfrm6_get_saddr(struct net *n
26474         dev = ip6_dst_idev(dst)->dev;
26475         ipv6_dev_get_saddr(dev_net(dev), dev,
26476                            (struct in6_addr *)&daddr->a6, 0,
26477 -                          (struct in6_addr *)&saddr->a6);
26478 +                          (struct in6_addr *)&saddr->a6, NULL);
26479         dst_release(dst);
26480         return 0;
26481  }
26482 diff -NurpP --minimal linux-2.6.30.2/net/netlink/af_netlink.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/netlink/af_netlink.c
26483 --- linux-2.6.30.2/net/netlink/af_netlink.c     2009-06-11 17:13:31.000000000 +0200
26484 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/netlink/af_netlink.c  2009-07-04 01:11:39.000000000 +0200
26485 @@ -55,6 +55,9 @@
26486  #include <linux/types.h>
26487  #include <linux/audit.h>
26488  #include <linux/mutex.h>
26489 +#include <linux/vs_context.h>
26490 +#include <linux/vs_network.h>
26491 +#include <linux/vs_limit.h>
26492  
26493  #include <net/net_namespace.h>
26494  #include <net/sock.h>
26495 @@ -1831,6 +1834,8 @@ static struct sock *netlink_seq_socket_i
26496                         sk_for_each(s, node, &hash->table[j]) {
26497                                 if (sock_net(s) != seq_file_net(seq))
26498                                         continue;
26499 +                               if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT))
26500 +                                       continue;
26501                                 if (off == pos) {
26502                                         iter->link = i;
26503                                         iter->hash_idx = j;
26504 @@ -1865,7 +1870,8 @@ static void *netlink_seq_next(struct seq
26505         s = v;
26506         do {
26507                 s = sk_next(s);
26508 -       } while (s && sock_net(s) != seq_file_net(seq));
26509 +       } while (s && (sock_net(s) != seq_file_net(seq) ||
26510 +               !nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT)));
26511         if (s)
26512                 return s;
26513  
26514 @@ -1877,7 +1883,8 @@ static void *netlink_seq_next(struct seq
26515  
26516                 for (; j <= hash->mask; j++) {
26517                         s = sk_head(&hash->table[j]);
26518 -                       while (s && sock_net(s) != seq_file_net(seq))
26519 +                       while (s && (sock_net(s) != seq_file_net(seq) ||
26520 +                               !nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT)))
26521                                 s = sk_next(s);
26522                         if (s) {
26523                                 iter->link = i;
26524 diff -NurpP --minimal linux-2.6.30.2/net/sctp/ipv6.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/sctp/ipv6.c
26525 --- linux-2.6.30.2/net/sctp/ipv6.c      2009-06-11 17:13:32.000000000 +0200
26526 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/sctp/ipv6.c   2009-07-04 01:11:39.000000000 +0200
26527 @@ -316,7 +316,8 @@ static void sctp_v6_get_saddr(struct sct
26528                                    dst ? ip6_dst_idev(dst)->dev : NULL,
26529                                    &daddr->v6.sin6_addr,
26530                                    inet6_sk(&sk->inet.sk)->srcprefs,
26531 -                                  &saddr->v6.sin6_addr);
26532 +                                  &saddr->v6.sin6_addr,
26533 +                                  asoc->base.sk->sk_nx_info);
26534                 SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: %pI6\n",
26535                                   &saddr->v6.sin6_addr);
26536                 return;
26537 diff -NurpP --minimal linux-2.6.30.2/net/socket.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/socket.c
26538 --- linux-2.6.30.2/net/socket.c 2009-06-11 17:13:32.000000000 +0200
26539 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/socket.c      2009-07-04 01:11:39.000000000 +0200
26540 @@ -95,6 +95,10 @@
26541  
26542  #include <net/sock.h>
26543  #include <linux/netfilter.h>
26544 +#include <linux/vs_base.h>
26545 +#include <linux/vs_socket.h>
26546 +#include <linux/vs_inet.h>
26547 +#include <linux/vs_inet6.h>
26548  
26549  static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
26550  static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
26551 @@ -559,7 +563,7 @@ static inline int __sock_sendmsg(struct 
26552                                  struct msghdr *msg, size_t size)
26553  {
26554         struct sock_iocb *si = kiocb_to_siocb(iocb);
26555 -       int err;
26556 +       int err, len;
26557  
26558         si->sock = sock;
26559         si->scm = NULL;
26560 @@ -570,7 +574,22 @@ static inline int __sock_sendmsg(struct 
26561         if (err)
26562                 return err;
26563  
26564 -       return sock->ops->sendmsg(iocb, sock, msg, size);
26565 +       len = sock->ops->sendmsg(iocb, sock, msg, size);
26566 +       if (sock->sk) {
26567 +               if (len == size)
26568 +                       vx_sock_send(sock->sk, size);
26569 +               else
26570 +                       vx_sock_fail(sock->sk, size);
26571 +       }
26572 +       vxdprintk(VXD_CBIT(net, 7),
26573 +               "__sock_sendmsg: %p[%p,%p,%p;%d/%d]:%d/%d",
26574 +               sock, sock->sk,
26575 +               (sock->sk)?sock->sk->sk_nx_info:0,
26576 +               (sock->sk)?sock->sk->sk_vx_info:0,
26577 +               (sock->sk)?sock->sk->sk_xid:0,
26578 +               (sock->sk)?sock->sk->sk_nid:0,
26579 +               (unsigned int)size, len);
26580 +       return len;
26581  }
26582  
26583  int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
26584 @@ -671,7 +690,7 @@ EXPORT_SYMBOL_GPL(__sock_recv_timestamp)
26585  static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
26586                                  struct msghdr *msg, size_t size, int flags)
26587  {
26588 -       int err;
26589 +       int err, len;
26590         struct sock_iocb *si = kiocb_to_siocb(iocb);
26591  
26592         si->sock = sock;
26593 @@ -684,7 +703,18 @@ static inline int __sock_recvmsg(struct 
26594         if (err)
26595                 return err;
26596  
26597 -       return sock->ops->recvmsg(iocb, sock, msg, size, flags);
26598 +       len = sock->ops->recvmsg(iocb, sock, msg, size, flags);
26599 +       if ((len >= 0) && sock->sk)
26600 +               vx_sock_recv(sock->sk, len);
26601 +       vxdprintk(VXD_CBIT(net, 7),
26602 +               "__sock_recvmsg: %p[%p,%p,%p;%d/%d]:%d/%d",
26603 +               sock, sock->sk,
26604 +               (sock->sk)?sock->sk->sk_nx_info:0,
26605 +               (sock->sk)?sock->sk->sk_vx_info:0,
26606 +               (sock->sk)?sock->sk->sk_xid:0,
26607 +               (sock->sk)?sock->sk->sk_nid:0,
26608 +               (unsigned int)size, len);
26609 +       return len;
26610  }
26611  
26612  int sock_recvmsg(struct socket *sock, struct msghdr *msg,
26613 @@ -1155,6 +1185,13 @@ static int __sock_create(struct net *net
26614         if (type < 0 || type >= SOCK_MAX)
26615                 return -EINVAL;
26616  
26617 +       if (!nx_check(0, VS_ADMIN)) {
26618 +               if (family == PF_INET && !current_nx_info_has_v4())
26619 +                       return -EAFNOSUPPORT;
26620 +               if (family == PF_INET6 && !current_nx_info_has_v6())
26621 +                       return -EAFNOSUPPORT;
26622 +       }
26623 +
26624         /* Compatibility.
26625  
26626            This uglymoron is moved from INET layer to here to avoid
26627 @@ -1287,6 +1324,7 @@ SYSCALL_DEFINE3(socket, int, family, int
26628         if (retval < 0)
26629                 goto out;
26630  
26631 +       set_bit(SOCK_USER_SOCKET, &sock->flags);
26632         retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
26633         if (retval < 0)
26634                 goto out_release;
26635 @@ -1328,10 +1366,12 @@ SYSCALL_DEFINE4(socketpair, int, family,
26636         err = sock_create(family, type, protocol, &sock1);
26637         if (err < 0)
26638                 goto out;
26639 +       set_bit(SOCK_USER_SOCKET, &sock1->flags);
26640  
26641         err = sock_create(family, type, protocol, &sock2);
26642         if (err < 0)
26643                 goto out_release_1;
26644 +       set_bit(SOCK_USER_SOCKET, &sock2->flags);
26645  
26646         err = sock1->ops->socketpair(sock1, sock2);
26647         if (err < 0)
26648 diff -NurpP --minimal linux-2.6.30.2/net/sunrpc/auth.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/sunrpc/auth.c
26649 --- linux-2.6.30.2/net/sunrpc/auth.c    2009-03-24 14:22:48.000000000 +0100
26650 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/sunrpc/auth.c 2009-07-04 01:11:39.000000000 +0200
26651 @@ -14,6 +14,7 @@
26652  #include <linux/hash.h>
26653  #include <linux/sunrpc/clnt.h>
26654  #include <linux/spinlock.h>
26655 +#include <linux/vs_tag.h>
26656  
26657  #ifdef RPC_DEBUG
26658  # define RPCDBG_FACILITY       RPCDBG_AUTH
26659 @@ -360,6 +361,7 @@ rpcauth_lookupcred(struct rpc_auth *auth
26660         memset(&acred, 0, sizeof(acred));
26661         acred.uid = cred->fsuid;
26662         acred.gid = cred->fsgid;
26663 +       acred.tag = dx_current_tag();
26664         acred.group_info = get_group_info(((struct cred *)cred)->group_info);
26665  
26666         ret = auth->au_ops->lookup_cred(auth, &acred, flags);
26667 @@ -400,6 +402,7 @@ rpcauth_bind_root_cred(struct rpc_task *
26668         struct auth_cred acred = {
26669                 .uid = 0,
26670                 .gid = 0,
26671 +               .tag = dx_current_tag(),
26672         };
26673         struct rpc_cred *ret;
26674  
26675 diff -NurpP --minimal linux-2.6.30.2/net/sunrpc/auth_unix.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/sunrpc/auth_unix.c
26676 --- linux-2.6.30.2/net/sunrpc/auth_unix.c       2008-12-25 00:26:37.000000000 +0100
26677 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/sunrpc/auth_unix.c    2009-07-04 01:11:39.000000000 +0200
26678 @@ -11,12 +11,14 @@
26679  #include <linux/module.h>
26680  #include <linux/sunrpc/clnt.h>
26681  #include <linux/sunrpc/auth.h>
26682 +#include <linux/vs_tag.h>
26683  
26684  #define NFS_NGROUPS    16
26685  
26686  struct unx_cred {
26687         struct rpc_cred         uc_base;
26688         gid_t                   uc_gid;
26689 +       tag_t                   uc_tag;
26690         gid_t                   uc_gids[NFS_NGROUPS];
26691  };
26692  #define uc_uid                 uc_base.cr_uid
26693 @@ -78,6 +80,7 @@ unx_create_cred(struct rpc_auth *auth, s
26694                 groups = NFS_NGROUPS;
26695  
26696         cred->uc_gid = acred->gid;
26697 +       cred->uc_tag = acred->tag;
26698         for (i = 0; i < groups; i++)
26699                 cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
26700         if (i < NFS_NGROUPS)
26701 @@ -119,7 +122,9 @@ unx_match(struct auth_cred *acred, struc
26702         unsigned int i;
26703  
26704  
26705 -       if (cred->uc_uid != acred->uid || cred->uc_gid != acred->gid)
26706 +       if (cred->uc_uid != acred->uid ||
26707 +               cred->uc_gid != acred->gid ||
26708 +               cred->uc_tag != acred->tag)
26709                 return 0;
26710  
26711         if (acred->group_info != NULL)
26712 @@ -142,7 +147,7 @@ unx_marshal(struct rpc_task *task, __be3
26713         struct rpc_clnt *clnt = task->tk_client;
26714         struct unx_cred *cred = container_of(task->tk_msg.rpc_cred, struct unx_cred, uc_base);
26715         __be32          *base, *hold;
26716 -       int             i;
26717 +       int             i, tag;
26718  
26719         *p++ = htonl(RPC_AUTH_UNIX);
26720         base = p++;
26721 @@ -152,9 +157,12 @@ unx_marshal(struct rpc_task *task, __be3
26722          * Copy the UTS nodename captured when the client was created.
26723          */
26724         p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
26725 +       tag = task->tk_client->cl_tag;
26726  
26727 -       *p++ = htonl((u32) cred->uc_uid);
26728 -       *p++ = htonl((u32) cred->uc_gid);
26729 +       *p++ = htonl((u32) TAGINO_UID(tag,
26730 +               cred->uc_uid, cred->uc_tag));
26731 +       *p++ = htonl((u32) TAGINO_GID(tag,
26732 +               cred->uc_gid, cred->uc_tag));
26733         hold = p++;
26734         for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++)
26735                 *p++ = htonl((u32) cred->uc_gids[i]);
26736 diff -NurpP --minimal linux-2.6.30.2/net/sunrpc/clnt.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/sunrpc/clnt.c
26737 --- linux-2.6.30.2/net/sunrpc/clnt.c    2009-06-11 17:13:32.000000000 +0200
26738 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/sunrpc/clnt.c 2009-07-04 01:11:39.000000000 +0200
26739 @@ -32,6 +32,7 @@
26740  #include <linux/utsname.h>
26741  #include <linux/workqueue.h>
26742  #include <linux/in6.h>
26743 +#include <linux/vs_cvirt.h>
26744  
26745  #include <linux/sunrpc/clnt.h>
26746  #include <linux/sunrpc/rpc_pipe_fs.h>
26747 @@ -335,6 +336,9 @@ struct rpc_clnt *rpc_create(struct rpc_c
26748         if (!(args->flags & RPC_CLNT_CREATE_QUIET))
26749                 clnt->cl_chatty = 1;
26750  
26751 +       /* TODO: handle RPC_CLNT_CREATE_TAGGED
26752 +       if (args->flags & RPC_CLNT_CREATE_TAGGED)
26753 +               clnt->cl_tag = 1; */
26754         return clnt;
26755  }
26756  EXPORT_SYMBOL_GPL(rpc_create);
26757 diff -NurpP --minimal linux-2.6.30.2/net/unix/af_unix.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/unix/af_unix.c
26758 --- linux-2.6.30.2/net/unix/af_unix.c   2009-06-11 17:13:32.000000000 +0200
26759 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/unix/af_unix.c        2009-07-04 01:11:39.000000000 +0200
26760 @@ -114,6 +114,8 @@
26761  #include <linux/mount.h>
26762  #include <net/checksum.h>
26763  #include <linux/security.h>
26764 +#include <linux/vs_context.h>
26765 +#include <linux/vs_limit.h>
26766  
26767  static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
26768  static DEFINE_SPINLOCK(unix_table_lock);
26769 @@ -258,6 +260,8 @@ static struct sock *__unix_find_socket_b
26770                 if (!net_eq(sock_net(s), net))
26771                         continue;
26772  
26773 +               if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT))
26774 +                       continue;
26775                 if (u->addr->len == len &&
26776                     !memcmp(u->addr->name, sunname, len))
26777                         goto found;
26778 @@ -2109,6 +2113,8 @@ static struct sock *unix_seq_idx(struct 
26779         for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
26780                 if (sock_net(s) != seq_file_net(seq))
26781                         continue;
26782 +               if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT))
26783 +                       continue;
26784                 if (off == pos)
26785                         return s;
26786                 ++off;
26787 @@ -2133,7 +2139,8 @@ static void *unix_seq_next(struct seq_fi
26788                 sk = first_unix_socket(&iter->i);
26789         else
26790                 sk = next_unix_socket(&iter->i, sk);
26791 -       while (sk && (sock_net(sk) != seq_file_net(seq)))
26792 +       while (sk && (sock_net(sk) != seq_file_net(seq) ||
26793 +               !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)))
26794                 sk = next_unix_socket(&iter->i, sk);
26795         return sk;
26796  }
26797 diff -NurpP --minimal linux-2.6.30.2/net/x25/af_x25.c linux-2.6.30.2-vs2.3.0.36.14-pre4/net/x25/af_x25.c
26798 --- linux-2.6.30.2/net/x25/af_x25.c     2009-07-23 13:28:48.000000000 +0200
26799 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/net/x25/af_x25.c  2009-07-04 01:11:39.000000000 +0200
26800 @@ -519,7 +519,10 @@ static int x25_create(struct net *net, s
26801  
26802         x25 = x25_sk(sk);
26803  
26804 -       sock_init_data(sock, sk);
26805 +       sk->sk_socket = sock;
26806 +       sk->sk_type = sock->type;
26807 +       sk->sk_sleep = &sock->wait;
26808 +       sock->sk = sk;
26809  
26810         x25_init_timers(sk);
26811  
26812 diff -NurpP --minimal linux-2.6.30.2/scripts/checksyscalls.sh linux-2.6.30.2-vs2.3.0.36.14-pre4/scripts/checksyscalls.sh
26813 --- linux-2.6.30.2/scripts/checksyscalls.sh     2008-12-25 00:26:37.000000000 +0100
26814 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/scripts/checksyscalls.sh  2009-07-04 01:11:39.000000000 +0200
26815 @@ -108,7 +108,6 @@ cat << EOF
26816  #define __IGNORE_afs_syscall
26817  #define __IGNORE_getpmsg
26818  #define __IGNORE_putpmsg
26819 -#define __IGNORE_vserver
26820  EOF
26821  }
26822  
26823 diff -NurpP --minimal linux-2.6.30.2/security/commoncap.c linux-2.6.30.2-vs2.3.0.36.14-pre4/security/commoncap.c
26824 --- linux-2.6.30.2/security/commoncap.c 2009-06-11 17:13:33.000000000 +0200
26825 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/security/commoncap.c      2009-07-04 01:11:39.000000000 +0200
26826 @@ -27,10 +27,11 @@
26827  #include <linux/sched.h>
26828  #include <linux/prctl.h>
26829  #include <linux/securebits.h>
26830 +#include <linux/vs_context.h>
26831  
26832  int cap_netlink_send(struct sock *sk, struct sk_buff *skb)
26833  {
26834 -       NETLINK_CB(skb).eff_cap = current_cap();
26835 +       NETLINK_CB(skb).eff_cap = vx_mbcaps(current_cap());
26836         return 0;
26837  }
26838  
26839 @@ -40,6 +41,7 @@ int cap_netlink_recv(struct sk_buff *skb
26840                 return -EPERM;
26841         return 0;
26842  }
26843 +
26844  EXPORT_SYMBOL(cap_netlink_recv);
26845  
26846  /**
26847 @@ -60,7 +62,22 @@ EXPORT_SYMBOL(cap_netlink_recv);
26848  int cap_capable(struct task_struct *tsk, const struct cred *cred, int cap,
26849                 int audit)
26850  {
26851 -       return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
26852 +       struct vx_info *vxi = tsk->vx_info;
26853 +
26854 +#if 0
26855 +       printk("cap_capable() VXF_STATE_SETUP = %llx, raised = %x, eff = %08x:%08x\n",
26856 +               vx_info_flags(vxi, VXF_STATE_SETUP, 0),
26857 +               cap_raised(tsk->cap_effective, cap),
26858 +               tsk->cap_effective.cap[1], tsk->cap_effective.cap[0]);
26859 +#endif
26860 +
26861 +       /* special case SETUP */
26862 +       if (vx_info_flags(vxi, VXF_STATE_SETUP, 0) &&
26863 +               /* FIXME: maybe use cred instead? */
26864 +               cap_raised(tsk->cred->cap_effective, cap))
26865 +               return 0;
26866 +
26867 +       return vx_cap_raised(vxi, cred->cap_effective, cap) ? 0 : -EPERM;
26868  }
26869  
26870  /**
26871 @@ -586,7 +603,7 @@ int cap_inode_setxattr(struct dentry *de
26872  
26873         if (!strncmp(name, XATTR_SECURITY_PREFIX,
26874                      sizeof(XATTR_SECURITY_PREFIX) - 1)  &&
26875 -           !capable(CAP_SYS_ADMIN))
26876 +               !vx_capable(CAP_SYS_ADMIN, VXC_FS_SECURITY))
26877                 return -EPERM;
26878         return 0;
26879  }
26880 @@ -930,7 +947,8 @@ error:
26881   */
26882  int cap_syslog(int type)
26883  {
26884 -       if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN))
26885 +       if ((type != 3 && type != 10) &&
26886 +               !vx_capable(CAP_SYS_ADMIN, VXC_SYSLOG))
26887                 return -EPERM;
26888         return 0;
26889  }
26890 @@ -952,3 +970,4 @@ int cap_vm_enough_memory(struct mm_struc
26891                 cap_sys_admin = 1;
26892         return __vm_enough_memory(mm, pages, cap_sys_admin);
26893  }
26894 +
26895 diff -NurpP --minimal linux-2.6.30.2/security/selinux/hooks.c linux-2.6.30.2-vs2.3.0.36.14-pre4/security/selinux/hooks.c
26896 --- linux-2.6.30.2/security/selinux/hooks.c     2009-06-11 17:13:33.000000000 +0200
26897 +++ linux-2.6.30.2-vs2.3.0.36.14-pre4/security/selinux/hooks.c  2009-07-04 01:11:39.000000000 +0200
26898 @@ -64,7 +64,6 @@
26899  #include <linux/dccp.h>
26900  #include <linux/quota.h>
26901  #include <linux/un.h>          /* for Unix socket types */
26902 -#include <net/af_unix.h>       /* for Unix socket types */
26903  #include <linux/parser.h>
26904  #include <linux/nfs_mount.h>
26905  #include <net/ipv6.h>
This page took 2.65046 seconds and 3 git commands to generate.