]> git.pld-linux.org Git - packages/kernel.git/blob - linux-2.6-vs2.1.patch
- updated to 2.6.16.53
[packages/kernel.git] / linux-2.6-vs2.1.patch
1 diff -NurpP --minimal linux-2.6.16.20/Documentation/vserver/debug.txt linux-2.6.16.20-vs2.1.1-rc22/Documentation/vserver/debug.txt
2 --- linux-2.6.16.20/Documentation/vserver/debug.txt     1970-01-01 01:00:00 +0100
3 +++ linux-2.6.16.20-vs2.1.1-rc22/Documentation/vserver/debug.txt        2006-04-26 19:06:59 +0200
4 @@ -0,0 +1,108 @@
5 +
6 +debug_cvirt:
7 +
8 + 2   4 "vx_map_tgid: %p/%llx: %d -> %d"
9 +       "vx_rmap_tgid: %p/%llx: %d -> %d"
10 +
11 +debug_dlim:
12 +
13 + 0   1 "ALLOC (%p,#%d)%c inode (%d)"
14 +       "FREE  (%p,#%d)%c inode"
15 + 1   2 "ALLOC (%p,#%d)%c %lld bytes (%d)"
16 +       "FREE  (%p,#%d)%c %lld bytes"
17 + 2   4 "ADJUST: %lld,%lld on %d,%d [mult=%d]"
18 + 3   8 "ext3_has_free_blocks(%p): %u<%u+1, %c, %u!=%u r=%d"
19 +       "ext3_has_free_blocks(%p): free=%u, root=%u"
20 +       "rcu_free_dl_info(%p)"
21 + 4  10 "alloc_dl_info(%p,%d) = %p"
22 +       "dealloc_dl_info(%p)"
23 +       "get_dl_info(%p[#%d.%d])"
24 +       "put_dl_info(%p[#%d.%d])"
25 + 5  20 "alloc_dl_info(%p,%d)*"
26 + 6  40 "__hash_dl_info: %p[#%d]"
27 +       "__unhash_dl_info: %p[#%d]"
28 + 7  80 "locate_dl_info(%p,#%d) = %p"
29 +
30 +debug_net:
31 +
32 + 2   4 "nx_addr_conflict(%p,%p) %d.%d,%d.%d"
33 + 3   8 "inet_bind(%p) %d.%d.%d.%d, %d.%d.%d.%d, %d.%d.%d.%d"
34 +       "inet_bind(%p)* %p,%p;%lx %d.%d.%d.%d"
35 + 4  10 "ip_route_connect(%p) %p,%p;%lx"
36 + 5  20 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx"
37 + 6  40 "sk,egf: %p [#%d] (from %d)"
38 +       "sk,egn: %p [#%d] (from %d)"
39 +       "sk,req: %p [#%d] (from %d)"
40 +       "sk: %p [#%d] (from %d)"
41 +       "tw: %p [#%d] (from %d)"
42 + 7  80 "__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d"
43 +       "__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d"
44 +
45 +debug_nid:
46 +
47 + 0   1 "__lookup_nx_info(#%u): %p[#%u]"
48 +       "alloc_nx_info(%d) = %p"
49 +       "create_nx_info(%d) (dynamic rejected)"
50 +       "create_nx_info(%d) = %p (already there)"
51 +       "create_nx_info(%d) = %p (new)"
52 +       "dealloc_nx_info(%p)"
53 + 1   2 "alloc_nx_info(%d)*"
54 +       "create_nx_info(%d)*"
55 + 2   4 "get_nx_info(%p[#%d.%d])"
56 +       "put_nx_info(%p[#%d.%d])"
57 + 3   8 "claim_nx_info(%p[#%d.%d.%d]) %p"
58 +       "clr_nx_info(%p[#%d.%d])"
59 +       "init_nx_info(%p[#%d.%d])"
60 +       "release_nx_info(%p[#%d.%d.%d]) %p"
61 +       "set_nx_info(%p[#%d.%d])"
62 + 4  10 "__hash_nx_info: %p[#%d]"
63 +       "__nx_dynamic_id: [#%d]"
64 +       "__unhash_nx_info: %p[#%d]"
65 + 5  20 "moved task %p into nxi:%p[#%d]"
66 +       "nx_migrate_task(%p,%p[#%d.%d.%d])"
67 +       "task_get_nx_info(%p)"
68 +
69 +debug_switch:
70 +
71 + 0   1 "vc: VCMD_%02d_%d[%d], %d,%p,%d"
72 + 1   2 "vc: VCMD_%02d_%d[%d] = %08lx(%ld)"
73 + 4  10 "%s: (%s %s) returned %s with %d"
74 +
75 +debug_xid:
76 +
77 + 0   1 "__lookup_vx_info(#%u): %p[#%u]"
78 +       "alloc_vx_info(%d) = %p"
79 +       "alloc_vx_info(%d)*"
80 +       "create_vx_info(%d) (dynamic rejected)"
81 +       "create_vx_info(%d) = %p (already there)"
82 +       "create_vx_info(%d) = %p (new)"
83 +       "dealloc_vx_info(%p)"
84 + 1   2 "create_vx_info(%d)*"
85 + 2   4 "get_vx_info(%p[#%d.%d])"
86 +       "put_vx_info(%p[#%d.%d])"
87 + 3   8 "claim_vx_info(%p[#%d.%d.%d]) %p"
88 +       "clr_vx_info(%p[#%d.%d])"
89 +       "init_vx_info(%p[#%d.%d])"
90 +       "release_vx_info(%p[#%d.%d.%d]) %p"
91 +       "set_vx_info(%p[#%d.%d])"
92 + 4  10 "__hash_vx_info: %p[#%d]"
93 +       "__unhash_vx_info: %p[#%d]"
94 +       "__vx_dynamic_id: [#%d]"
95 + 5  20 "moved task %p into vxi:%p[#%d]"
96 +       "task_get_vx_info(%p)"
97 +       "vx_migrate_task(%p,%p[#%d.%d])"
98 + 6  40 "vx_set_init(%p[#%d],%p[#%d,%d,%d])"
99 +       "vx_exit_init(%p[#%d],%p[#%d,%d,%d])"
100 +       "vx_set_reaper(%p[#%d],%p[#%d,%d])"
101 + 7  80 "vx_parse_xid(»%s«): %d:#%d"
102 +       "vx_propagate_xid(%p[#%lu.%d]): %d,%d"
103 +
104 +
105 +debug_limit:
106 +
107 + n 2^n "vx_acc_cres[%5d,%s,%2d]: %5d%s"
108 +       "vx_cres_avail[%5d,%s,%2d]: %5ld > %5d + %5d"
109 +
110 + m 2^m "vx_acc_page[%5d,%s,%2d]: %5d%s"
111 +       "vx_acc_pages[%5d,%s,%2d]: %5d += %5d"
112 +       "vx_pages_avail[%5d,%s,%2d]: %5ld > %5d + %5d"
113 diff -NurpP --minimal linux-2.6.16.20/arch/alpha/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/Kconfig
114 --- linux-2.6.16.20/arch/alpha/Kconfig  2006-02-18 14:39:40 +0100
115 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/Kconfig     2006-04-26 19:06:59 +0200
116 @@ -619,6 +619,8 @@ source "arch/alpha/oprofile/Kconfig"
117  
118  source "arch/alpha/Kconfig.debug"
119  
120 +source "kernel/vserver/Kconfig"
121 +
122  source "security/Kconfig"
123  
124  source "crypto/Kconfig"
125 diff -NurpP --minimal linux-2.6.16.20/arch/alpha/kernel/asm-offsets.c linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/kernel/asm-offsets.c
126 --- linux-2.6.16.20/arch/alpha/kernel/asm-offsets.c     2006-02-15 13:54:10 +0100
127 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/kernel/asm-offsets.c        2006-04-26 19:06:59 +0200
128 @@ -36,6 +36,7 @@ void foo(void)
129         DEFINE(PT_PTRACED, PT_PTRACED);
130         DEFINE(CLONE_VM, CLONE_VM);
131         DEFINE(CLONE_UNTRACED, CLONE_UNTRACED);
132 +       DEFINE(CLONE_KTHREAD, CLONE_KTHREAD);
133         DEFINE(SIGCHLD, SIGCHLD);
134         BLANK();
135  
136 diff -NurpP --minimal linux-2.6.16.20/arch/alpha/kernel/entry.S linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/kernel/entry.S
137 --- linux-2.6.16.20/arch/alpha/kernel/entry.S   2006-04-09 13:49:39 +0200
138 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/kernel/entry.S      2006-04-26 19:06:59 +0200
139 @@ -645,7 +645,7 @@ kernel_thread:
140         stq     $2, 152($sp)            /* HAE */
141  
142         /* Shuffle FLAGS to the front; add CLONE_VM.  */
143 -       ldi     $1, CLONE_VM|CLONE_UNTRACED
144 +       ldi     $1, CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD;
145         or      $18, $1, $16
146         bsr     $26, sys_clone
147  
148 @@ -874,24 +874,15 @@ sys_getxgid:
149         .globl  sys_getxpid
150         .ent    sys_getxpid
151  sys_getxpid:
152 +       lda     $sp, -16($sp)
153 +       stq     $26, 0($sp)
154         .prologue 0
155 -       ldq     $2, TI_TASK($8)
156  
157 -       /* See linux/kernel/timer.c sys_getppid for discussion
158 -          about this loop.  */
159 -       ldq     $3, TASK_GROUP_LEADER($2)
160 -       ldq     $4, TASK_REAL_PARENT($3)
161 -       ldl     $0, TASK_TGID($2)
162 -1:     ldl     $1, TASK_TGID($4)
163 -#ifdef CONFIG_SMP
164 -       mov     $4, $5
165 -       mb
166 -       ldq     $3, TASK_GROUP_LEADER($2)
167 -       ldq     $4, TASK_REAL_PARENT($3)
168 -       cmpeq   $4, $5, $5
169 -       beq     $5, 1b
170 -#endif
171 -       stq     $1, 80($sp)
172 +       lda     $16, 96($sp)
173 +       jsr     $26, do_getxpid
174 +       ldq     $26, 0($sp)
175 +
176 +       lda     $sp, 16($sp)
177         ret
178  .end sys_getxpid
179  
180 diff -NurpP --minimal linux-2.6.16.20/arch/alpha/kernel/osf_sys.c linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/kernel/osf_sys.c
181 --- linux-2.6.16.20/arch/alpha/kernel/osf_sys.c 2006-02-15 13:54:10 +0100
182 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/kernel/osf_sys.c    2006-05-29 16:49:23 +0200
183 @@ -38,6 +38,7 @@
184  #include <linux/uio.h>
185  #include <linux/vfs.h>
186  #include <linux/rcupdate.h>
187 +#include <linux/vs_cvirt.h>
188  
189  #include <asm/fpu.h>
190  #include <asm/io.h>
191 @@ -399,18 +400,20 @@ asmlinkage int
192  osf_utsname(char __user *name)
193  {
194         int error;
195 +       struct new_utsname *ptr;
196  
197         down_read(&uts_sem);
198 +       ptr = vx_new_utsname();
199         error = -EFAULT;
200 -       if (copy_to_user(name + 0, system_utsname.sysname, 32))
201 +       if (copy_to_user(name + 0, ptr->sysname, 32))
202                 goto out;
203 -       if (copy_to_user(name + 32, system_utsname.nodename, 32))
204 +       if (copy_to_user(name + 32, ptr->nodename, 32))
205                 goto out;
206 -       if (copy_to_user(name + 64, system_utsname.release, 32))
207 +       if (copy_to_user(name + 64, ptr->release, 32))
208                 goto out;
209 -       if (copy_to_user(name + 96, system_utsname.version, 32))
210 +       if (copy_to_user(name + 96, ptr->version, 32))
211                 goto out;
212 -       if (copy_to_user(name + 128, system_utsname.machine, 32))
213 +       if (copy_to_user(name + 128, ptr->machine, 32))
214                 goto out;
215  
216         error = 0;
217 @@ -439,6 +442,7 @@ osf_getdomainname(char __user *name, int
218  {
219         unsigned len;
220         int i;
221 +       char *domainname;
222  
223         if (!access_ok(VERIFY_WRITE, name, namelen))
224                 return -EFAULT;
225 @@ -448,9 +452,10 @@ osf_getdomainname(char __user *name, int
226                 len = 32;
227  
228         down_read(&uts_sem);
229 +       domainname = vx_new_uts(domainname);
230         for (i = 0; i < len; ++i) {
231 -               __put_user(system_utsname.domainname[i], name + i);
232 -               if (system_utsname.domainname[i] == '\0')
233 +               __put_user(domainname[i], name + i);
234 +               if (domainname[i] == '\0')
235                         break;
236         }
237         up_read(&uts_sem);
238 @@ -607,30 +612,30 @@ osf_sigstack(struct sigstack __user *uss
239  asmlinkage long
240  osf_sysinfo(int command, char __user *buf, long count)
241  {
242 -       static char * sysinfo_table[] = {
243 -               system_utsname.sysname,
244 -               system_utsname.nodename,
245 -               system_utsname.release,
246 -               system_utsname.version,
247 -               system_utsname.machine,
248 -               "alpha",        /* instruction set architecture */
249 -               "dummy",        /* hardware serial number */
250 -               "dummy",        /* hardware manufacturer */
251 -               "dummy",        /* secure RPC domain */
252 -       };
253         unsigned long offset;
254         char *res;
255         long len, err = -EINVAL;
256  
257         offset = command-1;
258 -       if (offset >= sizeof(sysinfo_table)/sizeof(char *)) {
259 +       if (offset >= 9) {
260                 /* Digital UNIX has a few unpublished interfaces here */
261                 printk("sysinfo(%d)", command);
262                 goto out;
263         }
264         
265         down_read(&uts_sem);
266 -       res = sysinfo_table[offset];
267 +       switch (offset)
268 +       {
269 +       case 0: res = vx_new_uts(sysname);  break;
270 +       case 1: res = vx_new_uts(nodename); break;
271 +       case 2: res = vx_new_uts(release);  break;
272 +       case 3: res = vx_new_uts(version);  break;
273 +       case 4: res = vx_new_uts(machine);  break;
274 +       case 5: res = "alpha";              break;
275 +       default:
276 +               res = "dummy";
277 +               break;
278 +       }
279         len = strlen(res)+1;
280         if (len > count)
281                 len = count;
282 @@ -882,7 +887,7 @@ osf_gettimeofday(struct timeval32 __user
283  {
284         if (tv) {
285                 struct timeval ktv;
286 -               do_gettimeofday(&ktv);
287 +               vx_gettimeofday(&ktv);
288                 if (put_tv32(tv, &ktv))
289                         return -EFAULT;
290         }
291 diff -NurpP --minimal linux-2.6.16.20/arch/alpha/kernel/ptrace.c linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/kernel/ptrace.c
292 --- linux-2.6.16.20/arch/alpha/kernel/ptrace.c  2006-04-09 13:49:39 +0200
293 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/kernel/ptrace.c     2006-04-28 05:07:10 +0200
294 @@ -283,6 +283,11 @@ do_sys_ptrace(long request, long pid, lo
295                 goto out_notsk;
296         }
297  
298 +       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) {
299 +               ret = -EPERM;
300 +               goto out;
301 +       }
302 +
303         if (request == PTRACE_ATTACH) {
304                 ret = ptrace_attach(child);
305                 goto out;
306 diff -NurpP --minimal linux-2.6.16.20/arch/alpha/kernel/systbls.S linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/kernel/systbls.S
307 --- linux-2.6.16.20/arch/alpha/kernel/systbls.S 2005-08-29 22:24:49 +0200
308 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/kernel/systbls.S    2006-04-26 19:06:59 +0200
309 @@ -447,7 +447,7 @@ sys_call_table:
310         .quad sys_stat64                        /* 425 */
311         .quad sys_lstat64
312         .quad sys_fstat64
313 -       .quad sys_ni_syscall                    /* sys_vserver */
314 +       .quad sys_vserver                       /* sys_vserver */
315         .quad sys_ni_syscall                    /* sys_mbind */
316         .quad sys_ni_syscall                    /* sys_get_mempolicy */
317         .quad sys_ni_syscall                    /* sys_set_mempolicy */
318 diff -NurpP --minimal linux-2.6.16.20/arch/alpha/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/kernel/traps.c
319 --- linux-2.6.16.20/arch/alpha/kernel/traps.c   2005-10-28 20:49:08 +0200
320 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/kernel/traps.c      2006-04-26 19:06:59 +0200
321 @@ -183,7 +183,8 @@ die_if_kernel(char * str, struct pt_regs
322  #ifdef CONFIG_SMP
323         printk("CPU %d ", hard_smp_processor_id());
324  #endif
325 -       printk("%s(%d): %s %ld\n", current->comm, current->pid, str, err);
326 +       printk("%s(%d[#%u]): %s %ld\n", current->comm,
327 +               current->pid, current->xid, str, err);
328         dik_show_regs(regs, r9_15);
329         dik_show_trace((unsigned long *)(regs+1));
330         dik_show_code((unsigned int *)regs->pc);
331 diff -NurpP --minimal linux-2.6.16.20/arch/alpha/mm/init.c linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/mm/init.c
332 --- linux-2.6.16.20/arch/alpha/mm/init.c        2006-02-18 14:39:40 +0100
333 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/mm/init.c   2006-04-26 19:06:59 +0200
334 @@ -21,6 +21,7 @@
335  #include <linux/init.h>
336  #include <linux/bootmem.h> /* max_low_pfn */
337  #include <linux/vmalloc.h>
338 +#include <linux/pagemap.h>
339  
340  #include <asm/system.h>
341  #include <asm/uaccess.h>
342 diff -NurpP --minimal linux-2.6.16.20/arch/arm/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/arm/Kconfig
343 --- linux-2.6.16.20/arch/arm/Kconfig    2006-04-09 13:49:39 +0200
344 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/arm/Kconfig       2006-04-26 19:06:59 +0200
345 @@ -827,6 +827,8 @@ source "arch/arm/oprofile/Kconfig"
346  
347  source "arch/arm/Kconfig.debug"
348  
349 +source "kernel/vserver/Kconfig"
350 +
351  source "security/Kconfig"
352  
353  source "crypto/Kconfig"
354 diff -NurpP --minimal linux-2.6.16.20/arch/arm/kernel/calls.S linux-2.6.16.20-vs2.1.1-rc22/arch/arm/kernel/calls.S
355 --- linux-2.6.16.20/arch/arm/kernel/calls.S     2006-02-18 14:39:40 +0100
356 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/arm/kernel/calls.S        2006-04-26 19:06:59 +0200
357 @@ -322,7 +322,7 @@
358  /* 310 */      CALL(sys_request_key)
359                 CALL(sys_keyctl)
360                 CALL(ABI(sys_semtimedop, sys_oabi_semtimedop))
361 -/* vserver */  CALL(sys_ni_syscall)
362 +               CALL(sys_vserver)
363                 CALL(sys_ioprio_set)
364  /* 315 */      CALL(sys_ioprio_get)
365                 CALL(sys_inotify_init)
366 diff -NurpP --minimal linux-2.6.16.20/arch/arm/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/arm/kernel/process.c
367 --- linux-2.6.16.20/arch/arm/kernel/process.c   2006-04-09 13:49:40 +0200
368 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/arm/kernel/process.c      2006-04-26 19:06:59 +0200
369 @@ -227,7 +227,8 @@ void __show_regs(struct pt_regs *regs)
370  void show_regs(struct pt_regs * regs)
371  {
372         printk("\n");
373 -       printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
374 +       printk("Pid: %d[#%u], comm: %20s\n",
375 +               current->pid, current->xid, current->comm);
376         __show_regs(regs);
377         __backtrace();
378  }
379 @@ -448,7 +449,8 @@ pid_t kernel_thread(int (*fn)(void *), v
380         regs.ARM_pc = (unsigned long)kernel_thread_helper;
381         regs.ARM_cpsr = SVC_MODE;
382  
383 -       return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
384 +       return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD,
385 +               0, &regs, 0, NULL, NULL);
386  }
387  EXPORT_SYMBOL(kernel_thread);
388  
389 diff -NurpP --minimal linux-2.6.16.20/arch/arm26/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/arm26/Kconfig
390 --- linux-2.6.16.20/arch/arm26/Kconfig  2006-02-18 14:39:41 +0100
391 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/arm26/Kconfig     2006-04-26 19:06:59 +0200
392 @@ -230,6 +230,8 @@ source "drivers/usb/Kconfig"
393  
394  source "arch/arm26/Kconfig.debug"
395  
396 +source "kernel/vserver/Kconfig"
397 +
398  source "security/Kconfig"
399  
400  source "crypto/Kconfig"
401 diff -NurpP --minimal linux-2.6.16.20/arch/arm26/kernel/calls.S linux-2.6.16.20-vs2.1.1-rc22/arch/arm26/kernel/calls.S
402 --- linux-2.6.16.20/arch/arm26/kernel/calls.S   2005-03-02 12:38:19 +0100
403 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/arm26/kernel/calls.S      2006-04-26 19:06:59 +0200
404 @@ -257,6 +257,11 @@ __syscall_start:
405                 .long   sys_lremovexattr
406                 .long   sys_fremovexattr
407                 .long   sys_tkill
408 +
409 +               .rept   313 - (. - __syscall_start) / 4
410 +                       .long   sys_ni_syscall
411 +               .endr
412 +               .long   sys_vserver     /* 313 */
413  __syscall_end:
414  
415                 .rept   NR_syscalls - (__syscall_end - __syscall_start) / 4
416 diff -NurpP --minimal linux-2.6.16.20/arch/arm26/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/arm26/kernel/process.c
417 --- linux-2.6.16.20/arch/arm26/kernel/process.c 2006-01-18 06:07:51 +0100
418 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/arm26/kernel/process.c    2006-04-26 19:06:59 +0200
419 @@ -366,7 +366,8 @@ pid_t kernel_thread(int (*fn)(void *), v
420          regs.ARM_r3 = (unsigned long)do_exit;
421          regs.ARM_pc = (unsigned long)kernel_thread_helper | MODE_SVC26;
422  
423 -        return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
424 +       return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD,
425 +               0, &regs, 0, NULL, NULL);
426  }
427  EXPORT_SYMBOL(kernel_thread);
428  
429 diff -NurpP --minimal linux-2.6.16.20/arch/arm26/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/arm26/kernel/traps.c
430 --- linux-2.6.16.20/arch/arm26/kernel/traps.c   2006-01-18 06:07:51 +0100
431 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/arm26/kernel/traps.c      2006-04-26 19:06:59 +0200
432 @@ -186,8 +186,9 @@ NORET_TYPE void die(const char *str, str
433         printk("Internal error: %s: %x\n", str, err);
434         printk("CPU: %d\n", smp_processor_id());
435         show_regs(regs);
436 -       printk("Process %s (pid: %d, stack limit = 0x%p)\n",
437 -               current->comm, current->pid, end_of_stack(tsk));
438 +       printk("Process %s (pid: %d[#%u], stack limit = 0x%p)\n",
439 +               current->comm, current->pid,
440 +               current->xid, end_of_stack(tsk));
441  
442         if (!user_mode(regs) || in_interrupt()) {
443                 __dump_stack(tsk, (unsigned long)(regs + 1));
444 diff -NurpP --minimal linux-2.6.16.20/arch/cris/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/cris/Kconfig
445 --- linux-2.6.16.20/arch/cris/Kconfig   2006-02-18 14:39:42 +0100
446 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/cris/Kconfig      2006-04-26 19:06:59 +0200
447 @@ -173,6 +173,8 @@ source "drivers/usb/Kconfig"
448  
449  source "arch/cris/Kconfig.debug"
450  
451 +source "kernel/vserver/Kconfig"
452 +
453  source "security/Kconfig"
454  
455  source "crypto/Kconfig"
456 diff -NurpP --minimal linux-2.6.16.20/arch/cris/arch-v10/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/cris/arch-v10/kernel/process.c
457 --- linux-2.6.16.20/arch/cris/arch-v10/kernel/process.c 2006-01-18 06:07:51 +0100
458 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/cris/arch-v10/kernel/process.c    2006-04-26 19:06:59 +0200
459 @@ -104,7 +104,8 @@ int kernel_thread(int (*fn)(void *), voi
460         regs.dccr = 1 << I_DCCR_BITNR;
461  
462         /* Ok, create the new process.. */
463 -        return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
464 +       return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD,
465 +               0, &regs, 0, NULL, NULL);
466  }
467  
468  /* setup the child's kernel stack with a pt_regs and switch_stack on it.
469 diff -NurpP --minimal linux-2.6.16.20/arch/cris/arch-v32/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/cris/arch-v32/kernel/process.c
470 --- linux-2.6.16.20/arch/cris/arch-v32/kernel/process.c 2006-01-18 06:07:51 +0100
471 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/cris/arch-v32/kernel/process.c    2006-04-26 19:06:59 +0200
472 @@ -121,7 +121,8 @@ kernel_thread(int (*fn)(void *), void * 
473         regs.ccs = 1 << (I_CCS_BITNR + CCS_SHIFT);
474  
475         /* Create the new process. */
476 -        return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
477 +       return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD,
478 +               0, &regs, 0, NULL, NULL);
479  }
480  
481  /*
482 diff -NurpP --minimal linux-2.6.16.20/arch/frv/kernel/kernel_thread.S linux-2.6.16.20-vs2.1.1-rc22/arch/frv/kernel/kernel_thread.S
483 --- linux-2.6.16.20/arch/frv/kernel/kernel_thread.S     2005-03-02 12:38:20 +0100
484 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/frv/kernel/kernel_thread.S        2006-04-26 19:06:59 +0200
485 @@ -13,6 +13,8 @@
486  #include <asm/unistd.h>
487  
488  #define CLONE_VM       0x00000100      /* set if VM shared between processes */
489 +#define CLONE_KTHREAD  0x10000000      /* kernel thread */
490 +#define CLONE_KT       (CLONE_VM | CLONE_KTHREAD)      /* kernel thread flags */
491  #define        KERN_ERR        "<3>"
492  
493         .section .rodata
494 @@ -37,7 +39,7 @@ kernel_thread:
495  
496         # start by forking the current process, but with shared VM
497         setlos.p        #__NR_clone,gr7         ; syscall number
498 -       ori             gr10,#CLONE_VM,gr8      ; first syscall arg     [clone_flags]
499 +       ori             gr10,#CLONE_KT,gr8      ; first syscall arg     [clone_flags]
500         sethi.p         #0xe4e4,gr9             ; second syscall arg    [newsp]
501         setlo           #0xe4e4,gr9
502         setlos.p        #0,gr10                 ; third syscall arg     [parent_tidptr]
503 diff -NurpP --minimal linux-2.6.16.20/arch/frv/mm/mmu-context.c linux-2.6.16.20-vs2.1.1-rc22/arch/frv/mm/mmu-context.c
504 --- linux-2.6.16.20/arch/frv/mm/mmu-context.c   2005-03-02 12:38:20 +0100
505 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/frv/mm/mmu-context.c      2006-04-26 19:06:59 +0200
506 @@ -11,6 +11,7 @@
507  
508  #include <linux/sched.h>
509  #include <linux/mm.h>
510 +#include <linux/vs_pid.h>
511  #include <asm/tlbflush.h>
512  
513  #define NR_CXN 4096
514 diff -NurpP --minimal linux-2.6.16.20/arch/h8300/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/h8300/Kconfig
515 --- linux-2.6.16.20/arch/h8300/Kconfig  2006-02-18 14:39:42 +0100
516 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/h8300/Kconfig     2006-04-26 19:06:59 +0200
517 @@ -191,6 +191,8 @@ source "fs/Kconfig"
518  
519  source "arch/h8300/Kconfig.debug"
520  
521 +source "kernel/vserver/Kconfig"
522 +
523  source "security/Kconfig"
524  
525  source "crypto/Kconfig"
526 diff -NurpP --minimal linux-2.6.16.20/arch/h8300/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/h8300/kernel/process.c
527 --- linux-2.6.16.20/arch/h8300/kernel/process.c 2006-04-09 13:49:41 +0200
528 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/h8300/kernel/process.c    2006-04-26 19:06:59 +0200
529 @@ -135,7 +135,7 @@ int kernel_thread(int (*fn)(void *), voi
530  
531         fs = get_fs();
532         set_fs (KERNEL_DS);
533 -       clone_arg = flags | CLONE_VM;
534 +       clone_arg = flags | CLONE_VM | CLONE_KTHREAD;
535         __asm__("mov.l sp,er3\n\t"
536                 "sub.l er2,er2\n\t"
537                 "mov.l %2,er1\n\t"
538 diff -NurpP --minimal linux-2.6.16.20/arch/i386/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/i386/Kconfig
539 --- linux-2.6.16.20/arch/i386/Kconfig   2006-04-09 13:49:41 +0200
540 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/i386/Kconfig      2006-04-26 19:06:59 +0200
541 @@ -466,23 +466,43 @@ choice
542           will also likely make your kernel incompatible with binary-only
543           kernel modules.
544  
545 -         If you are not absolutely sure what you are doing, leave this
546 -         option alone!
547 -
548         config VMSPLIT_3G
549 -               bool "3G/1G user/kernel split"
550 -       config VMSPLIT_3G_OPT
551 -               bool "3G/1G user/kernel split (for full 1G low memory)"
552 +               bool "3G/1G user/kernel split (Default)"
553 +               help
554 +                 This is the default split of 3GB userspace to 1GB kernel
555 +                 space, which will result in about 860MB of lowmem.
556 +
557 +       config VMSPLIT_25G
558 +               bool "2.5G/1.5G user/kernel split"
559 +               help
560 +                 This split provides 2.5GB userspace and 1.5GB kernel
561 +                 space, which will result in about 1370MB of lowmem.
562 +
563         config VMSPLIT_2G
564                 bool "2G/2G user/kernel split"
565 +               help
566 +                 This split provides 2GB userspace and 2GB kernel
567 +                 space, which will result in about 1880MB of lowmem.
568 +
569 +       config VMSPLIT_15G
570 +               bool "1.5G/2.5G user/kernel split"
571 +               help
572 +                 This split provides 1.5GB userspace and 2.5GB kernel
573 +                 space, which will result in about 2390MB of lowmem.
574 +
575         config VMSPLIT_1G
576                 bool "1G/3G user/kernel split"
577 +               help
578 +                 This split provides 1GB userspace and 3GB kernel
579 +                 space, which will result in about 2900MB of lowmem.
580 +
581  endchoice
582  
583  config PAGE_OFFSET
584         hex
585 -       default 0xB0000000 if VMSPLIT_3G_OPT
586 -       default 0x78000000 if VMSPLIT_2G
587 +       default 0xA0000000 if VMSPLIT_25G
588 +       default 0x80000000 if VMSPLIT_2G
589 +       default 0x60000000 if VMSPLIT_15G
590         default 0x40000000 if VMSPLIT_1G
591         default 0xC0000000
592  
593 @@ -1071,6 +1091,8 @@ endmenu
594  
595  source "arch/i386/Kconfig.debug"
596  
597 +source "kernel/vserver/Kconfig"
598 +
599  source "security/Kconfig"
600  
601  source "crypto/Kconfig"
602 diff -NurpP --minimal linux-2.6.16.20/arch/i386/boot/compressed/misc.c linux-2.6.16.20-vs2.1.1-rc22/arch/i386/boot/compressed/misc.c
603 --- linux-2.6.16.20/arch/i386/boot/compressed/misc.c    2006-04-09 13:49:42 +0200
604 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/i386/boot/compressed/misc.c       2006-04-26 19:06:59 +0200
605 @@ -309,7 +309,7 @@ static void setup_normal_output_buffer(v
606  #else
607         if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
608  #endif
609 -       output_data = (char *)__PHYSICAL_START; /* Normally Points to 1M */
610 +       output_data = (char *)PHYSICAL_START; /* Normally Points to 1M */
611         free_mem_end_ptr = (long)real_mode;
612  }
613  
614 @@ -334,8 +334,8 @@ static void setup_output_buffer_if_we_ru
615         low_buffer_size = low_buffer_end - LOW_BUFFER_START;
616         high_loaded = 1;
617         free_mem_end_ptr = (long)high_buffer_start;
618 -       if ( (__PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) {
619 -               high_buffer_start = (uch *)(__PHYSICAL_START + low_buffer_size);
620 +       if ((PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) {
621 +               high_buffer_start = (uch *)(PHYSICAL_START + low_buffer_size);
622                 mv->hcount = 0; /* say: we need not to move high_buffer */
623         }
624         else mv->hcount = -1;
625 diff -NurpP --minimal linux-2.6.16.20/arch/i386/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/i386/kernel/process.c
626 --- linux-2.6.16.20/arch/i386/kernel/process.c  2006-02-15 13:54:10 +0100
627 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/i386/kernel/process.c     2006-04-26 19:06:59 +0200
628 @@ -291,8 +291,10 @@ void show_regs(struct pt_regs * regs)
629         unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
630  
631         printk("\n");
632 -       printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
633 -       printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
634 +       printk("Pid: %d[#%u], comm: %20s\n",
635 +               current->pid, current->xid, current->comm);
636 +       printk("EIP: %04x:[<%08lx>] CPU: %d\n",
637 +               0xffff & regs->xcs,regs->eip, smp_processor_id());
638         print_symbol("EIP is at %s\n", regs->eip);
639  
640         if (user_mode(regs))
641 @@ -352,7 +354,8 @@ int kernel_thread(int (*fn)(void *), voi
642         regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
643  
644         /* Ok, create the new process.. */
645 -       return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
646 +       return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD,
647 +               0, &regs, 0, NULL, NULL);
648  }
649  EXPORT_SYMBOL(kernel_thread);
650  
651 diff -NurpP --minimal linux-2.6.16.20/arch/i386/kernel/setup.c linux-2.6.16.20-vs2.1.1-rc22/arch/i386/kernel/setup.c
652 --- linux-2.6.16.20/arch/i386/kernel/setup.c    2006-04-09 13:49:42 +0200
653 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/i386/kernel/setup.c       2006-04-26 19:06:59 +0200
654 @@ -1192,8 +1192,8 @@ void __init setup_bootmem_allocator(void
655          * the (very unlikely) case of us accidentally initializing the
656          * bootmem allocator with an invalid RAM area.
657          */
658 -       reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
659 -                        bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
660 +       reserve_bootmem(PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
661 +                        bootmap_size + PAGE_SIZE-1) - (PHYSICAL_START));
662  
663         /*
664          * reserve physical page 0 - it's a special BIOS page on many boxes,
665 diff -NurpP --minimal linux-2.6.16.20/arch/i386/kernel/sys_i386.c linux-2.6.16.20-vs2.1.1-rc22/arch/i386/kernel/sys_i386.c
666 --- linux-2.6.16.20/arch/i386/kernel/sys_i386.c 2004-08-14 12:56:23 +0200
667 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/i386/kernel/sys_i386.c    2006-04-26 19:06:59 +0200
668 @@ -19,6 +19,7 @@
669  #include <linux/mman.h>
670  #include <linux/file.h>
671  #include <linux/utsname.h>
672 +#include <linux/vs_cvirt.h>
673  
674  #include <asm/uaccess.h>
675  #include <asm/ipc.h>
676 @@ -217,7 +218,7 @@ asmlinkage int sys_uname(struct old_utsn
677         if (!name)
678                 return -EFAULT;
679         down_read(&uts_sem);
680 -       err=copy_to_user(name, &system_utsname, sizeof (*name));
681 +       err=copy_to_user(name, vx_new_utsname(), sizeof (*name));
682         up_read(&uts_sem);
683         return err?-EFAULT:0;
684  }
685 @@ -225,6 +226,7 @@ asmlinkage int sys_uname(struct old_utsn
686  asmlinkage int sys_olduname(struct oldold_utsname __user * name)
687  {
688         int error;
689 +       struct new_utsname *ptr;
690  
691         if (!name)
692                 return -EFAULT;
693 @@ -233,15 +235,16 @@ asmlinkage int sys_olduname(struct oldol
694    
695         down_read(&uts_sem);
696         
697 -       error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
698 +       ptr = vx_new_utsname();
699 +       error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN);
700         error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
701 -       error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
702 +       error |= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN);
703         error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
704 -       error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
705 +       error |= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN);
706         error |= __put_user(0,name->release+__OLD_UTS_LEN);
707 -       error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
708 +       error |= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN);
709         error |= __put_user(0,name->version+__OLD_UTS_LEN);
710 -       error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
711 +       error |= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN);
712         error |= __put_user(0,name->machine+__OLD_UTS_LEN);
713         
714         up_read(&uts_sem);
715 diff -NurpP --minimal linux-2.6.16.20/arch/i386/kernel/syscall_table.S linux-2.6.16.20-vs2.1.1-rc22/arch/i386/kernel/syscall_table.S
716 --- linux-2.6.16.20/arch/i386/kernel/syscall_table.S    2006-02-18 14:39:43 +0100
717 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/i386/kernel/syscall_table.S       2006-04-26 19:06:59 +0200
718 @@ -272,7 +272,7 @@ ENTRY(sys_call_table)
719         .long sys_tgkill        /* 270 */
720         .long sys_utimes
721         .long sys_fadvise64_64
722 -       .long sys_ni_syscall    /* sys_vserver */
723 +       .long sys_vserver
724         .long sys_mbind
725         .long sys_get_mempolicy
726         .long sys_set_mempolicy
727 diff -NurpP --minimal linux-2.6.16.20/arch/i386/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/i386/kernel/traps.c
728 --- linux-2.6.16.20/arch/i386/kernel/traps.c    2006-02-18 14:39:43 +0100
729 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/i386/kernel/traps.c       2006-04-26 19:06:59 +0200
730 @@ -53,6 +53,8 @@
731  #include <asm/kdebug.h>
732  
733  #include <linux/module.h>
734 +#include <linux/vserver/debug.h>
735 +#include <linux/vserver/history.h>
736  
737  #include "mach_traps.h"
738  
739 @@ -252,8 +254,9 @@ void show_registers(struct pt_regs *regs
740                 regs->esi, regs->edi, regs->ebp, esp);
741         printk(KERN_EMERG "ds: %04x   es: %04x   ss: %04x\n",
742                 regs->xds & 0xffff, regs->xes & 0xffff, ss);
743 -       printk(KERN_EMERG "Process %s (pid: %d, threadinfo=%p task=%p)",
744 -               current->comm, current->pid, current_thread_info(), current);
745 +       printk(KERN_EMERG "Process %s (pid: %d[#%u], threadinfo=%p task=%p)",
746 +               current->comm, current->pid, current->xid,
747 +               current_thread_info(), current);
748         /*
749          * When in-kernel, we also print out the stack and code at the
750          * time of the fault..
751 @@ -333,6 +336,8 @@ void die(const char * str, struct pt_reg
752         static int die_counter;
753         unsigned long flags;
754  
755 +       vxh_throw_oops();
756 +
757         if (die.lock_owner != raw_smp_processor_id()) {
758                 console_verbose();
759                 spin_lock_irqsave(&die.lock, flags);
760 @@ -365,8 +370,9 @@ void die(const char * str, struct pt_reg
761  #endif
762                 if (nl)
763                         printk("\n");
764 -       notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
765 +               notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
766                 show_registers(regs);
767 +               vxh_dump_history();
768         } else
769                 printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
770  
771 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/Kconfig
772 --- linux-2.6.16.20/arch/ia64/Kconfig   2006-04-09 13:49:42 +0200
773 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/Kconfig      2006-04-26 19:06:59 +0200
774 @@ -464,6 +464,8 @@ endmenu
775  
776  source "arch/ia64/Kconfig.debug"
777  
778 +source "kernel/vserver/Kconfig"
779 +
780  source "security/Kconfig"
781  
782  source "crypto/Kconfig"
783 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/ia32/binfmt_elf32.c linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/ia32/binfmt_elf32.c
784 --- linux-2.6.16.20/arch/ia64/ia32/binfmt_elf32.c       2006-01-03 17:29:09 +0100
785 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/ia32/binfmt_elf32.c  2006-04-26 19:06:59 +0200
786 @@ -236,7 +236,8 @@ ia32_setup_arg_pages (struct linux_binpr
787                         kmem_cache_free(vm_area_cachep, mpnt);
788                         return ret;
789                 }
790 -               current->mm->stack_vm = current->mm->total_vm = vma_pages(mpnt);
791 +               vx_vmpages_sub(current->mm, current->mm->total_vm - vma_pages(mpnt));
792 +               current->mm->stack_vm = current->mm->total_vm;
793         }
794  
795         for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
796 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/ia32/ia32_entry.S linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/ia32/ia32_entry.S
797 --- linux-2.6.16.20/arch/ia64/ia32/ia32_entry.S 2006-04-09 13:49:42 +0200
798 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/ia32/ia32_entry.S    2006-04-26 19:06:59 +0200
799 @@ -483,7 +483,7 @@ ia32_syscall_table:
800         data8 sys_tgkill        /* 270 */
801         data8 compat_sys_utimes
802         data8 sys32_fadvise64_64
803 -       data8 sys_ni_syscall
804 +       data8 sys32_vserver
805         data8 sys_ni_syscall
806         data8 sys_ni_syscall    /* 275 */
807         data8 sys_ni_syscall
808 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/ia32/sys_ia32.c linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/ia32/sys_ia32.c
809 --- linux-2.6.16.20/arch/ia64/ia32/sys_ia32.c   2006-02-18 14:39:43 +0100
810 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/ia32/sys_ia32.c      2006-05-29 16:49:39 +0200
811 @@ -1191,7 +1191,7 @@ sys32_gettimeofday (struct compat_timeva
812  {
813         if (tv) {
814                 struct timeval ktv;
815 -               do_gettimeofday(&ktv);
816 +               vx_gettimeofday(&ktv);
817                 if (put_tv32(tv, &ktv))
818                         return -EFAULT;
819         }
820 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/kernel/asm-offsets.c linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/asm-offsets.c
821 --- linux-2.6.16.20/arch/ia64/kernel/asm-offsets.c      2005-10-28 20:49:10 +0200
822 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/asm-offsets.c 2006-04-26 19:06:59 +0200
823 @@ -192,6 +192,7 @@ void foo(void)
824      /* for assembly files which can't include sched.h: */
825         DEFINE(IA64_CLONE_VFORK, CLONE_VFORK);
826         DEFINE(IA64_CLONE_VM, CLONE_VM);
827 +       DEFINE(IA64_CLONE_KTHREAD, CLONE_KTHREAD);
828  
829         BLANK();
830         DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET,
831 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/kernel/entry.S linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/entry.S
832 --- linux-2.6.16.20/arch/ia64/kernel/entry.S    2006-02-18 14:39:43 +0100
833 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/entry.S       2006-04-26 19:06:59 +0200
834 @@ -1591,7 +1591,7 @@ sys_call_table:
835         data8 sys_mq_notify
836         data8 sys_mq_getsetattr
837         data8 sys_ni_syscall                    // reserved for kexec_load
838 -       data8 sys_ni_syscall                    // reserved for vserver
839 +       data8 sys_vserver
840         data8 sys_waitid                        // 1270
841         data8 sys_add_key
842         data8 sys_request_key
843 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/kernel/perfmon.c linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/perfmon.c
844 --- linux-2.6.16.20/arch/ia64/kernel/perfmon.c  2006-02-18 14:39:43 +0100
845 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/perfmon.c     2006-04-26 19:06:59 +0200
846 @@ -41,6 +41,8 @@
847  #include <linux/capability.h>
848  #include <linux/rcupdate.h>
849  #include <linux/completion.h>
850 +#include <linux/vs_memory.h>
851 +#include <linux/vs_pid.h>
852  
853  #include <asm/errno.h>
854  #include <asm/intrinsics.h>
855 @@ -2355,7 +2357,7 @@ pfm_smpl_buffer_alloc(struct task_struct
856          */
857         insert_vm_struct(mm, vma);
858  
859 -       mm->total_vm  += size >> PAGE_SHIFT;
860 +       vx_vmpages_add(mm, size >> PAGE_SHIFT);
861         vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
862                                                         vma_pages(vma));
863         up_write(&task->mm->mmap_sem);
864 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/process.c
865 --- linux-2.6.16.20/arch/ia64/kernel/process.c  2006-01-18 06:07:53 +0100
866 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/process.c     2006-04-26 19:06:59 +0200
867 @@ -109,7 +109,8 @@ show_regs (struct pt_regs *regs)
868         unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
869  
870         print_modules();
871 -       printk("\nPid: %d, CPU %d, comm: %20s\n", current->pid, smp_processor_id(), current->comm);
872 +       printk("\nPid: %d[#%u], CPU %d, comm: %20s\n",
873 +               current->pid, current->xid, smp_processor_id(), current->comm);
874         printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]    %s\n",
875                regs->cr_ipsr, regs->cr_ifs, ip, print_tainted());
876         print_symbol("ip is at %s\n", ip);
877 @@ -692,7 +693,8 @@ kernel_thread (int (*fn)(void *), void *
878         regs.sw.ar_fpsr = regs.pt.ar_fpsr = ia64_getreg(_IA64_REG_AR_FPSR);
879         regs.sw.ar_bspstore = (unsigned long) current + IA64_RBS_OFFSET;
880         regs.sw.pr = (1 << PRED_KERNEL_STACK);
881 -       return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs.pt, 0, NULL, NULL);
882 +       return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD,
883 +               0, &regs.pt, 0, NULL, NULL);
884  }
885  EXPORT_SYMBOL(kernel_thread);
886  
887 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/kernel/ptrace.c linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/ptrace.c
888 --- linux-2.6.16.20/arch/ia64/kernel/ptrace.c   2006-02-18 14:39:43 +0100
889 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/ptrace.c      2006-04-26 19:06:59 +0200
890 @@ -18,6 +18,7 @@
891  #include <linux/security.h>
892  #include <linux/audit.h>
893  #include <linux/signal.h>
894 +#include <linux/vs_pid.h>
895  
896  #include <asm/pgtable.h>
897  #include <asm/processor.h>
898 @@ -1443,6 +1444,9 @@ sys_ptrace (long request, pid_t pid, uns
899         read_unlock(&tasklist_lock);
900         if (!child)
901                 goto out;
902 +       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
903 +               goto out_tsk;
904 +
905         ret = -EPERM;
906         if (pid == 1)           /* no messing around with init! */
907                 goto out_tsk;
908 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/kernel/signal.c linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/signal.c
909 --- linux-2.6.16.20/arch/ia64/kernel/signal.c   2006-01-18 06:07:53 +0100
910 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/signal.c      2006-04-26 19:06:59 +0200
911 @@ -21,6 +21,7 @@
912  #include <linux/binfmts.h>
913  #include <linux/unistd.h>
914  #include <linux/wait.h>
915 +#include <linux/vs_pid.h>
916  
917  #include <asm/ia32.h>
918  #include <asm/intrinsics.h>
919 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/traps.c
920 --- linux-2.6.16.20/arch/ia64/kernel/traps.c    2006-02-18 14:39:43 +0100
921 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/traps.c       2006-04-26 19:06:59 +0200
922 @@ -107,8 +107,9 @@ die (const char *str, struct pt_regs *re
923         put_cpu();
924  
925         if (++die.lock_owner_depth < 3) {
926 -               printk("%s[%d]: %s %ld [%d]\n",
927 -                       current->comm, current->pid, str, err, ++die_counter);
928 +               printk("%s[%d[#%u]]: %s %ld [%d]\n",
929 +                       current->comm, current->pid, current->xid,
930 +                       str, err, ++die_counter);
931                 (void) notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
932                 show_regs(regs);
933         } else
934 @@ -335,8 +336,9 @@ handle_fpu_swa (int fp_fault, struct pt_
935                 last_time = jiffies;
936                 ++fpu_swa_count;
937                 printk(KERN_WARNING
938 -                      "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
939 -                      current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri, isr);
940 +                      "%s(%d[#%u]): floating-point assist fault at ip %016lx, isr %016lx\n",
941 +                      current->comm, current->pid, current->xid,
942 +                      regs->cr_iip + ia64_psr(regs)->ri, isr);
943         }
944  
945         exception = fp_emulate(fp_fault, bundle, &regs->cr_ipsr, &regs->ar_fpsr, &isr, &regs->pr,
946 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/mm/fault.c linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/mm/fault.c
947 --- linux-2.6.16.20/arch/ia64/mm/fault.c        2006-01-03 17:29:09 +0100
948 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/mm/fault.c   2006-04-26 19:06:59 +0200
949 @@ -10,6 +10,7 @@
950  #include <linux/smp_lock.h>
951  #include <linux/interrupt.h>
952  #include <linux/kprobes.h>
953 +#include <linux/vs_memory.h>
954  
955  #include <asm/pgtable.h>
956  #include <asm/processor.h>
957 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/sn/kernel/xpc_main.c linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/sn/kernel/xpc_main.c
958 --- linux-2.6.16.20/arch/ia64/sn/kernel/xpc_main.c      2006-02-18 14:39:44 +0100
959 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/sn/kernel/xpc_main.c 2006-04-26 19:06:59 +0200
960 @@ -109,6 +109,7 @@ static ctl_table xpc_sys_xpc_hb_dir[] = 
961                 0644,
962                 NULL,
963                 &proc_dointvec_minmax,
964 +               NULL,
965                 &sysctl_intvec,
966                 NULL,
967                 &xpc_hb_min_interval,
968 @@ -122,6 +123,7 @@ static ctl_table xpc_sys_xpc_hb_dir[] = 
969                 0644,
970                 NULL,
971                 &proc_dointvec_minmax,
972 +               NULL,
973                 &sysctl_intvec,
974                 NULL,
975                 &xpc_hb_check_min_interval,
976 @@ -146,6 +148,7 @@ static ctl_table xpc_sys_xpc_dir[] = {
977                 0644,
978                 NULL,
979                 &proc_dointvec_minmax,
980 +               NULL,
981                 &sysctl_intvec,
982                 NULL,
983                 &xpc_disengage_request_min_timelimit,
984 diff -NurpP --minimal linux-2.6.16.20/arch/m32r/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/m32r/kernel/process.c
985 --- linux-2.6.16.20/arch/m32r/kernel/process.c  2006-01-18 06:07:53 +0100
986 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/m32r/kernel/process.c     2006-04-26 19:06:59 +0200
987 @@ -208,8 +208,8 @@ int kernel_thread(int (*fn)(void *), voi
988         regs.psw = M32R_PSW_BIE;
989  
990         /* Ok, create the new process. */
991 -       return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL,
992 -               NULL);
993 +       return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD,
994 +               0, &regs, 0, NULL, NULL);
995  }
996  
997  /*
998 diff -NurpP --minimal linux-2.6.16.20/arch/m32r/kernel/sys_m32r.c linux-2.6.16.20-vs2.1.1-rc22/arch/m32r/kernel/sys_m32r.c
999 --- linux-2.6.16.20/arch/m32r/kernel/sys_m32r.c 2006-04-09 13:49:43 +0200
1000 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/m32r/kernel/sys_m32r.c    2006-05-11 16:06:22 +0200
1001 @@ -21,6 +21,7 @@
1002  #include <linux/mman.h>
1003  #include <linux/file.h>
1004  #include <linux/utsname.h>
1005 +#include <linux/vs_cvirt.h>
1006  
1007  #include <asm/uaccess.h>
1008  #include <asm/cachectl.h>
1009 @@ -206,7 +207,7 @@ asmlinkage int sys_uname(struct old_utsn
1010         if (!name)
1011                 return -EFAULT;
1012         down_read(&uts_sem);
1013 -       err=copy_to_user(name, &system_utsname, sizeof (*name));
1014 +       err=copy_to_user(name, vx_new_utsname(), sizeof (*name));
1015         up_read(&uts_sem);
1016         return err?-EFAULT:0;
1017  }
1018 diff -NurpP --minimal linux-2.6.16.20/arch/m32r/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/m32r/kernel/traps.c
1019 --- linux-2.6.16.20/arch/m32r/kernel/traps.c    2005-10-28 20:49:11 +0200
1020 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/m32r/kernel/traps.c       2006-04-26 19:06:59 +0200
1021 @@ -196,8 +196,9 @@ static void show_registers(struct pt_reg
1022         } else {
1023                 printk("SPI: %08lx\n", sp);
1024         }
1025 -       printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)",
1026 -               current->comm, current->pid, 0xffff & i, 4096+(unsigned long)current);
1027 +       printk("Process %s (pid: %d[#%u], process nr: %d, stackpage=%08lx)",
1028 +               current->comm, current->pid, current->xid,
1029 +               0xffff & i, 4096+(unsigned long)current);
1030  
1031         /*
1032          * When in-kernel, we also print out the stack and code at the
1033 diff -NurpP --minimal linux-2.6.16.20/arch/m68k/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/m68k/Kconfig
1034 --- linux-2.6.16.20/arch/m68k/Kconfig   2006-02-18 14:39:44 +0100
1035 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/m68k/Kconfig      2006-04-26 19:06:59 +0200
1036 @@ -650,6 +650,8 @@ source "fs/Kconfig"
1037  
1038  source "arch/m68k/Kconfig.debug"
1039  
1040 +source "kernel/vserver/Kconfig"
1041 +
1042  source "security/Kconfig"
1043  
1044  source "crypto/Kconfig"
1045 diff -NurpP --minimal linux-2.6.16.20/arch/m68k/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/m68k/kernel/process.c
1046 --- linux-2.6.16.20/arch/m68k/kernel/process.c  2006-04-09 13:49:43 +0200
1047 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/m68k/kernel/process.c     2006-04-26 19:06:59 +0200
1048 @@ -160,7 +160,8 @@ int kernel_thread(int (*fn)(void *), voi
1049  
1050         {
1051         register long retval __asm__ ("d0");
1052 -       register long clone_arg __asm__ ("d1") = flags | CLONE_VM | CLONE_UNTRACED;
1053 +       register long clone_arg __asm__ ("d1") =
1054 +               flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD;
1055  
1056         retval = __NR_clone;
1057         __asm__ __volatile__
1058 diff -NurpP --minimal linux-2.6.16.20/arch/m68k/kernel/ptrace.c linux-2.6.16.20-vs2.1.1-rc22/arch/m68k/kernel/ptrace.c
1059 --- linux-2.6.16.20/arch/m68k/kernel/ptrace.c   2006-01-03 17:29:10 +0100
1060 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/m68k/kernel/ptrace.c      2006-04-26 19:06:59 +0200
1061 @@ -280,6 +280,8 @@ long arch_ptrace(struct task_struct *chi
1062                 ret = ptrace_request(child, request, addr, data);
1063                 break;
1064         }
1065 +       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
1066 +               goto out_tsk;
1067  
1068         return ret;
1069  out_eio:
1070 diff -NurpP --minimal linux-2.6.16.20/arch/m68k/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/m68k/kernel/traps.c
1071 --- linux-2.6.16.20/arch/m68k/kernel/traps.c    2006-01-18 06:07:53 +0100
1072 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/m68k/kernel/traps.c       2006-04-26 19:06:59 +0200
1073 @@ -1198,8 +1198,9 @@ void die_if_kernel (char *str, struct pt
1074         printk("d4: %08lx    d5: %08lx    a0: %08lx    a1: %08lx\n",
1075                fp->d4, fp->d5, fp->a0, fp->a1);
1076  
1077 -       printk("Process %s (pid: %d, stackpage=%08lx)\n",
1078 -               current->comm, current->pid, PAGE_SIZE+(unsigned long)current);
1079 +       printk("Process %s (pid: %d[#%u], stackpage=%08lx)\n",
1080 +               current->comm, current->pid, current->xid,
1081 +               PAGE_SIZE+(unsigned long)current);
1082         show_stack(NULL, (unsigned long *)fp);
1083         do_exit(SIGSEGV);
1084  }
1085 diff -NurpP --minimal linux-2.6.16.20/arch/m68knommu/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/m68knommu/Kconfig
1086 --- linux-2.6.16.20/arch/m68knommu/Kconfig      2006-02-18 14:39:44 +0100
1087 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/m68knommu/Kconfig 2006-04-26 19:06:59 +0200
1088 @@ -646,6 +646,8 @@ source "fs/Kconfig"
1089  
1090  source "arch/m68knommu/Kconfig.debug"
1091  
1092 +source "kernel/vserver/Kconfig"
1093 +
1094  source "security/Kconfig"
1095  
1096  source "crypto/Kconfig"
1097 diff -NurpP --minimal linux-2.6.16.20/arch/m68knommu/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/m68knommu/kernel/process.c
1098 --- linux-2.6.16.20/arch/m68knommu/kernel/process.c     2006-02-15 13:54:11 +0100
1099 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/m68knommu/kernel/process.c        2006-04-26 19:06:59 +0200
1100 @@ -123,7 +123,7 @@ void show_regs(struct pt_regs * regs)
1101  int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
1102  {
1103         int retval;
1104 -       long clone_arg = flags | CLONE_VM;
1105 +       long clone_arg = flags | CLONE_VM | CLONE_KTHREAD;
1106         mm_segment_t fs;
1107  
1108         fs = get_fs();
1109 diff -NurpP --minimal linux-2.6.16.20/arch/m68knommu/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/m68knommu/kernel/traps.c
1110 --- linux-2.6.16.20/arch/m68knommu/kernel/traps.c       2005-10-28 20:49:11 +0200
1111 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/m68knommu/kernel/traps.c  2006-04-26 19:06:59 +0200
1112 @@ -81,8 +81,9 @@ void die_if_kernel(char *str, struct pt_
1113         printk(KERN_EMERG "d4: %08lx    d5: %08lx    a0: %08lx    a1: %08lx\n",
1114                fp->d4, fp->d5, fp->a0, fp->a1);
1115  
1116 -       printk(KERN_EMERG "Process %s (pid: %d, stackpage=%08lx)\n",
1117 -               current->comm, current->pid, PAGE_SIZE+(unsigned long)current);
1118 +       printk(KERN_EMERG "Process %s (pid: %d[#%u], stackpage=%08lx)\n",
1119 +               current->comm, current->pid, current->xid,
1120 +               PAGE_SIZE+(unsigned long)current);
1121         show_stack(NULL, (unsigned long *)fp);
1122         do_exit(SIGSEGV);
1123  }
1124 diff -NurpP --minimal linux-2.6.16.20/arch/mips/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/mips/Kconfig
1125 --- linux-2.6.16.20/arch/mips/Kconfig   2006-04-09 13:49:43 +0200
1126 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/mips/Kconfig      2006-04-26 19:06:59 +0200
1127 @@ -1814,6 +1814,8 @@ source "arch/mips/oprofile/Kconfig"
1128  
1129  source "arch/mips/Kconfig.debug"
1130  
1131 +source "kernel/vserver/Kconfig"
1132 +
1133  source "security/Kconfig"
1134  
1135  source "crypto/Kconfig"
1136 diff -NurpP --minimal linux-2.6.16.20/arch/mips/kernel/linux32.c linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/linux32.c
1137 --- linux-2.6.16.20/arch/mips/kernel/linux32.c  2006-04-09 13:49:43 +0200
1138 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/linux32.c     2006-05-29 16:49:44 +0200
1139 @@ -37,6 +37,7 @@
1140  #include <linux/security.h>
1141  #include <linux/compat.h>
1142  #include <linux/vfs.h>
1143 +#include <linux/vs_cvirt.h>
1144  
1145  #include <net/sock.h>
1146  #include <net/scm.h>
1147 @@ -299,7 +300,7 @@ sys32_gettimeofday(struct compat_timeval
1148  {
1149         if (tv) {
1150                 struct timeval ktv;
1151 -               do_gettimeofday(&ktv);
1152 +               vx_gettimeofday(&ktv);
1153                 if (put_tv32(tv, &ktv))
1154                         return -EFAULT;
1155         }
1156 @@ -1099,7 +1100,7 @@ asmlinkage long sys32_newuname(struct ne
1157         int ret = 0;
1158  
1159         down_read(&uts_sem);
1160 -       if (copy_to_user(name,&system_utsname,sizeof *name))
1161 +       if (copy_to_user(name, vx_new_utsname(), sizeof *name))
1162                 ret = -EFAULT;
1163         up_read(&uts_sem);
1164  
1165 diff -NurpP --minimal linux-2.6.16.20/arch/mips/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/process.c
1166 --- linux-2.6.16.20/arch/mips/kernel/process.c  2006-02-18 14:39:45 +0100
1167 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/process.c     2006-04-26 19:06:59 +0200
1168 @@ -268,7 +268,8 @@ long kernel_thread(int (*fn)(void *), vo
1169  #endif
1170  
1171         /* Ok, create the new process.. */
1172 -       return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
1173 +       return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD,
1174 +               0, &regs, 0, NULL, NULL);
1175  }
1176  
1177  static struct mips_frame_info {
1178 diff -NurpP --minimal linux-2.6.16.20/arch/mips/kernel/ptrace.c linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/ptrace.c
1179 --- linux-2.6.16.20/arch/mips/kernel/ptrace.c   2006-04-09 13:49:43 +0200
1180 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/ptrace.c      2006-04-26 19:06:59 +0200
1181 @@ -476,6 +476,8 @@ asmlinkage void do_syscall_trace(struct 
1182                 goto out;
1183         if (!test_thread_flag(TIF_SYSCALL_TRACE))
1184                 goto out;
1185 +       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
1186 +               goto out_tsk;
1187  
1188         /* The 0x80 provides a way for the tracing parent to distinguish
1189            between a syscall stop and SIGTRAP delivery */
1190 diff -NurpP --minimal linux-2.6.16.20/arch/mips/kernel/scall32-o32.S linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/scall32-o32.S
1191 --- linux-2.6.16.20/arch/mips/kernel/scall32-o32.S      2006-04-09 13:49:43 +0200
1192 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/scall32-o32.S 2006-04-26 19:06:59 +0200
1193 @@ -607,7 +607,7 @@ einval:     li      v0, -EINVAL
1194         sys     sys_mq_timedreceive     5
1195         sys     sys_mq_notify           2       /* 4275 */
1196         sys     sys_mq_getsetattr       3
1197 -       sys     sys_ni_syscall          0       /* sys_vserver */
1198 +       sys     sys_vserver             3
1199         sys     sys_waitid              5
1200         sys     sys_ni_syscall          0       /* available, was setaltroot */
1201         sys     sys_add_key             5       /* 4280 */
1202 diff -NurpP --minimal linux-2.6.16.20/arch/mips/kernel/scall64-64.S linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/scall64-64.S
1203 --- linux-2.6.16.20/arch/mips/kernel/scall64-64.S       2006-02-18 14:39:45 +0100
1204 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/scall64-64.S  2006-04-26 19:06:59 +0200
1205 @@ -433,7 +433,7 @@ sys_call_table:
1206         PTR     sys_mq_timedreceive
1207         PTR     sys_mq_notify
1208         PTR     sys_mq_getsetattr               /* 5235 */
1209 -       PTR     sys_ni_syscall                  /* sys_vserver */
1210 +       PTR     sys_vserver
1211         PTR     sys_waitid
1212         PTR     sys_ni_syscall                  /* available, was setaltroot */
1213         PTR     sys_add_key
1214 diff -NurpP --minimal linux-2.6.16.20/arch/mips/kernel/scall64-n32.S linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/scall64-n32.S
1215 --- linux-2.6.16.20/arch/mips/kernel/scall64-n32.S      2006-04-09 13:49:43 +0200
1216 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/scall64-n32.S 2006-04-26 19:06:59 +0200
1217 @@ -359,7 +359,7 @@ EXPORT(sysn32_call_table)
1218         PTR     compat_sys_mq_timedreceive
1219         PTR     compat_sys_mq_notify
1220         PTR     compat_sys_mq_getsetattr
1221 -       PTR     sys_ni_syscall                  /* 6240, sys_vserver */
1222 +       PTR     sys32_vserver                   /* 6240 */
1223         PTR     sysn32_waitid
1224         PTR     sys_ni_syscall                  /* available, was setaltroot */
1225         PTR     sys_add_key
1226 diff -NurpP --minimal linux-2.6.16.20/arch/mips/kernel/scall64-o32.S linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/scall64-o32.S
1227 --- linux-2.6.16.20/arch/mips/kernel/scall64-o32.S      2006-04-09 13:49:43 +0200
1228 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/scall64-o32.S 2006-04-26 19:06:59 +0200
1229 @@ -481,7 +481,7 @@ sys_call_table:
1230         PTR     compat_sys_mq_timedreceive
1231         PTR     compat_sys_mq_notify            /* 4275 */
1232         PTR     compat_sys_mq_getsetattr
1233 -       PTR     sys_ni_syscall                  /* sys_vserver */
1234 +       PTR     sys32_vserver
1235         PTR     sys32_waitid
1236         PTR     sys_ni_syscall                  /* available, was setaltroot */
1237         PTR     sys_add_key                     /* 4280 */
1238 diff -NurpP --minimal linux-2.6.16.20/arch/mips/kernel/syscall.c linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/syscall.c
1239 --- linux-2.6.16.20/arch/mips/kernel/syscall.c  2006-02-18 14:39:45 +0100
1240 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/syscall.c     2006-04-26 19:06:59 +0200
1241 @@ -29,6 +29,7 @@
1242  #include <linux/shm.h>
1243  #include <linux/compiler.h>
1244  #include <linux/module.h>
1245 +#include <linux/vs_cvirt.h>
1246  
1247  #include <asm/branch.h>
1248  #include <asm/cachectl.h>
1249 @@ -229,7 +230,7 @@ out:
1250   */
1251  asmlinkage int sys_uname(struct old_utsname __user * name)
1252  {
1253 -       if (name && !copy_to_user(name, &system_utsname, sizeof (*name)))
1254 +       if (name && !copy_to_user(name, vx_new_utsname(), sizeof (*name)))
1255                 return 0;
1256         return -EFAULT;
1257  }
1258 @@ -240,21 +241,23 @@ asmlinkage int sys_uname(struct old_utsn
1259  asmlinkage int sys_olduname(struct oldold_utsname __user * name)
1260  {
1261         int error;
1262 +       struct new_utsname *ptr;
1263  
1264         if (!name)
1265                 return -EFAULT;
1266         if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname)))
1267                 return -EFAULT;
1268  
1269 -       error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
1270 +       ptr = vx_new_utsname();
1271 +       error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN);
1272         error -= __put_user(0,name->sysname+__OLD_UTS_LEN);
1273 -       error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
1274 +       error -= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN);
1275         error -= __put_user(0,name->nodename+__OLD_UTS_LEN);
1276 -       error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
1277 +       error -= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN);
1278         error -= __put_user(0,name->release+__OLD_UTS_LEN);
1279 -       error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
1280 +       error -= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN);
1281         error -= __put_user(0,name->version+__OLD_UTS_LEN);
1282 -       error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
1283 +       error -= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN);
1284         error = __put_user(0,name->machine+__OLD_UTS_LEN);
1285         error = error ? -EFAULT : 0;
1286  
1287 @@ -290,10 +293,10 @@ asmlinkage int _sys_sysmips(int cmd, lon
1288                         return -EFAULT;
1289  
1290                 down_write(&uts_sem);
1291 -               strncpy(system_utsname.nodename, nodename, len);
1292 +               strncpy(vx_new_uts(nodename), nodename, len);
1293                 nodename[__NEW_UTS_LEN] = '\0';
1294 -               strlcpy(system_utsname.nodename, nodename,
1295 -                       sizeof(system_utsname.nodename));
1296 +               strlcpy(vx_new_uts(nodename), nodename,
1297 +                       sizeof(vx_new_uts(nodename)));
1298                 up_write(&uts_sem);
1299                 return 0;
1300         }
1301 diff -NurpP --minimal linux-2.6.16.20/arch/mips/kernel/sysirix.c linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/sysirix.c
1302 --- linux-2.6.16.20/arch/mips/kernel/sysirix.c  2006-02-18 14:39:45 +0100
1303 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/sysirix.c     2006-05-11 16:06:22 +0200
1304 @@ -31,6 +31,8 @@
1305  #include <linux/socket.h>
1306  #include <linux/security.h>
1307  #include <linux/syscalls.h>
1308 +#include <linux/vs_pid.h>
1309 +#include <linux/vs_cvirt.h>
1310  
1311  #include <asm/ptrace.h>
1312  #include <asm/page.h>
1313 @@ -904,7 +906,7 @@ asmlinkage int irix_getdomainname(char _
1314         down_read(&uts_sem);
1315         if (len > __NEW_UTS_LEN)
1316                 len = __NEW_UTS_LEN;
1317 -       err = copy_to_user(name, system_utsname.domainname, len) ? -EFAULT : 0;
1318 +       err = copy_to_user(name, vx_new_uts(domainname), len) ? -EFAULT : 0;
1319         up_read(&uts_sem);
1320  
1321         return err;
1322 @@ -1147,11 +1149,11 @@ struct iuname {
1323  asmlinkage int irix_uname(struct iuname __user *buf)
1324  {
1325         down_read(&uts_sem);
1326 -       if (copy_from_user(system_utsname.sysname, buf->sysname, 65)
1327 -           || copy_from_user(system_utsname.nodename, buf->nodename, 65)
1328 -           || copy_from_user(system_utsname.release, buf->release, 65)
1329 -           || copy_from_user(system_utsname.version, buf->version, 65)
1330 -           || copy_from_user(system_utsname.machine, buf->machine, 65)) {
1331 +       if (copy_from_user(vx_new_uts(sysname), buf->sysname, 65)
1332 +           || copy_from_user(vx_new_uts(nodename), buf->nodename, 65)
1333 +           || copy_from_user(vx_new_uts(release), buf->release, 65)
1334 +           || copy_from_user(vx_new_uts(version), buf->version, 65)
1335 +           || copy_from_user(vx_new_uts(machine), buf->machine, 65)) {
1336                 return -EFAULT;
1337         }
1338         up_read(&uts_sem);
1339 diff -NurpP --minimal linux-2.6.16.20/arch/mips/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/traps.c
1340 --- linux-2.6.16.20/arch/mips/kernel/traps.c    2006-04-09 13:49:43 +0200
1341 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/traps.c       2006-04-26 19:06:59 +0200
1342 @@ -266,8 +266,9 @@ void show_registers(struct pt_regs *regs
1343  {
1344         show_regs(regs);
1345         print_modules();
1346 -       printk("Process %s (pid: %d, threadinfo=%p, task=%p)\n",
1347 -               current->comm, current->pid, current_thread_info(), current);
1348 +       printk("Process %s (pid: %d[#%u], threadinfo=%p, task=%p)\n",
1349 +               current->comm, current->pid, current->xid,
1350 +               current_thread_info(), current);
1351         show_stack(current, (long *) regs->regs[29]);
1352         show_trace(current, (long *) regs->regs[29]);
1353         show_code((unsigned int *) regs->cp0_epc);
1354 diff -NurpP --minimal linux-2.6.16.20/arch/parisc/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/Kconfig
1355 --- linux-2.6.16.20/arch/parisc/Kconfig 2006-02-18 14:39:45 +0100
1356 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/Kconfig    2006-04-26 19:06:59 +0200
1357 @@ -213,6 +213,8 @@ source "arch/parisc/oprofile/Kconfig"
1358  
1359  source "arch/parisc/Kconfig.debug"
1360  
1361 +source "kernel/vserver/Kconfig"
1362 +
1363  source "security/Kconfig"
1364  
1365  source "crypto/Kconfig"
1366 diff -NurpP --minimal linux-2.6.16.20/arch/parisc/hpux/sys_hpux.c linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/hpux/sys_hpux.c
1367 --- linux-2.6.16.20/arch/parisc/hpux/sys_hpux.c 2006-02-15 13:54:11 +0100
1368 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/hpux/sys_hpux.c    2006-05-11 16:06:22 +0200
1369 @@ -33,6 +33,7 @@
1370  #include <linux/utsname.h>
1371  #include <linux/vfs.h>
1372  #include <linux/vmalloc.h>
1373 +#include <linux/vs_cvirt.h>
1374  
1375  #include <asm/errno.h>
1376  #include <asm/pgalloc.h>
1377 @@ -266,15 +267,15 @@ static int hpux_uname(struct hpux_utsnam
1378  
1379         down_read(&uts_sem);
1380  
1381 -       error = __copy_to_user(&name->sysname,&system_utsname.sysname,HPUX_UTSLEN-1);
1382 +       error = __copy_to_user(&name->sysname,vx_new_uts(sysname),HPUX_UTSLEN-1);
1383         error |= __put_user(0,name->sysname+HPUX_UTSLEN-1);
1384 -       error |= __copy_to_user(&name->nodename,&system_utsname.nodename,HPUX_UTSLEN-1);
1385 +       error |= __copy_to_user(&name->nodename,vx_new_uts(nodename),HPUX_UTSLEN-1);
1386         error |= __put_user(0,name->nodename+HPUX_UTSLEN-1);
1387 -       error |= __copy_to_user(&name->release,&system_utsname.release,HPUX_UTSLEN-1);
1388 +       error |= __copy_to_user(&name->release,vx_new_uts(release),HPUX_UTSLEN-1);
1389         error |= __put_user(0,name->release+HPUX_UTSLEN-1);
1390 -       error |= __copy_to_user(&name->version,&system_utsname.version,HPUX_UTSLEN-1);
1391 +       error |= __copy_to_user(&name->version,vx_new_uts(version),HPUX_UTSLEN-1);
1392         error |= __put_user(0,name->version+HPUX_UTSLEN-1);
1393 -       error |= __copy_to_user(&name->machine,&system_utsname.machine,HPUX_UTSLEN-1);
1394 +       error |= __copy_to_user(&name->machine,vx_new_uts(machine),HPUX_UTSLEN-1);
1395         error |= __put_user(0,name->machine+HPUX_UTSLEN-1);
1396  
1397         up_read(&uts_sem);
1398 @@ -373,8 +374,8 @@ int hpux_utssys(char *ubuf, int n, int t
1399                 /*  TODO:  print a warning about using this?  */
1400                 down_write(&uts_sem);
1401                 error = -EFAULT;
1402 -               if (!copy_from_user(system_utsname.sysname, ubuf, len)) {
1403 -                       system_utsname.sysname[len] = 0;
1404 +               if (!copy_from_user(vx_new_uts(sysname), ubuf, len)) {
1405 +                       vx_new_uts(sysname)[len] = 0;
1406                         error = 0;
1407                 }
1408                 up_write(&uts_sem);
1409 @@ -400,8 +401,8 @@ int hpux_utssys(char *ubuf, int n, int t
1410                 /*  TODO:  print a warning about this?  */
1411                 down_write(&uts_sem);
1412                 error = -EFAULT;
1413 -               if (!copy_from_user(system_utsname.release, ubuf, len)) {
1414 -                       system_utsname.release[len] = 0;
1415 +               if (!copy_from_user(vx_new_uts(release), ubuf, len)) {
1416 +                       vx_new_uts(release)[len] = 0;
1417                         error = 0;
1418                 }
1419                 up_write(&uts_sem);
1420 @@ -422,13 +423,13 @@ int hpux_getdomainname(char *name, int l
1421         
1422         down_read(&uts_sem);
1423         
1424 -       nlen = strlen(system_utsname.domainname) + 1;
1425 +       nlen = strlen(vx_new_uts(domainname)) + 1;
1426  
1427         if (nlen < len)
1428                 len = nlen;
1429         if(len > __NEW_UTS_LEN)
1430                 goto done;
1431 -       if(copy_to_user(name, system_utsname.domainname, len))
1432 +       if(copy_to_user(name, vx_new_uts(domainname), len))
1433                 goto done;
1434         err = 0;
1435  done:
1436 diff -NurpP --minimal linux-2.6.16.20/arch/parisc/kernel/entry.S linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/kernel/entry.S
1437 --- linux-2.6.16.20/arch/parisc/kernel/entry.S  2006-01-03 17:29:13 +0100
1438 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/kernel/entry.S     2006-04-26 19:06:59 +0200
1439 @@ -756,6 +756,7 @@ fault_vector_11:
1440  
1441  #define CLONE_VM 0x100 /* Must agree with <linux/sched.h> */
1442  #define CLONE_UNTRACED 0x00800000
1443 +#define CLONE_KTHREAD 0x10000000
1444  
1445         .export __kernel_thread, code
1446         .import do_fork
1447 diff -NurpP --minimal linux-2.6.16.20/arch/parisc/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/kernel/process.c
1448 --- linux-2.6.16.20/arch/parisc/kernel/process.c        2006-02-15 13:54:11 +0100
1449 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/kernel/process.c   2006-04-26 19:06:59 +0200
1450 @@ -178,7 +178,7 @@ pid_t kernel_thread(int (*fn)(void *), v
1451          *        kernel_thread can become a #define.
1452          */
1453  
1454 -       return __kernel_thread(fn, arg, flags);
1455 +       return __kernel_thread(fn, arg, flags | CLONE_KTHREAD);
1456  }
1457  EXPORT_SYMBOL(kernel_thread);
1458  
1459 diff -NurpP --minimal linux-2.6.16.20/arch/parisc/kernel/sys_parisc32.c linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/kernel/sys_parisc32.c
1460 --- linux-2.6.16.20/arch/parisc/kernel/sys_parisc32.c   2005-06-22 02:37:56 +0200
1461 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/kernel/sys_parisc32.c      2006-05-29 16:50:03 +0200
1462 @@ -203,11 +203,11 @@ static inline long get_ts32(struct times
1463  asmlinkage int
1464  sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
1465  {
1466 -    extern void do_gettimeofday(struct timeval *tv);
1467 +    extern void vx_gettimeofday(struct timeval *tv);
1468  
1469      if (tv) {
1470             struct timeval ktv;
1471 -           do_gettimeofday(&ktv);
1472 +           vx_gettimeofday(&ktv);
1473             if (put_compat_timeval(tv, &ktv))
1474                     return -EFAULT;
1475      }
1476 @@ -657,6 +657,7 @@ asmlinkage int sys32_sysinfo(struct sysi
1477  
1478         do {
1479                 seq = read_seqbegin(&xtime_lock);
1480 +               /* FIXME: requires vx virtualization */
1481                 val.uptime = jiffies / HZ;
1482  
1483                 val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
1484 diff -NurpP --minimal linux-2.6.16.20/arch/parisc/kernel/syscall_table.S linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/kernel/syscall_table.S
1485 --- linux-2.6.16.20/arch/parisc/kernel/syscall_table.S  2006-02-18 14:39:46 +0100
1486 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/kernel/syscall_table.S     2006-04-26 19:06:59 +0200
1487 @@ -368,7 +368,7 @@
1488         ENTRY_COMP(mbind)               /* 260 */
1489         ENTRY_COMP(get_mempolicy)
1490         ENTRY_COMP(set_mempolicy)
1491 -       ENTRY_SAME(ni_syscall)  /* 263: reserved for vserver */
1492 +       ENTRY_DIFF(vserver)
1493         ENTRY_SAME(add_key)
1494         ENTRY_SAME(request_key)         /* 265 */
1495         ENTRY_SAME(keyctl)
1496 diff -NurpP --minimal linux-2.6.16.20/arch/parisc/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/kernel/traps.c
1497 --- linux-2.6.16.20/arch/parisc/kernel/traps.c  2006-02-15 13:54:11 +0100
1498 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/kernel/traps.c     2006-04-26 19:06:59 +0200
1499 @@ -214,8 +214,9 @@ void die_if_kernel(char *str, struct pt_
1500                 if (err == 0)
1501                         return; /* STFU */
1502  
1503 -               printk(KERN_CRIT "%s (pid %d): %s (code %ld) at " RFMT "\n",
1504 -                       current->comm, current->pid, str, err, regs->iaoq[0]);
1505 +               printk(KERN_CRIT "%s (pid %d[#%u]): %s (code %ld) at " RFMT "\n",
1506 +                       current->comm, current->pid, current->xid,
1507 +                       str, err, regs->iaoq[0]);
1508  #ifdef PRINT_USER_FAULTS
1509                 /* XXX for debugging only */
1510                 show_regs(regs);
1511 @@ -246,8 +247,8 @@ void die_if_kernel(char *str, struct pt_
1512         if (!console_drivers)
1513                 pdc_console_restart();
1514         
1515 -       printk(KERN_CRIT "%s (pid %d): %s (code %ld)\n",
1516 -               current->comm, current->pid, str, err);
1517 +       printk(KERN_CRIT "%s (pid %d[#%u]): %s (code %ld)\n",
1518 +               current->comm, current->pid, current->xid, str, err);
1519         show_regs(regs);
1520  
1521         /* Wot's wrong wif bein' racy? */
1522 diff -NurpP --minimal linux-2.6.16.20/arch/powerpc/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/Kconfig
1523 --- linux-2.6.16.20/arch/powerpc/Kconfig        2006-04-09 13:49:43 +0200
1524 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/Kconfig   2006-04-26 19:06:59 +0200
1525 @@ -974,6 +974,8 @@ endmenu
1526  
1527  source "arch/powerpc/Kconfig.debug"
1528  
1529 +source "kernel/vserver/Kconfig"
1530 +
1531  source "security/Kconfig"
1532  
1533  config KEYS_COMPAT
1534 diff -NurpP --minimal linux-2.6.16.20/arch/powerpc/kernel/asm-offsets.c linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/asm-offsets.c
1535 --- linux-2.6.16.20/arch/powerpc/kernel/asm-offsets.c   2006-04-09 13:49:43 +0200
1536 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/asm-offsets.c      2006-04-26 19:06:59 +0200
1537 @@ -229,6 +229,7 @@ int main(void)
1538  
1539         DEFINE(CLONE_VM, CLONE_VM);
1540         DEFINE(CLONE_UNTRACED, CLONE_UNTRACED);
1541 +       DEFINE(CLONE_KTHREAD, CLONE_KTHREAD);
1542  
1543  #ifndef CONFIG_PPC64
1544         DEFINE(MM_PGD, offsetof(struct mm_struct, pgd));
1545 diff -NurpP --minimal linux-2.6.16.20/arch/powerpc/kernel/misc_32.S linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/misc_32.S
1546 --- linux-2.6.16.20/arch/powerpc/kernel/misc_32.S       2006-01-18 06:07:55 +0100
1547 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/misc_32.S  2006-04-26 19:06:59 +0200
1548 @@ -980,7 +980,7 @@ _GLOBAL(kernel_thread)
1549         mr      r30,r3          /* function */
1550         mr      r31,r4          /* argument */
1551         ori     r3,r5,CLONE_VM  /* flags */
1552 -       oris    r3,r3,CLONE_UNTRACED>>16
1553 +       oris    r3,r3,(CLONE_UNTRACED|CLONE_KTHREAD)>>16
1554         li      r4,0            /* new sp (unused) */
1555         li      r0,__NR_clone
1556         sc
1557 diff -NurpP --minimal linux-2.6.16.20/arch/powerpc/kernel/misc_64.S linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/misc_64.S
1558 --- linux-2.6.16.20/arch/powerpc/kernel/misc_64.S       2006-01-18 06:07:55 +0100
1559 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/misc_64.S  2006-04-26 19:06:59 +0200
1560 @@ -684,7 +684,7 @@ _GLOBAL(kernel_thread)
1561         mr      r29,r3
1562         mr      r30,r4
1563         ori     r3,r5,CLONE_VM  /* flags */
1564 -       oris    r3,r3,(CLONE_UNTRACED>>16)
1565 +       oris    r3,r3,(CLONE_UNTRACED|CLONE_KTHREAD)>>16
1566         li      r4,0            /* new sp (unused) */
1567         li      r0,__NR_clone
1568         sc
1569 diff -NurpP --minimal linux-2.6.16.20/arch/powerpc/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/process.c
1570 --- linux-2.6.16.20/arch/powerpc/kernel/process.c       2006-04-09 13:49:43 +0200
1571 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/process.c  2006-04-26 19:06:59 +0200
1572 @@ -425,8 +425,9 @@ void show_regs(struct pt_regs * regs)
1573         trap = TRAP(regs);
1574         if (trap == 0x300 || trap == 0x600)
1575                 printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr);
1576 -       printk("TASK = %p[%d] '%s' THREAD: %p",
1577 -              current, current->pid, current->comm, task_thread_info(current));
1578 +       printk("TASK = %p[%d,#%u] '%s' THREAD: %p",
1579 +              current, current->pid, current->xid,
1580 +              current->comm, task_thread_info(current));
1581  
1582  #ifdef CONFIG_SMP
1583         printk(" CPU: %d", smp_processor_id());
1584 diff -NurpP --minimal linux-2.6.16.20/arch/powerpc/kernel/sys_ppc32.c linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/sys_ppc32.c
1585 --- linux-2.6.16.20/arch/powerpc/kernel/sys_ppc32.c     2006-04-09 13:49:43 +0200
1586 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/sys_ppc32.c        2006-05-29 16:50:23 +0200
1587 @@ -340,7 +340,7 @@ asmlinkage long compat_sys_gettimeofday(
1588  {
1589         if (tv) {
1590                 struct timeval ktv;
1591 -               do_gettimeofday(&ktv);
1592 +               vx_gettimeofday(&ktv);
1593                 if (put_tv32(tv, &ktv))
1594                         return -EFAULT;
1595         }
1596 diff -NurpP --minimal linux-2.6.16.20/arch/powerpc/kernel/syscalls.c linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/syscalls.c
1597 --- linux-2.6.16.20/arch/powerpc/kernel/syscalls.c      2006-02-18 14:39:46 +0100
1598 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/syscalls.c 2006-04-26 19:06:59 +0200
1599 @@ -36,6 +36,7 @@
1600  #include <linux/file.h>
1601  #include <linux/init.h>
1602  #include <linux/personality.h>
1603 +#include <linux/vs_cvirt.h>
1604  
1605  #include <asm/uaccess.h>
1606  #include <asm/ipc.h>
1607 @@ -259,7 +260,7 @@ long ppc_newuname(struct new_utsname __u
1608         int err = 0;
1609  
1610         down_read(&uts_sem);
1611 -       if (copy_to_user(name, &system_utsname, sizeof(*name)))
1612 +       if (copy_to_user(name, vx_new_utsname(), sizeof(*name)))
1613                 err = -EFAULT;
1614         up_read(&uts_sem);
1615         if (!err)
1616 @@ -272,7 +273,7 @@ int sys_uname(struct old_utsname __user 
1617         int err = 0;
1618         
1619         down_read(&uts_sem);
1620 -       if (copy_to_user(name, &system_utsname, sizeof(*name)))
1621 +       if (copy_to_user(name, vx_new_utsname(), sizeof(*name)))
1622                 err = -EFAULT;
1623         up_read(&uts_sem);
1624         if (!err)
1625 @@ -283,25 +284,22 @@ int sys_uname(struct old_utsname __user 
1626  int sys_olduname(struct oldold_utsname __user *name)
1627  {
1628         int error;
1629 +       struct new_utsname *ptr;
1630  
1631         if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
1632                 return -EFAULT;
1633    
1634         down_read(&uts_sem);
1635 -       error = __copy_to_user(&name->sysname, &system_utsname.sysname,
1636 -                              __OLD_UTS_LEN);
1637 +       ptr = vx_new_utsname();
1638 +       error = __copy_to_user(&name->sysname, ptr->sysname, __OLD_UTS_LEN);
1639         error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
1640 -       error |= __copy_to_user(&name->nodename, &system_utsname.nodename,
1641 -                               __OLD_UTS_LEN);
1642 +       error |= __copy_to_user(&name->nodename, ptr->nodename, __OLD_UTS_LEN);
1643         error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
1644 -       error |= __copy_to_user(&name->release, &system_utsname.release,
1645 -                               __OLD_UTS_LEN);
1646 +       error |= __copy_to_user(&name->release, ptr->release, __OLD_UTS_LEN);
1647         error |= __put_user(0, name->release + __OLD_UTS_LEN);
1648 -       error |= __copy_to_user(&name->version, &system_utsname.version,
1649 -                               __OLD_UTS_LEN);
1650 +       error |= __copy_to_user(&name->version, ptr->version, __OLD_UTS_LEN);
1651         error |= __put_user(0, name->version + __OLD_UTS_LEN);
1652 -       error |= __copy_to_user(&name->machine, &system_utsname.machine,
1653 -                               __OLD_UTS_LEN);
1654 +       error |= __copy_to_user(&name->machine, ptr->machine, __OLD_UTS_LEN);
1655         error |= override_machine(name->machine);
1656         up_read(&uts_sem);
1657  
1658 diff -NurpP --minimal linux-2.6.16.20/arch/powerpc/kernel/systbl.S linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/systbl.S
1659 --- linux-2.6.16.20/arch/powerpc/kernel/systbl.S        2006-04-09 13:49:43 +0200
1660 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/systbl.S   2006-04-26 19:06:59 +0200
1661 @@ -296,7 +296,7 @@ COMPAT_SYS(fstatfs64)
1662  SYSX(sys_ni_syscall, ppc_fadvise64_64, ppc_fadvise64_64)
1663  PPC_SYS(rtas)
1664  OLDSYS(debug_setcontext)
1665 -SYSCALL(ni_syscall)
1666 +SYSX(sys_vserver, sys32_vserver, sys_vserver)
1667  SYSCALL(ni_syscall)
1668  COMPAT_SYS(mbind)
1669  COMPAT_SYS(get_mempolicy)
1670 diff -NurpP --minimal linux-2.6.16.20/arch/powerpc/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/traps.c
1671 --- linux-2.6.16.20/arch/powerpc/kernel/traps.c 2006-04-09 13:49:43 +0200
1672 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/traps.c    2006-04-26 19:06:59 +0200
1673 @@ -878,8 +878,9 @@ void nonrecoverable_exception(struct pt_
1674  
1675  void trace_syscall(struct pt_regs *regs)
1676  {
1677 -       printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld    %s\n",
1678 -              current, current->pid, regs->nip, regs->link, regs->gpr[0],
1679 +       printk("Task: %p(%d[#%u]), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld    %s\n",
1680 +              current, current->pid, current->xid,
1681 +              regs->nip, regs->link, regs->gpr[0],
1682                regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted());
1683  }
1684  
1685 diff -NurpP --minimal linux-2.6.16.20/arch/powerpc/kernel/vdso.c linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/vdso.c
1686 --- linux-2.6.16.20/arch/powerpc/kernel/vdso.c  2006-04-09 13:49:43 +0200
1687 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/vdso.c     2006-04-26 19:06:59 +0200
1688 @@ -25,6 +25,7 @@
1689  #include <linux/elf.h>
1690  #include <linux/security.h>
1691  #include <linux/bootmem.h>
1692 +#include <linux/vs_memory.h>
1693  
1694  #include <asm/pgtable.h>
1695  #include <asm/system.h>
1696 @@ -294,7 +295,7 @@ int arch_setup_additional_pages(struct l
1697                 kmem_cache_free(vm_area_cachep, vma);
1698                 return -ENOMEM;
1699         }
1700 -       mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
1701 +       vx_vmpages_add(mm, (vma->vm_end - vma->vm_start) >> PAGE_SHIFT);
1702         up_write(&mm->mmap_sem);
1703  
1704         return 0;
1705 diff -NurpP --minimal linux-2.6.16.20/arch/ppc/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/ppc/Kconfig
1706 --- linux-2.6.16.20/arch/ppc/Kconfig    2006-02-18 14:39:47 +0100
1707 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ppc/Kconfig       2006-04-26 19:06:59 +0200
1708 @@ -1394,6 +1394,8 @@ source "arch/powerpc/oprofile/Kconfig"
1709  
1710  source "arch/ppc/Kconfig.debug"
1711  
1712 +source "kernel/vserver/Kconfig"
1713 +
1714  source "security/Kconfig"
1715  
1716  source "crypto/Kconfig"
1717 diff -NurpP --minimal linux-2.6.16.20/arch/ppc/kernel/asm-offsets.c linux-2.6.16.20-vs2.1.1-rc22/arch/ppc/kernel/asm-offsets.c
1718 --- linux-2.6.16.20/arch/ppc/kernel/asm-offsets.c       2006-04-09 13:49:44 +0200
1719 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ppc/kernel/asm-offsets.c  2006-04-26 19:06:59 +0200
1720 @@ -122,6 +122,7 @@ main(void)
1721         DEFINE(TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap));
1722         DEFINE(CLONE_VM, CLONE_VM);
1723         DEFINE(CLONE_UNTRACED, CLONE_UNTRACED);
1724 +       DEFINE(CLONE_KTHREAD, CLONE_KTHREAD);
1725         DEFINE(MM_PGD, offsetof(struct mm_struct, pgd));
1726  
1727         /* About the CPU features table */
1728 diff -NurpP --minimal linux-2.6.16.20/arch/ppc/kernel/misc.S linux-2.6.16.20-vs2.1.1-rc22/arch/ppc/kernel/misc.S
1729 --- linux-2.6.16.20/arch/ppc/kernel/misc.S      2006-02-17 22:18:50 +0100
1730 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ppc/kernel/misc.S 2006-04-26 19:06:59 +0200
1731 @@ -1011,7 +1011,7 @@ _GLOBAL(kernel_thread)
1732         mr      r30,r3          /* function */
1733         mr      r31,r4          /* argument */
1734         ori     r3,r5,CLONE_VM  /* flags */
1735 -       oris    r3,r3,CLONE_UNTRACED>>16
1736 +       oris    r3,r3,(CLONE_UNTRACED|CLONE_KTHREAD)>>16
1737         li      r4,0            /* new sp (unused) */
1738         li      r0,__NR_clone
1739         sc
1740 diff -NurpP --minimal linux-2.6.16.20/arch/ppc/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/ppc/kernel/traps.c
1741 --- linux-2.6.16.20/arch/ppc/kernel/traps.c     2006-01-18 06:07:56 +0100
1742 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ppc/kernel/traps.c        2006-04-26 19:06:59 +0200
1743 @@ -749,8 +749,9 @@ void nonrecoverable_exception(struct pt_
1744  
1745  void trace_syscall(struct pt_regs *regs)
1746  {
1747 -       printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld    %s\n",
1748 -              current, current->pid, regs->nip, regs->link, regs->gpr[0],
1749 +       printk("Task: %p(%d[#%u]), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld    %s\n",
1750 +              current, current->pid, current->xid,
1751 +              regs->nip, regs->link, regs->gpr[0],
1752                regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted());
1753  }
1754  
1755 diff -NurpP --minimal linux-2.6.16.20/arch/s390/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/s390/Kconfig
1756 --- linux-2.6.16.20/arch/s390/Kconfig   2006-02-18 14:39:47 +0100
1757 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/s390/Kconfig      2006-04-26 19:06:59 +0200
1758 @@ -472,6 +472,8 @@ source "arch/s390/oprofile/Kconfig"
1759  
1760  source "arch/s390/Kconfig.debug"
1761  
1762 +source "kernel/vserver/Kconfig"
1763 +
1764  source "security/Kconfig"
1765  
1766  source "crypto/Kconfig"
1767 diff -NurpP --minimal linux-2.6.16.20/arch/s390/kernel/compat_linux.c linux-2.6.16.20-vs2.1.1-rc22/arch/s390/kernel/compat_linux.c
1768 --- linux-2.6.16.20/arch/s390/kernel/compat_linux.c     2006-02-18 14:39:48 +0100
1769 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/s390/kernel/compat_linux.c        2006-05-29 16:53:03 +0200
1770 @@ -595,7 +595,7 @@ asmlinkage long sys32_gettimeofday(struc
1771  {
1772         if (tv) {
1773                 struct timeval ktv;
1774 -               do_gettimeofday(&ktv);
1775 +               vx_gettimeofday(&ktv);
1776                 if (put_tv32(tv, &ktv))
1777                         return -EFAULT;
1778         }
1779 diff -NurpP --minimal linux-2.6.16.20/arch/s390/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/s390/kernel/process.c
1780 --- linux-2.6.16.20/arch/s390/kernel/process.c  2006-02-18 14:39:48 +0100
1781 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/s390/kernel/process.c     2006-04-26 19:06:59 +0200
1782 @@ -164,9 +164,9 @@ void show_regs(struct pt_regs *regs)
1783         struct task_struct *tsk = current;
1784  
1785          printk("CPU:    %d    %s\n", task_thread_info(tsk)->cpu, print_tainted());
1786 -        printk("Process %s (pid: %d, task: %p, ksp: %p)\n",
1787 -              current->comm, current->pid, (void *) tsk,
1788 -              (void *) tsk->thread.ksp);
1789 +       printk("Process %s (pid: %d[#%u], task: %p, ksp: %p)\n",
1790 +              current->comm, current->pid, current->xid,
1791 +              (void *) tsk, (void *) tsk->thread.ksp);
1792  
1793         show_registers(regs);
1794         /* Show stack backtrace if pt_regs is from kernel mode */
1795 @@ -196,7 +196,7 @@ int kernel_thread(int (*fn)(void *), voi
1796         regs.orig_gpr2 = -1;
1797  
1798         /* Ok, create the new process.. */
1799 -       return do_fork(flags | CLONE_VM | CLONE_UNTRACED,
1800 +       return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD,
1801                        0, &regs, 0, NULL, NULL);
1802  }
1803  
1804 diff -NurpP --minimal linux-2.6.16.20/arch/s390/kernel/ptrace.c linux-2.6.16.20-vs2.1.1-rc22/arch/s390/kernel/ptrace.c
1805 --- linux-2.6.16.20/arch/s390/kernel/ptrace.c   2006-04-09 13:49:44 +0200
1806 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/s390/kernel/ptrace.c      2006-04-26 19:06:59 +0200
1807 @@ -723,7 +723,13 @@ sys_ptrace(long request, long pid, long 
1808                 goto out;
1809         }
1810  
1811 +       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) {
1812 +               ret = -EPERM;
1813 +               goto out_tsk;
1814 +       }
1815 +
1816         ret = do_ptrace(child, request, addr, data);
1817 +out_tsk:
1818         put_task_struct(child);
1819  out:
1820         unlock_kernel();
1821 diff -NurpP --minimal linux-2.6.16.20/arch/s390/kernel/syscalls.S linux-2.6.16.20-vs2.1.1-rc22/arch/s390/kernel/syscalls.S
1822 --- linux-2.6.16.20/arch/s390/kernel/syscalls.S 2006-02-18 14:39:48 +0100
1823 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/s390/kernel/syscalls.S    2006-04-26 19:06:59 +0200
1824 @@ -271,7 +271,7 @@ SYSCALL(sys_clock_settime,sys_clock_sett
1825  SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper)       /* 260 */
1826  SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper)
1827  SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper)
1828 -NI_SYSCALL                                                     /* reserved for vserver */
1829 +SYSCALL(sys_vserver,sys_vserver,sys32_vserver)
1830  SYSCALL(s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper)
1831  SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper)
1832  SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper)
1833 diff -NurpP --minimal linux-2.6.16.20/arch/sh/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/sh/Kconfig
1834 --- linux-2.6.16.20/arch/sh/Kconfig     2006-04-09 13:49:44 +0200
1835 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sh/Kconfig        2006-04-26 19:06:59 +0200
1836 @@ -633,6 +633,8 @@ source "arch/sh/oprofile/Kconfig"
1837  
1838  source "arch/sh/Kconfig.debug"
1839  
1840 +source "kernel/vserver/Kconfig"
1841 +
1842  source "security/Kconfig"
1843  
1844  source "crypto/Kconfig"
1845 diff -NurpP --minimal linux-2.6.16.20/arch/sh/kernel/kgdb_stub.c linux-2.6.16.20-vs2.1.1-rc22/arch/sh/kernel/kgdb_stub.c
1846 --- linux-2.6.16.20/arch/sh/kernel/kgdb_stub.c  2004-08-14 12:54:51 +0200
1847 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sh/kernel/kgdb_stub.c     2006-04-26 19:06:59 +0200
1848 @@ -412,7 +412,7 @@ static struct task_struct *get_thread(in
1849         if (pid == PID_MAX) pid = 0;
1850  
1851         /* First check via PID */
1852 -       thread = find_task_by_pid(pid);
1853 +       thread = find_task_by_real_pid(pid);
1854  
1855         if (thread)
1856                 return thread;
1857 diff -NurpP --minimal linux-2.6.16.20/arch/sh/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/sh/kernel/process.c
1858 --- linux-2.6.16.20/arch/sh/kernel/process.c    2006-02-15 13:54:12 +0100
1859 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sh/kernel/process.c       2006-04-26 19:06:59 +0200
1860 @@ -113,7 +113,8 @@ void machine_power_off(void)
1861  void show_regs(struct pt_regs * regs)
1862  {
1863         printk("\n");
1864 -       printk("Pid : %d, Comm: %20s\n", current->pid, current->comm);
1865 +       printk("Pid : %d[#%u], Comm: %20s\n",
1866 +               current->pid, current->xid, current->comm);
1867         print_symbol("PC is at %s\n", regs->pc);
1868         printk("PC  : %08lx SP  : %08lx SR  : %08lx ",
1869                regs->pc, regs->regs[15], regs->sr);
1870 @@ -181,7 +182,8 @@ int kernel_thread(int (*fn)(void *), voi
1871         regs.sr = (1 << 30);
1872  
1873         /* Ok, create the new process.. */
1874 -       return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
1875 +       return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD,
1876 +               0, &regs, 0, NULL, NULL);
1877  }
1878  
1879  /*
1880 diff -NurpP --minimal linux-2.6.16.20/arch/sh/kernel/setup.c linux-2.6.16.20-vs2.1.1-rc22/arch/sh/kernel/setup.c
1881 --- linux-2.6.16.20/arch/sh/kernel/setup.c      2006-02-15 13:54:12 +0100
1882 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sh/kernel/setup.c 2006-05-11 16:06:22 +0200
1883 @@ -20,6 +20,7 @@
1884  #include <linux/root_dev.h>
1885  #include <linux/utsname.h>
1886  #include <linux/cpu.h>
1887 +#include <linux/vs_cvirt.h>
1888  #include <asm/uaccess.h>
1889  #include <asm/io.h>
1890  #include <asm/sections.h>
1891 @@ -485,7 +486,7 @@ static int show_cpuinfo(struct seq_file 
1892                 seq_printf(m, "machine\t\t: %s\n", get_system_type());
1893  
1894         seq_printf(m, "processor\t: %d\n", cpu);
1895 -       seq_printf(m, "cpu family\t: %s\n", system_utsname.machine);
1896 +       seq_printf(m, "cpu family\t: %s\n", vx_new_uts(machine));
1897         seq_printf(m, "cpu type\t: %s\n", get_cpu_subtype());
1898  
1899         show_cpuflags(m);
1900 diff -NurpP --minimal linux-2.6.16.20/arch/sh/kernel/sys_sh.c linux-2.6.16.20-vs2.1.1-rc22/arch/sh/kernel/sys_sh.c
1901 --- linux-2.6.16.20/arch/sh/kernel/sys_sh.c     2005-08-29 22:24:55 +0200
1902 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sh/kernel/sys_sh.c        2006-05-11 16:06:22 +0200
1903 @@ -21,6 +21,7 @@
1904  #include <linux/mman.h>
1905  #include <linux/file.h>
1906  #include <linux/utsname.h>
1907 +#include <linux/vs_cvirt.h>
1908  
1909  #include <asm/uaccess.h>
1910  #include <asm/ipc.h>
1911 @@ -267,7 +268,7 @@ asmlinkage int sys_uname(struct old_utsn
1912         if (!name)
1913                 return -EFAULT;
1914         down_read(&uts_sem);
1915 -       err=copy_to_user(name, &system_utsname, sizeof (*name));
1916 +       err=copy_to_user(name, vx_new_utsname(), sizeof (*name));
1917         up_read(&uts_sem);
1918         return err?-EFAULT:0;
1919  }
1920 diff -NurpP --minimal linux-2.6.16.20/arch/sh64/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/sh64/kernel/process.c
1921 --- linux-2.6.16.20/arch/sh64/kernel/process.c  2006-01-18 06:07:57 +0100
1922 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sh64/kernel/process.c     2006-04-26 19:06:59 +0200
1923 @@ -637,7 +637,7 @@ int kernel_thread(int (*fn)(void *), voi
1924  static __inline__ _syscall2(int,clone,unsigned long,flags,unsigned long,newsp)
1925  static __inline__ _syscall1(int,exit,int,ret)
1926  
1927 -       reply = clone(flags | CLONE_VM, 0);
1928 +       reply = clone(flags | CLONE_VM | CLONE_KTHREAD, 0);
1929         if (!reply) {
1930                 /* Child */
1931                 reply = exit(fn(arg));
1932 diff -NurpP --minimal linux-2.6.16.20/arch/sh64/kernel/sys_sh64.c linux-2.6.16.20-vs2.1.1-rc22/arch/sh64/kernel/sys_sh64.c
1933 --- linux-2.6.16.20/arch/sh64/kernel/sys_sh64.c 2005-06-22 02:37:59 +0200
1934 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sh64/kernel/sys_sh64.c    2006-05-11 16:06:22 +0200
1935 @@ -29,6 +29,7 @@
1936  #include <linux/file.h>
1937  #include <linux/utsname.h>
1938  #include <linux/syscalls.h>
1939 +#include <linux/vs_cvirt.h>
1940  #include <asm/uaccess.h>
1941  #include <asm/ipc.h>
1942  #include <asm/ptrace.h>
1943 @@ -279,7 +280,7 @@ asmlinkage int sys_uname(struct old_utsn
1944         if (!name)
1945                 return -EFAULT;
1946         down_read(&uts_sem);
1947 -       err=copy_to_user(name, &system_utsname, sizeof (*name));
1948 +       err=copy_to_user(name, vx_new_utsname(), sizeof (*name));
1949         up_read(&uts_sem);
1950         return err?-EFAULT:0;
1951  }
1952 diff -NurpP --minimal linux-2.6.16.20/arch/sparc/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/Kconfig
1953 --- linux-2.6.16.20/arch/sparc/Kconfig  2006-02-18 14:39:49 +0100
1954 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/Kconfig     2006-04-26 19:06:59 +0200
1955 @@ -284,6 +284,8 @@ source "fs/Kconfig"
1956  
1957  source "arch/sparc/Kconfig.debug"
1958  
1959 +source "kernel/vserver/Kconfig"
1960 +
1961  source "security/Kconfig"
1962  
1963  source "crypto/Kconfig"
1964 diff -NurpP --minimal linux-2.6.16.20/arch/sparc/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/kernel/process.c
1965 --- linux-2.6.16.20/arch/sparc/kernel/process.c 2006-02-15 13:54:13 +0100
1966 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/kernel/process.c    2006-04-26 19:07:00 +0200
1967 @@ -706,7 +706,8 @@ pid_t kernel_thread(int (*fn)(void *), v
1968                              /* Notreached by child. */
1969                              "1: mov %%o0, %0\n\t" :
1970                              "=r" (retval) :
1971 -                            "i" (__NR_clone), "r" (flags | CLONE_VM | CLONE_UNTRACED),
1972 +                            "i" (__NR_clone), "r" (flags |
1973 +                                       CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD),
1974                              "i" (__NR_exit),  "r" (fn), "r" (arg) :
1975                              "g1", "g2", "g3", "o0", "o1", "memory", "cc");
1976         return retval;
1977 diff -NurpP --minimal linux-2.6.16.20/arch/sparc/kernel/ptrace.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/kernel/ptrace.c
1978 --- linux-2.6.16.20/arch/sparc/kernel/ptrace.c  2006-04-09 13:49:44 +0200
1979 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/kernel/ptrace.c     2006-04-28 05:07:10 +0200
1980 @@ -299,6 +299,10 @@ asmlinkage void do_ptrace(struct pt_regs
1981                 pt_error_return(regs, -ret);
1982                 goto out;
1983         }
1984 +       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) {
1985 +               pt_error_return(regs, ESRCH);
1986 +               goto out_tsk;
1987 +       }
1988  
1989         if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH)
1990             || (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) {
1991 diff -NurpP --minimal linux-2.6.16.20/arch/sparc/kernel/sys_sparc.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/kernel/sys_sparc.c
1992 --- linux-2.6.16.20/arch/sparc/kernel/sys_sparc.c       2005-06-22 02:37:59 +0200
1993 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/kernel/sys_sparc.c  2006-04-26 19:07:00 +0200
1994 @@ -21,6 +21,7 @@
1995  #include <linux/utsname.h>
1996  #include <linux/smp.h>
1997  #include <linux/smp_lock.h>
1998 +#include <linux/vs_cvirt.h>
1999  
2000  #include <asm/uaccess.h>
2001  #include <asm/ipc.h>
2002 @@ -470,13 +471,13 @@ asmlinkage int sys_getdomainname(char __
2003         
2004         down_read(&uts_sem);
2005         
2006 -       nlen = strlen(system_utsname.domainname) + 1;
2007 +       nlen = strlen(vx_new_uts(domainname)) + 1;
2008  
2009         if (nlen < len)
2010                 len = nlen;
2011         if (len > __NEW_UTS_LEN)
2012                 goto done;
2013 -       if (copy_to_user(name, system_utsname.domainname, len))
2014 +       if (copy_to_user(name, vx_new_uts(domainname), len))
2015                 goto done;
2016         err = 0;
2017  done:
2018 diff -NurpP --minimal linux-2.6.16.20/arch/sparc/kernel/sys_sunos.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/kernel/sys_sunos.c
2019 --- linux-2.6.16.20/arch/sparc/kernel/sys_sunos.c       2006-02-15 13:54:13 +0100
2020 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/kernel/sys_sunos.c  2006-05-11 16:06:22 +0200
2021 @@ -35,6 +35,7 @@
2022  #include <linux/smp.h>
2023  #include <linux/smp_lock.h>
2024  #include <linux/syscalls.h>
2025 +#include <linux/vs_cvirt.h>
2026  
2027  #include <net/sock.h>
2028  
2029 @@ -482,14 +483,16 @@ struct sunos_utsname {
2030  asmlinkage int sunos_uname(struct sunos_utsname __user *name)
2031  {
2032         int ret;
2033 +       struct new_utsname *ptr;
2034         down_read(&uts_sem);
2035 -       ret = copy_to_user(&name->sname[0], &system_utsname.sysname[0], sizeof(name->sname) - 1);
2036 +       ptr = vx_new_utsname();
2037 +       ret = copy_to_user(&name->sname[0], ptr->sysname, sizeof(name->sname) - 1);
2038         if (!ret) {
2039 -               ret |= __copy_to_user(&name->nname[0], &system_utsname.nodename[0], sizeof(name->nname) - 1);
2040 +               ret |= __copy_to_user(&name->nname[0], ptr->nodename, sizeof(name->nname) - 1);
2041                 ret |= __put_user('\0', &name->nname[8]);
2042 -               ret |= __copy_to_user(&name->rel[0], &system_utsname.release[0], sizeof(name->rel) - 1);
2043 -               ret |= __copy_to_user(&name->ver[0], &system_utsname.version[0], sizeof(name->ver) - 1);
2044 -               ret |= __copy_to_user(&name->mach[0], &system_utsname.machine[0], sizeof(name->mach) - 1);
2045 +               ret |= __copy_to_user(&name->rel[0], ptr->release, sizeof(name->rel) - 1);
2046 +               ret |= __copy_to_user(&name->ver[0], ptr->version, sizeof(name->ver) - 1);
2047 +               ret |= __copy_to_user(&name->mach[0], ptr->machine, sizeof(name->mach) - 1);
2048         }
2049         up_read(&uts_sem);
2050         return ret ? -EFAULT : 0;
2051 diff -NurpP --minimal linux-2.6.16.20/arch/sparc/kernel/systbls.S linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/kernel/systbls.S
2052 --- linux-2.6.16.20/arch/sparc/kernel/systbls.S 2006-02-18 14:39:49 +0100
2053 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/kernel/systbls.S    2006-04-26 19:07:00 +0200
2054 @@ -72,7 +72,7 @@ sys_call_table:
2055  /*250*/        .long sparc_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl
2056  /*255*/        .long sys_nis_syscall, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
2057  /*260*/        .long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
2058 -/*265*/        .long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy
2059 +/*265*/        .long sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
2060  /*270*/        .long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
2061  /*275*/        .long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
2062  /*280*/        .long sys_ni_syscall, sys_add_key, sys_request_key, sys_keyctl, sys_openat
2063 diff -NurpP --minimal linux-2.6.16.20/arch/sparc/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/kernel/traps.c
2064 --- linux-2.6.16.20/arch/sparc/kernel/traps.c   2006-01-18 06:07:57 +0100
2065 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/kernel/traps.c      2006-04-26 19:07:00 +0200
2066 @@ -100,7 +100,8 @@ void die_if_kernel(char *str, struct pt_
2067  "              /_| \\__/ |_\\\n"
2068  "                 \\__U_/\n");
2069  
2070 -       printk("%s(%d): %s [#%d]\n", current->comm, current->pid, str, ++die_counter);
2071 +       printk("%s(%d[#%u]): %s [#%d]\n", current->comm,
2072 +               current->pid, current->xid, str, ++die_counter);
2073         show_regs(regs);
2074  
2075         __SAVE; __SAVE; __SAVE; __SAVE;
2076 diff -NurpP --minimal linux-2.6.16.20/arch/sparc64/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/Kconfig
2077 --- linux-2.6.16.20/arch/sparc64/Kconfig        2006-04-09 13:49:44 +0200
2078 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/Kconfig   2006-04-26 19:07:00 +0200
2079 @@ -394,6 +394,8 @@ endmenu
2080  
2081  source "arch/sparc64/Kconfig.debug"
2082  
2083 +source "kernel/vserver/Kconfig"
2084 +
2085  source "security/Kconfig"
2086  
2087  source "crypto/Kconfig"
2088 diff -NurpP --minimal linux-2.6.16.20/arch/sparc64/kernel/binfmt_aout32.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/binfmt_aout32.c
2089 --- linux-2.6.16.20/arch/sparc64/kernel/binfmt_aout32.c 2006-02-18 14:39:49 +0100
2090 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/binfmt_aout32.c    2006-04-26 19:07:00 +0200
2091 @@ -27,6 +27,7 @@
2092  #include <linux/binfmts.h>
2093  #include <linux/personality.h>
2094  #include <linux/init.h>
2095 +#include <linux/vs_memory.h>
2096  
2097  #include <asm/system.h>
2098  #include <asm/uaccess.h>
2099 diff -NurpP --minimal linux-2.6.16.20/arch/sparc64/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/process.c
2100 --- linux-2.6.16.20/arch/sparc64/kernel/process.c       2006-02-15 13:54:13 +0100
2101 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/process.c  2006-04-26 19:07:00 +0200
2102 @@ -717,7 +717,8 @@ pid_t kernel_thread(int (*fn)(void *), v
2103                              /* Notreached by child. */
2104                              "1:" :
2105                              "=r" (retval) :
2106 -                            "i" (__NR_clone), "r" (flags | CLONE_VM | CLONE_UNTRACED),
2107 +                            "i" (__NR_clone), "r" (flags |
2108 +                               CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD),
2109                              "i" (__NR_exit),  "r" (fn), "r" (arg) :
2110                              "g1", "g2", "g3", "o0", "o1", "memory", "cc");
2111         return retval;
2112 diff -NurpP --minimal linux-2.6.16.20/arch/sparc64/kernel/ptrace.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/ptrace.c
2113 --- linux-2.6.16.20/arch/sparc64/kernel/ptrace.c        2006-02-18 14:39:49 +0100
2114 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/ptrace.c   2006-04-28 05:07:10 +0200
2115 @@ -209,6 +209,10 @@ asmlinkage void do_ptrace(struct pt_regs
2116                 pt_error_return(regs, -ret);
2117                 goto out;
2118         }
2119 +       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) {
2120 +               pt_error_return(regs, ESRCH);
2121 +               goto out_tsk;
2122 +       }
2123  
2124         if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH)
2125             || (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) {
2126 diff -NurpP --minimal linux-2.6.16.20/arch/sparc64/kernel/sys_sparc.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/sys_sparc.c
2127 --- linux-2.6.16.20/arch/sparc64/kernel/sys_sparc.c     2005-08-29 22:24:56 +0200
2128 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/sys_sparc.c        2006-04-26 19:07:00 +0200
2129 @@ -25,6 +25,7 @@
2130  #include <linux/syscalls.h>
2131  #include <linux/ipc.h>
2132  #include <linux/personality.h>
2133 +#include <linux/vs_cvirt.h>
2134  
2135  #include <asm/uaccess.h>
2136  #include <asm/ipc.h>
2137 @@ -476,13 +477,13 @@ asmlinkage long sys_getdomainname(char _
2138  
2139         down_read(&uts_sem);
2140         
2141 -       nlen = strlen(system_utsname.domainname) + 1;
2142 +       nlen = strlen(vx_new_uts(domainname)) + 1;
2143  
2144          if (nlen < len)
2145                  len = nlen;
2146         if (len > __NEW_UTS_LEN)
2147                 goto done;
2148 -       if (copy_to_user(name, system_utsname.domainname, len))
2149 +       if (copy_to_user(name, vx_new_uts(domainname), len))
2150                 goto done;
2151         err = 0;
2152  done:
2153 diff -NurpP --minimal linux-2.6.16.20/arch/sparc64/kernel/sys_sparc32.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/sys_sparc32.c
2154 --- linux-2.6.16.20/arch/sparc64/kernel/sys_sparc32.c   2006-02-18 14:39:49 +0100
2155 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/sys_sparc32.c      2006-05-29 16:53:09 +0200
2156 @@ -788,7 +788,7 @@ asmlinkage long sys32_gettimeofday(struc
2157  {
2158         if (tv) {
2159                 struct timeval ktv;
2160 -               do_gettimeofday(&ktv);
2161 +               vx_gettimeofday(&ktv);
2162                 if (put_tv32(tv, &ktv))
2163                         return -EFAULT;
2164         }
2165 diff -NurpP --minimal linux-2.6.16.20/arch/sparc64/kernel/sys_sunos32.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/sys_sunos32.c
2166 --- linux-2.6.16.20/arch/sparc64/kernel/sys_sunos32.c   2006-02-15 13:54:13 +0100
2167 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/sys_sunos32.c      2006-05-11 16:06:22 +0200
2168 @@ -35,6 +35,7 @@
2169  #include <linux/smp.h>
2170  #include <linux/smp_lock.h>
2171  #include <linux/syscalls.h>
2172 +#include <linux/vs_cvirt.h>
2173  
2174  #include <asm/uaccess.h>
2175  #include <asm/page.h>
2176 @@ -437,18 +438,20 @@ struct sunos_utsname {
2177  asmlinkage int sunos_uname(struct sunos_utsname __user *name)
2178  {
2179         int ret;
2180 +       struct new_utsname *ptr;
2181  
2182         down_read(&uts_sem);
2183 -       ret = copy_to_user(&name->sname[0], &system_utsname.sysname[0],
2184 +       ptr = vx_new_utsname();
2185 +       ret = copy_to_user(&name->sname[0], ptr->sysname,
2186                            sizeof(name->sname) - 1);
2187 -       ret |= copy_to_user(&name->nname[0], &system_utsname.nodename[0],
2188 +       ret |= copy_to_user(&name->nname[0], ptr->nodename,
2189                             sizeof(name->nname) - 1);
2190         ret |= put_user('\0', &name->nname[8]);
2191 -       ret |= copy_to_user(&name->rel[0], &system_utsname.release[0],
2192 +       ret |= copy_to_user(&name->rel[0], ptr->release,
2193                             sizeof(name->rel) - 1);
2194 -       ret |= copy_to_user(&name->ver[0], &system_utsname.version[0],
2195 +       ret |= copy_to_user(&name->ver[0], ptr->version,
2196                             sizeof(name->ver) - 1);
2197 -       ret |= copy_to_user(&name->mach[0], &system_utsname.machine[0],
2198 +       ret |= copy_to_user(&name->mach[0], ptr->machine,
2199                             sizeof(name->mach) - 1);
2200         up_read(&uts_sem);
2201         return (ret ? -EFAULT : 0);
2202 diff -NurpP --minimal linux-2.6.16.20/arch/sparc64/kernel/systbls.S linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/systbls.S
2203 --- linux-2.6.16.20/arch/sparc64/kernel/systbls.S       2006-02-18 14:39:49 +0100
2204 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/systbls.S  2006-04-26 19:07:00 +0200
2205 @@ -73,7 +73,7 @@ sys_call_table32:
2206  /*250*/        .word sys32_mremap, sys32_sysctl, sys32_getsid, sys_fdatasync, sys32_nfsservctl
2207         .word sys_ni_syscall, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep
2208  /*260*/        .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun
2209 -       .word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy
2210 +       .word sys_timer_delete, compat_sys_timer_create, sys32_vserver, compat_sys_io_setup, sys_io_destroy
2211  /*270*/        .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink
2212         .word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid
2213  /*280*/        .word sys_ni_syscall, sys_add_key, sys_request_key, sys_keyctl, compat_sys_openat
2214 @@ -142,7 +142,7 @@ sys_call_table:
2215  /*250*/        .word sys64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl
2216         .word sys_ni_syscall, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
2217  /*260*/        .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
2218 -       .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy
2219 +       .word sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
2220  /*270*/        .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
2221         .word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
2222  /*280*/        .word sys_nis_syscall, sys_add_key, sys_request_key, sys_keyctl, sys_openat
2223 diff -NurpP --minimal linux-2.6.16.20/arch/sparc64/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/traps.c
2224 --- linux-2.6.16.20/arch/sparc64/kernel/traps.c 2006-01-18 06:07:57 +0100
2225 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/traps.c    2006-04-26 19:07:00 +0200
2226 @@ -1896,7 +1896,8 @@ void die_if_kernel(char *str, struct pt_
2227  "              /_| \\__/ |_\\\n"
2228  "                 \\__U_/\n");
2229  
2230 -       printk("%s(%d): %s [#%d]\n", current->comm, current->pid, str, ++die_counter);
2231 +       printk("%s(%d[#%u]): %s [#%d]\n", current->comm,
2232 +               current->pid, current->xid, str, ++die_counter);
2233         notify_die(DIE_OOPS, str, regs, 0, 255, SIGSEGV);
2234         __asm__ __volatile__("flushw");
2235         __show_regs(regs);
2236 diff -NurpP --minimal linux-2.6.16.20/arch/sparc64/solaris/fs.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/solaris/fs.c
2237 --- linux-2.6.16.20/arch/sparc64/solaris/fs.c   2006-04-09 13:49:44 +0200
2238 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/solaris/fs.c      2006-04-26 19:07:00 +0200
2239 @@ -363,7 +363,7 @@ static int report_statvfs(struct vfsmoun
2240                 int j = strlen (p);
2241                 
2242                 if (j > 15) j = 15;
2243 -               if (IS_RDONLY(inode)) i = 1;
2244 +               if (IS_RDONLY(inode) || MNT_IS_RDONLY(mnt)) i = 1;
2245                 if (mnt->mnt_flags & MNT_NOSUID) i |= 2;
2246                 if (!sysv_valid_dev(inode->i_sb->s_dev))
2247                         return -EOVERFLOW;
2248 @@ -399,7 +399,7 @@ static int report_statvfs64(struct vfsmo
2249                 int j = strlen (p);
2250                 
2251                 if (j > 15) j = 15;
2252 -               if (IS_RDONLY(inode)) i = 1;
2253 +               if (IS_RDONLY(inode) || MNT_IS_RDONLY(mnt)) i = 1;
2254                 if (mnt->mnt_flags & MNT_NOSUID) i |= 2;
2255                 if (!sysv_valid_dev(inode->i_sb->s_dev))
2256                         return -EOVERFLOW;
2257 diff -NurpP --minimal linux-2.6.16.20/arch/sparc64/solaris/misc.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/solaris/misc.c
2258 --- linux-2.6.16.20/arch/sparc64/solaris/misc.c 2006-01-03 17:29:19 +0100
2259 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/solaris/misc.c    2006-05-11 16:06:22 +0200
2260 @@ -17,6 +17,7 @@
2261  #include <linux/timex.h>
2262  #include <linux/major.h>
2263  #include <linux/compat.h>
2264 +#include <linux/vs_cvirt.h>
2265  
2266  #include <asm/uaccess.h>
2267  #include <asm/string.h>
2268 @@ -239,7 +240,7 @@ asmlinkage int solaris_utssys(u32 buf, u
2269                 /* Let's cheat */
2270                 err  = set_utsfield(v->sysname, "SunOS", 1, 0);
2271                 down_read(&uts_sem);
2272 -               err |= set_utsfield(v->nodename, system_utsname.nodename,
2273 +               err |= set_utsfield(v->nodename, vx_new_uts(nodename),
2274                                     1, 1);
2275                 up_read(&uts_sem);
2276                 err |= set_utsfield(v->release, "2.6", 0, 0);
2277 @@ -263,7 +264,7 @@ asmlinkage int solaris_utsname(u32 buf)
2278         /* Why should we not lie a bit? */
2279         down_read(&uts_sem);
2280         err  = set_utsfield(v->sysname, "SunOS", 0, 0);
2281 -       err |= set_utsfield(v->nodename, system_utsname.nodename, 1, 1);
2282 +       err |= set_utsfield(v->nodename, vx_new_uts(nodename), 1, 1);
2283         err |= set_utsfield(v->release, "5.6", 0, 0);
2284         err |= set_utsfield(v->version, "Generic", 0, 0);
2285         err |= set_utsfield(v->machine, machine(), 0, 0);
2286 @@ -295,7 +296,7 @@ asmlinkage int solaris_sysinfo(int cmd, 
2287         case SI_HOSTNAME:
2288                 r = buffer + 256;
2289                 down_read(&uts_sem);
2290 -               for (p = system_utsname.nodename, q = buffer; 
2291 +               for (p = vx_new_uts(nodename), q = buffer;
2292                      q < r && *p && *p != '.'; *q++ = *p++);
2293                 up_read(&uts_sem);
2294                 *q = 0;
2295 diff -NurpP --minimal linux-2.6.16.20/arch/um/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/um/Kconfig
2296 --- linux-2.6.16.20/arch/um/Kconfig     2006-02-18 14:39:49 +0100
2297 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/um/Kconfig        2006-04-26 19:07:00 +0200
2298 @@ -290,6 +290,8 @@ source "drivers/connector/Kconfig"
2299  
2300  source "fs/Kconfig"
2301  
2302 +source "kernel/vserver/Kconfig"
2303 +
2304  source "security/Kconfig"
2305  
2306  source "crypto/Kconfig"
2307 diff -NurpP --minimal linux-2.6.16.20/arch/um/drivers/mconsole_kern.c linux-2.6.16.20-vs2.1.1-rc22/arch/um/drivers/mconsole_kern.c
2308 --- linux-2.6.16.20/arch/um/drivers/mconsole_kern.c     2006-04-09 13:49:44 +0200
2309 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/um/drivers/mconsole_kern.c        2006-04-26 19:07:00 +0200
2310 @@ -21,6 +21,7 @@
2311  #include "linux/proc_fs.h"
2312  #include "linux/syscalls.h"
2313  #include "linux/console.h"
2314 +#include "linux/vs_pid.h"
2315  #include "asm/irq.h"
2316  #include "asm/uaccess.h"
2317  #include "user_util.h"
2318 diff -NurpP --minimal linux-2.6.16.20/arch/um/kernel/process_kern.c linux-2.6.16.20-vs2.1.1-rc22/arch/um/kernel/process_kern.c
2319 --- linux-2.6.16.20/arch/um/kernel/process_kern.c       2006-02-18 14:39:49 +0100
2320 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/um/kernel/process_kern.c  2006-04-28 05:07:10 +0200
2321 @@ -23,6 +23,7 @@
2322  #include "linux/proc_fs.h"
2323  #include "linux/ptrace.h"
2324  #include "linux/random.h"
2325 +
2326  #include "asm/unistd.h"
2327  #include "asm/mman.h"
2328  #include "asm/segment.h"
2329 @@ -95,7 +96,7 @@ int kernel_thread(int (*fn)(void *), voi
2330  
2331         current->thread.request.u.thread.proc = fn;
2332         current->thread.request.u.thread.arg = arg;
2333 -       pid = do_fork(CLONE_VM | CLONE_UNTRACED | flags, 0,
2334 +       pid = do_fork(CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD | flags, 0,
2335                       &current->thread.regs, 0, NULL, NULL);
2336         if(pid < 0)
2337                 panic("do_fork failed in kernel_thread, errno = %d", pid);
2338 diff -NurpP --minimal linux-2.6.16.20/arch/um/kernel/syscall_kern.c linux-2.6.16.20-vs2.1.1-rc22/arch/um/kernel/syscall_kern.c
2339 --- linux-2.6.16.20/arch/um/kernel/syscall_kern.c       2005-08-29 22:24:56 +0200
2340 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/um/kernel/syscall_kern.c  2006-04-26 19:07:00 +0200
2341 @@ -15,6 +15,8 @@
2342  #include "linux/unistd.h"
2343  #include "linux/slab.h"
2344  #include "linux/utime.h"
2345 +#include <linux/vs_cvirt.h>
2346 +
2347  #include "asm/mman.h"
2348  #include "asm/uaccess.h"
2349  #include "kern_util.h"
2350 @@ -110,7 +112,7 @@ long sys_uname(struct old_utsname * name
2351         if (!name)
2352                 return -EFAULT;
2353         down_read(&uts_sem);
2354 -       err=copy_to_user(name, &system_utsname, sizeof (*name));
2355 +       err=copy_to_user(name, vx_new_utsname(), sizeof (*name));
2356         up_read(&uts_sem);
2357         return err?-EFAULT:0;
2358  }
2359 @@ -118,6 +120,7 @@ long sys_uname(struct old_utsname * name
2360  long sys_olduname(struct oldold_utsname * name)
2361  {
2362         long error;
2363 +       struct new_utsname *ptr;
2364  
2365         if (!name)
2366                 return -EFAULT;
2367 @@ -126,19 +129,20 @@ long sys_olduname(struct oldold_utsname 
2368    
2369         down_read(&uts_sem);
2370         
2371 -       error = __copy_to_user(&name->sysname,&system_utsname.sysname,
2372 +       ptr = vx_new_utsname();
2373 +       error = __copy_to_user(&name->sysname,ptr->sysname,
2374                                __OLD_UTS_LEN);
2375         error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
2376 -       error |= __copy_to_user(&name->nodename,&system_utsname.nodename,
2377 +       error |= __copy_to_user(&name->nodename,ptr->nodename,
2378                                 __OLD_UTS_LEN);
2379         error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
2380 -       error |= __copy_to_user(&name->release,&system_utsname.release,
2381 +       error |= __copy_to_user(&name->release,ptr->release,
2382                                 __OLD_UTS_LEN);
2383         error |= __put_user(0,name->release+__OLD_UTS_LEN);
2384 -       error |= __copy_to_user(&name->version,&system_utsname.version,
2385 +       error |= __copy_to_user(&name->version,ptr->version,
2386                                 __OLD_UTS_LEN);
2387         error |= __put_user(0,name->version+__OLD_UTS_LEN);
2388 -       error |= __copy_to_user(&name->machine,&system_utsname.machine,
2389 +       error |= __copy_to_user(&name->machine,ptr->machine,
2390                                 __OLD_UTS_LEN);
2391         error |= __put_user(0,name->machine+__OLD_UTS_LEN);
2392         
2393 diff -NurpP --minimal linux-2.6.16.20/arch/um/sys-x86_64/syscalls.c linux-2.6.16.20-vs2.1.1-rc22/arch/um/sys-x86_64/syscalls.c
2394 --- linux-2.6.16.20/arch/um/sys-x86_64/syscalls.c       2006-01-03 17:29:20 +0100
2395 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/um/sys-x86_64/syscalls.c  2006-05-11 16:06:22 +0200
2396 @@ -9,6 +9,7 @@
2397  #include "linux/shm.h"
2398  #include "linux/utsname.h"
2399  #include "linux/personality.h"
2400 +#include "linux/vs_cvirt.h"
2401  #include "asm/uaccess.h"
2402  #define __FRAME_OFFSETS
2403  #include "asm/ptrace.h"
2404 @@ -21,7 +22,7 @@ asmlinkage long sys_uname64(struct new_u
2405  {
2406         int err;
2407         down_read(&uts_sem);
2408 -       err = copy_to_user(name, &system_utsname, sizeof (*name));
2409 +       err = copy_to_user(name, vx_new_utsname(), sizeof (*name));
2410         up_read(&uts_sem);
2411         if (personality(current->personality) == PER_LINUX32)
2412                 err |= copy_to_user(&name->machine, "i686", 5);
2413 diff -NurpP --minimal linux-2.6.16.20/arch/v850/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/v850/Kconfig
2414 --- linux-2.6.16.20/arch/v850/Kconfig   2006-02-18 14:39:50 +0100
2415 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/v850/Kconfig      2006-04-26 19:07:00 +0200
2416 @@ -320,6 +320,8 @@ source "drivers/usb/Kconfig"
2417  
2418  source "arch/v850/Kconfig.debug"
2419  
2420 +source "kernel/vserver/Kconfig"
2421 +
2422  source "security/Kconfig"
2423  
2424  source "crypto/Kconfig"
2425 diff -NurpP --minimal linux-2.6.16.20/arch/v850/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/v850/kernel/process.c
2426 --- linux-2.6.16.20/arch/v850/kernel/process.c  2006-04-09 13:49:44 +0200
2427 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/v850/kernel/process.c     2006-04-26 19:07:00 +0200
2428 @@ -84,7 +84,7 @@ int kernel_thread (int (*fn)(void *), vo
2429         /* Clone this thread.  Note that we don't pass the clone syscall's
2430            second argument -- it's ignored for calls from kernel mode (the
2431            child's SP is always set to the top of the kernel stack).  */
2432 -       arg0 = flags | CLONE_VM;
2433 +       arg0 = flags | CLONE_VM | CLONE_KTHREAD;
2434         syscall = __NR_clone;
2435         asm volatile ("trap " SYSCALL_SHORT_TRAP
2436                       : "=r" (ret), "=r" (syscall)
2437 diff -NurpP --minimal linux-2.6.16.20/arch/v850/kernel/ptrace.c linux-2.6.16.20-vs2.1.1-rc22/arch/v850/kernel/ptrace.c
2438 --- linux-2.6.16.20/arch/v850/kernel/ptrace.c   2006-04-09 13:49:44 +0200
2439 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/v850/kernel/ptrace.c      2006-04-26 19:07:00 +0200
2440 @@ -137,6 +137,8 @@ long arch_ptrace(struct task_struct *chi
2441                         break;
2442                 rval = -EIO;
2443                 goto out;
2444 +       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
2445 +               goto out_tsk;
2446  
2447         /* Read/write the word at location ADDR in the registers.  */
2448         case PTRACE_PEEKUSR:
2449 diff -NurpP --minimal linux-2.6.16.20/arch/x86_64/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/Kconfig
2450 --- linux-2.6.16.20/arch/x86_64/Kconfig 2006-04-09 13:49:44 +0200
2451 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/Kconfig    2006-04-26 19:07:00 +0200
2452 @@ -588,6 +588,8 @@ endmenu
2453  
2454  source "arch/x86_64/Kconfig.debug"
2455  
2456 +source "kernel/vserver/Kconfig"
2457 +
2458  source "security/Kconfig"
2459  
2460  source "crypto/Kconfig"
2461 diff -NurpP --minimal linux-2.6.16.20/arch/x86_64/ia32/ia32_aout.c linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/ia32/ia32_aout.c
2462 --- linux-2.6.16.20/arch/x86_64/ia32/ia32_aout.c        2006-01-03 17:29:20 +0100
2463 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/ia32/ia32_aout.c   2006-04-26 19:07:00 +0200
2464 @@ -25,6 +25,7 @@
2465  #include <linux/binfmts.h>
2466  #include <linux/personality.h>
2467  #include <linux/init.h>
2468 +#include <linux/vs_memory.h>
2469  
2470  #include <asm/system.h>
2471  #include <asm/uaccess.h>
2472 diff -NurpP --minimal linux-2.6.16.20/arch/x86_64/ia32/ia32_binfmt.c linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/ia32/ia32_binfmt.c
2473 --- linux-2.6.16.20/arch/x86_64/ia32/ia32_binfmt.c      2006-02-18 14:39:50 +0100
2474 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/ia32/ia32_binfmt.c 2006-04-26 19:07:00 +0200
2475 @@ -371,7 +371,8 @@ int ia32_setup_arg_pages(struct linux_bi
2476                         kmem_cache_free(vm_area_cachep, mpnt);
2477                         return ret;
2478                 }
2479 -               mm->stack_vm = mm->total_vm = vma_pages(mpnt);
2480 +               vx_vmpages_sub(mm, mm->total_vm - vma_pages(mpnt));
2481 +               mm->stack_vm = mm->total_vm;
2482         } 
2483  
2484         for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
2485 diff -NurpP --minimal linux-2.6.16.20/arch/x86_64/ia32/ia32entry.S linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/ia32/ia32entry.S
2486 --- linux-2.6.16.20/arch/x86_64/ia32/ia32entry.S        2006-02-18 14:39:50 +0100
2487 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/ia32/ia32entry.S   2006-04-26 19:07:00 +0200
2488 @@ -650,7 +650,7 @@ ia32_sys_call_table:
2489         .quad sys_tgkill                /* 270 */
2490         .quad compat_sys_utimes
2491         .quad sys32_fadvise64_64
2492 -       .quad quiet_ni_syscall  /* sys_vserver */
2493 +       .quad sys32_vserver
2494         .quad sys_mbind
2495         .quad compat_sys_get_mempolicy  /* 275 */
2496         .quad sys_set_mempolicy
2497 diff -NurpP --minimal linux-2.6.16.20/arch/x86_64/ia32/sys_ia32.c linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/ia32/sys_ia32.c
2498 --- linux-2.6.16.20/arch/x86_64/ia32/sys_ia32.c 2006-02-18 14:39:50 +0100
2499 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/ia32/sys_ia32.c    2006-05-29 16:53:21 +0200
2500 @@ -62,6 +62,7 @@
2501  #include <linux/highuid.h>
2502  #include <linux/vmalloc.h>
2503  #include <linux/fsnotify.h>
2504 +#include <linux/vs_cvirt.h>
2505  #include <asm/mman.h>
2506  #include <asm/types.h>
2507  #include <asm/uaccess.h>
2508 @@ -460,7 +461,7 @@ sys32_gettimeofday(struct compat_timeval
2509  {
2510         if (tv) {
2511                 struct timeval ktv;
2512 -               do_gettimeofday(&ktv);
2513 +               vx_gettimeofday(&ktv);
2514                 if (put_tv32(tv, &ktv))
2515                         return -EFAULT;
2516         }
2517 @@ -882,6 +883,7 @@ asmlinkage long sys32_mmap2(unsigned lon
2518  asmlinkage long sys32_olduname(struct oldold_utsname __user * name)
2519  {
2520         int error;
2521 +       struct new_utsname *ptr;
2522  
2523         if (!name)
2524                 return -EFAULT;
2525 @@ -890,13 +892,14 @@ asmlinkage long sys32_olduname(struct ol
2526    
2527         down_read(&uts_sem);
2528         
2529 -       error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
2530 +       ptr = vx_new_utsname();
2531 +       error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN);
2532          __put_user(0,name->sysname+__OLD_UTS_LEN);
2533 -        __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
2534 +        __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN);
2535          __put_user(0,name->nodename+__OLD_UTS_LEN);
2536 -        __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
2537 +        __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN);
2538          __put_user(0,name->release+__OLD_UTS_LEN);
2539 -        __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
2540 +        __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN);
2541          __put_user(0,name->version+__OLD_UTS_LEN);
2542          { 
2543                  char *arch = "x86_64";
2544 @@ -919,7 +922,7 @@ long sys32_uname(struct old_utsname __us
2545         if (!name)
2546                 return -EFAULT;
2547         down_read(&uts_sem);
2548 -       err=copy_to_user(name, &system_utsname, sizeof (*name));
2549 +       err=copy_to_user(name, vx_new_utsname(), sizeof (*name));
2550         up_read(&uts_sem);
2551         if (personality(current->personality) == PER_LINUX32) 
2552                 err |= copy_to_user(&name->machine, "i686", 5);
2553 diff -NurpP --minimal linux-2.6.16.20/arch/x86_64/ia32/syscall32.c linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/ia32/syscall32.c
2554 --- linux-2.6.16.20/arch/x86_64/ia32/syscall32.c        2005-10-28 20:49:18 +0200
2555 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/ia32/syscall32.c   2006-04-26 19:07:00 +0200
2556 @@ -10,6 +10,7 @@
2557  #include <linux/init.h>
2558  #include <linux/stringify.h>
2559  #include <linux/security.h>
2560 +#include <linux/vs_memory.h>
2561  #include <asm/proto.h>
2562  #include <asm/tlbflush.h>
2563  #include <asm/ia32_unistd.h>
2564 @@ -70,7 +71,7 @@ int syscall32_setup_pages(struct linux_b
2565                 kmem_cache_free(vm_area_cachep, vma);
2566                 return ret;
2567         }
2568 -       mm->total_vm += npages;
2569 +       vx_vmpages_add(mm, npages);
2570         up_write(&mm->mmap_sem);
2571         return 0;
2572  }
2573 diff -NurpP --minimal linux-2.6.16.20/arch/x86_64/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/kernel/process.c
2574 --- linux-2.6.16.20/arch/x86_64/kernel/process.c        2006-05-11 21:25:35 +0200
2575 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/kernel/process.c   2006-04-26 19:07:00 +0200
2576 @@ -55,7 +55,8 @@
2577  
2578  asmlinkage extern void ret_from_fork(void);
2579  
2580 -unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
2581 +unsigned long kernel_thread_flags =
2582 +       CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD;
2583  
2584  unsigned long boot_option_idle_override = 0;
2585  EXPORT_SYMBOL(boot_option_idle_override);
2586 @@ -297,8 +298,8 @@ void __show_regs(struct pt_regs * regs)
2587  
2588         printk("\n");
2589         print_modules();
2590 -       printk("Pid: %d, comm: %.20s %s %s %.*s\n",
2591 -               current->pid, current->comm, print_tainted(),
2592 +       printk("Pid: %d[#%u], comm: %.20s %s %s %.*s\n",
2593 +               current->pid, current->xid, current->comm, print_tainted(),
2594                 system_utsname.release,
2595                 (int)strcspn(system_utsname.version, " "),
2596                 system_utsname.version);
2597 diff -NurpP --minimal linux-2.6.16.20/arch/x86_64/kernel/sys_x86_64.c linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/kernel/sys_x86_64.c
2598 --- linux-2.6.16.20/arch/x86_64/kernel/sys_x86_64.c     2006-01-03 17:29:20 +0100
2599 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/kernel/sys_x86_64.c        2006-04-26 19:07:00 +0200
2600 @@ -16,6 +16,7 @@
2601  #include <linux/file.h>
2602  #include <linux/utsname.h>
2603  #include <linux/personality.h>
2604 +#include <linux/vs_cvirt.h>
2605  
2606  #include <asm/uaccess.h>
2607  #include <asm/ia32.h>
2608 @@ -148,7 +149,7 @@ asmlinkage long sys_uname(struct new_uts
2609  {
2610         int err;
2611         down_read(&uts_sem);
2612 -       err = copy_to_user(name, &system_utsname, sizeof (*name));
2613 +       err = copy_to_user(name, vx_new_utsname(), sizeof (*name));
2614         up_read(&uts_sem);
2615         if (personality(current->personality) == PER_LINUX32) 
2616                 err |= copy_to_user(&name->machine, "i686", 5);                 
2617 diff -NurpP --minimal linux-2.6.16.20/arch/x86_64/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/kernel/traps.c
2618 --- linux-2.6.16.20/arch/x86_64/kernel/traps.c  2006-06-06 15:37:20 +0200
2619 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/kernel/traps.c     2006-06-06 15:31:32 +0200
2620 @@ -322,8 +322,9 @@ void show_registers(struct pt_regs *regs
2621  
2622         printk("CPU %d ", cpu);
2623         __show_regs(regs);
2624 -       printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
2625 -               cur->comm, cur->pid, task_thread_info(cur), cur);
2626 +       printk("Process %s (pid: %d[#%u], threadinfo %p, task %p)\n",
2627 +               cur->comm, cur->pid, cur->xid,
2628 +               task_thread_info(cur), cur);
2629  
2630         /*
2631          * When in-kernel, we also print out the stack and code at the
2632 diff -NurpP --minimal linux-2.6.16.20/arch/xtensa/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/xtensa/kernel/process.c
2633 --- linux-2.6.16.20/arch/xtensa/kernel/process.c        2006-04-09 13:49:44 +0200
2634 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/xtensa/kernel/process.c   2006-04-26 19:07:00 +0200
2635 @@ -207,7 +207,7 @@ int kernel_thread(int (*fn)(void *), voi
2636                  :"=r" (retval)
2637                  :"i" (__NR_clone), "i" (__NR_exit),
2638                  "r" (arg), "r" (fn),
2639 -                "r" (flags | CLONE_VM)
2640 +                "r" (flags | CLONE_VM | CLONE_KTHREAD)
2641                  : "a2", "a3", "a4", "a5", "a6" );
2642         return retval;
2643  }
2644 diff -NurpP --minimal linux-2.6.16.20/arch/xtensa/kernel/syscalls.c linux-2.6.16.20-vs2.1.1-rc22/arch/xtensa/kernel/syscalls.c
2645 --- linux-2.6.16.20/arch/xtensa/kernel/syscalls.c       2005-08-29 22:24:57 +0200
2646 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/xtensa/kernel/syscalls.c  2006-05-11 16:06:22 +0200
2647 @@ -35,6 +35,7 @@
2648  #include <linux/msg.h>
2649  #include <linux/shm.h>
2650  #include <linux/errno.h>
2651 +#include <linux/vs_cvirt.h>
2652  #include <asm/ptrace.h>
2653  #include <asm/signal.h>
2654  #include <asm/uaccess.h>
2655 @@ -129,7 +130,7 @@ out:
2656  
2657  int sys_uname(struct old_utsname * name)
2658  {
2659 -       if (name && !copy_to_user(name, &system_utsname, sizeof (*name)))
2660 +       if (name && !copy_to_user(name, vx_new_utsname(), sizeof (*name)))
2661                 return 0;
2662         return -EFAULT;
2663  }
2664 diff -NurpP --minimal linux-2.6.16.20/block/cfq-iosched.c linux-2.6.16.20-vs2.1.1-rc22/block/cfq-iosched.c
2665 --- linux-2.6.16.20/block/cfq-iosched.c 2006-03-20 17:32:30 +0100
2666 +++ linux-2.6.16.20-vs2.1.1-rc22/block/cfq-iosched.c    2006-05-02 03:32:45 +0200
2667 @@ -1756,6 +1756,8 @@ static void cfq_prio_boost(struct cfq_qu
2668  
2669  static inline pid_t cfq_queue_pid(struct task_struct *task, int rw)
2670  {
2671 +       if (task->xid)
2672 +               return task->xid + (1 << 16);
2673         if (rw == READ || process_sync(task))
2674                 return task->pid;
2675  
2676 diff -NurpP --minimal linux-2.6.16.20/drivers/block/Kconfig linux-2.6.16.20-vs2.1.1-rc22/drivers/block/Kconfig
2677 --- linux-2.6.16.20/drivers/block/Kconfig       2006-02-18 14:39:52 +0100
2678 +++ linux-2.6.16.20-vs2.1.1-rc22/drivers/block/Kconfig  2006-04-26 19:07:00 +0200
2679 @@ -315,6 +315,13 @@ config BLK_DEV_CRYPTOLOOP
2680           instead, which can be configured to be on-disk compatible with the
2681           cryptoloop device.
2682  
2683 +config BLK_DEV_VROOT
2684 +       tristate "Virtual Root device support"
2685 +       depends on QUOTACTL
2686 +       ---help---
2687 +         Saying Y here will allow you to use quota/fs ioctls on a shared
2688 +         partition within a virtual server without compromising security.
2689 +
2690  config BLK_DEV_NBD
2691         tristate "Network block device support"
2692         depends on NET
2693 diff -NurpP --minimal linux-2.6.16.20/drivers/block/Makefile linux-2.6.16.20-vs2.1.1-rc22/drivers/block/Makefile
2694 --- linux-2.6.16.20/drivers/block/Makefile      2006-01-03 17:29:21 +0100
2695 +++ linux-2.6.16.20-vs2.1.1-rc22/drivers/block/Makefile 2006-04-26 19:07:00 +0200
2696 @@ -30,4 +30,5 @@ obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryp
2697  obj-$(CONFIG_VIODASD)          += viodasd.o
2698  obj-$(CONFIG_BLK_DEV_SX8)      += sx8.o
2699  obj-$(CONFIG_BLK_DEV_UB)       += ub.o
2700 +obj-$(CONFIG_BLK_DEV_VROOT)    += vroot.o
2701  
2702 diff -NurpP --minimal linux-2.6.16.20/drivers/block/loop.c linux-2.6.16.20-vs2.1.1-rc22/drivers/block/loop.c
2703 --- linux-2.6.16.20/drivers/block/loop.c        2006-01-18 06:07:59 +0100
2704 +++ linux-2.6.16.20-vs2.1.1-rc22/drivers/block/loop.c   2006-04-26 19:07:00 +0200
2705 @@ -74,6 +74,7 @@
2706  #include <linux/completion.h>
2707  #include <linux/highmem.h>
2708  #include <linux/gfp.h>
2709 +#include <linux/vs_context.h>
2710  
2711  #include <asm/uaccess.h>
2712  
2713 @@ -743,10 +744,12 @@ static int loop_set_fd(struct loop_devic
2714         struct file     *file, *f;
2715         struct inode    *inode;
2716         struct address_space *mapping;
2717 +       struct vx_info_save vxis;
2718         unsigned lo_blocksize;
2719         int             lo_flags = 0;
2720         int             error;
2721         loff_t          size;
2722 +       pid_t           pid;
2723  
2724         /* This is safe, since we have a reference from open(). */
2725         __module_get(THIS_MODULE);
2726 @@ -839,10 +842,16 @@ static int loop_set_fd(struct loop_devic
2727  
2728         set_blocksize(bdev, lo_blocksize);
2729  
2730 -       kernel_thread(loop_thread, lo, CLONE_KERNEL);
2731 +       __enter_vx_admin(&vxis);
2732 +       pid = kernel_thread(loop_thread, lo, CLONE_KERNEL);
2733 +       __leave_vx_admin(&vxis);
2734 +       if (pid < 0)
2735 +               goto out_err;
2736         wait_for_completion(&lo->lo_done);
2737         return 0;
2738  
2739 + out_err:
2740 +       error = (int)pid;
2741   out_putf:
2742         fput(file);
2743   out:
2744 diff -NurpP --minimal linux-2.6.16.20/drivers/block/vroot.c linux-2.6.16.20-vs2.1.1-rc22/drivers/block/vroot.c
2745 --- linux-2.6.16.20/drivers/block/vroot.c       1970-01-01 01:00:00 +0100
2746 +++ linux-2.6.16.20-vs2.1.1-rc22/drivers/block/vroot.c  2006-04-26 19:07:00 +0200
2747 @@ -0,0 +1,288 @@
2748 +/*
2749 + *  linux/drivers/block/vroot.c
2750 + *
2751 + *  written by Herbert Pötzl, 9/11/2002
2752 + *  ported to 2.6.10 by Herbert Pötzl, 30/12/2004
2753 + *
2754 + *  based on the loop.c code by Theodore Ts'o.
2755 + *
2756 + * Copyright (C) 2002-2005 by Herbert Pötzl.
2757 + * Redistribution of this file is permitted under the
2758 + * GNU General Public License.
2759 + *
2760 + */
2761 +
2762 +#include <linux/module.h>
2763 +#include <linux/moduleparam.h>
2764 +#include <linux/file.h>
2765 +#include <linux/major.h>
2766 +#include <linux/blkdev.h>
2767 +#include <linux/devfs_fs_kernel.h>
2768 +
2769 +#include <linux/vroot.h>
2770 +#include <linux/vserver/debug.h>
2771 +
2772 +
2773 +static int max_vroot = 8;
2774 +
2775 +static struct vroot_device *vroot_dev;
2776 +static struct gendisk **disks;
2777 +
2778 +
2779 +static int vroot_set_dev(
2780 +       struct vroot_device *vr,
2781 +       struct file *vr_file,
2782 +       struct block_device *bdev,
2783 +       unsigned int arg)
2784 +{
2785 +       struct block_device *real_bdev;
2786 +       struct file *file;
2787 +       struct inode *inode;
2788 +       int error;
2789 +
2790 +       error = -EBUSY;
2791 +       if (vr->vr_state != Vr_unbound)
2792 +               goto out;
2793 +
2794 +       error = -EBADF;
2795 +       file = fget(arg);
2796 +       if (!file)
2797 +               goto out;
2798 +
2799 +       error = -EINVAL;
2800 +       inode = file->f_dentry->d_inode;
2801 +
2802 +
2803 +       if (S_ISBLK(inode->i_mode)) {
2804 +               real_bdev = inode->i_bdev;
2805 +               vr->vr_device = real_bdev;
2806 +               __iget(real_bdev->bd_inode);
2807 +       } else
2808 +               goto out_fput;
2809 +
2810 +       vxdprintk(VXD_CBIT(misc, 0),
2811 +               "vroot[%d]_set_dev: dev=" VXF_DEV,
2812 +               vr->vr_number, VXD_DEV(real_bdev));
2813 +
2814 +       vr->vr_state = Vr_bound;
2815 +       error = 0;
2816 +
2817 + out_fput:
2818 +       fput(file);
2819 + out:
2820 +       return error;
2821 +}
2822 +
2823 +static int vroot_clr_dev(
2824 +       struct vroot_device *vr,
2825 +       struct file *vr_file,
2826 +       struct block_device *bdev)
2827 +{
2828 +       struct block_device *real_bdev;
2829 +
2830 +       if (vr->vr_state != Vr_bound)
2831 +               return -ENXIO;
2832 +       if (vr->vr_refcnt > 1)  /* we needed one fd for the ioctl */
2833 +               return -EBUSY;
2834 +
2835 +       real_bdev = vr->vr_device;
2836 +
2837 +       vxdprintk(VXD_CBIT(misc, 0),
2838 +               "vroot[%d]_clr_dev: dev=" VXF_DEV,
2839 +               vr->vr_number, VXD_DEV(real_bdev));
2840 +
2841 +       bdput(real_bdev);
2842 +       vr->vr_state = Vr_unbound;
2843 +       vr->vr_device = NULL;
2844 +       return 0;
2845 +}
2846 +
2847 +
2848 +static int vr_ioctl(struct inode * inode, struct file * file,
2849 +       unsigned int cmd, unsigned long arg)
2850 +{
2851 +       struct vroot_device *vr = inode->i_bdev->bd_disk->private_data;
2852 +       int err;
2853 +
2854 +       down(&vr->vr_ctl_mutex);
2855 +       switch (cmd) {
2856 +       case VROOT_SET_DEV:
2857 +               err = vroot_set_dev(vr, file, inode->i_bdev, arg);
2858 +               break;
2859 +       case VROOT_CLR_DEV:
2860 +               err = vroot_clr_dev(vr, file, inode->i_bdev);
2861 +               break;
2862 +       default:
2863 +               err = -EINVAL;
2864 +               break;
2865 +       }
2866 +       up(&vr->vr_ctl_mutex);
2867 +       return err;
2868 +}
2869 +
2870 +static int vr_open(struct inode *inode, struct file *file)
2871 +{
2872 +       struct vroot_device *vr = inode->i_bdev->bd_disk->private_data;
2873 +
2874 +       down(&vr->vr_ctl_mutex);
2875 +       vr->vr_refcnt++;
2876 +       up(&vr->vr_ctl_mutex);
2877 +       return 0;
2878 +}
2879 +
2880 +static int vr_release(struct inode *inode, struct file *file)
2881 +{
2882 +       struct vroot_device *vr = inode->i_bdev->bd_disk->private_data;
2883 +
2884 +       down(&vr->vr_ctl_mutex);
2885 +       --vr->vr_refcnt;
2886 +       up(&vr->vr_ctl_mutex);
2887 +       return 0;
2888 +}
2889 +
2890 +static struct block_device_operations vr_fops = {
2891 +       .owner =        THIS_MODULE,
2892 +       .open =         vr_open,
2893 +       .release =      vr_release,
2894 +       .ioctl =        vr_ioctl,
2895 +};
2896 +
2897 +struct block_device *__vroot_get_real_bdev(struct block_device *bdev)
2898 +{
2899 +       struct inode *inode = bdev->bd_inode;
2900 +       struct vroot_device *vr;
2901 +       struct block_device *real_bdev;
2902 +       int minor = iminor(inode);
2903 +
2904 +       vr = &vroot_dev[minor];
2905 +       real_bdev = vr->vr_device;
2906 +
2907 +       vxdprintk(VXD_CBIT(misc, 0),
2908 +               "vroot[%d]_get_real_bdev: dev=" VXF_DEV,
2909 +               vr->vr_number, VXD_DEV(real_bdev));
2910 +
2911 +       if (vr->vr_state != Vr_bound)
2912 +               return ERR_PTR(-ENXIO);
2913 +
2914 +       __iget(real_bdev->bd_inode);
2915 +       return real_bdev;
2916 +}
2917 +
2918 +/*
2919 + * And now the modules code and kernel interface.
2920 + */
2921 +
2922 +module_param(max_vroot, int, 0);
2923 +
2924 +MODULE_PARM_DESC(max_vroot, "Maximum number of vroot devices (1-256)");
2925 +MODULE_LICENSE("GPL");
2926 +MODULE_ALIAS_BLOCKDEV_MAJOR(VROOT_MAJOR);
2927 +
2928 +MODULE_AUTHOR ("Herbert Pötzl");
2929 +MODULE_DESCRIPTION ("Virtual Root Device Mapper");
2930 +
2931 +
2932 +int __init vroot_init(void)
2933 +{
2934 +       int err, i;
2935 +
2936 +       if (max_vroot < 1 || max_vroot > 256) {
2937 +               max_vroot = MAX_VROOT_DEFAULT;
2938 +               printk(KERN_WARNING "vroot: invalid max_vroot "
2939 +                       "(must be between 1 and 256), "
2940 +                       "using default (%d)\n", max_vroot);
2941 +       }
2942 +
2943 +       if (register_blkdev(VROOT_MAJOR, "vroot"))
2944 +               return -EIO;
2945 +
2946 +       err = -ENOMEM;
2947 +       vroot_dev = kmalloc(max_vroot * sizeof(struct vroot_device), GFP_KERNEL);
2948 +       if (!vroot_dev)
2949 +               goto out_mem1;
2950 +       memset(vroot_dev, 0, max_vroot * sizeof(struct vroot_device));
2951 +
2952 +       disks = kmalloc(max_vroot * sizeof(struct gendisk *), GFP_KERNEL);
2953 +       if (!disks)
2954 +               goto out_mem2;
2955 +
2956 +       for (i = 0; i < max_vroot; i++) {
2957 +               disks[i] = alloc_disk(1);
2958 +               if (!disks[i])
2959 +                       goto out_mem3;
2960 +       }
2961 +
2962 +       devfs_mk_dir("vroot");
2963 +
2964 +       for (i = 0; i < max_vroot; i++) {
2965 +               struct vroot_device *vr = &vroot_dev[i];
2966 +               struct gendisk *disk = disks[i];
2967 +
2968 +               memset(vr, 0, sizeof(*vr));
2969 +               init_MUTEX(&vr->vr_ctl_mutex);
2970 +               vr->vr_number = i;
2971 +               disk->major = VROOT_MAJOR;
2972 +               disk->first_minor = i;
2973 +               disk->fops = &vr_fops;
2974 +               sprintf(disk->disk_name, "vroot%d", i);
2975 +               sprintf(disk->devfs_name, "vroot/%d", i);
2976 +               disk->private_data = vr;
2977 +       }
2978 +
2979 +       err = register_vroot_grb(&__vroot_get_real_bdev);
2980 +       if (err)
2981 +               goto out_reg;
2982 +
2983 +       for (i = 0; i < max_vroot; i++)
2984 +               add_disk(disks[i]);
2985 +       printk(KERN_INFO "vroot: loaded (max %d devices)\n", max_vroot);
2986 +       return 0;
2987 +
2988 +out_reg:
2989 +       devfs_remove("vroot");
2990 +out_mem3:
2991 +       while (i--)
2992 +               put_disk(disks[i]);
2993 +       kfree(disks);
2994 +out_mem2:
2995 +       kfree(vroot_dev);
2996 +out_mem1:
2997 +       unregister_blkdev(VROOT_MAJOR, "vroot");
2998 +       printk(KERN_ERR "vroot: ran out of memory\n");
2999 +       return err;
3000 +}
3001 +
3002 +void vroot_exit(void)
3003 +{
3004 +       int i;
3005 +
3006 +       if (unregister_vroot_grb(&__vroot_get_real_bdev))
3007 +               printk(KERN_WARNING "vroot: cannot unregister grb\n");
3008 +
3009 +       for (i = 0; i < max_vroot; i++) {
3010 +               del_gendisk(disks[i]);
3011 +               put_disk(disks[i]);
3012 +       }
3013 +       devfs_remove("vroot");
3014 +       if (unregister_blkdev(VROOT_MAJOR, "vroot"))
3015 +               printk(KERN_WARNING "vroot: cannot unregister blkdev\n");
3016 +
3017 +       kfree(disks);
3018 +       kfree(vroot_dev);
3019 +}
3020 +
3021 +module_init(vroot_init);
3022 +module_exit(vroot_exit);
3023 +
3024 +#ifndef MODULE
3025 +
3026 +static int __init max_vroot_setup(char *str)
3027 +{
3028 +       max_vroot = simple_strtol(str, NULL, 0);
3029 +       return 1;
3030 +}
3031 +
3032 +__setup("max_vroot=", max_vroot_setup);
3033 +
3034 +#endif
3035 +
3036 diff -NurpP --minimal linux-2.6.16.20/drivers/char/random.c linux-2.6.16.20-vs2.1.1-rc22/drivers/char/random.c
3037 --- linux-2.6.16.20/drivers/char/random.c       2006-04-09 13:49:45 +0200
3038 +++ linux-2.6.16.20-vs2.1.1-rc22/drivers/char/random.c  2006-04-26 19:07:00 +0200
3039 @@ -1174,7 +1174,7 @@ static char sysctl_bootid[16];
3040  static int proc_do_uuid(ctl_table *table, int write, struct file *filp,
3041                         void __user *buffer, size_t *lenp, loff_t *ppos)
3042  {
3043 -       ctl_table fake_table;
3044 +       ctl_table fake_table = {0};
3045         unsigned char buf[64], tmp_uuid[16], *uuid;
3046  
3047         uuid = table->data;
3048 diff -NurpP --minimal linux-2.6.16.20/drivers/char/sysrq.c linux-2.6.16.20-vs2.1.1-rc22/drivers/char/sysrq.c
3049 --- linux-2.6.16.20/drivers/char/sysrq.c        2006-04-09 13:49:45 +0200
3050 +++ linux-2.6.16.20-vs2.1.1-rc22/drivers/char/sysrq.c   2006-04-26 19:07:00 +0200
3051 @@ -36,6 +36,7 @@
3052  #include <linux/vt_kern.h>
3053  #include <linux/workqueue.h>
3054  #include <linux/kexec.h>
3055 +#include <linux/vserver/debug.h>
3056  
3057  #include <asm/ptrace.h>
3058  
3059 @@ -286,6 +287,21 @@ static struct sysrq_key_op sysrq_unrt_op
3060         .enable_mask    = SYSRQ_ENABLE_RTNICE,
3061  };
3062  
3063 +
3064 +#ifdef CONFIG_VSERVER_DEBUG
3065 +static void sysrq_handle_vxinfo(int key, struct pt_regs *pt_regs,
3066 +                                struct tty_struct *tty)
3067 +{
3068 +       dump_vx_info_inactive((key == 'x')?0:1);
3069 +}
3070 +static struct sysrq_key_op sysrq_showvxinfo_op = {
3071 +       .handler        = sysrq_handle_vxinfo,
3072 +       .help_msg       = "conteXt",
3073 +       .action_msg     = "Show Context Info",
3074 +       .enable_mask    = SYSRQ_ENABLE_DUMP,
3075 +};
3076 +#endif
3077 +
3078  /* Key Operations table and lock */
3079  static DEFINE_SPINLOCK(sysrq_key_table_lock);
3080  #define SYSRQ_KEY_TABLE_LENGTH 36
3081 @@ -342,7 +358,11 @@ static struct sysrq_key_op *sysrq_key_ta
3082  /* u */        &sysrq_mountro_op,
3083  /* v */        NULL, /* May be assigned at init time by SMP VOYAGER */
3084  /* w */        NULL,
3085 +#ifdef CONFIG_VSERVER_DEBUG
3086 +/* x */        &sysrq_showvxinfo_op,
3087 +#else
3088  /* x */        NULL,
3089 +#endif
3090  /* y */        NULL,
3091  /* z */        NULL
3092  };
3093 @@ -354,6 +374,8 @@ static int sysrq_key_table_key2index(int
3094                 retval = key - '0';
3095         } else if ((key >= 'a') && (key <= 'z')) {
3096                 retval = key + 10 - 'a';
3097 +       } else if ((key >= 'A') && (key <= 'Z')) {
3098 +               retval = key + 10 - 'A';
3099         } else {
3100                 retval = -1;
3101         }
3102 diff -NurpP --minimal linux-2.6.16.20/drivers/char/tty_io.c linux-2.6.16.20-vs2.1.1-rc22/drivers/char/tty_io.c
3103 --- linux-2.6.16.20/drivers/char/tty_io.c       2006-05-11 21:25:35 +0200
3104 +++ linux-2.6.16.20-vs2.1.1-rc22/drivers/char/tty_io.c  2006-04-26 19:07:00 +0200
3105 @@ -103,6 +103,7 @@
3106  #include <linux/vt_kern.h>
3107  #include <linux/selection.h>
3108  #include <linux/devfs_fs_kernel.h>
3109 +#include <linux/vs_pid.h>
3110  
3111  #include <linux/kmod.h>
3112  
3113 @@ -2381,13 +2382,16 @@ static int tiocsctty(struct tty_struct *
3114  
3115  static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
3116  {
3117 +       pid_t pgrp;
3118         /*
3119          * (tty == real_tty) is a cheap way of
3120          * testing if the tty is NOT a master pty.
3121          */
3122         if (tty == real_tty && current->signal->tty != real_tty)
3123                 return -ENOTTY;
3124 -       return put_user(real_tty->pgrp, p);
3125 +
3126 +       pgrp = vx_map_pid(real_tty->pgrp);
3127 +       return put_user(pgrp, p);
3128  }
3129  
3130  static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
3131 @@ -2405,6 +2409,8 @@ static int tiocspgrp(struct tty_struct *
3132                 return -ENOTTY;
3133         if (get_user(pgrp, p))
3134                 return -EFAULT;
3135 +
3136 +       pgrp = vx_rmap_pid(pgrp);
3137         if (pgrp < 0)
3138                 return -EINVAL;
3139         if (session_of_pgrp(pgrp) != current->signal->session)
3140 diff -NurpP --minimal linux-2.6.16.20/drivers/infiniband/core/uverbs_mem.c linux-2.6.16.20-vs2.1.1-rc22/drivers/infiniband/core/uverbs_mem.c
3141 --- linux-2.6.16.20/drivers/infiniband/core/uverbs_mem.c        2005-10-28 20:49:23 +0200
3142 +++ linux-2.6.16.20-vs2.1.1-rc22/drivers/infiniband/core/uverbs_mem.c   2006-04-26 19:07:00 +0200
3143 @@ -36,6 +36,7 @@
3144  
3145  #include <linux/mm.h>
3146  #include <linux/dma-mapping.h>
3147 +#include <linux/vs_memory.h>
3148  
3149  #include "uverbs.h"
3150  
3151 @@ -161,7 +162,7 @@ out:
3152         if (ret < 0)
3153                 __ib_umem_release(dev, mem, 0);
3154         else
3155 -               current->mm->locked_vm = locked;
3156 +               vx_vmlocked_sub(current->mm, current->mm->locked_vm - locked);
3157  
3158         up_write(&current->mm->mmap_sem);
3159         free_page((unsigned long) page_list);
3160 @@ -174,8 +175,8 @@ void ib_umem_release(struct ib_device *d
3161         __ib_umem_release(dev, umem, 1);
3162  
3163         down_write(&current->mm->mmap_sem);
3164 -       current->mm->locked_vm -=
3165 -               PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
3166 +       vx_vmlocked_sub(current->mm,
3167 +               PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT);
3168         up_write(&current->mm->mmap_sem);
3169  }
3170  
3171 @@ -184,7 +185,7 @@ static void ib_umem_account(void *work_p
3172         struct ib_umem_account_work *work = work_ptr;
3173  
3174         down_write(&work->mm->mmap_sem);
3175 -       work->mm->locked_vm -= work->diff;
3176 +       vx_vmlocked_sub(work->mm, work->diff);
3177         up_write(&work->mm->mmap_sem);
3178         mmput(work->mm);
3179         kfree(work);
3180 diff -NurpP --minimal linux-2.6.16.20/drivers/mtd/devices/blkmtd.c linux-2.6.16.20-vs2.1.1-rc22/drivers/mtd/devices/blkmtd.c
3181 --- linux-2.6.16.20/drivers/mtd/devices/blkmtd.c        2006-01-03 17:29:35 +0100
3182 +++ linux-2.6.16.20-vs2.1.1-rc22/drivers/mtd/devices/blkmtd.c   2006-04-26 19:07:00 +0200
3183 @@ -614,7 +614,7 @@ static struct mtd_erase_region_info *cal
3184  }
3185  
3186  
3187 -extern dev_t __init name_to_dev_t(const char *line);
3188 +extern dev_t __init name_to_dev_t(char *line);
3189  
3190  static struct blkmtd_dev *add_device(char *devname, int readonly, int erase_size)
3191  {
3192 diff -NurpP --minimal linux-2.6.16.20/fs/attr.c linux-2.6.16.20-vs2.1.1-rc22/fs/attr.c
3193 --- linux-2.6.16.20/fs/attr.c   2006-04-09 13:49:53 +0200
3194 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/attr.c      2006-04-26 19:07:00 +0200
3195 @@ -15,6 +15,9 @@
3196  #include <linux/fcntl.h>
3197  #include <linux/quotaops.h>
3198  #include <linux/security.h>
3199 +#include <linux/proc_fs.h>
3200 +#include <linux/devpts_fs.h>
3201 +#include <linux/vserver/debug.h>
3202  
3203  /* Taken over from the old code... */
3204  
3205 @@ -56,6 +59,30 @@ int inode_change_ok(struct inode *inode,
3206                 if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER))
3207                         goto error;
3208         }
3209 +
3210 +       /* Check for evil vserver activity */
3211 +       if (vx_check(0, VX_ADMIN))
3212 +               goto fine;
3213 +
3214 +       if (IS_BARRIER(inode)) {
3215 +               vxwprintk(1, "xid=%d messing with the barrier.",
3216 +                       vx_current_xid());
3217 +               goto error;
3218 +       }
3219 +       switch (inode->i_sb->s_magic) {
3220 +               case PROC_SUPER_MAGIC:
3221 +                       /* maybe allow that in the future? */
3222 +                       vxwprintk(1, "xid=%d messing with the procfs.",
3223 +                               vx_current_xid());
3224 +                       goto error;
3225 +               case DEVPTS_SUPER_MAGIC:
3226 +                       /* devpts is xid tagged */
3227 +                       if (vx_check((xid_t)inode->i_tag, VX_IDENT))
3228 +                               goto fine;
3229 +                       vxwprintk(1, "xid=%d messing with the devpts.",
3230 +                               vx_current_xid());
3231 +                       goto error;
3232 +       }
3233  fine:
3234         retval = 0;
3235  error:
3236 @@ -79,6 +106,8 @@ int inode_setattr(struct inode * inode, 
3237                 inode->i_uid = attr->ia_uid;
3238         if (ia_valid & ATTR_GID)
3239                 inode->i_gid = attr->ia_gid;
3240 +       if ((ia_valid & ATTR_TAG) && IS_TAGGED(inode))
3241 +               inode->i_tag = attr->ia_tag;
3242         if (ia_valid & ATTR_ATIME)
3243                 inode->i_atime = timespec_trunc(attr->ia_atime,
3244                                                 inode->i_sb->s_time_gran);
3245 @@ -153,7 +182,8 @@ int notify_change(struct dentry * dentry
3246                         error = security_inode_setattr(dentry, attr);
3247                 if (!error) {
3248                         if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
3249 -                           (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid))
3250 +                           (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) ||
3251 +                           (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag))
3252                                 error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
3253                         if (!error)
3254                                 error = inode_setattr(inode, attr);
3255 diff -NurpP --minimal linux-2.6.16.20/fs/binfmt_aout.c linux-2.6.16.20-vs2.1.1-rc22/fs/binfmt_aout.c
3256 --- linux-2.6.16.20/fs/binfmt_aout.c    2006-04-09 13:49:53 +0200
3257 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/binfmt_aout.c       2006-04-26 19:07:00 +0200
3258 @@ -24,6 +24,7 @@
3259  #include <linux/binfmts.h>
3260  #include <linux/personality.h>
3261  #include <linux/init.h>
3262 +#include <linux/vs_memory.h>
3263  
3264  #include <asm/system.h>
3265  #include <asm/uaccess.h>
3266 diff -NurpP --minimal linux-2.6.16.20/fs/binfmt_elf.c linux-2.6.16.20-vs2.1.1-rc22/fs/binfmt_elf.c
3267 --- linux-2.6.16.20/fs/binfmt_elf.c     2006-04-09 13:49:53 +0200
3268 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/binfmt_elf.c        2006-05-11 16:06:22 +0200
3269 @@ -38,6 +38,8 @@
3270  #include <linux/security.h>
3271  #include <linux/syscalls.h>
3272  #include <linux/random.h>
3273 +#include <linux/vs_memory.h>
3274 +#include <linux/vs_cvirt.h>
3275  
3276  #include <asm/uaccess.h>
3277  #include <asm/param.h>
3278 diff -NurpP --minimal linux-2.6.16.20/fs/binfmt_elf_fdpic.c linux-2.6.16.20-vs2.1.1-rc22/fs/binfmt_elf_fdpic.c
3279 --- linux-2.6.16.20/fs/binfmt_elf_fdpic.c       2006-01-18 06:08:29 +0100
3280 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/binfmt_elf_fdpic.c  2006-05-11 16:06:22 +0200
3281 @@ -32,6 +32,7 @@
3282  #include <linux/elf.h>
3283  #include <linux/elf-fdpic.h>
3284  #include <linux/elfcore.h>
3285 +#include <linux/vs_cvirt.h>
3286  
3287  #include <asm/uaccess.h>
3288  #include <asm/param.h>
3289 diff -NurpP --minimal linux-2.6.16.20/fs/binfmt_flat.c linux-2.6.16.20-vs2.1.1-rc22/fs/binfmt_flat.c
3290 --- linux-2.6.16.20/fs/binfmt_flat.c    2006-02-18 14:40:21 +0100
3291 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/binfmt_flat.c       2006-04-26 19:07:00 +0200
3292 @@ -36,6 +36,7 @@
3293  #include <linux/personality.h>
3294  #include <linux/init.h>
3295  #include <linux/flat.h>
3296 +#include <linux/vs_memory.h>
3297  
3298  #include <asm/byteorder.h>
3299  #include <asm/system.h>
3300 diff -NurpP --minimal linux-2.6.16.20/fs/binfmt_som.c linux-2.6.16.20-vs2.1.1-rc22/fs/binfmt_som.c
3301 --- linux-2.6.16.20/fs/binfmt_som.c     2006-01-03 17:29:55 +0100
3302 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/binfmt_som.c        2006-04-26 19:07:00 +0200
3303 @@ -28,6 +28,7 @@
3304  #include <linux/shm.h>
3305  #include <linux/personality.h>
3306  #include <linux/init.h>
3307 +#include <linux/vs_memory.h>
3308  
3309  #include <asm/uaccess.h>
3310  #include <asm/pgtable.h>
3311 diff -NurpP --minimal linux-2.6.16.20/fs/buffer.c linux-2.6.16.20-vs2.1.1-rc22/fs/buffer.c
3312 --- linux-2.6.16.20/fs/buffer.c 2006-04-09 13:49:53 +0200
3313 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/buffer.c    2006-04-26 19:07:00 +0200
3314 @@ -168,7 +168,7 @@ EXPORT_SYMBOL(sync_blockdev);
3315  int fsync_super(struct super_block *sb)
3316  {
3317         sync_inodes_sb(sb, 0);
3318 -       DQUOT_SYNC(sb);
3319 +       DQUOT_SYNC(sb->s_dqh);
3320         lock_super(sb);
3321         if (sb->s_dirt && sb->s_op->write_super)
3322                 sb->s_op->write_super(sb);
3323 @@ -217,7 +217,7 @@ struct super_block *freeze_bdev(struct b
3324                 smp_wmb();
3325  
3326                 sync_inodes_sb(sb, 0);
3327 -               DQUOT_SYNC(sb);
3328 +               DQUOT_SYNC(sb->s_dqh);
3329  
3330                 lock_super(sb);
3331                 if (sb->s_dirt && sb->s_op->write_super)
3332 diff -NurpP --minimal linux-2.6.16.20/fs/dcache.c linux-2.6.16.20-vs2.1.1-rc22/fs/dcache.c
3333 --- linux-2.6.16.20/fs/dcache.c 2006-03-20 17:33:10 +0100
3334 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/dcache.c    2006-05-02 04:33:23 +0200
3335 @@ -33,6 +33,7 @@
3336  #include <linux/seqlock.h>
3337  #include <linux/swap.h>
3338  #include <linux/bootmem.h>
3339 +#include <linux/vs_limit.h>
3340  
3341  /* #define DCACHE_DEBUG 1 */
3342  
3343 @@ -148,6 +149,7 @@ void dput(struct dentry *dentry)
3344         if (!dentry)
3345                 return;
3346  
3347 +       vx_dentry_dec(dentry);
3348  repeat:
3349         if (atomic_read(&dentry->d_count) == 1)
3350                 might_sleep();
3351 @@ -161,6 +163,8 @@ repeat:
3352                 return;
3353         }
3354  
3355 +       vx_dentry_dec(dentry);
3356 +
3357         /*
3358          * AV: ->d_delete() is _NOT_ allowed to block now.
3359          */
3360 @@ -271,6 +275,7 @@ static inline struct dentry * __dget_loc
3361         if (!list_empty(&dentry->d_lru)) {
3362                 dentry_stat.nr_unused--;
3363                 list_del_init(&dentry->d_lru);
3364 +               vx_dentry_inc(dentry);
3365         }
3366         return dentry;
3367  }
3368 @@ -714,6 +719,9 @@ struct dentry *d_alloc(struct dentry * p
3369         struct dentry *dentry;
3370         char *dname;
3371  
3372 +       if (!vx_dentry_avail(1))
3373 +               return NULL;
3374 +
3375         dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 
3376         if (!dentry)
3377                 return NULL;
3378 @@ -762,6 +770,7 @@ struct dentry *d_alloc(struct dentry * p
3379         if (parent)
3380                 list_add(&dentry->d_u.d_child, &parent->d_subdirs);
3381         dentry_stat.nr_dentry++;
3382 +       vx_dentry_inc(dentry);
3383         spin_unlock(&dcache_lock);
3384  
3385         return dentry;
3386 @@ -1089,6 +1098,7 @@ struct dentry * __d_lookup(struct dentry
3387  
3388                 if (!d_unhashed(dentry)) {
3389                         atomic_inc(&dentry->d_count);
3390 +                       vx_dentry_inc(dentry);
3391                         found = dentry;
3392                 }
3393                 spin_unlock(&dentry->d_lock);
3394 diff -NurpP --minimal linux-2.6.16.20/fs/devpts/inode.c linux-2.6.16.20-vs2.1.1-rc22/fs/devpts/inode.c
3395 --- linux-2.6.16.20/fs/devpts/inode.c   2006-02-18 14:40:21 +0100
3396 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/devpts/inode.c      2006-04-26 19:07:00 +0200
3397 @@ -19,7 +19,20 @@
3398  #include <linux/tty.h>
3399  #include <linux/devpts_fs.h>
3400  
3401 -#define DEVPTS_SUPER_MAGIC 0x1cd1
3402 +
3403 +static int devpts_permission(struct inode *inode, int mask, struct nameidata *nd)
3404 +{
3405 +       int ret = -EACCES;
3406 +
3407 +       /* devpts is xid tagged */
3408 +       if (vx_check((xid_t)inode->i_tag, VX_IDENT))
3409 +               ret = generic_permission(inode, mask, NULL);
3410 +       return ret;
3411 +}
3412 +
3413 +static struct inode_operations devpts_file_inode_operations = {
3414 +       .permission     = devpts_permission,
3415 +};
3416  
3417  static struct vfsmount *devpts_mnt;
3418  static struct dentry *devpts_root;
3419 @@ -69,6 +82,25 @@ static int devpts_remount(struct super_b
3420         return 0;
3421  }
3422  
3423 +static int devpts_filter(struct dentry *de)
3424 +{
3425 +       /* devpts is xid tagged */
3426 +       return vx_check((xid_t)de->d_inode->i_tag, VX_IDENT);
3427 +}
3428 +
3429 +static int devpts_readdir(struct file * filp, void * dirent, filldir_t filldir)
3430 +{
3431 +       return dcache_readdir_filter(filp, dirent, filldir, devpts_filter);
3432 +}
3433 +
3434 +static struct file_operations devpts_dir_operations = {
3435 +       .open           = dcache_dir_open,
3436 +       .release        = dcache_dir_close,
3437 +       .llseek         = dcache_dir_lseek,
3438 +       .read           = generic_read_dir,
3439 +       .readdir        = devpts_readdir,
3440 +};
3441 +
3442  static struct super_operations devpts_sops = {
3443         .statfs         = simple_statfs,
3444         .remount_fs     = devpts_remount,
3445 @@ -95,8 +127,10 @@ devpts_fill_super(struct super_block *s,
3446         inode->i_uid = inode->i_gid = 0;
3447         inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
3448         inode->i_op = &simple_dir_inode_operations;
3449 -       inode->i_fop = &simple_dir_operations;
3450 +       inode->i_fop = &devpts_dir_operations;
3451         inode->i_nlink = 2;
3452 +       /* devpts is xid tagged */
3453 +       inode->i_tag = (tag_t)vx_current_xid();
3454  
3455         devpts_root = s->s_root = d_alloc_root(inode);
3456         if (s->s_root)
3457 @@ -155,6 +189,9 @@ int devpts_pty_new(struct tty_struct *tt
3458         inode->i_gid = config.setgid ? config.gid : current->fsgid;
3459         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
3460         init_special_inode(inode, S_IFCHR|config.mode, device);
3461 +       /* devpts is xid tagged */
3462 +       inode->i_tag = (tag_t)vx_current_xid();
3463 +       inode->i_op = &devpts_file_inode_operations;
3464         inode->u.generic_ip = tty;
3465  
3466         dentry = get_node(number);
3467 diff -NurpP --minimal linux-2.6.16.20/fs/dquot.c linux-2.6.16.20-vs2.1.1-rc22/fs/dquot.c
3468 --- linux-2.6.16.20/fs/dquot.c  2006-04-09 13:49:53 +0200
3469 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/dquot.c     2006-04-26 19:07:00 +0200
3470 @@ -79,6 +79,7 @@
3471  #include <linux/buffer_head.h>
3472  #include <linux/capability.h>
3473  #include <linux/quotaops.h>
3474 +#include <linux/vserver/debug.h>
3475  
3476  #include <asm/uaccess.h>
3477  
3478 @@ -185,7 +186,7 @@ static void put_quota_format(struct quot
3479  /*
3480   * Dquot List Management:
3481   * The quota code uses three lists for dquot management: the inuse_list,
3482 - * free_dquots, and dquot_hash[] array. A single dquot structure may be
3483 + * free_dquots, and hash->dqh_hash[] array. A single dquot structure may be
3484   * on all three lists, depending on its current state.
3485   *
3486   * All dquots are placed to the end of inuse_list when first created, and this
3487 @@ -198,7 +199,7 @@ static void put_quota_format(struct quot
3488   * dquot is invalidated it's completely released from memory.
3489   *
3490   * Dquots with a specific identity (device, type and id) are placed on
3491 - * one of the dquot_hash[] hash chains. The provides an efficient search
3492 + * one of the hash->dqh_hash[] hash chains. The provides an efficient search
3493   * mechanism to locate a specific dquot.
3494   */
3495  
3496 @@ -212,36 +213,44 @@ struct dqstats dqstats;
3497  static void dqput(struct dquot *dquot);
3498  
3499  static inline unsigned int
3500 -hashfn(const struct super_block *sb, unsigned int id, int type)
3501 +hashfn(struct dqhash *hash, unsigned int id, int type)
3502  {
3503         unsigned long tmp;
3504  
3505 -       tmp = (((unsigned long)sb>>L1_CACHE_SHIFT) ^ id) * (MAXQUOTAS - type);
3506 +       tmp = (((unsigned long)hash >> L1_CACHE_SHIFT) ^ id) * (MAXQUOTAS - type);
3507         return (tmp + (tmp >> dq_hash_bits)) & dq_hash_mask;
3508  }
3509  
3510  /*
3511   * Following list functions expect dq_list_lock to be held
3512   */
3513 -static inline void insert_dquot_hash(struct dquot *dquot)
3514 +static inline void insert_dquot_hash(struct dqhash *hash, struct dquot *dquot)
3515  {
3516 -       struct hlist_head *head = dquot_hash + hashfn(dquot->dq_sb, dquot->dq_id, dquot->dq_type);
3517 +       struct hlist_head *head = dquot_hash +
3518 +               hashfn(hash, dquot->dq_id, dquot->dq_type);
3519 +       /* struct hlist_head *head = hash->dqh_hash +
3520 +               hashfn(dquot->dq_dqh, dquot->dq_id, dquot->dq_type); */
3521         hlist_add_head(&dquot->dq_hash, head);
3522 +       dquot->dq_dqh = dqhget(hash);
3523  }
3524  
3525  static inline void remove_dquot_hash(struct dquot *dquot)
3526  {
3527         hlist_del_init(&dquot->dq_hash);
3528 +       dqhput(dquot->dq_dqh);
3529 +       dquot->dq_dqh = NULL;
3530  }
3531  
3532 -static inline struct dquot *find_dquot(unsigned int hashent, struct super_block *sb, unsigned int id, int type)
3533 +static inline struct dquot *find_dquot(struct dqhash *hash,
3534 +       unsigned int hashent, unsigned int id, int type)
3535  {
3536         struct hlist_node *node;
3537         struct dquot *dquot;
3538  
3539 -       hlist_for_each (node, dquot_hash+hashent) {
3540 +       /* hlist_for_each (node, hash->dqh_hash + hashent) { */
3541 +       hlist_for_each (node, dquot_hash + hashent) {
3542                 dquot = hlist_entry(node, struct dquot, dq_hash);
3543 -               if (dquot->dq_sb == sb && dquot->dq_id == id && dquot->dq_type == type)
3544 +               if (dquot->dq_dqh == hash && dquot->dq_id == id && dquot->dq_type == type)
3545                         return dquot;
3546         }
3547         return NODQUOT;
3548 @@ -285,13 +294,13 @@ static void wait_on_dquot(struct dquot *
3549         up(&dquot->dq_lock);
3550  }
3551  
3552 -#define mark_dquot_dirty(dquot) ((dquot)->dq_sb->dq_op->mark_dirty(dquot))
3553 +#define mark_dquot_dirty(dquot) ((dquot)->dq_dqh->dqh_qop->mark_dirty(dquot))
3554  
3555  int dquot_mark_dquot_dirty(struct dquot *dquot)
3556  {
3557         spin_lock(&dq_list_lock);
3558         if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags))
3559 -               list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)->
3560 +               list_add(&dquot->dq_dirty, &dqh_dqopt(dquot->dq_dqh)->
3561                                 info[dquot->dq_type].dqi_dirty_list);
3562         spin_unlock(&dq_list_lock);
3563         return 0;
3564 @@ -306,9 +315,9 @@ static inline int clear_dquot_dirty(stru
3565         return 1;
3566  }
3567  
3568 -void mark_info_dirty(struct super_block *sb, int type)
3569 +void mark_info_dirty(struct dqhash *hash, int type)
3570  {
3571 -       set_bit(DQF_INFO_DIRTY_B, &sb_dqopt(sb)->info[type].dqi_flags);
3572 +       set_bit(DQF_INFO_DIRTY_B, &dqh_dqopt(hash)->info[type].dqi_flags);
3573  }
3574  EXPORT_SYMBOL(mark_info_dirty);
3575  
3576 @@ -319,7 +328,7 @@ EXPORT_SYMBOL(mark_info_dirty);
3577  int dquot_acquire(struct dquot *dquot)
3578  {
3579         int ret = 0, ret2 = 0;
3580 -       struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
3581 +       struct quota_info *dqopt = dqh_dqopt(dquot->dq_dqh);
3582  
3583         down(&dquot->dq_lock);
3584         down(&dqopt->dqio_sem);
3585 @@ -333,7 +342,7 @@ int dquot_acquire(struct dquot *dquot)
3586                 ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot);
3587                 /* Write the info if needed */
3588                 if (info_dirty(&dqopt->info[dquot->dq_type]))
3589 -                       ret2 = dqopt->ops[dquot->dq_type]->write_file_info(dquot->dq_sb, dquot->dq_type);
3590 +                       ret2 = dqopt->ops[dquot->dq_type]->write_file_info(dquot->dq_dqh, dquot->dq_type);
3591                 if (ret < 0)
3592                         goto out_iolock;
3593                 if (ret2 < 0) {
3594 @@ -354,7 +363,7 @@ out_iolock:
3595  int dquot_commit(struct dquot *dquot)
3596  {
3597         int ret = 0, ret2 = 0;
3598 -       struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
3599 +       struct quota_info *dqopt = dqh_dqopt(dquot->dq_dqh);
3600  
3601         down(&dqopt->dqio_sem);
3602         spin_lock(&dq_list_lock);
3603 @@ -368,7 +377,7 @@ int dquot_commit(struct dquot *dquot)
3604         if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
3605                 ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot);
3606                 if (info_dirty(&dqopt->info[dquot->dq_type]))
3607 -                       ret2 = dqopt->ops[dquot->dq_type]->write_file_info(dquot->dq_sb, dquot->dq_type);
3608 +                       ret2 = dqopt->ops[dquot->dq_type]->write_file_info(dquot->dq_dqh, dquot->dq_type);
3609                 if (ret >= 0)
3610                         ret = ret2;
3611         }
3612 @@ -383,7 +392,7 @@ out_sem:
3613  int dquot_release(struct dquot *dquot)
3614  {
3615         int ret = 0, ret2 = 0;
3616 -       struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
3617 +       struct quota_info *dqopt = dqh_dqopt(dquot->dq_dqh);
3618  
3619         down(&dquot->dq_lock);
3620         /* Check whether we are not racing with some other dqget() */
3621 @@ -394,7 +403,7 @@ int dquot_release(struct dquot *dquot)
3622                 ret = dqopt->ops[dquot->dq_type]->release_dqblk(dquot);
3623                 /* Write the info */
3624                 if (info_dirty(&dqopt->info[dquot->dq_type]))
3625 -                       ret2 = dqopt->ops[dquot->dq_type]->write_file_info(dquot->dq_sb, dquot->dq_type);
3626 +                       ret2 = dqopt->ops[dquot->dq_type]->write_file_info(dquot->dq_dqh, dquot->dq_type);
3627                 if (ret >= 0)
3628                         ret = ret2;
3629         }
3630 @@ -409,13 +418,13 @@ out_dqlock:
3631   * quota is disabled and pointers from inodes removed so there cannot be new
3632   * quota users. Also because we hold dqonoff_sem there can be no quota users
3633   * for this sb+type at all. */
3634 -static void invalidate_dquots(struct super_block *sb, int type)
3635 +static void invalidate_dquots(struct dqhash *hash, int type)
3636  {
3637         struct dquot *dquot, *tmp;
3638  
3639         spin_lock(&dq_list_lock);
3640         list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) {
3641 -               if (dquot->dq_sb != sb)
3642 +               if (dquot->dq_dqh != hash)
3643                         continue;
3644                 if (dquot->dq_type != type)
3645                         continue;
3646 @@ -432,18 +441,94 @@ static void invalidate_dquots(struct sup
3647         spin_unlock(&dq_list_lock);
3648  }
3649  
3650 -int vfs_quota_sync(struct super_block *sb, int type)
3651 +
3652 +/* Dquota Hash Management Functions */
3653 +
3654 +static LIST_HEAD(dqhash_list);
3655 +
3656 +struct dqhash *new_dqhash(struct super_block *sb, unsigned int id)
3657 +{
3658 +       struct dqhash *hash;
3659 +       int err;
3660 +
3661 +       err = -ENOMEM;
3662 +       hash = kmalloc(sizeof(struct dqhash),  GFP_USER);
3663 +       if (!hash)
3664 +               goto out;
3665 +
3666 +       memset(hash, 0, sizeof(struct dqhash));
3667 +       hash->dqh_id = id;
3668 +       atomic_set(&hash->dqh_count, 1);
3669 +
3670 +       INIT_LIST_HEAD(&hash->dqh_list);
3671 +
3672 +       sema_init(&hash->dqh_dqopt.dqio_sem, 1);
3673 +       sema_init(&hash->dqh_dqopt.dqonoff_sem, 1);
3674 +       init_rwsem(&hash->dqh_dqopt.dqptr_sem);
3675 +       hash->dqh_qop = sb->s_qop;
3676 +       hash->dqh_qcop = sb->s_qcop;
3677 +       hash->dqh_sb = sb;
3678 +
3679 +       lock_kernel();
3680 +       list_add(&hash->dqh_list, &dqhash_list);
3681 +       unlock_kernel();
3682 +       vxdprintk(VXD_CBIT(misc, 0),
3683 +               "new_dqhash: %p [#0x%08x]", hash, hash->dqh_id);
3684 +       return hash;
3685 +
3686 +       // kfree(hash);
3687 +out:
3688 +       return ERR_PTR(err);
3689 +}
3690 +
3691 +void destroy_dqhash(struct dqhash *hash)
3692 +{
3693 +       int cnt;
3694 +
3695 +       vxdprintk(VXD_CBIT(misc, 0),
3696 +               "destroy_dqhash: %p [#0x%08x] c=%d",
3697 +               hash, hash->dqh_id, atomic_read(&hash->dqh_count));
3698 +       lock_kernel();
3699 +       list_del_init(&hash->dqh_list);
3700 +       unlock_kernel();
3701 +       for (cnt = 0; cnt < MAXQUOTAS; cnt++)   /* should not be required anymore! */
3702 +               invalidate_dquots(hash, cnt);
3703 +       kfree(hash);
3704 +}
3705 +
3706 +
3707 +struct dqhash *find_dqhash(unsigned int id)
3708 +{
3709 +       struct list_head *head;
3710 +       struct dqhash *hash;
3711 +
3712 +       lock_kernel();
3713 +       list_for_each(head, &dqhash_list) {
3714 +               hash = list_entry(head, struct dqhash, dqh_list);
3715 +               if (hash->dqh_id == id)
3716 +                       goto dqh_found;
3717 +       }
3718 +       unlock_kernel();
3719 +       return NULL;
3720 +
3721 +dqh_found:
3722 +       unlock_kernel();
3723 +       return dqhget(hash);
3724 +}
3725 +
3726 +
3727 +int vfs_quota_sync(struct dqhash *hash, int type)
3728  {
3729         struct list_head *dirty;
3730         struct dquot *dquot;
3731 -       struct quota_info *dqopt = sb_dqopt(sb);
3732 +       struct quota_info *dqopt = dqh_dqopt(hash);
3733         int cnt;
3734  
3735         down(&dqopt->dqonoff_sem);
3736         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
3737                 if (type != -1 && cnt != type)
3738                         continue;
3739 -               if (!sb_has_quota_enabled(sb, cnt))
3740 +               if (!dqh_has_quota_enabled(hash, cnt))
3741                         continue;
3742                 spin_lock(&dq_list_lock);
3743                 dirty = &dqopt->info[cnt].dqi_dirty_list;
3744 @@ -460,7 +545,7 @@ int vfs_quota_sync(struct super_block *s
3745                         atomic_inc(&dquot->dq_count);
3746                         dqstats.lookups++;
3747                         spin_unlock(&dq_list_lock);
3748 -                       sb->dq_op->write_dquot(dquot);
3749 +                       hash->dqh_qop->write_dquot(dquot);
3750                         dqput(dquot);
3751                         spin_lock(&dq_list_lock);
3752                 }
3753 @@ -468,9 +553,10 @@ int vfs_quota_sync(struct super_block *s
3754         }
3755  
3756         for (cnt = 0; cnt < MAXQUOTAS; cnt++)
3757 -               if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt)
3758 +               if ((cnt == type || type == -1)
3759 +                       && dqh_has_quota_enabled(hash, cnt)
3760                         && info_dirty(&dqopt->info[cnt]))
3761 -                       sb->dq_op->write_info(sb, cnt);
3762 +                       hash->dqh_qop->write_info(hash, cnt);
3763         spin_lock(&dq_list_lock);
3764         dqstats.syncs++;
3765         spin_unlock(&dq_list_lock);
3766 @@ -525,7 +611,7 @@ static void dqput(struct dquot *dquot)
3767         if (!atomic_read(&dquot->dq_count)) {
3768                 printk("VFS: dqput: trying to free free dquot\n");
3769                 printk("VFS: device %s, dquot of %s %d\n",
3770 -                       dquot->dq_sb->s_id,
3771 +                       dquot->dq_dqh->dqh_sb->s_id,
3772                         quotatypes[dquot->dq_type],
3773                         dquot->dq_id);
3774                 BUG();
3775 @@ -547,14 +633,14 @@ we_slept:
3776         if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) {
3777                 spin_unlock(&dq_list_lock);
3778                 /* Commit dquot before releasing */
3779 -               dquot->dq_sb->dq_op->write_dquot(dquot);
3780 +               dquot->dq_dqh->dqh_qop->write_dquot(dquot);
3781                 goto we_slept;
3782         }
3783         /* Clear flag in case dquot was inactive (something bad happened) */
3784         clear_dquot_dirty(dquot);
3785         if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
3786                 spin_unlock(&dq_list_lock);
3787 -               dquot->dq_sb->dq_op->release_dquot(dquot);
3788 +               dquot->dq_dqh->dqh_qop->release_dquot(dquot);
3789                 goto we_slept;
3790         }
3791         atomic_dec(&dquot->dq_count);
3792 @@ -567,7 +653,7 @@ we_slept:
3793         spin_unlock(&dq_list_lock);
3794  }
3795  
3796 -static struct dquot *get_empty_dquot(struct super_block *sb, int type)
3797 +static struct dquot *get_empty_dquot(int type)
3798  {
3799         struct dquot *dquot;
3800  
3801 @@ -581,7 +667,7 @@ static struct dquot *get_empty_dquot(str
3802         INIT_LIST_HEAD(&dquot->dq_inuse);
3803         INIT_HLIST_NODE(&dquot->dq_hash);
3804         INIT_LIST_HEAD(&dquot->dq_dirty);
3805 -       dquot->dq_sb = sb;
3806 +       dquot->dq_dqh = NULL;
3807         dquot->dq_type = type;
3808         atomic_set(&dquot->dq_count, 1);
3809  
3810 @@ -592,19 +678,19 @@ static struct dquot *get_empty_dquot(str
3811   * Get reference to dquot
3812   * MUST be called with either dqptr_sem or dqonoff_sem held
3813   */
3814 -static struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
3815 +static struct dquot *dqget(struct dqhash *hash, unsigned int id, int type)
3816  {
3817 -       unsigned int hashent = hashfn(sb, id, type);
3818 +       unsigned int hashent = hashfn(hash, id, type);
3819         struct dquot *dquot, *empty = NODQUOT;
3820  
3821 -        if (!sb_has_quota_enabled(sb, type))
3822 +       if (!dqh_has_quota_enabled(hash, type))
3823                 return NODQUOT;
3824  we_slept:
3825         spin_lock(&dq_list_lock);
3826 -       if ((dquot = find_dquot(hashent, sb, id, type)) == NODQUOT) {
3827 +       if ((dquot = find_dquot(hash, hashent, id, type)) == NODQUOT) {
3828                 if (empty == NODQUOT) {
3829                         spin_unlock(&dq_list_lock);
3830 -                       if ((empty = get_empty_dquot(sb, type)) == NODQUOT)
3831 +                       if ((empty = get_empty_dquot(type)) == NODQUOT)
3832                                 schedule();     /* Try to wait for a moment... */
3833                         goto we_slept;
3834                 }
3835 @@ -613,7 +699,7 @@ we_slept:
3836                 /* all dquots go on the inuse_list */
3837                 put_inuse(dquot);
3838                 /* hash it first so it can be found */
3839 -               insert_dquot_hash(dquot);
3840 +               insert_dquot_hash(hash, dquot);
3841                 dqstats.lookups++;
3842                 spin_unlock(&dq_list_lock);
3843         } else {
3844 @@ -630,12 +716,13 @@ we_slept:
3845          * finished or it will be canceled due to dq_count > 1 test */
3846         wait_on_dquot(dquot);
3847         /* Read the dquot and instantiate it (everything done only if needed) */
3848 -       if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && sb->dq_op->acquire_dquot(dquot) < 0) {
3849 +       if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) &&
3850 +               hash->dqh_qop->acquire_dquot(dquot) < 0) {
3851                 dqput(dquot);
3852                 return NODQUOT;
3853         }
3854  #ifdef __DQUOT_PARANOIA
3855 -       if (!dquot->dq_sb)      /* Has somebody invalidated entry under us? */
3856 +       if (!dquot->dq_dqh)     /* Has somebody invalidated entry under us? */
3857                 BUG();
3858  #endif
3859  
3860 @@ -657,9 +744,10 @@ static int dqinit_needed(struct inode *i
3861  }
3862  
3863  /* This routine is guarded by dqonoff_sem semaphore */
3864 -static void add_dquot_ref(struct super_block *sb, int type)
3865 +static void add_dquot_ref(struct dqhash *hash, int type)
3866  {
3867         struct list_head *p;
3868 +       struct super_block *sb = hash->dqh_sb;
3869  
3870  restart:
3871         file_list_lock();
3872 @@ -669,7 +757,7 @@ restart:
3873                 if (filp->f_mode & FMODE_WRITE && dqinit_needed(inode, type)) {
3874                         struct dentry *dentry = dget(filp->f_dentry);
3875                         file_list_unlock();
3876 -                       sb->dq_op->initialize(inode, type);
3877 +                       hash->dqh_qop->initialize(inode, type);
3878                         dput(dentry);
3879                         /* As we may have blocked we had better restart... */
3880                         goto restart;
3881 @@ -728,16 +816,16 @@ static void put_dquot_list(struct list_h
3882  }
3883  
3884  /* Gather all references from inodes and drop them */
3885 -static void drop_dquot_ref(struct super_block *sb, int type)
3886 +static void drop_dquot_ref(struct dqhash *hash, int type)
3887  {
3888         LIST_HEAD(tofree_head);
3889  
3890         /* We need to be guarded against prune_icache to reach all the
3891          * inodes - otherwise some can be on the local list of prune_icache */
3892         down(&iprune_sem);
3893 -       down_write(&sb_dqopt(sb)->dqptr_sem);
3894 -       remove_dquot_ref(sb, type, &tofree_head);
3895 -       up_write(&sb_dqopt(sb)->dqptr_sem);
3896 +       down_write(&dqh_dqopt(hash)->dqptr_sem);
3897 +       remove_dquot_ref(hash, type, &tofree_head);
3898 +       up_write(&dqh_dqopt(hash)->dqptr_sem);
3899         up(&iprune_sem);
3900         put_dquot_list(&tofree_head);
3901  }
3902 @@ -809,7 +897,7 @@ static void print_warning(struct dquot *
3903         if (!need_print_warning(dquot) || (flag && test_and_set_bit(flag, &dquot->dq_flags)))
3904                 return;
3905  
3906 -       tty_write_message(current->signal->tty, dquot->dq_sb->s_id);
3907 +       tty_write_message(current->signal->tty, dquot->dq_dqh->dqh_sb->s_id);
3908         if (warntype == ISOFTWARN || warntype == BSOFTWARN)
3909                 tty_write_message(current->signal->tty, ": warning, ");
3910         else
3911 @@ -849,7 +937,7 @@ static inline void flush_warnings(struct
3912  
3913  static inline char ignore_hardlimit(struct dquot *dquot)
3914  {
3915 -       struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type];
3916 +       struct mem_dqinfo *info = &dqh_dqopt(dquot->dq_dqh)->info[dquot->dq_type];
3917  
3918         return capable(CAP_SYS_RESOURCE) &&
3919             (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD || !(info->dqi_flags & V1_DQF_RSQUASH));
3920 @@ -881,7 +969,7 @@ static int check_idq(struct dquot *dquot
3921            (dquot->dq_dqb.dqb_curinodes + inodes) > dquot->dq_dqb.dqb_isoftlimit &&
3922             dquot->dq_dqb.dqb_itime == 0) {
3923                 *warntype = ISOFTWARN;
3924 -               dquot->dq_dqb.dqb_itime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace;
3925 +               dquot->dq_dqb.dqb_itime = get_seconds() + dqh_dqopt(dquot->dq_dqh)->info[dquot->dq_type].dqi_igrace;
3926         }
3927  
3928         return QUOTA_OK;
3929 @@ -916,7 +1004,7 @@ static int check_bdq(struct dquot *dquot
3930             dquot->dq_dqb.dqb_btime == 0) {
3931                 if (!prealloc) {
3932                         *warntype = BSOFTWARN;
3933 -                       dquot->dq_dqb.dqb_btime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_bgrace;
3934 +                       dquot->dq_dqb.dqb_btime = get_seconds() + dqh_dqopt(dquot->dq_dqh)->info[dquot->dq_type].dqi_bgrace;
3935                 }
3936                 else
3937                         /*
3938 @@ -942,7 +1030,7 @@ int dquot_initialize(struct inode *inode
3939           * re-enter the quota code and are already holding the semaphore */
3940         if (IS_NOQUOTA(inode))
3941                 return 0;
3942 -       down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
3943 +       down_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
3944         /* Having dqptr_sem we know NOQUOTA flags can't be altered... */
3945         if (IS_NOQUOTA(inode))
3946                 goto out_err;
3947 @@ -958,11 +1046,11 @@ int dquot_initialize(struct inode *inode
3948                                         id = inode->i_gid;
3949                                         break;
3950                         }
3951 -                       inode->i_dquot[cnt] = dqget(inode->i_sb, id, cnt);
3952 +                       inode->i_dquot[cnt] = dqget(inode->i_dqh, id, cnt);
3953                 }
3954         }
3955  out_err:
3956 -       up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
3957 +       up_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
3958         return ret;
3959  }
3960  
3961 @@ -974,14 +1062,14 @@ int dquot_drop(struct inode *inode)
3962  {
3963         int cnt;
3964  
3965 -       down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
3966 +       down_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
3967         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
3968                 if (inode->i_dquot[cnt] != NODQUOT) {
3969                         dqput(inode->i_dquot[cnt]);
3970                         inode->i_dquot[cnt] = NODQUOT;
3971                 }
3972         }
3973 -       up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
3974 +       up_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
3975         return 0;
3976  }
3977  
3978 @@ -1012,9 +1100,9 @@ out_add:
3979         for (cnt = 0; cnt < MAXQUOTAS; cnt++)
3980                 warntype[cnt] = NOWARN;
3981  
3982 -       down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
3983 +       down_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
3984         if (IS_NOQUOTA(inode)) {        /* Now we can do reliable test... */
3985 -               up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
3986 +               up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
3987                 goto out_add;
3988         }
3989         spin_lock(&dq_data_lock);
3990 @@ -1039,7 +1127,7 @@ warn_put_all:
3991                         if (inode->i_dquot[cnt])
3992                                 mark_dquot_dirty(inode->i_dquot[cnt]);
3993         flush_warnings(inode->i_dquot, warntype);
3994 -       up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
3995 +       up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
3996         return ret;
3997  }
3998  
3999 @@ -1057,9 +1145,9 @@ int dquot_alloc_inode(const struct inode
4000                 return QUOTA_OK;
4001         for (cnt = 0; cnt < MAXQUOTAS; cnt++)
4002                 warntype[cnt] = NOWARN;
4003 -       down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
4004 +       down_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
4005         if (IS_NOQUOTA(inode)) {
4006 -               up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
4007 +               up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
4008                 return QUOTA_OK;
4009         }
4010         spin_lock(&dq_data_lock);
4011 @@ -1084,7 +1172,7 @@ warn_put_all:
4012                         if (inode->i_dquot[cnt])
4013                                 mark_dquot_dirty(inode->i_dquot[cnt]);
4014         flush_warnings((struct dquot **)inode->i_dquot, warntype);
4015 -       up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
4016 +       up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
4017         return ret;
4018  }
4019  
4020 @@ -1102,10 +1190,10 @@ out_sub:
4021                 inode_sub_bytes(inode, number);
4022                 return QUOTA_OK;
4023         }
4024 -       down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
4025 +       down_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
4026         /* Now recheck reliably when holding dqptr_sem */
4027         if (IS_NOQUOTA(inode)) {
4028 -               up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
4029 +               up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
4030                 goto out_sub;
4031         }
4032         spin_lock(&dq_data_lock);
4033 @@ -1120,7 +1208,7 @@ out_sub:
4034         for (cnt = 0; cnt < MAXQUOTAS; cnt++)
4035                 if (inode->i_dquot[cnt])
4036                         mark_dquot_dirty(inode->i_dquot[cnt]);
4037 -       up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
4038 +       up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
4039         return QUOTA_OK;
4040  }
4041  
4042 @@ -1135,10 +1223,10 @@ int dquot_free_inode(const struct inode 
4043           * re-enter the quota code and are already holding the semaphore */
4044         if (IS_NOQUOTA(inode))
4045                 return QUOTA_OK;
4046 -       down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
4047 +       down_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
4048         /* Now recheck reliably when holding dqptr_sem */
4049         if (IS_NOQUOTA(inode)) {
4050 -               up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
4051 +               up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
4052                 return QUOTA_OK;
4053         }
4054         spin_lock(&dq_data_lock);
4055 @@ -1152,7 +1240,7 @@ int dquot_free_inode(const struct inode 
4056         for (cnt = 0; cnt < MAXQUOTAS; cnt++)
4057                 if (inode->i_dquot[cnt])
4058                         mark_dquot_dirty(inode->i_dquot[cnt]);
4059 -       up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
4060 +       up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
4061         return QUOTA_OK;
4062  }
4063  
4064 @@ -1167,6 +1255,7 @@ int dquot_transfer(struct inode *inode, 
4065         qsize_t space;
4066         struct dquot *transfer_from[MAXQUOTAS];
4067         struct dquot *transfer_to[MAXQUOTAS];
4068 +       struct dqhash *dqh = inode->i_sb->s_dqh;
4069         int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid,
4070             chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid;
4071         char warntype[MAXQUOTAS];
4072 @@ -1180,10 +1269,10 @@ int dquot_transfer(struct inode *inode, 
4073                 transfer_to[cnt] = transfer_from[cnt] = NODQUOT;
4074                 warntype[cnt] = NOWARN;
4075         }
4076 -       down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
4077 +       down_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
4078         /* Now recheck reliably when holding dqptr_sem */
4079         if (IS_NOQUOTA(inode)) {        /* File without quota accounting? */
4080 -               up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
4081 +               up_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
4082                 return QUOTA_OK;
4083         }
4084         /* First build the transfer_to list - here we can block on
4085 @@ -1194,12 +1283,12 @@ int dquot_transfer(struct inode *inode, 
4086                         case USRQUOTA:
4087                                 if (!chuid)
4088                                         continue;
4089 -                               transfer_to[cnt] = dqget(inode->i_sb, iattr->ia_uid, cnt);
4090 +                               transfer_to[cnt] = dqget(dqh, iattr->ia_uid, cnt);
4091                                 break;
4092                         case GRPQUOTA:
4093                                 if (!chgid)
4094                                         continue;
4095 -                               transfer_to[cnt] = dqget(inode->i_sb, iattr->ia_gid, cnt);
4096 +                               transfer_to[cnt] = dqget(dqh, iattr->ia_gid, cnt);
4097                                 break;
4098                 }
4099         }
4100 @@ -1254,20 +1343,20 @@ warn_put_all:
4101                 if (ret == NO_QUOTA && transfer_to[cnt] != NODQUOT)
4102                         dqput(transfer_to[cnt]);
4103         }
4104 -       up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
4105 +       up_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
4106         return ret;
4107  }
4108  
4109  /*
4110   * Write info of quota file to disk
4111   */
4112 -int dquot_commit_info(struct super_block *sb, int type)
4113 +int dquot_commit_info(struct dqhash *hash, int type)
4114  {
4115         int ret;
4116 -       struct quota_info *dqopt = sb_dqopt(sb);
4117 +       struct quota_info *dqopt = dqh_dqopt(hash);
4118  
4119         down(&dqopt->dqio_sem);
4120 -       ret = dqopt->ops[type]->write_file_info(sb, type);
4121 +       ret = dqopt->ops[type]->write_file_info(hash, type);
4122         up(&dqopt->dqio_sem);
4123         return ret;
4124  }
4125 @@ -1317,10 +1406,10 @@ static inline void reset_enable_flags(st
4126  /*
4127   * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount)
4128   */
4129 -int vfs_quota_off(struct super_block *sb, int type)
4130 +int vfs_quota_off(struct dqhash *hash, int type)
4131  {
4132         int cnt;
4133 -       struct quota_info *dqopt = sb_dqopt(sb);
4134 +       struct quota_info *dqopt = dqh_dqopt(hash);
4135         struct inode *toputinode[MAXQUOTAS];
4136  
4137         /* We need to serialize quota_off() for device */
4138 @@ -1329,21 +1418,21 @@ int vfs_quota_off(struct super_block *sb
4139                 toputinode[cnt] = NULL;
4140                 if (type != -1 && cnt != type)
4141                         continue;
4142 -               if (!sb_has_quota_enabled(sb, cnt))
4143 +               if (!dqh_has_quota_enabled(hash, cnt))
4144                         continue;
4145                 reset_enable_flags(dqopt, cnt);
4146  
4147                 /* Note: these are blocking operations */
4148 -               drop_dquot_ref(sb, cnt);
4149 -               invalidate_dquots(sb, cnt);
4150 +               drop_dquot_ref(hash, cnt);
4151 +               invalidate_dquots(hash, cnt);
4152                 /*
4153                  * Now all dquots should be invalidated, all writes done so we should be only
4154                  * users of the info. No locks needed.
4155                  */
4156                 if (info_dirty(&dqopt->info[cnt]))
4157 -                       sb->dq_op->write_info(sb, cnt);
4158 +                       hash->dqh_qop->write_info(hash, cnt);
4159                 if (dqopt->ops[cnt]->free_file_info)
4160 -                       dqopt->ops[cnt]->free_file_info(sb, cnt);
4161 +                       dqopt->ops[cnt]->free_file_info(hash, cnt);
4162                 put_quota_format(dqopt->info[cnt].dqi_format);
4163  
4164                 toputinode[cnt] = dqopt->files[cnt];
4165 @@ -1356,9 +1445,9 @@ int vfs_quota_off(struct super_block *sb
4166         up(&dqopt->dqonoff_sem);
4167         /* Sync the superblock so that buffers with quota data are written to
4168          * disk (and so userspace sees correct data afterwards). */
4169 -       if (sb->s_op->sync_fs)
4170 -               sb->s_op->sync_fs(sb, 1);
4171 -       sync_blockdev(sb->s_bdev);
4172 +       if (hash->dqh_sb->s_op->sync_fs)
4173 +               hash->dqh_sb->s_op->sync_fs(hash->dqh_sb, 1);
4174 +       sync_blockdev(hash->dqh_sb->s_bdev);
4175         /* Now the quota files are just ordinary files and we can set the
4176          * inode flags back. Moreover we discard the pagecache so that
4177          * userspace sees the writes we did bypassing the pagecache. We
4178 @@ -1369,7 +1458,7 @@ int vfs_quota_off(struct super_block *sb
4179                         down(&dqopt->dqonoff_sem);
4180                         /* If quota was reenabled in the meantime, we have
4181                          * nothing to do */
4182 -                       if (!sb_has_quota_enabled(sb, cnt)) {
4183 +                       if (!dqh_has_quota_enabled(hash, cnt)) {
4184                                 mutex_lock(&toputinode[cnt]->i_mutex);
4185                                 toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
4186                                   S_NOATIME | S_NOQUOTA);
4187 @@ -1380,8 +1469,8 @@ int vfs_quota_off(struct super_block *sb
4188                         }
4189                         up(&dqopt->dqonoff_sem);
4190                 }
4191 -       if (sb->s_bdev)
4192 -               invalidate_bdev(sb->s_bdev, 0);
4193 +       if (hash->dqh_sb->s_bdev)
4194 +               invalidate_bdev(hash->dqh_sb->s_bdev, 0);
4195         return 0;
4196  }
4197  
4198 @@ -1394,7 +1483,8 @@ static int vfs_quota_on_inode(struct ino
4199  {
4200         struct quota_format_type *fmt = find_quota_format(format_id);
4201         struct super_block *sb = inode->i_sb;
4202 -       struct quota_info *dqopt = sb_dqopt(sb);
4203 +       struct dqhash *hash = inode->i_dqh;
4204 +       struct quota_info *dqopt = dqh_dqopt(hash);
4205         int error;
4206         int oldflags = -1;
4207  
4208 @@ -1420,7 +1510,7 @@ static int vfs_quota_on_inode(struct ino
4209         invalidate_bdev(sb->s_bdev, 0);
4210         mutex_lock(&inode->i_mutex);
4211         down(&dqopt->dqonoff_sem);
4212 -       if (sb_has_quota_enabled(sb, type)) {
4213 +       if (dqh_has_quota_enabled(hash, type)) {
4214                 error = -EBUSY;
4215                 goto out_lock;
4216         }
4217 @@ -1431,21 +1521,21 @@ static int vfs_quota_on_inode(struct ino
4218         oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | S_NOQUOTA);
4219         inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
4220         up_write(&dqopt->dqptr_sem);
4221 -       sb->dq_op->drop(inode);
4222 +       hash->dqh_qop->drop(inode);
4223  
4224         error = -EIO;
4225         dqopt->files[type] = igrab(inode);
4226         if (!dqopt->files[type])
4227                 goto out_lock;
4228         error = -EINVAL;
4229 -       if (!fmt->qf_ops->check_quota_file(sb, type))
4230 +       if (!fmt->qf_ops->check_quota_file(hash, type))
4231                 goto out_file_init;
4232  
4233         dqopt->ops[type] = fmt->qf_ops;
4234         dqopt->info[type].dqi_format = fmt;
4235         INIT_LIST_HEAD(&dqopt->info[type].dqi_dirty_list);
4236         down(&dqopt->dqio_sem);
4237 -       if ((error = dqopt->ops[type]->read_file_info(sb, type)) < 0) {
4238 +       if ((error = dqopt->ops[type]->read_file_info(hash, type)) < 0) {
4239                 up(&dqopt->dqio_sem);
4240                 goto out_file_init;
4241         }
4242 @@ -1453,7 +1543,7 @@ static int vfs_quota_on_inode(struct ino
4243         mutex_unlock(&inode->i_mutex);
4244         set_enable_flags(dqopt, type);
4245  
4246 -       add_dquot_ref(sb, type);
4247 +       add_dquot_ref(hash, type);
4248         up(&dqopt->dqonoff_sem);
4249  
4250         return 0;
4251 @@ -1479,7 +1569,7 @@ out_fmt:
4252  }
4253  
4254  /* Actual function called from quotactl() */
4255 -int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path)
4256 +int vfs_quota_on(struct dqhash *hash, int type, int format_id, char *path)
4257  {
4258         struct nameidata nd;
4259         int error;
4260 @@ -1491,7 +1581,7 @@ int vfs_quota_on(struct super_block *sb,
4261         if (error)
4262                 goto out_path;
4263         /* Quota file not on the same filesystem? */
4264 -       if (nd.mnt->mnt_sb != sb)
4265 +       if (nd.mnt->mnt_sb != hash->dqh_sb)
4266                 error = -EXDEV;
4267         else
4268                 error = vfs_quota_on_inode(nd.dentry->d_inode, type, format_id);
4269 @@ -1504,13 +1594,13 @@ out_path:
4270   * This function is used when filesystem needs to initialize quotas
4271   * during mount time.
4272   */
4273 -int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
4274 +int vfs_quota_on_mount(struct dqhash *hash, char *qf_name,
4275                 int format_id, int type)
4276  {
4277         struct dentry *dentry;
4278         int error;
4279  
4280 -       dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name));
4281 +       dentry = lookup_one_len(qf_name, hash->dqh_sb->s_root, strlen(qf_name));
4282         if (IS_ERR(dentry))
4283                 return PTR_ERR(dentry);
4284  
4285 @@ -1546,18 +1636,18 @@ static void do_get_dqblk(struct dquot *d
4286         spin_unlock(&dq_data_lock);
4287  }
4288  
4289 -int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di)
4290 +int vfs_get_dqblk(struct dqhash *hash, int type, qid_t id, struct if_dqblk *di)
4291  {
4292         struct dquot *dquot;
4293  
4294 -       down(&sb_dqopt(sb)->dqonoff_sem);
4295 -       if (!(dquot = dqget(sb, id, type))) {
4296 -               up(&sb_dqopt(sb)->dqonoff_sem);
4297 +       down(&dqh_dqopt(hash)->dqonoff_sem);
4298 +       if (!(dquot = dqget(hash, id, type))) {
4299 +               up(&dqh_dqopt(hash)->dqonoff_sem);
4300                 return -ESRCH;
4301         }
4302         do_get_dqblk(dquot, di);
4303         dqput(dquot);
4304 -       up(&sb_dqopt(sb)->dqonoff_sem);
4305 +       up(&dqh_dqopt(hash)->dqonoff_sem);
4306         return 0;
4307  }
4308  
4309 @@ -1597,7 +1687,7 @@ static void do_set_dqblk(struct dquot *d
4310                         clear_bit(DQ_BLKS_B, &dquot->dq_flags);
4311                 }
4312                 else if (!(di->dqb_valid & QIF_BTIME))  /* Set grace only if user hasn't provided his own... */
4313 -                       dm->dqb_btime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_bgrace;
4314 +                       dm->dqb_btime = get_seconds() + dqh_dqopt(dquot->dq_dqh)->info[dquot->dq_type].dqi_bgrace;
4315         }
4316         if (check_ilim) {
4317                 if (!dm->dqb_isoftlimit || dm->dqb_curinodes < dm->dqb_isoftlimit) {
4318 @@ -1605,7 +1695,7 @@ static void do_set_dqblk(struct dquot *d
4319                         clear_bit(DQ_INODES_B, &dquot->dq_flags);
4320                 }
4321                 else if (!(di->dqb_valid & QIF_ITIME))  /* Set grace only if user hasn't provided his own... */
4322 -                       dm->dqb_itime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace;
4323 +                       dm->dqb_itime = get_seconds() + dqh_dqopt(dquot->dq_dqh)->info[dquot->dq_type].dqi_igrace;
4324         }
4325         if (dm->dqb_bhardlimit || dm->dqb_bsoftlimit || dm->dqb_ihardlimit || dm->dqb_isoftlimit)
4326                 clear_bit(DQ_FAKE_B, &dquot->dq_flags);
4327 @@ -1615,53 +1705,53 @@ static void do_set_dqblk(struct dquot *d
4328         mark_dquot_dirty(dquot);
4329  }
4330  
4331 -int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di)
4332 +int vfs_set_dqblk(struct dqhash *hash, int type, qid_t id, struct if_dqblk *di)
4333  {
4334         struct dquot *dquot;
4335  
4336 -       down(&sb_dqopt(sb)->dqonoff_sem);
4337 -       if (!(dquot = dqget(sb, id, type))) {
4338 -               up(&sb_dqopt(sb)->dqonoff_sem);
4339 +       down(&dqh_dqopt(hash)->dqonoff_sem);
4340 +       if (!(dquot = dqget(hash, id, type))) {
4341 +               up(&dqh_dqopt(hash)->dqonoff_sem);
4342                 return -ESRCH;
4343         }
4344         do_set_dqblk(dquot, di);
4345         dqput(dquot);
4346 -       up(&sb_dqopt(sb)->dqonoff_sem);
4347 +       up(&dqh_dqopt(hash)->dqonoff_sem);
4348         return 0;
4349  }
4350  
4351  /* Generic routine for getting common part of quota file information */
4352 -int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
4353 +int vfs_get_dqinfo(struct dqhash *hash, int type, struct if_dqinfo *ii)
4354  {
4355         struct mem_dqinfo *mi;
4356    
4357 -       down(&sb_dqopt(sb)->dqonoff_sem);
4358 -       if (!sb_has_quota_enabled(sb, type)) {
4359 -               up(&sb_dqopt(sb)->dqonoff_sem);
4360 +       down(&dqh_dqopt(hash)->dqonoff_sem);
4361 +       if (!dqh_has_quota_enabled(hash, type)) {
4362 +               up(&dqh_dqopt(hash)->dqonoff_sem);
4363                 return -ESRCH;
4364         }
4365 -       mi = sb_dqopt(sb)->info + type;
4366 +       mi = dqh_dqopt(hash)->info + type;
4367         spin_lock(&dq_data_lock);
4368         ii->dqi_bgrace = mi->dqi_bgrace;
4369         ii->dqi_igrace = mi->dqi_igrace;
4370         ii->dqi_flags = mi->dqi_flags & DQF_MASK;
4371         ii->dqi_valid = IIF_ALL;
4372         spin_unlock(&dq_data_lock);
4373 -       up(&sb_dqopt(sb)->dqonoff_sem);
4374 +       up(&dqh_dqopt(hash)->dqonoff_sem);
4375         return 0;
4376  }
4377  
4378  /* Generic routine for setting common part of quota file information */
4379 -int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
4380 +int vfs_set_dqinfo(struct dqhash *hash, int type, struct if_dqinfo *ii)
4381  {
4382         struct mem_dqinfo *mi;
4383  
4384 -       down(&sb_dqopt(sb)->dqonoff_sem);
4385 -       if (!sb_has_quota_enabled(sb, type)) {
4386 -               up(&sb_dqopt(sb)->dqonoff_sem);
4387 +       down(&dqh_dqopt(hash)->dqonoff_sem);
4388 +       if (!dqh_has_quota_enabled(hash, type)) {
4389 +               up(&dqh_dqopt(hash)->dqonoff_sem);
4390                 return -ESRCH;
4391         }
4392 -       mi = sb_dqopt(sb)->info + type;
4393 +       mi = dqh_dqopt(hash)->info + type;
4394         spin_lock(&dq_data_lock);
4395         if (ii->dqi_valid & IIF_BGRACE)
4396                 mi->dqi_bgrace = ii->dqi_bgrace;
4397 @@ -1670,10 +1760,10 @@ int vfs_set_dqinfo(struct super_block *s
4398         if (ii->dqi_valid & IIF_FLAGS)
4399                 mi->dqi_flags = (mi->dqi_flags & ~DQF_MASK) | (ii->dqi_flags & DQF_MASK);
4400         spin_unlock(&dq_data_lock);
4401 -       mark_info_dirty(sb, type);
4402 +       mark_info_dirty(hash, type);
4403         /* Force write to disk */
4404 -       sb->dq_op->write_info(sb, type);
4405 -       up(&sb_dqopt(sb)->dqonoff_sem);
4406 +       hash->dqh_qop->write_info(hash, type);
4407 +       up(&dqh_dqopt(hash)->dqonoff_sem);
4408         return 0;
4409  }
4410  
4411 diff -NurpP --minimal linux-2.6.16.20/fs/exec.c linux-2.6.16.20-vs2.1.1-rc22/fs/exec.c
4412 --- linux-2.6.16.20/fs/exec.c   2006-04-09 13:49:53 +0200
4413 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/exec.c      2006-05-29 16:54:23 +0200
4414 @@ -49,6 +49,8 @@
4415  #include <linux/rmap.h>
4416  #include <linux/acct.h>
4417  #include <linux/cn_proc.h>
4418 +#include <linux/vs_cvirt.h>
4419 +#include <linux/vs_memory.h>
4420  
4421  #include <asm/uaccess.h>
4422  #include <asm/mmu_context.h>
4423 @@ -436,7 +438,8 @@ int setup_arg_pages(struct linux_binprm 
4424                         kmem_cache_free(vm_area_cachep, mpnt);
4425                         return ret;
4426                 }
4427 -               mm->stack_vm = mm->total_vm = vma_pages(mpnt);
4428 +               vx_vmpages_sub(mm, mm->total_vm - vma_pages(mpnt));
4429 +               mm->stack_vm = mm->total_vm;
4430         }
4431  
4432         for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
4433 @@ -1320,7 +1323,7 @@ static void format_corename(char *corena
4434                         /* UNIX time of coredump */
4435                         case 't': {
4436                                 struct timeval tv;
4437 -                               do_gettimeofday(&tv);
4438 +                               vx_gettimeofday(&tv);
4439                                 rc = snprintf(out_ptr, out_end - out_ptr,
4440                                               "%lu", tv.tv_sec);
4441                                 if (rc > out_end - out_ptr)
4442 @@ -1332,7 +1335,7 @@ static void format_corename(char *corena
4443                         case 'h':
4444                                 down_read(&uts_sem);
4445                                 rc = snprintf(out_ptr, out_end - out_ptr,
4446 -                                             "%s", system_utsname.nodename);
4447 +                                             "%s", vx_new_uts(nodename));
4448                                 up_read(&uts_sem);
4449                                 if (rc > out_end - out_ptr)
4450                                         goto out;
4451 diff -NurpP --minimal linux-2.6.16.20/fs/ext2/balloc.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/balloc.c
4452 --- linux-2.6.16.20/fs/ext2/balloc.c    2006-04-09 13:49:53 +0200
4453 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/balloc.c       2006-04-26 19:07:00 +0200
4454 @@ -17,6 +17,8 @@
4455  #include <linux/sched.h>
4456  #include <linux/buffer_head.h>
4457  #include <linux/capability.h>
4458 +#include <linux/vs_dlimit.h>
4459 +#include <linux/vs_tag.h>
4460  
4461  /*
4462   * balloc.c contains the blocks allocation and deallocation routines
4463 @@ -109,6 +111,8 @@ static int reserve_blocks(struct super_b
4464         free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
4465         root_blocks = le32_to_cpu(es->s_r_blocks_count);
4466  
4467 +       DLIMIT_ADJUST_BLOCK(sb, dx_current_tag(), &free_blocks, &root_blocks);
4468 +
4469         if (free_blocks < count)
4470                 count = free_blocks;
4471  
4472 @@ -259,6 +263,7 @@ do_more:
4473         }
4474  error_return:
4475         brelse(bitmap_bh);
4476 +       DLIMIT_FREE_BLOCK(inode, freed);
4477         release_blocks(sb, freed);
4478         DQUOT_FREE_BLOCK(inode, freed);
4479  }
4480 @@ -362,6 +367,10 @@ int ext2_new_block(struct inode *inode, 
4481                 *err = -ENOSPC;
4482                 goto out_dquot;
4483         }
4484 +       if (DLIMIT_ALLOC_BLOCK(inode, es_alloc)) {
4485 +               *err = -ENOSPC;
4486 +               goto out_dlimit;
4487 +       }
4488  
4489         ext2_debug ("goal=%lu.\n", goal);
4490  
4491 @@ -509,6 +518,8 @@ got_block:
4492         *err = 0;
4493  out_release:
4494         group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
4495 +       DLIMIT_FREE_BLOCK(inode, es_alloc);
4496 +out_dlimit:
4497         release_blocks(sb, es_alloc);
4498  out_dquot:
4499         DQUOT_FREE_BLOCK(inode, dq_alloc);
4500 diff -NurpP --minimal linux-2.6.16.20/fs/ext2/ext2.h linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/ext2.h
4501 --- linux-2.6.16.20/fs/ext2/ext2.h      2006-02-18 14:40:21 +0100
4502 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/ext2.h 2006-04-26 19:07:00 +0200
4503 @@ -162,6 +162,7 @@ extern struct file_operations ext2_xip_f
4504  extern struct address_space_operations ext2_aops;
4505  extern struct address_space_operations ext2_aops_xip;
4506  extern struct address_space_operations ext2_nobh_aops;
4507 +extern int ext2_sync_flags(struct inode *inode);
4508  
4509  /* namei.c */
4510  extern struct inode_operations ext2_dir_inode_operations;
4511 diff -NurpP --minimal linux-2.6.16.20/fs/ext2/file.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/file.c
4512 --- linux-2.6.16.20/fs/ext2/file.c      2005-08-29 22:25:30 +0200
4513 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/file.c 2006-04-26 19:07:00 +0200
4514 @@ -53,6 +53,7 @@ struct file_operations ext2_file_operati
4515         .readv          = generic_file_readv,
4516         .writev         = generic_file_writev,
4517         .sendfile       = generic_file_sendfile,
4518 +       .sendpage       = generic_file_sendpage,
4519  };
4520  
4521  #ifdef CONFIG_EXT2_FS_XIP
4522 @@ -79,4 +80,5 @@ struct inode_operations ext2_file_inode_
4523  #endif
4524         .setattr        = ext2_setattr,
4525         .permission     = ext2_permission,
4526 +       .sync_flags     = ext2_sync_flags,
4527  };
4528 diff -NurpP --minimal linux-2.6.16.20/fs/ext2/ialloc.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/ialloc.c
4529 --- linux-2.6.16.20/fs/ext2/ialloc.c    2006-02-18 14:40:21 +0100
4530 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/ialloc.c       2006-04-26 19:07:00 +0200
4531 @@ -18,6 +18,8 @@
4532  #include <linux/backing-dev.h>
4533  #include <linux/buffer_head.h>
4534  #include <linux/random.h>
4535 +#include <linux/vs_dlimit.h>
4536 +#include <linux/vs_tag.h>
4537  #include "ext2.h"
4538  #include "xattr.h"
4539  #include "acl.h"
4540 @@ -126,6 +128,7 @@ void ext2_free_inode (struct inode * ino
4541                 ext2_xattr_delete_inode(inode);
4542                 DQUOT_FREE_INODE(inode);
4543                 DQUOT_DROP(inode);
4544 +               DLIMIT_FREE_INODE(inode);
4545         }
4546  
4547         es = EXT2_SB(sb)->s_es;
4548 @@ -465,6 +468,11 @@ struct inode *ext2_new_inode(struct inod
4549         if (!inode)
4550                 return ERR_PTR(-ENOMEM);
4551  
4552 +       inode->i_tag = dx_current_fstag(sb);
4553 +       if (DLIMIT_ALLOC_INODE(inode)) {
4554 +               err = -ENOSPC;
4555 +               goto fail_dlim;
4556 +       }
4557         ei = EXT2_I(inode);
4558         sbi = EXT2_SB(sb);
4559         es = sbi->s_es;
4560 @@ -579,7 +587,8 @@ got:
4561         inode->i_blocks = 0;
4562         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
4563         memset(ei->i_data, 0, sizeof(ei->i_data));
4564 -       ei->i_flags = EXT2_I(dir)->i_flags & ~EXT2_BTREE_FL;
4565 +       ei->i_flags = EXT2_I(dir)->i_flags &
4566 +               ~(EXT2_BTREE_FL|EXT2_IUNLINK_FL|EXT2_BARRIER_FL);
4567         if (S_ISLNK(mode))
4568                 ei->i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL);
4569         /* dirsync is only applied to directories */
4570 @@ -627,12 +636,15 @@ fail_free_drop:
4571  
4572  fail_drop:
4573         DQUOT_DROP(inode);
4574 +       DLIMIT_FREE_INODE(inode);
4575         inode->i_flags |= S_NOQUOTA;
4576         inode->i_nlink = 0;
4577         iput(inode);
4578         return ERR_PTR(err);
4579  
4580  fail:
4581 +       DLIMIT_FREE_INODE(inode);
4582 +fail_dlim:
4583         make_bad_inode(inode);
4584         iput(inode);
4585         return ERR_PTR(err);
4586 diff -NurpP --minimal linux-2.6.16.20/fs/ext2/inode.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/inode.c
4587 --- linux-2.6.16.20/fs/ext2/inode.c     2006-02-18 14:40:21 +0100
4588 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/inode.c        2006-04-26 19:07:00 +0200
4589 @@ -31,6 +31,7 @@
4590  #include <linux/writeback.h>
4591  #include <linux/buffer_head.h>
4592  #include <linux/mpage.h>
4593 +#include <linux/vserver/tag.h>
4594  #include "ext2.h"
4595  #include "acl.h"
4596  #include "xip.h"
4597 @@ -1054,25 +1055,70 @@ void ext2_set_inode_flags(struct inode *
4598  {
4599         unsigned int flags = EXT2_I(inode)->i_flags;
4600  
4601 -       inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
4602 +       inode->i_flags &= ~(S_IMMUTABLE | S_IUNLINK | S_BARRIER |
4603 +               S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
4604 +
4605 +       if (flags & EXT2_IMMUTABLE_FL)
4606 +               inode->i_flags |= S_IMMUTABLE;
4607 +       if (flags & EXT2_IUNLINK_FL)
4608 +               inode->i_flags |= S_IUNLINK;
4609 +       if (flags & EXT2_BARRIER_FL)
4610 +               inode->i_flags |= S_BARRIER;
4611 +
4612         if (flags & EXT2_SYNC_FL)
4613                 inode->i_flags |= S_SYNC;
4614         if (flags & EXT2_APPEND_FL)
4615                 inode->i_flags |= S_APPEND;
4616 -       if (flags & EXT2_IMMUTABLE_FL)
4617 -               inode->i_flags |= S_IMMUTABLE;
4618         if (flags & EXT2_NOATIME_FL)
4619                 inode->i_flags |= S_NOATIME;
4620         if (flags & EXT2_DIRSYNC_FL)
4621                 inode->i_flags |= S_DIRSYNC;
4622  }
4623  
4624 +int ext2_sync_flags(struct inode *inode)
4625 +{
4626 +       unsigned int oldflags, newflags;
4627 +
4628 +       oldflags = EXT2_I(inode)->i_flags;
4629 +       newflags = oldflags & ~(EXT2_APPEND_FL |
4630 +               EXT2_IMMUTABLE_FL | EXT2_IUNLINK_FL |
4631 +               EXT2_BARRIER_FL | EXT2_NOATIME_FL |
4632 +               EXT2_SYNC_FL | EXT2_DIRSYNC_FL);
4633 +
4634 +       if (IS_APPEND(inode))
4635 +               newflags |= EXT2_APPEND_FL;
4636 +       if (IS_IMMUTABLE(inode))
4637 +               newflags |= EXT2_IMMUTABLE_FL;
4638 +       if (IS_IUNLINK(inode))
4639 +               newflags |= EXT2_IUNLINK_FL;
4640 +       if (IS_BARRIER(inode))
4641 +               newflags |= EXT2_BARRIER_FL;
4642 +
4643 +       /* we do not want to copy superblock flags */
4644 +       if (inode->i_flags & S_NOATIME)
4645 +               newflags |= EXT2_NOATIME_FL;
4646 +       if (inode->i_flags & S_SYNC)
4647 +               newflags |= EXT2_SYNC_FL;
4648 +       if (inode->i_flags & S_DIRSYNC)
4649 +               newflags |= EXT2_DIRSYNC_FL;
4650 +
4651 +       if (oldflags ^ newflags) {
4652 +               EXT2_I(inode)->i_flags = newflags;
4653 +               inode->i_ctime = CURRENT_TIME;
4654 +               mark_inode_dirty(inode);
4655 +       }
4656 +
4657 +       return 0;
4658 +}
4659 +
4660  void ext2_read_inode (struct inode * inode)
4661  {
4662         struct ext2_inode_info *ei = EXT2_I(inode);
4663         ino_t ino = inode->i_ino;
4664         struct buffer_head * bh;
4665         struct ext2_inode * raw_inode = ext2_get_inode(inode->i_sb, ino, &bh);
4666 +       uid_t uid;
4667 +       gid_t gid;
4668         int n;
4669  
4670  #ifdef CONFIG_EXT2_FS_POSIX_ACL
4671 @@ -1083,12 +1129,17 @@ void ext2_read_inode (struct inode * ino
4672                 goto bad_inode;
4673  
4674         inode->i_mode = le16_to_cpu(raw_inode->i_mode);
4675 -       inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
4676 -       inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
4677 +       uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
4678 +       gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
4679         if (!(test_opt (inode->i_sb, NO_UID32))) {
4680 -               inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
4681 -               inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
4682 +               uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
4683 +               gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
4684         }
4685 +       inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
4686 +       inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
4687 +       inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
4688 +               le16_to_cpu(raw_inode->i_raw_tag));
4689 +
4690         inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
4691         inode->i_size = le32_to_cpu(raw_inode->i_size);
4692         inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime);
4693 @@ -1186,8 +1237,8 @@ static int ext2_update_inode(struct inod
4694         struct ext2_inode_info *ei = EXT2_I(inode);
4695         struct super_block *sb = inode->i_sb;
4696         ino_t ino = inode->i_ino;
4697 -       uid_t uid = inode->i_uid;
4698 -       gid_t gid = inode->i_gid;
4699 +       uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
4700 +       gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
4701         struct buffer_head * bh;
4702         struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh);
4703         int n;
4704 @@ -1222,6 +1273,9 @@ static int ext2_update_inode(struct inod
4705                 raw_inode->i_uid_high = 0;
4706                 raw_inode->i_gid_high = 0;
4707         }
4708 +#ifdef CONFIG_TAGGING_INTERN
4709 +       raw_inode->i_raw_tag = cpu_to_le16(inode->i_tag);
4710 +#endif
4711         raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
4712         raw_inode->i_size = cpu_to_le32(inode->i_size);
4713         raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
4714 @@ -1308,7 +1362,8 @@ int ext2_setattr(struct dentry *dentry, 
4715         if (error)
4716                 return error;
4717         if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
4718 -           (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
4719 +           (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) ||
4720 +           (iattr->ia_valid & ATTR_TAG && iattr->ia_tag != inode->i_tag)) {
4721                 error = DQUOT_TRANSFER(inode, iattr) ? -EDQUOT : 0;
4722                 if (error)
4723                         return error;
4724 diff -NurpP --minimal linux-2.6.16.20/fs/ext2/ioctl.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/ioctl.c
4725 --- linux-2.6.16.20/fs/ext2/ioctl.c     2006-04-09 13:49:53 +0200
4726 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/ioctl.c        2006-04-26 19:07:00 +0200
4727 @@ -11,6 +11,7 @@
4728  #include <linux/capability.h>
4729  #include <linux/time.h>
4730  #include <linux/sched.h>
4731 +#include <linux/mount.h>
4732  #include <asm/current.h>
4733  #include <asm/uaccess.h>
4734  
4735 @@ -30,7 +31,8 @@ int ext2_ioctl (struct inode * inode, st
4736         case EXT2_IOC_SETFLAGS: {
4737                 unsigned int oldflags;
4738  
4739 -               if (IS_RDONLY(inode))
4740 +               if (IS_RDONLY(inode) ||
4741 +                       (filp && MNT_IS_RDONLY(filp->f_vfsmnt)))
4742                         return -EROFS;
4743  
4744                 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
4745 @@ -50,7 +52,9 @@ int ext2_ioctl (struct inode * inode, st
4746                  *
4747                  * This test looks nicer. Thanks to Pauline Middelink
4748                  */
4749 -               if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
4750 +               if ((oldflags & EXT2_IMMUTABLE_FL) ||
4751 +                       ((flags ^ oldflags) & (EXT2_APPEND_FL |
4752 +                       EXT2_IMMUTABLE_FL | EXT2_IUNLINK_FL))) {
4753                         if (!capable(CAP_LINUX_IMMUTABLE))
4754                                 return -EPERM;
4755                 }
4756 @@ -69,7 +73,8 @@ int ext2_ioctl (struct inode * inode, st
4757         case EXT2_IOC_SETVERSION:
4758                 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
4759                         return -EPERM;
4760 -               if (IS_RDONLY(inode))
4761 +               if (IS_RDONLY(inode) ||
4762 +                       (filp && MNT_IS_RDONLY(filp->f_vfsmnt)))
4763                         return -EROFS;
4764                 if (get_user(inode->i_generation, (int __user *) arg))
4765                         return -EFAULT; 
4766 diff -NurpP --minimal linux-2.6.16.20/fs/ext2/namei.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/namei.c
4767 --- linux-2.6.16.20/fs/ext2/namei.c     2006-02-18 14:40:21 +0100
4768 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/namei.c        2006-04-26 19:07:00 +0200
4769 @@ -31,6 +31,7 @@
4770   */
4771  
4772  #include <linux/pagemap.h>
4773 +#include <linux/vserver/tag.h>
4774  #include "ext2.h"
4775  #include "xattr.h"
4776  #include "acl.h"
4777 @@ -82,6 +83,7 @@ static struct dentry *ext2_lookup(struct
4778                 inode = iget(dir->i_sb, ino);
4779                 if (!inode)
4780                         return ERR_PTR(-EACCES);
4781 +               dx_propagate_tag(nd, inode);
4782         }
4783         return d_splice_alias(inode, dentry);
4784  }
4785 @@ -407,6 +409,7 @@ struct inode_operations ext2_dir_inode_o
4786  #endif
4787         .setattr        = ext2_setattr,
4788         .permission     = ext2_permission,
4789 +       .sync_flags     = ext2_sync_flags,
4790  };
4791  
4792  struct inode_operations ext2_special_inode_operations = {
4793 @@ -418,4 +421,5 @@ struct inode_operations ext2_special_ino
4794  #endif
4795         .setattr        = ext2_setattr,
4796         .permission     = ext2_permission,
4797 +       .sync_flags     = ext2_sync_flags,
4798  };
4799 diff -NurpP --minimal linux-2.6.16.20/fs/ext2/super.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/super.c
4800 --- linux-2.6.16.20/fs/ext2/super.c     2006-02-18 14:40:21 +0100
4801 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/super.c        2006-04-26 19:07:00 +0200
4802 @@ -230,8 +230,8 @@ static int ext2_show_options(struct seq_
4803  }
4804  
4805  #ifdef CONFIG_QUOTA
4806 -static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off);
4807 -static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off);
4808 +static ssize_t ext2_quota_read(struct dqhash *hash, int type, char *data, size_t len, loff_t off);
4809 +static ssize_t ext2_quota_write(struct dqhash *hash, int type, const char *data, size_t len, loff_t off);
4810  #endif
4811  
4812  static struct super_operations ext2_sops = {
4813 @@ -289,7 +289,7 @@ enum {
4814         Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug,
4815         Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr,
4816         Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota,
4817 -       Opt_usrquota, Opt_grpquota
4818 +       Opt_usrquota, Opt_grpquota, Opt_tag, Opt_notag, Opt_tagid
4819  };
4820  
4821  static match_table_t tokens = {
4822 @@ -317,6 +317,10 @@ static match_table_t tokens = {
4823         {Opt_acl, "acl"},
4824         {Opt_noacl, "noacl"},
4825         {Opt_xip, "xip"},
4826 +       {Opt_tag, "tag"},
4827 +       {Opt_notag, "notag"},
4828 +       {Opt_tagid, "tagid=%u"},
4829 +       {Opt_tag, "tagxid"},
4830         {Opt_grpquota, "grpquota"},
4831         {Opt_ignore, "noquota"},
4832         {Opt_quota, "quota"},
4833 @@ -380,6 +384,20 @@ static int parse_options (char * options
4834                 case Opt_nouid32:
4835                         set_opt (sbi->s_mount_opt, NO_UID32);
4836                         break;
4837 +#ifndef CONFIG_TAGGING_NONE
4838 +               case Opt_tag:
4839 +                       set_opt (sbi->s_mount_opt, TAGGED);
4840 +                       break;
4841 +               case Opt_notag:
4842 +                       clear_opt (sbi->s_mount_opt, TAGGED);
4843 +                       break;
4844 +#endif
4845 +#ifdef CONFIG_PROPAGATE
4846 +               case Opt_tagid:
4847 +                       /* use args[0] */
4848 +                       set_opt (sbi->s_mount_opt, TAGGED);
4849 +                       break;
4850 +#endif
4851                 case Opt_nocheck:
4852                         clear_opt (sbi->s_mount_opt, CHECK);
4853                         break;
4854 @@ -681,6 +699,8 @@ static int ext2_fill_super(struct super_
4855         if (!parse_options ((char *) data, sbi))
4856                 goto failed_mount;
4857  
4858 +       if (EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_TAGGED)
4859 +               sb->s_flags |= MS_TAGGED;
4860         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
4861                 ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
4862                  MS_POSIXACL : 0);
4863 @@ -990,6 +1010,13 @@ static int ext2_remount (struct super_bl
4864                 goto restore_opts;
4865         }
4866  
4867 +       if ((sbi->s_mount_opt & EXT2_MOUNT_TAGGED) &&
4868 +               !(sb->s_flags & MS_TAGGED)) {
4869 +               printk("EXT2-fs: %s: tagging not permitted on remount.\n",
4870 +                      sb->s_id);
4871 +               return -EINVAL;
4872 +       }
4873 +
4874         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
4875                 ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
4876  
4877 @@ -1101,10 +1128,11 @@ static struct super_block *ext2_get_sb(s
4878   * acquiring the locks... As quota files are never truncated and quota code
4879   * itself serializes the operations (and noone else should touch the files)
4880   * we don't have to be afraid of races */
4881 -static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data,
4882 +static ssize_t ext2_quota_read(struct dqhash *hash, int type, char *data,
4883                                size_t len, loff_t off)
4884  {
4885 -       struct inode *inode = sb_dqopt(sb)->files[type];
4886 +       struct inode *inode = dqh_dqopt(hash)->files[type];
4887 +       struct super_block *sb = hash->dqh_sb;
4888         sector_t blk = off >> EXT2_BLOCK_SIZE_BITS(sb);
4889         int err = 0;
4890         int offset = off & (sb->s_blocksize - 1);
4891 @@ -1145,10 +1173,11 @@ static ssize_t ext2_quota_read(struct su
4892  }
4893  
4894  /* Write to quotafile */
4895 -static ssize_t ext2_quota_write(struct super_block *sb, int type,
4896 +static ssize_t ext2_quota_write(struct dqhash *hash, int type,
4897                                 const char *data, size_t len, loff_t off)
4898  {
4899 -       struct inode *inode = sb_dqopt(sb)->files[type];
4900 +       struct inode *inode = dqh_dqopt(hash)->files[type];
4901 +       struct super_block *sb = hash->dqh_sb;
4902         sector_t blk = off >> EXT2_BLOCK_SIZE_BITS(sb);
4903         int err = 0;
4904         int offset = off & (sb->s_blocksize - 1);
4905 diff -NurpP --minimal linux-2.6.16.20/fs/ext2/symlink.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/symlink.c
4906 --- linux-2.6.16.20/fs/ext2/symlink.c   2005-08-29 22:25:30 +0200
4907 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/symlink.c      2006-04-26 19:07:00 +0200
4908 @@ -38,6 +38,7 @@ struct inode_operations ext2_symlink_ino
4909         .listxattr      = ext2_listxattr,
4910         .removexattr    = generic_removexattr,
4911  #endif
4912 +       .sync_flags     = ext2_sync_flags,
4913  };
4914   
4915  struct inode_operations ext2_fast_symlink_inode_operations = {
4916 @@ -49,4 +50,5 @@ struct inode_operations ext2_fast_symlin
4917         .listxattr      = ext2_listxattr,
4918         .removexattr    = generic_removexattr,
4919  #endif
4920 +       .sync_flags     = ext2_sync_flags,
4921  };
4922 diff -NurpP --minimal linux-2.6.16.20/fs/ext2/xattr.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/xattr.c
4923 --- linux-2.6.16.20/fs/ext2/xattr.c     2006-02-18 14:40:21 +0100
4924 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/xattr.c        2006-04-26 19:07:00 +0200
4925 @@ -60,6 +60,7 @@
4926  #include <linux/mbcache.h>
4927  #include <linux/quotaops.h>
4928  #include <linux/rwsem.h>
4929 +#include <linux/vs_dlimit.h>
4930  #include "ext2.h"
4931  #include "xattr.h"
4932  #include "acl.h"
4933 @@ -645,8 +646,12 @@ ext2_xattr_set2(struct inode *inode, str
4934                                    the inode.  */
4935                                 ea_bdebug(new_bh, "reusing block");
4936  
4937 +                               error = -ENOSPC;
4938 +                               if (DLIMIT_ALLOC_BLOCK(inode, 1))
4939 +                                       goto cleanup;
4940                                 error = -EDQUOT;
4941                                 if (DQUOT_ALLOC_BLOCK(inode, 1)) {
4942 +                                       DLIMIT_FREE_BLOCK(inode, 1);
4943                                         unlock_buffer(new_bh);
4944                                         goto cleanup;
4945                                 }
4946 @@ -740,6 +745,7 @@ ext2_xattr_set2(struct inode *inode, str
4947                                 le32_to_cpu(HDR(old_bh)->h_refcount) - 1);
4948                         if (ce)
4949                                 mb_cache_entry_release(ce);
4950 +                       DLIMIT_FREE_BLOCK(inode, 1);
4951                         DQUOT_FREE_BLOCK(inode, 1);
4952                         mark_buffer_dirty(old_bh);
4953                         ea_bdebug(old_bh, "refcount now=%d",
4954 @@ -804,6 +810,7 @@ ext2_xattr_delete_inode(struct inode *in
4955                 mark_buffer_dirty(bh);
4956                 if (IS_SYNC(inode))
4957                         sync_dirty_buffer(bh);
4958 +               DLIMIT_FREE_BLOCK(inode, 1);
4959                 DQUOT_FREE_BLOCK(inode, 1);
4960         }
4961         EXT2_I(inode)->i_file_acl = 0;
4962 diff -NurpP --minimal linux-2.6.16.20/fs/ext3/balloc.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/balloc.c
4963 --- linux-2.6.16.20/fs/ext3/balloc.c    2006-02-18 14:40:21 +0100
4964 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/balloc.c       2006-04-26 19:07:00 +0200
4965 @@ -20,6 +20,8 @@
4966  #include <linux/ext3_jbd.h>
4967  #include <linux/quotaops.h>
4968  #include <linux/buffer_head.h>
4969 +#include <linux/vs_dlimit.h>
4970 +#include <linux/vs_tag.h>
4971  
4972  /*
4973   * balloc.c contains the blocks allocation and deallocation routines
4974 @@ -504,8 +506,10 @@ void ext3_free_blocks(handle_t *handle, 
4975                 return;
4976         }
4977         ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
4978 -       if (dquot_freed_blocks)
4979 +       if (dquot_freed_blocks) {
4980 +               DLIMIT_FREE_BLOCK(inode, dquot_freed_blocks);
4981                 DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
4982 +       }
4983         return;
4984  }
4985  
4986 @@ -1116,18 +1120,32 @@ out:
4987         return ret;
4988  }
4989  
4990 -static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
4991 +static int ext3_has_free_blocks(struct super_block *sb)
4992  {
4993 -       int free_blocks, root_blocks;
4994 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
4995 +       int free_blocks, root_blocks, cond;
4996  
4997         free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
4998         root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
4999 -       if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
5000 +
5001 +       vxdprintk(VXD_CBIT(dlim, 3),
5002 +               "ext3_has_free_blocks(%p): free=%u, root=%u",
5003 +               sb, free_blocks, root_blocks);
5004 +
5005 +       DLIMIT_ADJUST_BLOCK(sb, dx_current_tag(), &free_blocks, &root_blocks);
5006 +
5007 +       cond = (free_blocks < root_blocks + 1 &&
5008 +               !capable(CAP_SYS_RESOURCE) &&
5009                 sbi->s_resuid != current->fsuid &&
5010 -               (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
5011 -               return 0;
5012 -       }
5013 -       return 1;
5014 +               (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid)));
5015 +
5016 +       vxdprintk(VXD_CBIT(dlim, 3),
5017 +               "ext3_has_free_blocks(%p): %u<%u+1, %c, %u!=%u r=%d",
5018 +               sb, free_blocks, root_blocks,
5019 +               !capable(CAP_SYS_RESOURCE)?'1':'0',
5020 +               sbi->s_resuid, current->fsuid, cond?0:1);
5021 +
5022 +       return (cond ? 0 : 1);
5023  }
5024  
5025  /*
5026 @@ -1138,7 +1156,7 @@ static int ext3_has_free_blocks(struct e
5027   */
5028  int ext3_should_retry_alloc(struct super_block *sb, int *retries)
5029  {
5030 -       if (!ext3_has_free_blocks(EXT3_SB(sb)) || (*retries)++ > 3)
5031 +       if (!ext3_has_free_blocks(sb) || (*retries)++ > 3)
5032                 return 0;
5033  
5034         jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
5035 @@ -1193,6 +1211,8 @@ int ext3_new_block(handle_t *handle, str
5036                 *errp = -EDQUOT;
5037                 return 0;
5038         }
5039 +       if (DLIMIT_ALLOC_BLOCK(inode, 1))
5040 +           goto out_dlimit;
5041  
5042         sbi = EXT3_SB(sb);
5043         es = EXT3_SB(sb)->s_es;
5044 @@ -1209,7 +1229,7 @@ int ext3_new_block(handle_t *handle, str
5045         if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
5046                 my_rsv = &block_i->rsv_window_node;
5047  
5048 -       if (!ext3_has_free_blocks(sbi)) {
5049 +       if (!ext3_has_free_blocks(sb)) {
5050                 *errp = -ENOSPC;
5051                 goto out;
5052         }
5053 @@ -1393,6 +1413,9 @@ allocated:
5054  io_error:
5055         *errp = -EIO;
5056  out:
5057 +       if (!performed_allocation)
5058 +               DLIMIT_FREE_BLOCK(inode, 1);
5059 +out_dlimit:
5060         if (fatal) {
5061                 *errp = fatal;
5062                 ext3_std_error(sb, fatal);
5063 diff -NurpP --minimal linux-2.6.16.20/fs/ext3/file.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/file.c
5064 --- linux-2.6.16.20/fs/ext3/file.c      2005-08-29 22:25:30 +0200
5065 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/file.c 2006-04-26 19:07:00 +0200
5066 @@ -119,6 +119,7 @@ struct file_operations ext3_file_operati
5067         .release        = ext3_release_file,
5068         .fsync          = ext3_sync_file,
5069         .sendfile       = generic_file_sendfile,
5070 +       .sendpage       = generic_file_sendpage,
5071  };
5072  
5073  struct inode_operations ext3_file_inode_operations = {
5074 @@ -131,5 +132,6 @@ struct inode_operations ext3_file_inode_
5075         .removexattr    = generic_removexattr,
5076  #endif
5077         .permission     = ext3_permission,
5078 +       .sync_flags     = ext3_sync_flags,
5079  };
5080  
5081 diff -NurpP --minimal linux-2.6.16.20/fs/ext3/ialloc.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/ialloc.c
5082 --- linux-2.6.16.20/fs/ext3/ialloc.c    2006-04-09 13:49:53 +0200
5083 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/ialloc.c       2006-04-26 19:07:00 +0200
5084 @@ -23,6 +23,8 @@
5085  #include <linux/buffer_head.h>
5086  #include <linux/random.h>
5087  #include <linux/bitops.h>
5088 +#include <linux/vs_dlimit.h>
5089 +#include <linux/vs_tag.h>
5090  
5091  #include <asm/byteorder.h>
5092  
5093 @@ -127,6 +129,7 @@ void ext3_free_inode (handle_t *handle, 
5094         ext3_xattr_delete_inode(handle, inode);
5095         DQUOT_FREE_INODE(inode);
5096         DQUOT_DROP(inode);
5097 +       DLIMIT_FREE_INODE(inode);
5098  
5099         is_directory = S_ISDIR(inode->i_mode);
5100  
5101 @@ -443,6 +446,12 @@ struct inode *ext3_new_inode(handle_t *h
5102         inode = new_inode(sb);
5103         if (!inode)
5104                 return ERR_PTR(-ENOMEM);
5105 +
5106 +       inode->i_tag = dx_current_fstag(sb);
5107 +       if (DLIMIT_ALLOC_INODE(inode)) {
5108 +               err = -ENOSPC;
5109 +               goto out_dlimit;
5110 +       }
5111         ei = EXT3_I(inode);
5112  
5113         sbi = EXT3_SB(sb);
5114 @@ -565,7 +574,8 @@ got:
5115         ei->i_dir_start_lookup = 0;
5116         ei->i_disksize = 0;
5117  
5118 -       ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL;
5119 +       ei->i_flags = EXT3_I(dir)->i_flags &
5120 +               ~(EXT3_INDEX_FL|EXT3_IUNLINK_FL|EXT3_BARRIER_FL);
5121         if (S_ISLNK(mode))
5122                 ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL);
5123         /* dirsync only applies to directories */
5124 @@ -620,6 +630,8 @@ got:
5125  fail:
5126         ext3_std_error(sb, err);
5127  out:
5128 +       DLIMIT_FREE_INODE(inode);
5129 +out_dlimit:
5130         iput(inode);
5131         ret = ERR_PTR(err);
5132  really_out:
5133 @@ -631,6 +643,7 @@ fail_free_drop:
5134  
5135  fail_drop:
5136         DQUOT_DROP(inode);
5137 +       DLIMIT_FREE_INODE(inode);
5138         inode->i_flags |= S_NOQUOTA;
5139         inode->i_nlink = 0;
5140         iput(inode);
5141 diff -NurpP --minimal linux-2.6.16.20/fs/ext3/inode.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/inode.c
5142 --- linux-2.6.16.20/fs/ext3/inode.c     2006-04-09 13:49:53 +0200
5143 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/inode.c        2006-04-26 19:07:00 +0200
5144 @@ -36,6 +36,7 @@
5145  #include <linux/writeback.h>
5146  #include <linux/mpage.h>
5147  #include <linux/uio.h>
5148 +#include <linux/vserver/tag.h>
5149  #include "xattr.h"
5150  #include "acl.h"
5151  
5152 @@ -2422,19 +2423,77 @@ void ext3_set_inode_flags(struct inode *
5153  {
5154         unsigned int flags = EXT3_I(inode)->i_flags;
5155  
5156 -       inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
5157 +       inode->i_flags &= ~(S_IMMUTABLE | S_IUNLINK | S_BARRIER |
5158 +               S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
5159 +
5160 +       if (flags & EXT3_IMMUTABLE_FL)
5161 +               inode->i_flags |= S_IMMUTABLE;
5162 +       if (flags & EXT3_IUNLINK_FL)
5163 +               inode->i_flags |= S_IUNLINK;
5164 +       if (flags & EXT3_BARRIER_FL)
5165 +               inode->i_flags |= S_BARRIER;
5166 +
5167         if (flags & EXT3_SYNC_FL)
5168                 inode->i_flags |= S_SYNC;
5169         if (flags & EXT3_APPEND_FL)
5170                 inode->i_flags |= S_APPEND;
5171 -       if (flags & EXT3_IMMUTABLE_FL)
5172 -               inode->i_flags |= S_IMMUTABLE;
5173         if (flags & EXT3_NOATIME_FL)
5174                 inode->i_flags |= S_NOATIME;
5175         if (flags & EXT3_DIRSYNC_FL)
5176                 inode->i_flags |= S_DIRSYNC;
5177  }
5178  
5179 +int ext3_sync_flags(struct inode *inode)
5180 +{
5181 +       unsigned int oldflags, newflags;
5182 +       int err = 0;
5183 +
5184 +       oldflags = EXT3_I(inode)->i_flags;
5185 +       newflags = oldflags & ~(EXT3_APPEND_FL |
5186 +               EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL |
5187 +               EXT3_BARRIER_FL | EXT3_NOATIME_FL |
5188 +               EXT3_SYNC_FL | EXT3_DIRSYNC_FL);
5189 +
5190 +       if (IS_APPEND(inode))
5191 +               newflags |= EXT3_APPEND_FL;
5192 +       if (IS_IMMUTABLE(inode))
5193 +               newflags |= EXT3_IMMUTABLE_FL;
5194 +       if (IS_IUNLINK(inode))
5195 +               newflags |= EXT3_IUNLINK_FL;
5196 +       if (IS_BARRIER(inode))
5197 +               newflags |= EXT3_BARRIER_FL;
5198 +
5199 +       /* we do not want to copy superblock flags */
5200 +       if (inode->i_flags & S_NOATIME)
5201 +               newflags |= EXT3_NOATIME_FL;
5202 +       if (inode->i_flags & S_SYNC)
5203 +               newflags |= EXT3_SYNC_FL;
5204 +       if (inode->i_flags & S_DIRSYNC)
5205 +               newflags |= EXT3_DIRSYNC_FL;
5206 +
5207 +       if (oldflags ^ newflags) {
5208 +               handle_t *handle;
5209 +               struct ext3_iloc iloc;
5210 +
5211 +               handle = ext3_journal_start(inode, 1);
5212 +               if (IS_ERR(handle))
5213 +                       return PTR_ERR(handle);
5214 +               if (IS_SYNC(inode))
5215 +                       handle->h_sync = 1;
5216 +               err = ext3_reserve_inode_write(handle, inode, &iloc);
5217 +               if (err)
5218 +                       goto flags_err;
5219 +
5220 +               EXT3_I(inode)->i_flags = newflags;
5221 +               inode->i_ctime = CURRENT_TIME;
5222 +
5223 +               err = ext3_mark_iloc_dirty(handle, inode, &iloc);
5224 +       flags_err:
5225 +               ext3_journal_stop(handle);
5226 +       }
5227 +       return err;
5228 +}
5229 +
5230  void ext3_read_inode(struct inode * inode)
5231  {
5232         struct ext3_iloc iloc;
5233 @@ -2442,6 +2501,8 @@ void ext3_read_inode(struct inode * inod
5234         struct ext3_inode_info *ei = EXT3_I(inode);
5235         struct buffer_head *bh;
5236         int block;
5237 +       uid_t uid;
5238 +       gid_t gid;
5239  
5240  #ifdef CONFIG_EXT3_FS_POSIX_ACL
5241         ei->i_acl = EXT3_ACL_NOT_CACHED;
5242 @@ -2454,12 +2515,17 @@ void ext3_read_inode(struct inode * inod
5243         bh = iloc.bh;
5244         raw_inode = ext3_raw_inode(&iloc);
5245         inode->i_mode = le16_to_cpu(raw_inode->i_mode);
5246 -       inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
5247 -       inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
5248 +       uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
5249 +       gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
5250         if(!(test_opt (inode->i_sb, NO_UID32))) {
5251 -               inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
5252 -               inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
5253 +               uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
5254 +               gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
5255         }
5256 +       inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
5257 +       inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
5258 +       inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
5259 +               le16_to_cpu(raw_inode->i_raw_tag));
5260 +
5261         inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
5262         inode->i_size = le32_to_cpu(raw_inode->i_size);
5263         inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime);
5264 @@ -2586,6 +2652,8 @@ static int ext3_do_update_inode(handle_t
5265         struct ext3_inode *raw_inode = ext3_raw_inode(iloc);
5266         struct ext3_inode_info *ei = EXT3_I(inode);
5267         struct buffer_head *bh = iloc->bh;
5268 +       uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
5269 +       gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
5270         int err = 0, rc, block;
5271  
5272         /* For fields not not tracking in the in-memory inode,
5273 @@ -2595,29 +2663,32 @@ static int ext3_do_update_inode(handle_t
5274  
5275         raw_inode->i_mode = cpu_to_le16(inode->i_mode);
5276         if(!(test_opt(inode->i_sb, NO_UID32))) {
5277 -               raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
5278 -               raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
5279 +               raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid));
5280 +               raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid));
5281  /*
5282   * Fix up interoperability with old kernels. Otherwise, old inodes get
5283   * re-used with the upper 16 bits of the uid/gid intact
5284   */
5285                 if(!ei->i_dtime) {
5286                         raw_inode->i_uid_high =
5287 -                               cpu_to_le16(high_16_bits(inode->i_uid));
5288 +                               cpu_to_le16(high_16_bits(uid));
5289                         raw_inode->i_gid_high =
5290 -                               cpu_to_le16(high_16_bits(inode->i_gid));
5291 +                               cpu_to_le16(high_16_bits(gid));
5292                 } else {
5293                         raw_inode->i_uid_high = 0;
5294                         raw_inode->i_gid_high = 0;
5295                 }
5296         } else {
5297                 raw_inode->i_uid_low =
5298 -                       cpu_to_le16(fs_high2lowuid(inode->i_uid));
5299 +                       cpu_to_le16(fs_high2lowuid(uid));
5300                 raw_inode->i_gid_low =
5301 -                       cpu_to_le16(fs_high2lowgid(inode->i_gid));
5302 +                       cpu_to_le16(fs_high2lowgid(gid));
5303                 raw_inode->i_uid_high = 0;
5304                 raw_inode->i_gid_high = 0;
5305         }
5306 +#ifdef CONFIG_TAGGING_INTERN
5307 +       raw_inode->i_raw_tag = cpu_to_le16(inode->i_tag);
5308 +#endif
5309         raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
5310         raw_inode->i_size = cpu_to_le32(ei->i_disksize);
5311         raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
5312 @@ -2770,7 +2841,8 @@ int ext3_setattr(struct dentry *dentry, 
5313                 return error;
5314  
5315         if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
5316 -               (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
5317 +               (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) ||
5318 +               (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) {
5319                 handle_t *handle;
5320  
5321                 /* (user+group)*(old+new) structure, inode write (sb,
5322 @@ -2792,6 +2864,8 @@ int ext3_setattr(struct dentry *dentry, 
5323                         inode->i_uid = attr->ia_uid;
5324                 if (attr->ia_valid & ATTR_GID)
5325                         inode->i_gid = attr->ia_gid;
5326 +               if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
5327 +                       inode->i_tag = attr->ia_tag;
5328                 error = ext3_mark_inode_dirty(handle, inode);
5329                 ext3_journal_stop(handle);
5330         }
5331 diff -NurpP --minimal linux-2.6.16.20/fs/ext3/ioctl.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/ioctl.c
5332 --- linux-2.6.16.20/fs/ext3/ioctl.c     2006-02-18 14:40:22 +0100
5333 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/ioctl.c        2006-04-29 02:58:07 +0200
5334 @@ -8,11 +8,13 @@
5335   */
5336  
5337  #include <linux/fs.h>
5338 +#include <linux/mount.h>
5339  #include <linux/jbd.h>
5340  #include <linux/capability.h>
5341  #include <linux/ext3_fs.h>
5342  #include <linux/ext3_jbd.h>
5343  #include <linux/time.h>
5344 +#include <linux/vserver/tag.h>
5345  #include <asm/uaccess.h>
5346  
5347  
5348 @@ -36,7 +38,8 @@ int ext3_ioctl (struct inode * inode, st
5349                 unsigned int oldflags;
5350                 unsigned int jflag;
5351  
5352 -               if (IS_RDONLY(inode))
5353 +               if (IS_RDONLY(inode) ||
5354 +                       (filp && MNT_IS_RDONLY(filp->f_vfsmnt)))
5355                         return -EROFS;
5356  
5357                 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
5358 @@ -59,7 +62,9 @@ int ext3_ioctl (struct inode * inode, st
5359                  *
5360                  * This test looks nicer. Thanks to Pauline Middelink
5361                  */
5362 -               if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) {
5363 +               if ((oldflags & EXT3_IMMUTABLE_FL) ||
5364 +                       ((flags ^ oldflags) & (EXT3_APPEND_FL |
5365 +                       EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL))) {
5366                         if (!capable(CAP_LINUX_IMMUTABLE))
5367                                 return -EPERM;
5368                 }
5369 @@ -112,7 +117,8 @@ flags_err:
5370  
5371                 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
5372                         return -EPERM;
5373 -               if (IS_RDONLY(inode))
5374 +               if (IS_RDONLY(inode) ||
5375 +                       (filp && MNT_IS_RDONLY(filp->f_vfsmnt)))
5376                         return -EROFS;
5377                 if (get_user(generation, (int __user *) arg))
5378                         return -EFAULT;
5379 @@ -166,7 +172,8 @@ flags_err:
5380                 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
5381                         return -ENOTTY;
5382  
5383 -               if (IS_RDONLY(inode))
5384 +               if (IS_RDONLY(inode) ||
5385 +                       (filp && MNT_IS_RDONLY(filp->f_vfsmnt)))
5386                         return -EROFS;
5387  
5388                 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
5389 @@ -201,7 +208,8 @@ flags_err:
5390                 if (!capable(CAP_SYS_RESOURCE))
5391                         return -EPERM;
5392  
5393 -               if (IS_RDONLY(inode))
5394 +               if (IS_RDONLY(inode) ||
5395 +                       (filp && MNT_IS_RDONLY(filp->f_vfsmnt)))
5396                         return -EROFS;
5397  
5398                 if (get_user(n_blocks_count, (__u32 __user *)arg))
5399 @@ -222,7 +230,8 @@ flags_err:
5400                 if (!capable(CAP_SYS_RESOURCE))
5401                         return -EPERM;
5402  
5403 -               if (IS_RDONLY(inode))
5404 +               if (IS_RDONLY(inode) ||
5405 +                       (filp && MNT_IS_RDONLY(filp->f_vfsmnt)))
5406                         return -EROFS;
5407  
5408                 if (copy_from_user(&input, (struct ext3_new_group_input __user *)arg,
5409 @@ -237,6 +246,38 @@ flags_err:
5410                 return err;
5411         }
5412  
5413 +#if defined(CONFIG_VSERVER_LEGACY) && !defined(CONFIG_TAGGING_NONE)
5414 +       case EXT3_IOC_SETTAG: {
5415 +               handle_t *handle;
5416 +               struct ext3_iloc iloc;
5417 +               int tag;
5418 +               int err;
5419 +
5420 +               /* fixme: if stealth, return -ENOTTY */
5421 +               if (!capable(CAP_CONTEXT))
5422 +                       return -EPERM;
5423 +               if (IS_RDONLY(inode))
5424 +                       return -EROFS;
5425 +               if (!(inode->i_sb->s_flags & MS_TAGGED))
5426 +                       return -ENOSYS;
5427 +               if (get_user(tag, (int __user *) arg))
5428 +                       return -EFAULT;
5429 +
5430 +               handle = ext3_journal_start(inode, 1);
5431 +               if (IS_ERR(handle))
5432 +                       return PTR_ERR(handle);
5433 +               err = ext3_reserve_inode_write(handle, inode, &iloc);
5434 +               if (err)
5435 +                       return err;
5436 +
5437 +               inode->i_tag = (tag & 0xFFFF);
5438 +               inode->i_ctime = CURRENT_TIME;
5439 +
5440 +               err = ext3_mark_iloc_dirty(handle, inode, &iloc);
5441 +               ext3_journal_stop(handle);
5442 +               return err;
5443 +       }
5444 +#endif
5445  
5446         default:
5447                 return -ENOTTY;
5448 diff -NurpP --minimal linux-2.6.16.20/fs/ext3/namei.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/namei.c
5449 --- linux-2.6.16.20/fs/ext3/namei.c     2006-04-09 13:49:53 +0200
5450 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/namei.c        2006-04-26 19:07:00 +0200
5451 @@ -36,6 +36,7 @@
5452  #include <linux/quotaops.h>
5453  #include <linux/buffer_head.h>
5454  #include <linux/smp_lock.h>
5455 +#include <linux/vserver/tag.h>
5456  
5457  #include "namei.h"
5458  #include "xattr.h"
5459 @@ -1004,6 +1005,7 @@ static struct dentry *ext3_lookup(struct
5460  
5461                 if (!inode)
5462                         return ERR_PTR(-EACCES);
5463 +               dx_propagate_tag(nd, inode);
5464         }
5465         return d_splice_alias(inode, dentry);
5466  }
5467 @@ -2373,6 +2375,7 @@ struct inode_operations ext3_dir_inode_o
5468         .removexattr    = generic_removexattr,
5469  #endif
5470         .permission     = ext3_permission,
5471 +       .sync_flags     = ext3_sync_flags,
5472  };
5473  
5474  struct inode_operations ext3_special_inode_operations = {
5475 @@ -2384,4 +2387,5 @@ struct inode_operations ext3_special_ino
5476         .removexattr    = generic_removexattr,
5477  #endif
5478         .permission     = ext3_permission,
5479 +       .sync_flags     = ext3_sync_flags,
5480  }; 
5481 diff -NurpP --minimal linux-2.6.16.20/fs/ext3/super.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/super.c
5482 --- linux-2.6.16.20/fs/ext3/super.c     2006-04-09 13:49:53 +0200
5483 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/super.c        2006-04-26 19:07:00 +0200
5484 @@ -563,12 +563,12 @@ static int ext3_write_dquot(struct dquot
5485  static int ext3_acquire_dquot(struct dquot *dquot);
5486  static int ext3_release_dquot(struct dquot *dquot);
5487  static int ext3_mark_dquot_dirty(struct dquot *dquot);
5488 -static int ext3_write_info(struct super_block *sb, int type);
5489 -static int ext3_quota_on(struct super_block *sb, int type, int format_id, char *path);
5490 -static int ext3_quota_on_mount(struct super_block *sb, int type);
5491 -static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
5492 +static int ext3_write_info(struct dqhash *hash, int type);
5493 +static int ext3_quota_on(struct dqhash *hash, int type, int format_id, char *path);
5494 +static int ext3_quota_on_mount(struct dqhash *hash, int type);
5495 +static ssize_t ext3_quota_read(struct dqhash *hash, int type, char *data,
5496                                size_t len, loff_t off);
5497 -static ssize_t ext3_quota_write(struct super_block *sb, int type,
5498 +static ssize_t ext3_quota_write(struct dqhash *hash, int type,
5499                                 const char *data, size_t len, loff_t off);
5500  
5501  static struct dquot_operations ext3_quota_operations = {
5502 @@ -634,7 +634,7 @@ enum {
5503         Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
5504         Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
5505         Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
5506 -       Opt_grpquota
5507 +       Opt_grpquota, Opt_tag, Opt_notag, Opt_tagid
5508  };
5509  
5510  static match_table_t tokens = {
5511 @@ -683,6 +683,10 @@ static match_table_t tokens = {
5512         {Opt_quota, "quota"},
5513         {Opt_usrquota, "usrquota"},
5514         {Opt_barrier, "barrier=%u"},
5515 +       {Opt_tag, "tag"},
5516 +       {Opt_notag, "notag"},
5517 +       {Opt_tagid, "tagid=%u"},
5518 +       {Opt_tag, "tagxid"},
5519         {Opt_err, NULL},
5520         {Opt_resize, "resize"},
5521  };
5522 @@ -775,6 +779,20 @@ static int parse_options (char *options,
5523                 case Opt_nouid32:
5524                         set_opt (sbi->s_mount_opt, NO_UID32);
5525                         break;
5526 +#ifndef CONFIG_TAGGING_NONE
5527 +               case Opt_tag:
5528 +                       set_opt (sbi->s_mount_opt, TAGGED);
5529 +                       break;
5530 +               case Opt_notag:
5531 +                       clear_opt (sbi->s_mount_opt, TAGGED);
5532 +                       break;
5533 +#endif
5534 +#ifdef CONFIG_PROPAGATE
5535 +               case Opt_tagid:
5536 +                       /* use args[0] */
5537 +                       set_opt (sbi->s_mount_opt, TAGGED);
5538 +                       break;
5539 +#endif
5540                 case Opt_nocheck:
5541                         clear_opt (sbi->s_mount_opt, CHECK);
5542                         break;
5543 @@ -893,7 +911,7 @@ static int parse_options (char *options,
5544                 case Opt_grpjquota:
5545                         qtype = GRPQUOTA;
5546  set_qf_name:
5547 -                       if (sb_any_quota_enabled(sb)) {
5548 +                       if (dqh_any_quota_enabled(sb->s_dqh)) {
5549                                 printk(KERN_ERR
5550                                         "EXT3-fs: Cannot change journalled "
5551                                         "quota options when quota turned on.\n");
5552 @@ -931,7 +949,7 @@ set_qf_name:
5553                 case Opt_offgrpjquota:
5554                         qtype = GRPQUOTA;
5555  clear_qf_name:
5556 -                       if (sb_any_quota_enabled(sb)) {
5557 +                       if (dqh_any_quota_enabled(sb->s_dqh)) {
5558                                 printk(KERN_ERR "EXT3-fs: Cannot change "
5559                                         "journalled quota options when "
5560                                         "quota turned on.\n");
5561 @@ -959,7 +977,7 @@ clear_qf_name:
5562                         set_opt(sbi->s_mount_opt, GRPQUOTA);
5563                         break;
5564                 case Opt_noquota:
5565 -                       if (sb_any_quota_enabled(sb)) {
5566 +                       if (dqh_any_quota_enabled(sb->s_dqh)) {
5567                                 printk(KERN_ERR "EXT3-fs: Cannot change quota "
5568                                         "options when quota turned on.\n");
5569                                 return 0;
5570 @@ -1231,7 +1249,7 @@ static void ext3_orphan_cleanup (struct 
5571         /* Turn on quotas so that they are updated correctly */
5572         for (i = 0; i < MAXQUOTAS; i++) {
5573                 if (EXT3_SB(sb)->s_qf_names[i]) {
5574 -                       int ret = ext3_quota_on_mount(sb, i);
5575 +                       int ret = ext3_quota_on_mount(sb->s_dqh, i);
5576                         if (ret < 0)
5577                                 printk(KERN_ERR
5578                                         "EXT3-fs: Cannot turn on journalled "
5579 @@ -1281,8 +1299,8 @@ static void ext3_orphan_cleanup (struct 
5580  #ifdef CONFIG_QUOTA
5581         /* Turn quotas off */
5582         for (i = 0; i < MAXQUOTAS; i++) {
5583 -               if (sb_dqopt(sb)->files[i])
5584 -                       vfs_quota_off(sb, i);
5585 +               if (dqh_dqopt(sb->s_dqh)->files[i])
5586 +                       vfs_quota_off(sb->s_dqh, i);
5587         }
5588  #endif
5589         sb->s_flags = s_flags; /* Restore MS_RDONLY status */
5590 @@ -1429,6 +1447,9 @@ static int ext3_fill_super (struct super
5591                             NULL, 0))
5592                 goto failed_mount;
5593  
5594 +       if (EXT3_SB(sb)->s_mount_opt & EXT3_MOUNT_TAGGED)
5595 +               sb->s_flags |= MS_TAGGED;
5596 +
5597         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
5598                 ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
5599  
5600 @@ -1620,8 +1641,8 @@ static int ext3_fill_super (struct super
5601         sb->s_export_op = &ext3_export_ops;
5602         sb->s_xattr = ext3_xattr_handlers;
5603  #ifdef CONFIG_QUOTA
5604 -       sb->s_qcop = &ext3_qctl_operations;
5605 -       sb->dq_op = &ext3_quota_operations;
5606 +       sb->s_dqh->dqh_qop = &ext3_quota_operations;
5607 +       sb->s_dqh->dqh_qcop = &ext3_qctl_operations;
5608  #endif
5609         INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
5610  
5611 @@ -2238,6 +2259,12 @@ static int ext3_remount (struct super_bl
5612  
5613         if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
5614                 ext3_abort(sb, __FUNCTION__, "Abort forced by user");
5615 +       if ((sbi->s_mount_opt & EXT3_MOUNT_TAGGED) &&
5616 +               !(sb->s_flags & MS_TAGGED)) {
5617 +               printk("EXT3-fs: %s: tagging not permitted on remount.\n",
5618 +                       sb->s_id);
5619 +               return -EINVAL;
5620 +       }
5621  
5622         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
5623                 ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
5624 @@ -2391,7 +2418,7 @@ static int ext3_statfs (struct super_blo
5625  
5626  static inline struct inode *dquot_to_inode(struct dquot *dquot)
5627  {
5628 -       return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
5629 +       return dqh_dqopt(dquot->dq_dqh)->files[dquot->dq_type];
5630  }
5631  
5632  static int ext3_dquot_initialize(struct inode *inode, int type)
5633 @@ -2434,7 +2461,7 @@ static int ext3_write_dquot(struct dquot
5634  
5635         inode = dquot_to_inode(dquot);
5636         handle = ext3_journal_start(inode,
5637 -                                       EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
5638 +               EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_dqh->dqh_sb));
5639         if (IS_ERR(handle))
5640                 return PTR_ERR(handle);
5641         ret = dquot_commit(dquot);
5642 @@ -2450,7 +2477,7 @@ static int ext3_acquire_dquot(struct dqu
5643         handle_t *handle;
5644  
5645         handle = ext3_journal_start(dquot_to_inode(dquot),
5646 -                                       EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb));
5647 +               EXT3_QUOTA_INIT_BLOCKS(dquot->dq_dqh->dqh_sb));
5648         if (IS_ERR(handle))
5649                 return PTR_ERR(handle);
5650         ret = dquot_acquire(dquot);
5651 @@ -2466,7 +2493,7 @@ static int ext3_release_dquot(struct dqu
5652         handle_t *handle;
5653  
5654         handle = ext3_journal_start(dquot_to_inode(dquot),
5655 -                                       EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb));
5656 +               EXT3_QUOTA_DEL_BLOCKS(dquot->dq_dqh->dqh_sb));
5657         if (IS_ERR(handle))
5658                 return PTR_ERR(handle);
5659         ret = dquot_release(dquot);
5660 @@ -2479,8 +2506,8 @@ static int ext3_release_dquot(struct dqu
5661  static int ext3_mark_dquot_dirty(struct dquot *dquot)
5662  {
5663         /* Are we journalling quotas? */
5664 -       if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
5665 -           EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
5666 +       if (EXT3_SB(dquot->dq_dqh->dqh_sb)->s_qf_names[USRQUOTA] ||
5667 +           EXT3_SB(dquot->dq_dqh->dqh_sb)->s_qf_names[GRPQUOTA]) {
5668                 dquot_mark_dquot_dirty(dquot);
5669                 return ext3_write_dquot(dquot);
5670         } else {
5671 @@ -2488,8 +2515,9 @@ static int ext3_mark_dquot_dirty(struct 
5672         }
5673  }
5674  
5675 -static int ext3_write_info(struct super_block *sb, int type)
5676 +static int ext3_write_info(struct dqhash *hash, int type)
5677  {
5678 +       struct super_block *sb = hash->dqh_sb;
5679         int ret, err;
5680         handle_t *handle;
5681  
5682 @@ -2497,7 +2525,7 @@ static int ext3_write_info(struct super_
5683         handle = ext3_journal_start(sb->s_root->d_inode, 2);
5684         if (IS_ERR(handle))
5685                 return PTR_ERR(handle);
5686 -       ret = dquot_commit_info(sb, type);
5687 +       ret = dquot_commit_info(hash, type);
5688         err = ext3_journal_stop(handle);
5689         if (!ret)
5690                 ret = err;
5691 @@ -2508,18 +2536,20 @@ static int ext3_write_info(struct super_
5692   * Turn on quotas during mount time - we need to find
5693   * the quota file and such...
5694   */
5695 -static int ext3_quota_on_mount(struct super_block *sb, int type)
5696 +static int ext3_quota_on_mount(struct dqhash *hash, int type)
5697  {
5698 -       return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
5699 -                       EXT3_SB(sb)->s_jquota_fmt, type);
5700 +       return vfs_quota_on_mount(hash,
5701 +               EXT3_SB(hash->dqh_sb)->s_qf_names[type],
5702 +               EXT3_SB(hash->dqh_sb)->s_jquota_fmt, type);
5703  }
5704  
5705  /*
5706   * Standard function to be called on quota_on
5707   */
5708 -static int ext3_quota_on(struct super_block *sb, int type, int format_id,
5709 +static int ext3_quota_on(struct dqhash *hash, int type, int format_id,
5710                          char *path)
5711  {
5712 +       struct super_block *sb = hash->dqh_sb;
5713         int err;
5714         struct nameidata nd;
5715  
5716 @@ -2528,7 +2558,7 @@ static int ext3_quota_on(struct super_bl
5717         /* Not journalling quota? */
5718         if (!EXT3_SB(sb)->s_qf_names[USRQUOTA] &&
5719             !EXT3_SB(sb)->s_qf_names[GRPQUOTA])
5720 -               return vfs_quota_on(sb, type, format_id, path);
5721 +               return vfs_quota_on(hash, type, format_id, path);
5722         err = path_lookup(path, LOOKUP_FOLLOW, &nd);
5723         if (err)
5724                 return err;
5725 @@ -2543,17 +2573,18 @@ static int ext3_quota_on(struct super_bl
5726                         "EXT3-fs: Quota file not on filesystem root. "
5727                         "Journalled quota will not work.\n");
5728         path_release(&nd);
5729 -       return vfs_quota_on(sb, type, format_id, path);
5730 +       return vfs_quota_on(hash, type, format_id, path);
5731  }
5732  
5733  /* Read data from quotafile - avoid pagecache and such because we cannot afford
5734   * acquiring the locks... As quota files are never truncated and quota code
5735   * itself serializes the operations (and noone else should touch the files)
5736   * we don't have to be afraid of races */
5737 -static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
5738 +static ssize_t ext3_quota_read(struct dqhash *hash, int type, char *data,
5739                                size_t len, loff_t off)
5740  {
5741 -       struct inode *inode = sb_dqopt(sb)->files[type];
5742 +       struct inode *inode = dqh_dqopt(hash)->files[type];
5743 +       struct super_block *sb = hash->dqh_sb;
5744         sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
5745         int err = 0;
5746         int offset = off & (sb->s_blocksize - 1);
5747 @@ -2588,10 +2619,11 @@ static ssize_t ext3_quota_read(struct su
5748  
5749  /* Write to quotafile (we know the transaction is already started and has
5750   * enough credits) */
5751 -static ssize_t ext3_quota_write(struct super_block *sb, int type,
5752 +static ssize_t ext3_quota_write(struct dqhash *hash, int type,
5753                                 const char *data, size_t len, loff_t off)
5754  {
5755 -       struct inode *inode = sb_dqopt(sb)->files[type];
5756 +       struct inode *inode = dqh_dqopt(hash)->files[type];
5757 +       struct super_block *sb = hash->dqh_sb;
5758         sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
5759         int err = 0;
5760         int offset = off & (sb->s_blocksize - 1);
5761 diff -NurpP --minimal linux-2.6.16.20/fs/ext3/symlink.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/symlink.c
5762 --- linux-2.6.16.20/fs/ext3/symlink.c   2005-08-29 22:25:30 +0200
5763 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/symlink.c      2006-04-26 19:07:00 +0200
5764 @@ -40,6 +40,7 @@ struct inode_operations ext3_symlink_ino
5765         .listxattr      = ext3_listxattr,
5766         .removexattr    = generic_removexattr,
5767  #endif
5768 +       .sync_flags     = ext3_sync_flags,
5769  };
5770  
5771  struct inode_operations ext3_fast_symlink_inode_operations = {
5772 @@ -51,4 +52,5 @@ struct inode_operations ext3_fast_symlin
5773         .listxattr      = ext3_listxattr,
5774         .removexattr    = generic_removexattr,
5775  #endif
5776 +       .sync_flags     = ext3_sync_flags,
5777  };
5778 diff -NurpP --minimal linux-2.6.16.20/fs/ext3/xattr.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/xattr.c
5779 --- linux-2.6.16.20/fs/ext3/xattr.c     2006-04-09 13:49:53 +0200
5780 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/xattr.c        2006-04-26 19:07:00 +0200
5781 @@ -58,6 +58,7 @@
5782  #include <linux/mbcache.h>
5783  #include <linux/quotaops.h>
5784  #include <linux/rwsem.h>
5785 +#include <linux/vs_dlimit.h>
5786  #include "xattr.h"
5787  #include "acl.h"
5788  
5789 @@ -495,6 +496,7 @@ ext3_xattr_release_block(handle_t *handl
5790                         ext3_journal_dirty_metadata(handle, bh);
5791                         if (IS_SYNC(inode))
5792                                 handle->h_sync = 1;
5793 +                       DLIMIT_FREE_BLOCK(inode, 1);
5794                         DQUOT_FREE_BLOCK(inode, 1);
5795                         unlock_buffer(bh);
5796                         ea_bdebug(bh, "refcount now=%d; releasing",
5797 @@ -763,11 +765,14 @@ inserted:
5798                         if (new_bh == bs->bh)
5799                                 ea_bdebug(new_bh, "keeping");
5800                         else {
5801 +                               error = -ENOSPC;
5802 +                               if (DLIMIT_ALLOC_BLOCK(inode, 1))
5803 +                                       goto cleanup;
5804                                 /* The old block is released after updating
5805                                    the inode. */
5806                                 error = -EDQUOT;
5807                                 if (DQUOT_ALLOC_BLOCK(inode, 1))
5808 -                                       goto cleanup;
5809 +                                       goto cleanup_dlimit;
5810                                 error = ext3_journal_get_write_access(handle,
5811                                                                       new_bh);
5812                                 if (error)
5813 @@ -843,6 +848,8 @@ cleanup:
5814  
5815  cleanup_dquot:
5816         DQUOT_FREE_BLOCK(inode, 1);
5817 +cleanup_dlimit:
5818 +       DLIMIT_FREE_BLOCK(inode, 1);
5819         goto cleanup;
5820  
5821  bad_block:
5822 diff -NurpP --minimal linux-2.6.16.20/fs/fcntl.c linux-2.6.16.20-vs2.1.1-rc22/fs/fcntl.c
5823 --- linux-2.6.16.20/fs/fcntl.c  2006-02-18 14:40:22 +0100
5824 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/fcntl.c     2006-04-26 19:07:00 +0200
5825 @@ -18,6 +18,7 @@
5826  #include <linux/ptrace.h>
5827  #include <linux/signal.h>
5828  #include <linux/rcupdate.h>
5829 +#include <linux/vs_limit.h>
5830  
5831  #include <asm/poll.h>
5832  #include <asm/siginfo.h>
5833 @@ -85,6 +86,8 @@ repeat:
5834         error = -EMFILE;
5835         if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
5836                 goto out;
5837 +       if (!vx_files_avail(1))
5838 +               goto out;
5839  
5840         error = expand_files(files, newfd);
5841         if (error < 0)
5842 @@ -126,6 +129,7 @@ static int dupfd(struct file *file, unsi
5843                 FD_SET(fd, fdt->open_fds);
5844                 FD_CLR(fd, fdt->close_on_exec);
5845                 spin_unlock(&files->file_lock);
5846 +               vx_openfd_inc(fd);
5847                 fd_install(fd, file);
5848         } else {
5849                 spin_unlock(&files->file_lock);
5850 @@ -178,6 +182,9 @@ asmlinkage long sys_dup2(unsigned int ol
5851  
5852         if (tofree)
5853                 filp_close(tofree, files);
5854 +       else
5855 +               vx_openfd_inc(newfd);   /* fd was unused */
5856 +
5857         err = newfd;
5858  out:
5859         return err;
5860 @@ -481,7 +488,7 @@ void send_sigio(struct fown_struct *fown
5861         
5862         read_lock(&tasklist_lock);
5863         if (pid > 0) {
5864 -               p = find_task_by_pid(pid);
5865 +               p = find_task_by_real_pid(pid);
5866                 if (p) {
5867                         send_sigio_to_task(p, fown, fd, band);
5868                 }
5869 @@ -516,7 +523,7 @@ int send_sigurg(struct fown_struct *fown
5870         
5871         read_lock(&tasklist_lock);
5872         if (pid > 0) {
5873 -               p = find_task_by_pid(pid);
5874 +               p = find_task_by_real_pid(pid);
5875                 if (p) {
5876                         send_sigurg_to_task(p, fown);
5877                 }
5878 diff -NurpP --minimal linux-2.6.16.20/fs/file_table.c linux-2.6.16.20-vs2.1.1-rc22/fs/file_table.c
5879 --- linux-2.6.16.20/fs/file_table.c     2006-04-09 13:49:53 +0200
5880 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/file_table.c        2006-04-26 19:07:00 +0200
5881 @@ -22,6 +22,8 @@
5882  #include <linux/fsnotify.h>
5883  #include <linux/sysctl.h>
5884  #include <linux/percpu_counter.h>
5885 +#include <linux/vs_limit.h>
5886 +#include <linux/vs_context.h>
5887  
5888  #include <asm/atomic.h>
5889  
5890 @@ -119,6 +121,8 @@ struct file *get_empty_filp(void)
5891         rwlock_init(&f->f_owner.lock);
5892         /* f->f_version: 0 */
5893         INIT_LIST_HEAD(&f->f_u.fu_list);
5894 +       f->f_xid = vx_current_xid();
5895 +       vx_files_inc(f);
5896         return f;
5897  
5898  over:
5899 @@ -173,6 +177,8 @@ void fastcall __fput(struct file *file)
5900         fops_put(file->f_op);
5901         if (file->f_mode & FMODE_WRITE)
5902                 put_write_access(inode);
5903 +       vx_files_dec(file);
5904 +       file->f_xid = 0;
5905         file_kill(file);
5906         file->f_dentry = NULL;
5907         file->f_vfsmnt = NULL;
5908 @@ -238,6 +244,8 @@ void put_filp(struct file *file)
5909  {
5910         if (atomic_dec_and_test(&file->f_count)) {
5911                 security_file_free(file);
5912 +               vx_files_dec(file);
5913 +               file->f_xid = 0;
5914                 file_kill(file);
5915                 file_free(file);
5916         }
5917 diff -NurpP --minimal linux-2.6.16.20/fs/hfsplus/ioctl.c linux-2.6.16.20-vs2.1.1-rc22/fs/hfsplus/ioctl.c
5918 --- linux-2.6.16.20/fs/hfsplus/ioctl.c  2006-04-09 13:49:53 +0200
5919 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/hfsplus/ioctl.c     2006-04-26 19:07:00 +0200
5920 @@ -16,6 +16,7 @@
5921  #include <linux/fs.h>
5922  #include <linux/sched.h>
5923  #include <linux/xattr.h>
5924 +#include <linux/mount.h>
5925  #include <asm/uaccess.h>
5926  #include "hfsplus_fs.h"
5927  
5928 @@ -35,7 +36,8 @@ int hfsplus_ioctl(struct inode *inode, s
5929                         flags |= EXT2_FLAG_NODUMP; /* EXT2_NODUMP_FL */
5930                 return put_user(flags, (int __user *)arg);
5931         case HFSPLUS_IOC_EXT2_SETFLAGS: {
5932 -               if (IS_RDONLY(inode))
5933 +               if (IS_RDONLY(inode) ||
5934 +                       (filp && MNT_IS_RDONLY(filp->f_vfsmnt)))
5935                         return -EROFS;
5936  
5937                 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
5938 diff -NurpP --minimal linux-2.6.16.20/fs/inode.c linux-2.6.16.20-vs2.1.1-rc22/fs/inode.c
5939 --- linux-2.6.16.20/fs/inode.c  2006-02-18 14:40:22 +0100
5940 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/inode.c     2006-04-26 19:07:00 +0200
5941 @@ -116,6 +116,9 @@ static struct inode *alloc_inode(struct 
5942                 struct address_space * const mapping = &inode->i_data;
5943  
5944                 inode->i_sb = sb;
5945 +
5946 +               /* essential because of inode slab reuse */
5947 +               inode->i_tag = 0;
5948                 inode->i_blkbits = sb->s_blocksize_bits;
5949                 inode->i_flags = 0;
5950                 atomic_set(&inode->i_count, 1);
5951 @@ -128,6 +131,7 @@ static struct inode *alloc_inode(struct 
5952                 inode->i_bytes = 0;
5953                 inode->i_generation = 0;
5954  #ifdef CONFIG_QUOTA
5955 +               inode->i_dqh = dqhget(sb->s_dqh);
5956                 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
5957  #endif
5958                 inode->i_pipe = NULL;
5959 @@ -175,6 +179,8 @@ void destroy_inode(struct inode *inode) 
5960         if (inode_has_buffers(inode))
5961                 BUG();
5962         security_inode_free(inode);
5963 +       if (dqhash_valid(inode->i_dqh))
5964 +               dqhput(inode->i_dqh);
5965         if (inode->i_sb->s_op->destroy_inode)
5966                 inode->i_sb->s_op->destroy_inode(inode);
5967         else
5968 @@ -236,6 +242,8 @@ void __iget(struct inode * inode)
5969         inodes_stat.nr_unused--;
5970  }
5971  
5972 +EXPORT_SYMBOL_GPL(__iget);
5973 +
5974  /**
5975   * clear_inode - clear an inode
5976   * @inode: inode to clear
5977 @@ -1272,12 +1280,13 @@ EXPORT_SYMBOL(inode_needs_sync);
5978  /* Function back in dquot.c */
5979  int remove_inode_dquot_ref(struct inode *, int, struct list_head *);
5980  
5981 -void remove_dquot_ref(struct super_block *sb, int type,
5982 +void remove_dquot_ref(struct dqhash *hash, int type,
5983                         struct list_head *tofree_head)
5984  {
5985         struct inode *inode;
5986 +       struct super_block *sb = hash->dqh_sb;
5987  
5988 -       if (!sb->dq_op)
5989 +       if (!hash->dqh_qop)
5990                 return; /* nothing to do */
5991         spin_lock(&inode_lock); /* This lock is for inodes code */
5992  
5993 diff -NurpP --minimal linux-2.6.16.20/fs/ioctl.c linux-2.6.16.20-vs2.1.1-rc22/fs/ioctl.c
5994 --- linux-2.6.16.20/fs/ioctl.c  2006-04-09 13:49:53 +0200
5995 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ioctl.c     2006-04-29 02:58:07 +0200
5996 @@ -13,10 +13,19 @@
5997  #include <linux/fs.h>
5998  #include <linux/security.h>
5999  #include <linux/module.h>
6000 +#include <linux/proc_fs.h>
6001 +#include <linux/vserver/inode.h>
6002 +#include <linux/vserver/tag.h>
6003  
6004  #include <asm/uaccess.h>
6005  #include <asm/ioctls.h>
6006  
6007 +
6008 +#ifdef CONFIG_VSERVER_LEGACY
6009 +extern int vx_proc_ioctl(struct inode *, struct file *,
6010 +       unsigned int, unsigned long);
6011 +#endif
6012 +
6013  static long do_ioctl(struct file *filp, unsigned int cmd,
6014                 unsigned long arg)
6015  {
6016 @@ -147,6 +156,48 @@ int vfs_ioctl(struct file *filp, unsigne
6017                         else
6018                                 error = -ENOTTY;
6019                         break;
6020 +#ifdef CONFIG_VSERVER_LEGACY
6021 +#ifndef CONFIG_TAGGING_NONE
6022 +               case FIOC_GETTAG: {
6023 +                       struct inode *inode = filp->f_dentry->d_inode;
6024 +
6025 +                       /* fixme: if stealth, return -ENOTTY */
6026 +                       error = -EPERM;
6027 +                       if (capable(CAP_CONTEXT))
6028 +                               error = put_user(inode->i_tag, (int __user *) arg);
6029 +                       break;
6030 +               }
6031 +               case FIOC_SETTAG: {
6032 +                       struct inode *inode = filp->f_dentry->d_inode;
6033 +                       int tag;
6034 +
6035 +                       /* fixme: if stealth, return -ENOTTY */
6036 +                       error = -EPERM;
6037 +                       if (!capable(CAP_CONTEXT))
6038 +                               break;
6039 +                       error = -EROFS;
6040 +                       if (IS_RDONLY(inode))
6041 +                               break;
6042 +                       error = -ENOSYS;
6043 +                       if (!(inode->i_sb->s_flags & MS_TAGGED))
6044 +                               break;
6045 +                       error = -EFAULT;
6046 +                       if (get_user(tag, (int __user *) arg))
6047 +                               break;
6048 +                       error = 0;
6049 +                       inode->i_tag = (tag & 0xFFFF);
6050 +                       inode->i_ctime = CURRENT_TIME;
6051 +                       mark_inode_dirty(inode);
6052 +                       break;
6053 +               }
6054 +#endif
6055 +               case FIOC_GETXFLG:
6056 +               case FIOC_SETXFLG:
6057 +                       error = -ENOTTY;
6058 +                       if (filp->f_dentry->d_inode->i_sb->s_magic == PROC_SUPER_MAGIC)
6059 +                               error = vx_proc_ioctl(filp->f_dentry->d_inode, filp, cmd, arg);
6060 +                       break;
6061 +#endif
6062                 default:
6063                         if (S_ISREG(filp->f_dentry->d_inode->i_mode))
6064                                 error = file_ioctl(filp, cmd, arg);
6065 diff -NurpP --minimal linux-2.6.16.20/fs/ioprio.c linux-2.6.16.20-vs2.1.1-rc22/fs/ioprio.c
6066 --- linux-2.6.16.20/fs/ioprio.c 2006-04-09 13:49:53 +0200
6067 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ioprio.c    2006-04-26 19:07:00 +0200
6068 @@ -24,6 +24,7 @@
6069  #include <linux/blkdev.h>
6070  #include <linux/capability.h>
6071  #include <linux/syscalls.h>
6072 +#include <linux/vs_pid.h>
6073  
6074  static int set_task_ioprio(struct task_struct *task, int ioprio)
6075  {
6076 @@ -95,7 +96,7 @@ asmlinkage long sys_ioprio_set(int which
6077                         if (!who)
6078                                 user = current->user;
6079                         else
6080 -                               user = find_user(who);
6081 +                               user = find_user(vx_current_xid(), who);
6082  
6083                         if (!user)
6084                                 break;
6085 @@ -149,7 +150,7 @@ asmlinkage long sys_ioprio_get(int which
6086                         if (!who)
6087                                 user = current->user;
6088                         else
6089 -                               user = find_user(who);
6090 +                               user = find_user(vx_current_xid(), who);
6091  
6092                         if (!user)
6093                                 break;
6094 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/Makefile linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/Makefile
6095 --- linux-2.6.16.20/fs/jfs/Makefile     2004-08-14 12:56:09 +0200
6096 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/Makefile        2006-04-26 19:07:00 +0200
6097 @@ -8,7 +8,8 @@ jfs-y    := super.o file.o inode.o namei
6098             jfs_xtree.o jfs_imap.o jfs_debug.o jfs_dmap.o \
6099             jfs_unicode.o jfs_dtree.o jfs_inode.o \
6100             jfs_extent.o symlink.o jfs_metapage.o \
6101 -           jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o resize.o xattr.o
6102 +           jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o \
6103 +           resize.o xattr.o ioctl.o
6104  
6105  jfs-$(CONFIG_JFS_POSIX_ACL) += acl.o
6106  
6107 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/acl.c linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/acl.c
6108 --- linux-2.6.16.20/fs/jfs/acl.c        2005-10-28 20:49:44 +0200
6109 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/acl.c   2006-04-26 19:07:00 +0200
6110 @@ -229,7 +229,8 @@ int jfs_setattr(struct dentry *dentry, s
6111                 return rc;
6112  
6113         if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
6114 -           (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
6115 +           (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) ||
6116 +           (iattr->ia_valid & ATTR_TAG && iattr->ia_tag != inode->i_tag)) {
6117                 if (DQUOT_TRANSFER(inode, iattr))
6118                         return -EDQUOT;
6119         }
6120 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/file.c linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/file.c
6121 --- linux-2.6.16.20/fs/jfs/file.c       2005-08-29 22:25:31 +0200
6122 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/file.c  2006-04-26 19:07:00 +0200
6123 @@ -98,6 +98,7 @@ struct inode_operations jfs_file_inode_o
6124         .setattr        = jfs_setattr,
6125         .permission     = jfs_permission,
6126  #endif
6127 +       .sync_flags     = jfs_sync_flags,
6128  };
6129  
6130  struct file_operations jfs_file_operations = {
6131 @@ -111,6 +112,8 @@ struct file_operations jfs_file_operatio
6132         .readv          = generic_file_readv,
6133         .writev         = generic_file_writev,
6134         .sendfile       = generic_file_sendfile,
6135 +       .sendpage       = generic_file_sendpage,
6136         .fsync          = jfs_fsync,
6137         .release        = jfs_release,
6138 +       .ioctl          = jfs_ioctl,
6139  };
6140 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/inode.c linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/inode.c
6141 --- linux-2.6.16.20/fs/jfs/inode.c      2005-10-28 20:49:44 +0200
6142 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/inode.c 2006-04-26 19:07:00 +0200
6143 @@ -22,6 +22,7 @@
6144  #include <linux/buffer_head.h>
6145  #include <linux/pagemap.h>
6146  #include <linux/quotaops.h>
6147 +#include <linux/vs_dlimit.h>
6148  #include "jfs_incore.h"
6149  #include "jfs_inode.h"
6150  #include "jfs_filsys.h"
6151 @@ -55,6 +56,7 @@ void jfs_read_inode(struct inode *inode)
6152                 inode->i_op = &jfs_file_inode_operations;
6153                 init_special_inode(inode, inode->i_mode, inode->i_rdev);
6154         }
6155 +       jfs_set_inode_flags(inode);
6156  }
6157  
6158  /*
6159 @@ -143,6 +145,7 @@ void jfs_delete_inode(struct inode *inod
6160                 DQUOT_INIT(inode);
6161                 DQUOT_FREE_INODE(inode);
6162                 DQUOT_DROP(inode);
6163 +               DLIMIT_FREE_INODE(inode);
6164         }
6165  
6166         clear_inode(inode);
6167 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/ioctl.c linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/ioctl.c
6168 --- linux-2.6.16.20/fs/jfs/ioctl.c      1970-01-01 01:00:00 +0100
6169 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/ioctl.c 2006-04-26 19:07:00 +0200
6170 @@ -0,0 +1,111 @@
6171 +/*
6172 + * linux/fs/jfs/ioctl.c
6173 + *
6174 + * Copyright (C) 2006 Herbert Poetzl
6175 + * adapted from Remy Card's ext2/ioctl.c
6176 + */
6177 +
6178 +#include <linux/fs.h>
6179 +#include <linux/ext2_fs.h>
6180 +#include <linux/ctype.h>
6181 +#include <linux/capability.h>
6182 +#include <linux/time.h>
6183 +#include <linux/mount.h>
6184 +#include <asm/current.h>
6185 +#include <asm/uaccess.h>
6186 +
6187 +#include "jfs_incore.h"
6188 +#include "jfs_dinode.h"
6189 +#include "jfs_inode.h"
6190 +
6191 +
6192 +static struct {
6193 +       long jfs_flag;
6194 +       long ext2_flag;
6195 +} jfs_map[] = {
6196 +       {JFS_NOATIME_FL, EXT2_NOATIME_FL},
6197 +       {JFS_DIRSYNC_FL, EXT2_DIRSYNC_FL},
6198 +       {JFS_SYNC_FL, EXT2_SYNC_FL},
6199 +       {JFS_SECRM_FL, EXT2_SECRM_FL},
6200 +       {JFS_UNRM_FL, EXT2_UNRM_FL},
6201 +       {JFS_APPEND_FL, EXT2_APPEND_FL},
6202 +       {JFS_IMMUTABLE_FL, EXT2_IMMUTABLE_FL},
6203 +       {0, 0},
6204 +};
6205 +
6206 +static long jfs_map_ext2(unsigned long flags, int from)
6207 +{
6208 +       int index=0;
6209 +       long mapped=0;
6210 +
6211 +       while (jfs_map[index].jfs_flag) {
6212 +               if (from) {
6213 +                       if (jfs_map[index].ext2_flag & flags)
6214 +                               mapped |= jfs_map[index].jfs_flag;
6215 +               } else {
6216 +                       if (jfs_map[index].jfs_flag & flags)
6217 +                               mapped |= jfs_map[index].ext2_flag;
6218 +               }
6219 +               index++;
6220 +       }
6221 +       return mapped;
6222 +}
6223 +
6224 +
6225 +int jfs_ioctl(struct inode * inode, struct file * filp, unsigned int cmd,
6226 +               unsigned long arg)
6227 +{
6228 +       struct jfs_inode_info *jfs_inode = JFS_IP(inode);
6229 +       unsigned int flags;
6230 +
6231 +       switch (cmd) {
6232 +       case JFS_IOC_GETFLAGS:
6233 +               flags = jfs_inode->mode2 & JFS_FL_USER_VISIBLE;
6234 +               flags = jfs_map_ext2(flags, 0);
6235 +               return put_user(flags, (int __user *) arg);
6236 +       case JFS_IOC_SETFLAGS: {
6237 +               unsigned int oldflags;
6238 +
6239 +               if (IS_RDONLY(inode) ||
6240 +                       (filp && MNT_IS_RDONLY(filp->f_vfsmnt)))
6241 +                       return -EROFS;
6242 +
6243 +               if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
6244 +                       return -EACCES;
6245 +
6246 +               if (get_user(flags, (int __user *) arg))
6247 +                       return -EFAULT;
6248 +
6249 +               flags = jfs_map_ext2(flags, 1);
6250 +               if (!S_ISDIR(inode->i_mode))
6251 +                       flags &= ~JFS_DIRSYNC_FL;
6252 +
6253 +               oldflags = jfs_inode->mode2;
6254 +
6255 +               /*
6256 +                * The IMMUTABLE and APPEND_ONLY flags can only be changed by
6257 +                * the relevant capability.
6258 +                *
6259 +                * This test looks nicer. Thanks to Pauline Middelink
6260 +                */
6261 +               if ((oldflags & JFS_IMMUTABLE_FL) ||
6262 +                       ((flags ^ oldflags) & (JFS_APPEND_FL |
6263 +                       JFS_IMMUTABLE_FL | JFS_IUNLINK_FL))) {
6264 +                       if (!capable(CAP_LINUX_IMMUTABLE))
6265 +                               return -EPERM;
6266 +               }
6267 +
6268 +               flags = flags & JFS_FL_USER_MODIFIABLE;
6269 +               flags |= oldflags & ~JFS_FL_USER_MODIFIABLE;
6270 +               jfs_inode->mode2 = flags;
6271 +
6272 +               jfs_set_inode_flags(inode);
6273 +               inode->i_ctime = CURRENT_TIME_SEC;
6274 +               mark_inode_dirty(inode);
6275 +               return 0;
6276 +       }
6277 +       default:
6278 +               return -ENOTTY;
6279 +       }
6280 +}
6281 +
6282 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/jfs_dinode.h linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_dinode.h
6283 --- linux-2.6.16.20/fs/jfs/jfs_dinode.h 2004-12-25 01:55:20 +0100
6284 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_dinode.h    2006-04-26 19:07:00 +0200
6285 @@ -139,13 +139,39 @@ struct dinode {
6286  
6287  /* more extended mode bits: attributes for OS/2 */
6288  #define IREADONLY      0x02000000      /* no write access to file */
6289 -#define IARCHIVE       0x40000000      /* file archive bit */
6290 -#define ISYSTEM                0x08000000      /* system file */
6291  #define IHIDDEN                0x04000000      /* hidden file */
6292 -#define IRASH          0x4E000000      /* mask for changeable attributes */
6293 -#define INEWNAME       0x80000000      /* non-8.3 filename format */
6294 +#define ISYSTEM                0x08000000      /* system file */
6295 +
6296  #define IDIRECTORY     0x20000000      /* directory (shadow of real bit) */
6297 +#define IARCHIVE       0x40000000      /* file archive bit */
6298 +#define INEWNAME       0x80000000      /* non-8.3 filename format */
6299 +
6300 +#define IRASH          0x4E000000      /* mask for changeable attributes */
6301  #define ATTRSHIFT      25      /* bits to shift to move attribute
6302                                    specification to mode position */
6303  
6304 +/* extended attributes for Linux */
6305 +
6306 +#define JFS_NOATIME_FL         0x00080000 /* do not update atime */
6307 +
6308 +#define JFS_DIRSYNC_FL         0x00100000 /* dirsync behaviour */
6309 +#define JFS_SYNC_FL            0x00200000 /* Synchronous updates */
6310 +#define JFS_SECRM_FL           0x00400000 /* Secure deletion */
6311 +#define JFS_UNRM_FL            0x00800000 /* allow for undelete */
6312 +
6313 +#define        JFS_APPEND_FL           0x01000000 /* writes to file may only append */
6314 +#define        JFS_IMMUTABLE_FL        0x02000000 /* Immutable file */
6315 +
6316 +#define        JFS_BARRIER_FL          0x04000000 /* Barrier for chroot() */
6317 +#define        JFS_IUNLINK_FL          0x08000000 /* Immutable unlink */
6318 +
6319 +#define JFS_FL_USER_VISIBLE    0x0FF80000
6320 +#define JFS_FL_USER_MODIFIABLE 0x03F80000
6321 +#define JFS_FL_INHERIT         0x0BC80000
6322 +
6323 +
6324 +#define JFS_IOC_GETFLAGS       _IOR('f', 1, long)
6325 +#define JFS_IOC_SETFLAGS       _IOW('f', 2, long)
6326 +
6327 +
6328  #endif /*_H_JFS_DINODE */
6329 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/jfs_dtree.c linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_dtree.c
6330 --- linux-2.6.16.20/fs/jfs/jfs_dtree.c  2005-08-29 22:25:31 +0200
6331 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_dtree.c     2006-04-26 19:07:00 +0200
6332 @@ -102,6 +102,7 @@
6333  
6334  #include <linux/fs.h>
6335  #include <linux/quotaops.h>
6336 +#include <linux/vs_dlimit.h>
6337  #include "jfs_incore.h"
6338  #include "jfs_superblock.h"
6339  #include "jfs_filsys.h"
6340 @@ -383,10 +384,10 @@ static u32 add_index(tid_t tid, struct i
6341                  */
6342                 if (DQUOT_ALLOC_BLOCK(ip, sbi->nbperpage))
6343                         goto clean_up;
6344 -               if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) {
6345 -                       DQUOT_FREE_BLOCK(ip, sbi->nbperpage);
6346 -                       goto clean_up;
6347 -               }
6348 +               if (DLIMIT_ALLOC_BLOCK(ip, sbi->nbperpage))
6349 +                       goto clean_up_dquot;
6350 +               if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr))
6351 +                       goto clean_up_dlimit;
6352  
6353                 /*
6354                  * Save the table, we're going to overwrite it with the
6355 @@ -479,6 +480,12 @@ static u32 add_index(tid_t tid, struct i
6356  
6357         return index;
6358  
6359 +      clean_up_dlimit:
6360 +       DLIMIT_FREE_BLOCK(ip, sbi->nbperpage);
6361 +
6362 +      clean_up_dquot:
6363 +       DQUOT_FREE_BLOCK(ip, sbi->nbperpage);
6364 +
6365        clean_up:
6366  
6367         jfs_ip->next_index--;
6368 @@ -930,7 +937,8 @@ int dtInsert(tid_t tid, struct inode *ip
6369  static int dtSplitUp(tid_t tid,
6370           struct inode *ip, struct dtsplit * split, struct btstack * btstack)
6371  {
6372 -       struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
6373 +       struct super_block *sb = ip->i_sb;
6374 +       struct jfs_sb_info *sbi = JFS_SBI(sb);
6375         int rc = 0;
6376         struct metapage *smp;
6377         dtpage_t *sp;           /* split page */
6378 @@ -952,6 +960,7 @@ static int dtSplitUp(tid_t tid,
6379         struct tlock *tlck;
6380         struct lv *lv;
6381         int quota_allocation = 0;
6382 +       int dlimit_allocation = 0;
6383  
6384         /* get split page */
6385         smp = split->mp;
6386 @@ -1033,6 +1042,12 @@ static int dtSplitUp(tid_t tid,
6387                 }
6388                 quota_allocation += n;
6389  
6390 +               if (DLIMIT_ALLOC_BLOCK(ip, n)) {
6391 +                       rc = -ENOSPC;
6392 +                       goto extendOut;
6393 +               }
6394 +               dlimit_allocation += n;
6395 +
6396                 if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen,
6397                                     (s64) n, &nxaddr)))
6398                         goto extendOut;
6399 @@ -1301,6 +1316,9 @@ static int dtSplitUp(tid_t tid,
6400        freeKeyName:
6401         kfree(key.name);
6402  
6403 +       /* Rollback dlimit allocation */
6404 +       if (rc && dlimit_allocation)
6405 +               DLIMIT_FREE_BLOCK(ip, dlimit_allocation);
6406         /* Rollback quota allocation */
6407         if (rc && quota_allocation)
6408                 DQUOT_FREE_BLOCK(ip, quota_allocation);
6409 @@ -1368,6 +1386,12 @@ static int dtSplitPage(tid_t tid, struct
6410                 release_metapage(rmp);
6411                 return -EDQUOT;
6412         }
6413 +       /* Allocate blocks to dlimit. */
6414 +       if (DLIMIT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
6415 +               DQUOT_FREE_BLOCK(ip, lengthPXD(pxd));
6416 +               release_metapage(rmp);
6417 +               return -ENOSPC;
6418 +       }
6419  
6420         jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
6421  
6422 @@ -1918,6 +1942,12 @@ static int dtSplitRoot(tid_t tid,
6423                 release_metapage(rmp);
6424                 return -EDQUOT;
6425         }
6426 +       /* Allocate blocks to dlimit. */
6427 +       if (DLIMIT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
6428 +               DQUOT_FREE_BLOCK(ip, lengthPXD(pxd));
6429 +               release_metapage(rmp);
6430 +               return -ENOSPC;
6431 +       }
6432  
6433         BT_MARK_DIRTY(rmp, ip);
6434         /*
6435 @@ -2284,6 +2314,8 @@ static int dtDeleteUp(tid_t tid, struct 
6436  
6437         xlen = lengthPXD(&fp->header.self);
6438  
6439 +       /* Free dlimit allocation. */
6440 +       DLIMIT_FREE_BLOCK(ip, xlen);
6441         /* Free quota allocation. */
6442         DQUOT_FREE_BLOCK(ip, xlen);
6443  
6444 @@ -2360,6 +2392,8 @@ static int dtDeleteUp(tid_t tid, struct 
6445  
6446                                 xlen = lengthPXD(&p->header.self);
6447  
6448 +                               /* Free dlimit allocation */
6449 +                               DLIMIT_FREE_BLOCK(ip, xlen);
6450                                 /* Free quota allocation */
6451                                 DQUOT_FREE_BLOCK(ip, xlen);
6452  
6453 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/jfs_extent.c linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_extent.c
6454 --- linux-2.6.16.20/fs/jfs/jfs_extent.c 2005-08-29 22:25:32 +0200
6455 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_extent.c    2006-04-26 19:07:00 +0200
6456 @@ -18,6 +18,7 @@
6457  
6458  #include <linux/fs.h>
6459  #include <linux/quotaops.h>
6460 +#include <linux/vs_dlimit.h>
6461  #include "jfs_incore.h"
6462  #include "jfs_inode.h"
6463  #include "jfs_superblock.h"
6464 @@ -146,6 +147,13 @@ extAlloc(struct inode *ip, s64 xlen, s64
6465                 up(&JFS_IP(ip)->commit_sem);
6466                 return -EDQUOT;
6467         }
6468 +       /* Allocate blocks to dlimit. */
6469 +       if (DLIMIT_ALLOC_BLOCK(ip, nxlen)) {
6470 +               DQUOT_FREE_BLOCK(ip, nxlen);
6471 +               dbFree(ip, nxaddr, (s64) nxlen);
6472 +               up(&JFS_IP(ip)->commit_sem);
6473 +               return -ENOSPC;
6474 +       }
6475  
6476         /* determine the value of the extent flag */
6477         xflag = (abnr == TRUE) ? XAD_NOTRECORDED : 0;
6478 @@ -164,6 +172,7 @@ extAlloc(struct inode *ip, s64 xlen, s64
6479          */
6480         if (rc) {
6481                 dbFree(ip, nxaddr, nxlen);
6482 +               DLIMIT_FREE_BLOCK(ip, nxlen);
6483                 DQUOT_FREE_BLOCK(ip, nxlen);
6484                 up(&JFS_IP(ip)->commit_sem);
6485                 return (rc);
6486 @@ -261,6 +270,13 @@ int extRealloc(struct inode *ip, s64 nxl
6487                 up(&JFS_IP(ip)->commit_sem);
6488                 return -EDQUOT;
6489         }
6490 +       /* Allocate blocks to dlimit. */
6491 +       if (DLIMIT_ALLOC_BLOCK(ip, nxlen)) {
6492 +               DQUOT_FREE_BLOCK(ip, nxlen);
6493 +               dbFree(ip, nxaddr, (s64) nxlen);
6494 +               up(&JFS_IP(ip)->commit_sem);
6495 +               return -ENOSPC;
6496 +       }
6497  
6498         delta = nxlen - xlen;
6499  
6500 @@ -297,6 +313,7 @@ int extRealloc(struct inode *ip, s64 nxl
6501                 /* extend the extent */
6502                 if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) {
6503                         dbFree(ip, xaddr + xlen, delta);
6504 +                       DLIMIT_FREE_BLOCK(ip, nxlen);
6505                         DQUOT_FREE_BLOCK(ip, nxlen);
6506                         goto exit;
6507                 }
6508 @@ -308,6 +325,7 @@ int extRealloc(struct inode *ip, s64 nxl
6509                  */
6510                 if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) {
6511                         dbFree(ip, nxaddr, nxlen);
6512 +                       DLIMIT_FREE_BLOCK(ip, nxlen);
6513                         DQUOT_FREE_BLOCK(ip, nxlen);
6514                         goto exit;
6515                 }
6516 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/jfs_filsys.h linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_filsys.h
6517 --- linux-2.6.16.20/fs/jfs/jfs_filsys.h 2005-10-28 20:49:44 +0200
6518 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_filsys.h    2006-04-26 19:07:00 +0200
6519 @@ -84,6 +84,7 @@
6520  #define JFS_DIR_INDEX          0x00200000      /* Persistant index for */
6521                                                 /* directory entries    */
6522  
6523 +#define JFS_TAGGED             0x00800000      /* Context Tagging */
6524  
6525  /*
6526   *     buffer cache configuration
6527 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/jfs_imap.c linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_imap.c
6528 --- linux-2.6.16.20/fs/jfs/jfs_imap.c   2006-04-09 13:49:53 +0200
6529 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_imap.c      2006-04-26 19:07:00 +0200
6530 @@ -45,6 +45,7 @@
6531  #include <linux/buffer_head.h>
6532  #include <linux/pagemap.h>
6533  #include <linux/quotaops.h>
6534 +#include <linux/vserver/tag.h>
6535  
6536  #include "jfs_incore.h"
6537  #include "jfs_inode.h"
6538 @@ -3074,14 +3075,21 @@ static void duplicateIXtree(struct super
6539  static int copy_from_dinode(struct dinode * dip, struct inode *ip)
6540  {
6541         struct jfs_inode_info *jfs_ip = JFS_IP(ip);
6542 +       uid_t uid;
6543 +       gid_t gid;
6544  
6545         jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
6546         jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
6547  
6548         ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff;
6549         ip->i_nlink = le32_to_cpu(dip->di_nlink);
6550 -       ip->i_uid = le32_to_cpu(dip->di_uid);
6551 -       ip->i_gid = le32_to_cpu(dip->di_gid);
6552 +
6553 +       uid = le32_to_cpu(dip->di_uid);
6554 +       gid = le32_to_cpu(dip->di_gid);
6555 +       ip->i_uid = INOTAG_UID(DX_TAG(ip), uid, gid);
6556 +       ip->i_gid = INOTAG_GID(DX_TAG(ip), uid, gid);
6557 +       ip->i_tag = INOTAG_TAG(DX_TAG(ip), uid, gid, 0);
6558 +
6559         ip->i_size = le64_to_cpu(dip->di_size);
6560         ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec);
6561         ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec);
6562 @@ -3132,6 +3140,8 @@ static int copy_from_dinode(struct dinod
6563  static void copy_to_dinode(struct dinode * dip, struct inode *ip)
6564  {
6565         struct jfs_inode_info *jfs_ip = JFS_IP(ip);
6566 +       uid_t uid;
6567 +       gid_t gid;
6568  
6569         dip->di_fileset = cpu_to_le32(jfs_ip->fileset);
6570         dip->di_inostamp = cpu_to_le32(JFS_SBI(ip->i_sb)->inostamp);
6571 @@ -3140,8 +3150,11 @@ static void copy_to_dinode(struct dinode
6572         dip->di_size = cpu_to_le64(ip->i_size);
6573         dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
6574         dip->di_nlink = cpu_to_le32(ip->i_nlink);
6575 -       dip->di_uid = cpu_to_le32(ip->i_uid);
6576 -       dip->di_gid = cpu_to_le32(ip->i_gid);
6577 +
6578 +       uid = TAGINO_UID(DX_TAG(ip), ip->i_uid, ip->i_tag);
6579 +       gid = TAGINO_GID(DX_TAG(ip), ip->i_gid, ip->i_tag);
6580 +       dip->di_uid = cpu_to_le32(uid);
6581 +       dip->di_gid = cpu_to_le32(gid);
6582         /*
6583          * mode2 is only needed for storing the higher order bits.
6584          * Trust i_mode for the lower order ones
6585 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/jfs_inode.c linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_inode.c
6586 --- linux-2.6.16.20/fs/jfs/jfs_inode.c  2005-08-29 22:25:32 +0200
6587 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_inode.c     2006-04-26 19:07:00 +0200
6588 @@ -18,6 +18,8 @@
6589  
6590  #include <linux/fs.h>
6591  #include <linux/quotaops.h>
6592 +#include <linux/vs_dlimit.h>
6593 +#include <linux/vs_tag.h>
6594  #include "jfs_incore.h"
6595  #include "jfs_inode.h"
6596  #include "jfs_filsys.h"
6597 @@ -25,6 +27,66 @@
6598  #include "jfs_dinode.h"
6599  #include "jfs_debug.h"
6600  
6601 +
6602 +void jfs_set_inode_flags(struct inode *inode)
6603 +{
6604 +       unsigned int flags = JFS_IP(inode)->mode2;
6605 +
6606 +       inode->i_flags &= ~(S_IMMUTABLE | S_IUNLINK | S_BARRIER |
6607 +               S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
6608 +
6609 +       if (flags & JFS_IMMUTABLE_FL)
6610 +               inode->i_flags |= S_IMMUTABLE;
6611 +       if (flags & JFS_IUNLINK_FL)
6612 +               inode->i_flags |= S_IUNLINK;
6613 +       if (flags & JFS_BARRIER_FL)
6614 +               inode->i_flags |= S_BARRIER;
6615 +
6616 +       if (flags & JFS_SYNC_FL)
6617 +               inode->i_flags |= S_SYNC;
6618 +       if (flags & JFS_APPEND_FL)
6619 +               inode->i_flags |= S_APPEND;
6620 +       if (flags & JFS_NOATIME_FL)
6621 +               inode->i_flags |= S_NOATIME;
6622 +       if (flags & JFS_DIRSYNC_FL)
6623 +               inode->i_flags |= S_DIRSYNC;
6624 +}
6625 +
6626 +int jfs_sync_flags(struct inode *inode)
6627 +{
6628 +       unsigned int oldflags, newflags;
6629 +
6630 +       oldflags = JFS_IP(inode)->mode2;
6631 +       newflags = oldflags & ~(JFS_APPEND_FL |
6632 +               JFS_IMMUTABLE_FL | JFS_IUNLINK_FL |
6633 +               JFS_BARRIER_FL | JFS_NOATIME_FL |
6634 +               JFS_SYNC_FL | JFS_DIRSYNC_FL);
6635 +
6636 +       if (IS_APPEND(inode))
6637 +               newflags |= JFS_APPEND_FL;
6638 +       if (IS_IMMUTABLE(inode))
6639 +               newflags |= JFS_IMMUTABLE_FL;
6640 +       if (IS_IUNLINK(inode))
6641 +               newflags |= JFS_IUNLINK_FL;
6642 +       if (IS_BARRIER(inode))
6643 +               newflags |= JFS_BARRIER_FL;
6644 +
6645 +       /* we do not want to copy superblock flags */
6646 +       if (inode->i_flags & S_NOATIME)
6647 +               newflags |= JFS_NOATIME_FL;
6648 +       if (inode->i_flags & S_SYNC)
6649 +               newflags |= JFS_SYNC_FL;
6650 +       if (inode->i_flags & S_DIRSYNC)
6651 +               newflags |= JFS_DIRSYNC_FL;
6652 +
6653 +       if (oldflags ^ newflags) {
6654 +               JFS_IP(inode)->mode2 = newflags;
6655 +               inode->i_ctime = CURRENT_TIME;
6656 +               mark_inode_dirty(inode);
6657 +       }
6658 +       return 0;
6659 +}
6660 +
6661  /*
6662   * NAME:       ialloc()
6663   *
6664 @@ -62,10 +124,17 @@ struct inode *ialloc(struct inode *paren
6665         } else
6666                 inode->i_gid = current->fsgid;
6667  
6668 +       inode->i_tag = dx_current_fstag(sb);
6669 +       if (DLIMIT_ALLOC_INODE(inode)) {
6670 +               iput(inode);
6671 +               return NULL;
6672 +       }
6673 +
6674         /*
6675          * Allocate inode to quota.
6676          */
6677         if (DQUOT_ALLOC_INODE(inode)) {
6678 +               DLIMIT_FREE_INODE(inode);
6679                 DQUOT_DROP(inode);
6680                 inode->i_flags |= S_NOQUOTA;
6681                 inode->i_nlink = 0;
6682 @@ -74,10 +143,20 @@ struct inode *ialloc(struct inode *paren
6683         }
6684  
6685         inode->i_mode = mode;
6686 -       if (S_ISDIR(mode))
6687 -               jfs_inode->mode2 = IDIRECTORY | mode;
6688 +       /* inherit flags from parent */
6689 +       jfs_inode->mode2 = JFS_IP(parent)->mode2 & JFS_FL_INHERIT;
6690 +
6691 +       if (S_ISDIR(mode)) {
6692 +               jfs_inode->mode2 |= IDIRECTORY;
6693 +               jfs_inode->mode2 &= ~JFS_DIRSYNC_FL;
6694 +       }
6695 +       else if (S_ISLNK(mode))
6696 +               jfs_inode->mode2 &=
6697 +                       ~(JFS_IMMUTABLE_FL|JFS_APPEND_FL);
6698         else
6699 -               jfs_inode->mode2 = INLINEEA | ISPARSE | mode;
6700 +               jfs_inode->mode2 |= INLINEEA | ISPARSE;
6701 +       jfs_inode->mode2 |= mode;
6702 +
6703         inode->i_blksize = sb->s_blocksize;
6704         inode->i_blocks = 0;
6705         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
6706 @@ -98,6 +177,7 @@ struct inode *ialloc(struct inode *paren
6707         jfs_inode->atlhead = 0;
6708         jfs_inode->atltail = 0;
6709         jfs_inode->xtlid = 0;
6710 +       jfs_set_inode_flags(inode);
6711  
6712         jfs_info("ialloc returns inode = 0x%p\n", inode);
6713  
6714 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/jfs_inode.h linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_inode.h
6715 --- linux-2.6.16.20/fs/jfs/jfs_inode.h  2005-08-29 22:25:32 +0200
6716 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_inode.h     2006-04-26 19:07:00 +0200
6717 @@ -20,6 +20,8 @@
6718  
6719  extern struct inode *ialloc(struct inode *, umode_t);
6720  extern int jfs_fsync(struct file *, struct dentry *, int);
6721 +extern int jfs_ioctl(struct inode *, struct file *,
6722 +                       unsigned int, unsigned long);
6723  extern void jfs_read_inode(struct inode *);
6724  extern int jfs_commit_inode(struct inode *, int);
6725  extern int jfs_write_inode(struct inode*, int);
6726 @@ -29,6 +31,8 @@ extern void jfs_truncate(struct inode *)
6727  extern void jfs_truncate_nolock(struct inode *, loff_t);
6728  extern void jfs_free_zero_link(struct inode *);
6729  extern struct dentry *jfs_get_parent(struct dentry *dentry);
6730 +extern int jfs_sync_flags(struct inode *);
6731 +extern void jfs_set_inode_flags(struct inode *);
6732  
6733  extern struct address_space_operations jfs_aops;
6734  extern struct inode_operations jfs_dir_inode_operations;
6735 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/jfs_xtree.c linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_xtree.c
6736 --- linux-2.6.16.20/fs/jfs/jfs_xtree.c  2006-01-03 17:29:57 +0100
6737 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_xtree.c     2006-04-26 19:07:00 +0200
6738 @@ -21,6 +21,7 @@
6739  
6740  #include <linux/fs.h>
6741  #include <linux/quotaops.h>
6742 +#include <linux/vs_dlimit.h>
6743  #include "jfs_incore.h"
6744  #include "jfs_filsys.h"
6745  #include "jfs_metapage.h"
6746 @@ -841,7 +842,12 @@ int xtInsert(tid_t tid,            /* transaction 
6747                         hint = 0;
6748                 if ((rc = DQUOT_ALLOC_BLOCK(ip, xlen)))
6749                         goto out;
6750 +               if ((rc = DLIMIT_ALLOC_BLOCK(ip, xlen))) {
6751 +                       DQUOT_FREE_BLOCK(ip, xlen);
6752 +                       goto out;
6753 +               }
6754                 if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) {
6755 +                       DLIMIT_FREE_BLOCK(ip, xlen);
6756                         DQUOT_FREE_BLOCK(ip, xlen);
6757                         goto out;
6758                 }
6759 @@ -871,6 +877,7 @@ int xtInsert(tid_t tid,             /* transaction 
6760                         /* undo data extent allocation */
6761                         if (*xaddrp == 0) {
6762                                 dbFree(ip, xaddr, (s64) xlen);
6763 +                               DLIMIT_FREE_BLOCK(ip, xlen);
6764                                 DQUOT_FREE_BLOCK(ip, xlen);
6765                         }
6766                         return rc;
6767 @@ -1231,6 +1238,7 @@ xtSplitPage(tid_t tid, struct inode *ip,
6768         struct tlock *tlck;
6769         struct xtlock *sxtlck = NULL, *rxtlck = NULL;
6770         int quota_allocation = 0;
6771 +       int dlimit_allocation = 0;
6772  
6773         smp = split->mp;
6774         sp = XT_PAGE(ip, smp);
6775 @@ -1243,13 +1251,20 @@ xtSplitPage(tid_t tid, struct inode *ip,
6776         rbn = addressPXD(pxd);
6777  
6778         /* Allocate blocks to quota. */
6779 -       if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
6780 +       if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
6781                rc = -EDQUOT;
6782                goto clean_up;
6783         }
6784  
6785         quota_allocation += lengthPXD(pxd);
6786  
6787 +       /* Allocate blocks to dlimit. */
6788 +       if (DLIMIT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
6789 +              rc = -ENOSPC;
6790 +              goto clean_up;
6791 +       }
6792 +       dlimit_allocation += lengthPXD(pxd);
6793 +
6794         /*
6795          * allocate the new right page for the split
6796          */
6797 @@ -1451,6 +1466,9 @@ xtSplitPage(tid_t tid, struct inode *ip,
6798  
6799        clean_up:
6800  
6801 +       /* Rollback dlimit allocation. */
6802 +       if (dlimit_allocation)
6803 +               DLIMIT_FREE_BLOCK(ip, dlimit_allocation);
6804         /* Rollback quota allocation. */
6805         if (quota_allocation)
6806                 DQUOT_FREE_BLOCK(ip, quota_allocation);
6807 @@ -1515,6 +1533,12 @@ xtSplitRoot(tid_t tid,
6808                 release_metapage(rmp);
6809                 return -EDQUOT;
6810         }
6811 +       /* Allocate blocks to dlimit. */
6812 +       if (DLIMIT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
6813 +               DQUOT_FREE_BLOCK(ip, lengthPXD(pxd));
6814 +               release_metapage(rmp);
6815 +               return -ENOSPC;
6816 +       }
6817  
6818         jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
6819  
6820 @@ -3941,6 +3965,8 @@ s64 xtTruncate(tid_t tid, struct inode *
6821         else
6822                 ip->i_size = newsize;
6823  
6824 +       /* update dlimit allocation to reflect freed blocks */
6825 +       DLIMIT_FREE_BLOCK(ip, nfreed);
6826         /* update quota allocation to reflect freed blocks */
6827         DQUOT_FREE_BLOCK(ip, nfreed);
6828  
6829 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/namei.c linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/namei.c
6830 --- linux-2.6.16.20/fs/jfs/namei.c      2006-01-03 17:29:57 +0100
6831 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/namei.c 2006-04-26 19:07:00 +0200
6832 @@ -20,6 +20,7 @@
6833  #include <linux/fs.h>
6834  #include <linux/ctype.h>
6835  #include <linux/quotaops.h>
6836 +#include <linux/vserver/tag.h>
6837  #include "jfs_incore.h"
6838  #include "jfs_superblock.h"
6839  #include "jfs_inode.h"
6840 @@ -1465,6 +1466,7 @@ static struct dentry *jfs_lookup(struct 
6841                 return ERR_PTR(-EACCES);
6842         }
6843  
6844 +       dx_propagate_tag(nd, ip);
6845         dentry = d_splice_alias(ip, dentry);
6846  
6847         if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2))
6848 @@ -1517,12 +1519,14 @@ struct inode_operations jfs_dir_inode_op
6849         .setattr        = jfs_setattr,
6850         .permission     = jfs_permission,
6851  #endif
6852 +       .sync_flags     = jfs_sync_flags,
6853  };
6854  
6855  struct file_operations jfs_dir_operations = {
6856         .read           = generic_read_dir,
6857         .readdir        = jfs_readdir,
6858         .fsync          = jfs_fsync,
6859 +       .ioctl          = jfs_ioctl,
6860  };
6861  
6862  static int jfs_ci_hash(struct dentry *dir, struct qstr *this)
6863 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/super.c linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/super.c
6864 --- linux-2.6.16.20/fs/jfs/super.c      2006-02-18 14:40:22 +0100
6865 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/super.c 2006-04-26 19:07:00 +0200
6866 @@ -195,7 +195,7 @@ static void jfs_put_super(struct super_b
6867  enum {
6868         Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize,
6869         Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota,
6870 -       Opt_usrquota, Opt_grpquota
6871 +       Opt_usrquota, Opt_grpquota, Opt_tag, Opt_notag, Opt_tagid
6872  };
6873  
6874  static match_table_t tokens = {
6875 @@ -205,6 +205,10 @@ static match_table_t tokens = {
6876         {Opt_resize, "resize=%u"},
6877         {Opt_resize_nosize, "resize"},
6878         {Opt_errors, "errors=%s"},
6879 +       {Opt_tag, "tag"},
6880 +       {Opt_notag, "notag"},
6881 +       {Opt_tagid, "tagid=%u"},
6882 +       {Opt_tag, "tagxid"},
6883         {Opt_ignore, "noquota"},
6884         {Opt_ignore, "quota"},
6885         {Opt_usrquota, "usrquota"},
6886 @@ -313,6 +317,20 @@ static int parse_options(char *options, 
6887                                "JFS: quota operations not supported\n");
6888                         break;
6889  #endif
6890 +#ifndef CONFIG_TAGGING_NONE
6891 +               case Opt_tag:
6892 +                       *flag |= JFS_TAGGED;
6893 +                       break;
6894 +               case Opt_notag:
6895 +                       *flag &= JFS_TAGGED;
6896 +                       break;
6897 +#endif
6898 +#ifdef CONFIG_PROPAGATE
6899 +               case Opt_tagid:
6900 +                       /* use args[0] */
6901 +                       *flag |= JFS_TAGGED;
6902 +                       break;
6903 +#endif
6904  
6905                 default:
6906                         printk("jfs: Unrecognized mount option \"%s\" "
6907 @@ -344,6 +362,13 @@ static int jfs_remount(struct super_bloc
6908         if (!parse_options(data, sb, &newLVSize, &flag)) {
6909                 return -EINVAL;
6910         }
6911 +
6912 +       if ((flag & JFS_TAGGED) && !(sb->s_flags & MS_TAGGED)) {
6913 +               printk(KERN_ERR "JFS: %s: tagging not permitted on remount.\n",
6914 +                       sb->s_id);
6915 +               return -EINVAL;
6916 +       }
6917 +
6918         if (newLVSize) {
6919                 if (sb->s_flags & MS_RDONLY) {
6920                         printk(KERN_ERR
6921 @@ -415,6 +440,9 @@ static int jfs_fill_super(struct super_b
6922  #ifdef CONFIG_JFS_POSIX_ACL
6923         sb->s_flags |= MS_POSIXACL;
6924  #endif
6925 +       /* map mount option tagxid */
6926 +       if (sbi->flag & JFS_TAGGED)
6927 +               sb->s_flags |= MS_TAGGED;
6928  
6929         if (newLVSize) {
6930                 printk(KERN_ERR "resize option for remount only\n");
6931 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/xattr.c linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/xattr.c
6932 --- linux-2.6.16.20/fs/jfs/xattr.c      2006-02-18 14:40:22 +0100
6933 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/xattr.c 2006-04-26 19:07:00 +0200
6934 @@ -23,6 +23,7 @@
6935  #include <linux/posix_acl_xattr.h>
6936  #include <linux/quotaops.h>
6937  #include <linux/security.h>
6938 +#include <linux/vs_dlimit.h>
6939  #include "jfs_incore.h"
6940  #include "jfs_superblock.h"
6941  #include "jfs_dmap.h"
6942 @@ -263,9 +264,16 @@ static int ea_write(struct inode *ip, st
6943         if (DQUOT_ALLOC_BLOCK(ip, nblocks)) {
6944                 return -EDQUOT;
6945         }
6946 +       /* Allocate new blocks to dlimit. */
6947 +       if (DLIMIT_ALLOC_BLOCK(ip, nblocks)) {
6948 +               DQUOT_FREE_BLOCK(ip, nblocks);
6949 +               return -ENOSPC;
6950 +       }
6951  
6952         rc = dbAlloc(ip, INOHINT(ip), nblocks, &blkno);
6953         if (rc) {
6954 +               /*Rollback dlimit allocation. */
6955 +               DLIMIT_FREE_BLOCK(ip, nblocks);
6956                 /*Rollback quota allocation. */
6957                 DQUOT_FREE_BLOCK(ip, nblocks);
6958                 return rc;
6959 @@ -332,6 +340,8 @@ static int ea_write(struct inode *ip, st
6960  
6961        failed:
6962         /* Rollback quota allocation. */
6963 +       DLIMIT_FREE_BLOCK(ip, nblocks);
6964 +       /* Rollback quota allocation. */
6965         DQUOT_FREE_BLOCK(ip, nblocks);
6966  
6967         dbFree(ip, blkno, nblocks);
6968 @@ -468,6 +478,7 @@ static int ea_get(struct inode *inode, s
6969         s64 blkno;
6970         int rc;
6971         int quota_allocation = 0;
6972 +       int dlimit_allocation = 0;
6973  
6974         /* When fsck.jfs clears a bad ea, it doesn't clear the size */
6975         if (ji->ea.flag == 0)
6976 @@ -543,6 +554,12 @@ static int ea_get(struct inode *inode, s
6977  
6978                 quota_allocation = blocks_needed;
6979  
6980 +               /* Allocate new blocks to dlimit. */
6981 +               rc = -ENOSPC;
6982 +               if (DLIMIT_ALLOC_BLOCK(inode, blocks_needed))
6983 +                       goto clean_up;
6984 +               dlimit_allocation = blocks_needed;
6985 +
6986                 rc = dbAlloc(inode, INOHINT(inode), (s64) blocks_needed,
6987                              &blkno);
6988                 if (rc)
6989 @@ -599,6 +616,9 @@ static int ea_get(struct inode *inode, s
6990         return ea_size;
6991  
6992        clean_up:
6993 +       /* Rollback dlimit allocation */
6994 +       if (dlimit_allocation)
6995 +               DLIMIT_FREE_BLOCK(inode, dlimit_allocation);
6996         /* Rollback quota allocation */
6997         if (quota_allocation)
6998                 DQUOT_FREE_BLOCK(inode, quota_allocation);
6999 @@ -675,8 +695,10 @@ static int ea_put(tid_t tid, struct inod
7000         }
7001  
7002         /* If old blocks exist, they must be removed from quota allocation. */
7003 -       if (old_blocks)
7004 +       if (old_blocks) {
7005 +               DLIMIT_FREE_BLOCK(inode, old_blocks);
7006                 DQUOT_FREE_BLOCK(inode, old_blocks);
7007 +       }
7008  
7009         inode->i_ctime = CURRENT_TIME;
7010  
7011 diff -NurpP --minimal linux-2.6.16.20/fs/libfs.c linux-2.6.16.20-vs2.1.1-rc22/fs/libfs.c
7012 --- linux-2.6.16.20/fs/libfs.c  2006-02-18 14:40:22 +0100
7013 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/libfs.c     2006-04-26 19:07:00 +0200
7014 @@ -122,7 +122,8 @@ static inline unsigned char dt_type(stru
7015   * both impossible due to the lock on directory.
7016   */
7017  
7018 -int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
7019 +static inline int do_dcache_readdir_filter(struct file * filp,
7020 +       void * dirent, filldir_t filldir, int (*filter)(struct dentry *dentry))
7021  {
7022         struct dentry *dentry = filp->f_dentry;
7023         struct dentry *cursor = filp->private_data;
7024 @@ -156,6 +157,8 @@ int dcache_readdir(struct file * filp, v
7025                                 next = list_entry(p, struct dentry, d_u.d_child);
7026                                 if (d_unhashed(next) || !next->d_inode)
7027                                         continue;
7028 +                               if (filter && !filter(next))
7029 +                                       continue;
7030  
7031                                 spin_unlock(&dcache_lock);
7032                                 if (filldir(dirent, next->d_name.name, next->d_name.len, filp->f_pos, next->d_inode->i_ino, dt_type(next->d_inode)) < 0)
7033 @@ -172,6 +175,18 @@ int dcache_readdir(struct file * filp, v
7034         return 0;
7035  }
7036  
7037 +int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
7038 +{
7039 +       return do_dcache_readdir_filter(filp, dirent, filldir, NULL);
7040 +}
7041 +
7042 +int dcache_readdir_filter(struct file * filp, void * dirent, filldir_t filldir,
7043 +       int (*filter)(struct dentry *))
7044 +{
7045 +       return do_dcache_readdir_filter(filp, dirent, filldir, filter);
7046 +}
7047 +
7048 +
7049  ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
7050  {
7051         return -EISDIR;
7052 @@ -621,6 +636,7 @@ EXPORT_SYMBOL(dcache_dir_close);
7053  EXPORT_SYMBOL(dcache_dir_lseek);
7054  EXPORT_SYMBOL(dcache_dir_open);
7055  EXPORT_SYMBOL(dcache_readdir);
7056 +EXPORT_SYMBOL(dcache_readdir_filter);
7057  EXPORT_SYMBOL(generic_read_dir);
7058  EXPORT_SYMBOL(get_sb_pseudo);
7059  EXPORT_SYMBOL(simple_commit_write);
7060 diff -NurpP --minimal linux-2.6.16.20/fs/lockd/clntproc.c linux-2.6.16.20-vs2.1.1-rc22/fs/lockd/clntproc.c
7061 --- linux-2.6.16.20/fs/lockd/clntproc.c 2006-04-09 13:49:53 +0200
7062 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/lockd/clntproc.c    2006-04-26 19:07:00 +0200
7063 @@ -14,6 +14,7 @@
7064  #include <linux/nfs_fs.h>
7065  #include <linux/utsname.h>
7066  #include <linux/smp_lock.h>
7067 +#include <linux/vs_cvirt.h>
7068  #include <linux/sunrpc/clnt.h>
7069  #include <linux/sunrpc/svc.h>
7070  #include <linux/lockd/lockd.h>
7071 @@ -130,10 +131,10 @@ static void nlmclnt_setlockargs(struct n
7072         nlmclnt_next_cookie(&argp->cookie);
7073         argp->state   = nsm_local_state;
7074         memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh));
7075 -       lock->caller  = system_utsname.nodename;
7076 +       lock->caller  = vx_new_uts(nodename);
7077         lock->oh.data = req->a_owner;
7078         lock->oh.len  = sprintf(req->a_owner, "%d@%s",
7079 -                               current->pid, system_utsname.nodename);
7080 +                               current->pid, vx_new_uts(nodename));
7081         locks_copy_lock(&lock->fl, fl);
7082  }
7083  
7084 @@ -154,7 +155,7 @@ nlmclnt_setgrantargs(struct nlm_rqst *ca
7085  {
7086         locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
7087         memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
7088 -       call->a_args.lock.caller = system_utsname.nodename;
7089 +       call->a_args.lock.caller = vx_new_uts(nodename);
7090         call->a_args.lock.oh.len = lock->oh.len;
7091  
7092         /* set default data area */
7093 diff -NurpP --minimal linux-2.6.16.20/fs/lockd/mon.c linux-2.6.16.20-vs2.1.1-rc22/fs/lockd/mon.c
7094 --- linux-2.6.16.20/fs/lockd/mon.c      2006-01-18 06:08:30 +0100
7095 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/lockd/mon.c 2006-04-26 19:07:00 +0200
7096 @@ -13,6 +13,7 @@
7097  #include <linux/sunrpc/svc.h>
7098  #include <linux/lockd/lockd.h>
7099  #include <linux/lockd/sm_inter.h>
7100 +#include <linux/vs_cvirt.h>
7101  
7102  
7103  #define NLMDBG_FACILITY                NLMDBG_MONITOR
7104 @@ -147,7 +148,7 @@ xdr_encode_common(struct rpc_rqst *rqstp
7105          */
7106         sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr));
7107         if (!(p = xdr_encode_string(p, buffer))
7108 -        || !(p = xdr_encode_string(p, system_utsname.nodename)))
7109 +        || !(p = xdr_encode_string(p, vx_new_uts(nodename))))
7110                 return ERR_PTR(-EIO);
7111         *p++ = htonl(argp->prog);
7112         *p++ = htonl(argp->vers);
7113 diff -NurpP --minimal linux-2.6.16.20/fs/locks.c linux-2.6.16.20-vs2.1.1-rc22/fs/locks.c
7114 --- linux-2.6.16.20/fs/locks.c  2006-05-22 16:25:40 +0200
7115 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/locks.c     2006-06-02 17:45:59 +0200
7116 @@ -125,6 +125,7 @@
7117  #include <linux/syscalls.h>
7118  #include <linux/time.h>
7119  #include <linux/rcupdate.h>
7120 +#include <linux/vs_limit.h>
7121  
7122  #include <asm/semaphore.h>
7123  #include <asm/uaccess.h>
7124 @@ -150,6 +151,8 @@ static kmem_cache_t *filelock_cache;
7125  /* Allocate an empty lock structure. */
7126  static struct file_lock *locks_alloc_lock(void)
7127  {
7128 +       if (!vx_locks_avail(1))
7129 +               return NULL;
7130         return kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
7131  }
7132  
7133 @@ -160,6 +163,9 @@ static void locks_free_lock(struct file_
7134                 BUG();
7135                 return;
7136         }
7137 +
7138 +       vx_locks_dec(fl);
7139 +
7140         if (waitqueue_active(&fl->fl_wait))
7141                 panic("Attempting to free lock with active wait queue");
7142  
7143 @@ -199,6 +205,7 @@ void locks_init_lock(struct file_lock *f
7144         fl->fl_start = fl->fl_end = 0;
7145         fl->fl_ops = NULL;
7146         fl->fl_lmops = NULL;
7147 +       fl->fl_xid = -1;
7148  }
7149  
7150  EXPORT_SYMBOL(locks_init_lock);
7151 @@ -236,6 +243,8 @@ void locks_copy_lock(struct file_lock *n
7152                 fl->fl_ops->fl_copy_lock(new, fl);
7153         if (fl->fl_lmops && fl->fl_lmops->fl_copy_lock)
7154                 fl->fl_lmops->fl_copy_lock(new, fl);
7155 +
7156 +       new->fl_xid = fl->fl_xid;
7157  }
7158  
7159  EXPORT_SYMBOL(locks_copy_lock);
7160 @@ -272,6 +281,11 @@ static int flock_make_lock(struct file *
7161         fl->fl_flags = FL_FLOCK;
7162         fl->fl_type = type;
7163         fl->fl_end = OFFSET_MAX;
7164 +
7165 +       vxd_assert(filp->f_xid == vx_current_xid(),
7166 +               "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
7167 +       fl->fl_xid = filp->f_xid;
7168 +       vx_locks_inc(fl);
7169         
7170         *lock = fl;
7171         return 0;
7172 @@ -437,6 +451,7 @@ static int lease_init(struct file *filp,
7173  
7174         fl->fl_owner = current->files;
7175         fl->fl_pid = current->tgid;
7176 +       fl->fl_xid = vx_current_xid();
7177  
7178         fl->fl_file = filp;
7179         fl->fl_flags = FL_LEASE;
7180 @@ -456,6 +471,11 @@ static int lease_alloc(struct file *filp
7181         if (fl == NULL)
7182                 goto out;
7183  
7184 +       fl->fl_xid = vx_current_xid();
7185 +       if (filp)
7186 +               vxd_assert(filp->f_xid == fl->fl_xid,
7187 +                       "f_xid(%d) == fl_xid(%d)", filp->f_xid, fl->fl_xid);
7188 +       vx_locks_inc(fl);
7189         error = lease_init(filp, type, fl);
7190         if (error) {
7191                 locks_free_lock(fl);
7192 @@ -765,6 +785,7 @@ static int flock_lock_file(struct file *
7193                 goto out;
7194         }
7195         locks_copy_lock(new_fl, request);
7196 +       vx_locks_inc(new_fl);
7197         locks_insert_lock(&inode->i_flock, new_fl);
7198         new_fl = NULL;
7199  
7200 @@ -777,7 +798,7 @@ out:
7201  
7202  EXPORT_SYMBOL(posix_lock_file);
7203  
7204 -static int __posix_lock_file(struct inode *inode, struct file_lock *request)
7205 +static int __posix_lock_file(struct inode *inode, struct file_lock *request, xid_t xid)
7206  {
7207         struct file_lock *fl;
7208         struct file_lock *new_fl, *new_fl2;
7209 @@ -786,12 +807,18 @@ static int __posix_lock_file(struct inod
7210         struct file_lock **before;
7211         int error, added = 0;
7212  
7213 +       vxd_assert(xid == vx_current_xid(),
7214 +               "xid(%d) == current(%d)", xid, vx_current_xid());
7215         /*
7216          * We may need two file_lock structures for this operation,
7217          * so we get them in advance to avoid races.
7218          */
7219         new_fl = locks_alloc_lock();
7220 +       new_fl->fl_xid = xid;
7221 +       vx_locks_inc(new_fl);
7222         new_fl2 = locks_alloc_lock();
7223 +       new_fl2->fl_xid = xid;
7224 +       vx_locks_inc(new_fl2);
7225  
7226         lock_kernel();
7227         if (request->fl_type != F_UNLCK) {
7228 @@ -969,7 +996,7 @@ static int __posix_lock_file(struct inod
7229   */
7230  int posix_lock_file(struct file *filp, struct file_lock *fl)
7231  {
7232 -       return __posix_lock_file(filp->f_dentry->d_inode, fl);
7233 +       return __posix_lock_file(filp->f_dentry->d_inode, fl, filp->f_xid);
7234  }
7235  
7236  /**
7237 @@ -986,7 +1013,8 @@ int posix_lock_file_wait(struct file *fi
7238         int error;
7239         might_sleep ();
7240         for (;;) {
7241 -               error = __posix_lock_file(filp->f_dentry->d_inode, fl);
7242 +               error = __posix_lock_file(filp->f_dentry->d_inode,
7243 +                       fl, filp->f_xid);
7244                 if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP))
7245                         break;
7246                 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
7247 @@ -1058,7 +1086,7 @@ int locks_mandatory_area(int read_write,
7248         fl.fl_end = offset + count - 1;
7249  
7250         for (;;) {
7251 -               error = __posix_lock_file(inode, &fl);
7252 +               error = __posix_lock_file(inode, &fl, filp->f_xid);
7253                 if (error != -EAGAIN)
7254                         break;
7255                 if (!(fl.fl_flags & FL_SLEEP))
7256 @@ -1618,6 +1646,11 @@ int fcntl_setlk(unsigned int fd, struct 
7257         if (file_lock == NULL)
7258                 return -ENOLCK;
7259  
7260 +       vxd_assert(filp->f_xid == vx_current_xid(),
7261 +               "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
7262 +       file_lock->fl_xid = filp->f_xid;
7263 +       vx_locks_inc(file_lock);
7264 +
7265         /*
7266          * This might block, so we do it before checking the inode.
7267          */
7268 @@ -1670,7 +1703,8 @@ again:
7269                 error = filp->f_op->lock(filp, cmd, file_lock);
7270         else {
7271                 for (;;) {
7272 -                       error = __posix_lock_file(inode, file_lock);
7273 +                       error = __posix_lock_file(inode, file_lock,
7274 +                               filp->f_xid);
7275                         if ((error != -EAGAIN) || (cmd == F_SETLK))
7276                                 break;
7277                         error = wait_event_interruptible(file_lock->fl_wait,
7278 @@ -1761,6 +1795,11 @@ int fcntl_setlk64(unsigned int fd, struc
7279         if (file_lock == NULL)
7280                 return -ENOLCK;
7281  
7282 +       vxd_assert(filp->f_xid == vx_current_xid(),
7283 +               "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
7284 +       file_lock->fl_xid = filp->f_xid;
7285 +       vx_locks_inc(file_lock);
7286 +
7287         /*
7288          * This might block, so we do it before checking the inode.
7289          */
7290 @@ -1813,7 +1852,8 @@ again:
7291                 error = filp->f_op->lock(filp, cmd, file_lock);
7292         else {
7293                 for (;;) {
7294 -                       error = __posix_lock_file(inode, file_lock);
7295 +                       error = __posix_lock_file(inode, file_lock,
7296 +                               filp->f_xid);
7297                         if ((error != -EAGAIN) || (cmd == F_SETLK64))
7298                                 break;
7299                         error = wait_event_interruptible(file_lock->fl_wait,
7300 @@ -2086,6 +2126,10 @@ int get_locks_status(char *buffer, char 
7301         list_for_each(tmp, &file_lock_list) {
7302                 struct list_head *btmp;
7303                 struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link);
7304 +
7305 +               if (!vx_check(fl->fl_xid, VX_IDENT|VX_WATCH))
7306 +                       continue;
7307 +
7308                 lock_get_status(q, fl, ++i, "");
7309                 move_lock_status(&q, &pos, offset);
7310  
7311 diff -NurpP --minimal linux-2.6.16.20/fs/namei.c linux-2.6.16.20-vs2.1.1-rc22/fs/namei.c
7312 --- linux-2.6.16.20/fs/namei.c  2006-04-09 13:49:53 +0200
7313 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/namei.c     2006-05-21 23:34:46 +0200
7314 @@ -32,6 +32,10 @@
7315  #include <linux/file.h>
7316  #include <linux/fcntl.h>
7317  #include <linux/namei.h>
7318 +#include <linux/proc_fs.h>
7319 +#include <linux/vserver/inode.h>
7320 +#include <linux/vs_tag.h>
7321 +#include <linux/vserver/debug.h>
7322  #include <asm/namei.h>
7323  #include <asm/uaccess.h>
7324  
7325 @@ -225,6 +229,24 @@ int generic_permission(struct inode *ino
7326         return -EACCES;
7327  }
7328  
7329 +static inline int dx_permission(struct inode *inode, int mask, struct nameidata *nd)
7330 +{
7331 +       if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN)) {
7332 +               vxwprintk(1, "xid=%d did hit the barrier.",
7333 +                       vx_current_xid());
7334 +               return -EACCES;
7335 +       }
7336 +       if (inode->i_tag == 0)
7337 +               return 0;
7338 +       if (dx_check(inode->i_tag, DX_ADMIN|DX_WATCH|DX_IDENT))
7339 +               return 0;
7340 +
7341 +       vxwprintk(1, "xid=%d denied access to %p[#%d,%lu] »%s«.",
7342 +               vx_current_xid(), inode, inode->i_tag, inode->i_ino,
7343 +               vxd_cond_path(nd));
7344 +       return -EACCES;
7345 +}
7346 +
7347  int permission(struct inode *inode, int mask, struct nameidata *nd)
7348  {
7349         int retval, submask;
7350 @@ -235,7 +257,7 @@ int permission(struct inode *inode, int 
7351                 /*
7352                  * Nobody gets write access to a read-only fs.
7353                  */
7354 -               if (IS_RDONLY(inode) &&
7355 +               if ((IS_RDONLY(inode) || (nd && MNT_IS_RDONLY(nd->mnt))) &&
7356                     (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
7357                         return -EROFS;
7358  
7359 @@ -249,6 +271,8 @@ int permission(struct inode *inode, int 
7360  
7361         /* Ordinary permission routines do not understand MAY_APPEND. */
7362         submask = mask & ~MAY_APPEND;
7363 +       if ((retval = dx_permission(inode, mask, nd)))
7364 +               return retval;
7365         if (inode->i_op && inode->i_op->permission)
7366                 retval = inode->i_op->permission(inode, submask, nd);
7367         else
7368 @@ -702,7 +726,8 @@ static __always_inline void follow_dotdo
7369                 if (nd->dentry == current->fs->root &&
7370                     nd->mnt == current->fs->rootmnt) {
7371                          read_unlock(&current->fs->lock);
7372 -                       break;
7373 +                       /* for sane '/' avoid follow_mount() */
7374 +                       return;
7375                 }
7376                  read_unlock(&current->fs->lock);
7377                 spin_lock(&dcache_lock);
7378 @@ -739,16 +764,34 @@ static int do_lookup(struct nameidata *n
7379  {
7380         struct vfsmount *mnt = nd->mnt;
7381         struct dentry *dentry = __d_lookup(nd->dentry, name);
7382 +       struct inode *inode;
7383  
7384         if (!dentry)
7385                 goto need_lookup;
7386         if (dentry->d_op && dentry->d_op->d_revalidate)
7387                 goto need_revalidate;
7388 +       inode = dentry->d_inode;
7389 +       if (!inode)
7390 +               goto done;
7391 +       if (inode->i_sb->s_magic == PROC_SUPER_MAGIC) {
7392 +               struct proc_dir_entry *de = PDE(inode);
7393 +
7394 +               if (de && !vx_hide_check(0, de->vx_flags))
7395 +                       goto hidden;
7396 +       }
7397 +       if (!dx_check(inode->i_tag, DX_WATCH|DX_ADMIN|DX_HOSTID|DX_IDENT))
7398 +               goto hidden;
7399  done:
7400         path->mnt = mnt;
7401         path->dentry = dentry;
7402         __follow_mount(path);
7403         return 0;
7404 +hidden:
7405 +       vxwprintk(1, "xid=%d did lookup hidden %p[#%d,%lu] »%s«.",
7406 +               vx_current_xid(), inode, inode->i_tag, inode->i_ino,
7407 +               vxd_path(dentry, mnt));
7408 +       dput(dentry);
7409 +       return -ENOENT;
7410  
7411  need_lookup:
7412         dentry = real_lookup(nd->dentry, name, nd);
7413 @@ -1345,7 +1388,8 @@ static inline int check_sticky(struct in
7414   * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
7415   *     nfs_async_unlink().
7416   */
7417 -static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
7418 +static int may_delete(struct inode *dir, struct dentry *victim,
7419 +       int isdir, struct nameidata *nd)
7420  {
7421         int error;
7422  
7423 @@ -1354,13 +1398,13 @@ static int may_delete(struct inode *dir,
7424  
7425         BUG_ON(victim->d_parent->d_inode != dir);
7426  
7427 -       error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
7428 +       error = permission(dir,MAY_WRITE | MAY_EXEC, nd);
7429         if (error)
7430                 return error;
7431         if (IS_APPEND(dir))
7432                 return -EPERM;
7433         if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
7434 -           IS_IMMUTABLE(victim->d_inode))
7435 +               IS_IXORUNLINK(victim->d_inode))
7436                 return -EPERM;
7437         if (isdir) {
7438                 if (!S_ISDIR(victim->d_inode->i_mode))
7439 @@ -1491,6 +1535,14 @@ int may_open(struct nameidata *nd, int a
7440         if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
7441                 return -EISDIR;
7442  
7443 +#ifdef CONFIG_VSERVER_COWBL
7444 +       if (IS_COW(inode) && (flag & FMODE_WRITE)) {
7445 +               if (IS_COW_LINK(inode))
7446 +                       return -EMLINK;
7447 +               inode->i_flags &= ~(S_IUNLINK|S_IMMUTABLE);
7448 +               mark_inode_dirty(inode);
7449 +       }
7450 +#endif
7451         error = vfs_permission(nd, acc_mode);
7452         if (error)
7453                 return error;
7454 @@ -1507,7 +1559,8 @@ int may_open(struct nameidata *nd, int a
7455                         return -EACCES;
7456  
7457                 flag &= ~O_TRUNC;
7458 -       } else if (IS_RDONLY(inode) && (flag & FMODE_WRITE))
7459 +       } else if ((IS_RDONLY(inode) || MNT_IS_RDONLY(nd->mnt))
7460 +               && (flag & FMODE_WRITE))
7461                 return -EROFS;
7462         /*
7463          * An append-only file must be opened in append mode for writing.
7464 @@ -1555,6 +1608,8 @@ int may_open(struct nameidata *nd, int a
7465         return 0;
7466  }
7467  
7468 +int cow_break_link(struct dentry *dentry, const char *pathname);
7469 +
7470  /*
7471   *     open_namei()
7472   *
7473 @@ -1577,6 +1632,11 @@ int open_namei(int dfd, const char *path
7474         struct dentry *dir;
7475         int count = 0;
7476  
7477 +#ifdef CONFIG_VSERVER_COWBL
7478 +       int rflag = flag;
7479 +       int rmode = mode;
7480 +restart:
7481 +#endif
7482         acc_mode = ACC_MODE(flag);
7483  
7484         /* O_TRUNC implies we need access checks for write permissions */
7485 @@ -1670,6 +1730,18 @@ do_last:
7486                 goto exit;
7487  ok:
7488         error = may_open(nd, acc_mode, flag);
7489 +#ifdef CONFIG_VSERVER_COWBL
7490 +       if (error == -EMLINK) {
7491 +               error = cow_break_link(path.dentry, pathname);
7492 +               if (error)
7493 +                       goto exit;
7494 +               path_release(nd);
7495 +               vxdprintk(VXD_CBIT(misc, 2), "restarting open_namei() ...");
7496 +               flag = rflag;
7497 +               mode = rmode;
7498 +               goto restart;
7499 +       }
7500 +#endif
7501         if (error)
7502                 goto exit;
7503         return 0;
7504 @@ -1773,9 +1845,10 @@ fail:
7505  }
7506  EXPORT_SYMBOL_GPL(lookup_create);
7507  
7508 -int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
7509 +int vfs_mknod(struct inode *dir, struct dentry *dentry,
7510 +       int mode, dev_t dev, struct nameidata *nd)
7511  {
7512 -       int error = may_create(dir, dentry, NULL);
7513 +       int error = may_create(dir, dentry, nd);
7514  
7515         if (error)
7516                 return error;
7517 @@ -1825,11 +1898,12 @@ asmlinkage long sys_mknodat(int dfd, con
7518                         error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd);
7519                         break;
7520                 case S_IFCHR: case S_IFBLK:
7521 -                       error = vfs_mknod(nd.dentry->d_inode,dentry,mode,
7522 -                                       new_decode_dev(dev));
7523 +                       error = vfs_mknod(nd.dentry->d_inode, dentry, mode,
7524 +                                       new_decode_dev(dev), &nd);
7525                         break;
7526                 case S_IFIFO: case S_IFSOCK:
7527 -                       error = vfs_mknod(nd.dentry->d_inode,dentry,mode,0);
7528 +                       error = vfs_mknod(nd.dentry->d_inode, dentry, mode,
7529 +                                       0, &nd);
7530                         break;
7531                 case S_IFDIR:
7532                         error = -EPERM;
7533 @@ -1852,9 +1926,10 @@ asmlinkage long sys_mknod(const char __u
7534         return sys_mknodat(AT_FDCWD, filename, mode, dev);
7535  }
7536  
7537 -int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
7538 +int vfs_mkdir(struct inode *dir, struct dentry *dentry,
7539 +       int mode, struct nameidata *nd)
7540  {
7541 -       int error = may_create(dir, dentry, NULL);
7542 +       int error = may_create(dir, dentry, nd);
7543  
7544         if (error)
7545                 return error;
7546 @@ -1893,7 +1968,8 @@ asmlinkage long sys_mkdirat(int dfd, con
7547                 if (!IS_ERR(dentry)) {
7548                         if (!IS_POSIXACL(nd.dentry->d_inode))
7549                                 mode &= ~current->fs->umask;
7550 -                       error = vfs_mkdir(nd.dentry->d_inode, dentry, mode);
7551 +                       error = vfs_mkdir(nd.dentry->d_inode, dentry,
7552 +                               mode, &nd);
7553                         dput(dentry);
7554                 }
7555                 mutex_unlock(&nd.dentry->d_inode->i_mutex);
7556 @@ -1938,9 +2014,10 @@ void dentry_unhash(struct dentry *dentry
7557         spin_unlock(&dcache_lock);
7558  }
7559  
7560 -int vfs_rmdir(struct inode *dir, struct dentry *dentry)
7561 +int vfs_rmdir(struct inode *dir, struct dentry *dentry,
7562 +       struct nameidata *nd)
7563  {
7564 -       int error = may_delete(dir, dentry, 1);
7565 +       int error = may_delete(dir, dentry, 1, nd);
7566  
7567         if (error)
7568                 return error;
7569 @@ -2001,7 +2078,7 @@ static long do_rmdir(int dfd, const char
7570         dentry = lookup_hash(&nd);
7571         error = PTR_ERR(dentry);
7572         if (!IS_ERR(dentry)) {
7573 -               error = vfs_rmdir(nd.dentry->d_inode, dentry);
7574 +               error = vfs_rmdir(nd.dentry->d_inode, dentry, &nd);
7575                 dput(dentry);
7576         }
7577         mutex_unlock(&nd.dentry->d_inode->i_mutex);
7578 @@ -2017,9 +2094,10 @@ asmlinkage long sys_rmdir(const char __u
7579         return do_rmdir(AT_FDCWD, pathname);
7580  }
7581  
7582 -int vfs_unlink(struct inode *dir, struct dentry *dentry)
7583 +int vfs_unlink(struct inode *dir, struct dentry *dentry,
7584 +       struct nameidata *nd)
7585  {
7586 -       int error = may_delete(dir, dentry, 0);
7587 +       int error = may_delete(dir, dentry, 0, nd);
7588  
7589         if (error)
7590                 return error;
7591 @@ -2081,7 +2159,7 @@ static long do_unlinkat(int dfd, const c
7592                 inode = dentry->d_inode;
7593                 if (inode)
7594                         atomic_inc(&inode->i_count);
7595 -               error = vfs_unlink(nd.dentry->d_inode, dentry);
7596 +               error = vfs_unlink(nd.dentry->d_inode, dentry, &nd);
7597         exit2:
7598                 dput(dentry);
7599         }
7600 @@ -2116,9 +2194,10 @@ asmlinkage long sys_unlink(const char __
7601         return do_unlinkat(AT_FDCWD, pathname);
7602  }
7603  
7604 -int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode)
7605 +int vfs_symlink(struct inode *dir, struct dentry *dentry,
7606 +       const char *oldname, int mode, struct nameidata *nd)
7607  {
7608 -       int error = may_create(dir, dentry, NULL);
7609 +       int error = may_create(dir, dentry, nd);
7610  
7611         if (error)
7612                 return error;
7613 @@ -2159,7 +2238,8 @@ asmlinkage long sys_symlinkat(const char
7614                 dentry = lookup_create(&nd, 0);
7615                 error = PTR_ERR(dentry);
7616                 if (!IS_ERR(dentry)) {
7617 -                       error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO);
7618 +                       error = vfs_symlink(nd.dentry->d_inode, dentry,
7619 +                               from, S_IALLUGO, &nd);
7620                         dput(dentry);
7621                 }
7622                 mutex_unlock(&nd.dentry->d_inode->i_mutex);
7623 @@ -2176,7 +2256,8 @@ asmlinkage long sys_symlink(const char _
7624         return sys_symlinkat(oldname, AT_FDCWD, newname);
7625  }
7626  
7627 -int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
7628 +int vfs_link(struct dentry *old_dentry, struct inode *dir,
7629 +       struct dentry *new_dentry, struct nameidata *nd)
7630  {
7631         struct inode *inode = old_dentry->d_inode;
7632         int error;
7633 @@ -2184,7 +2265,7 @@ int vfs_link(struct dentry *old_dentry, 
7634         if (!inode)
7635                 return -ENOENT;
7636  
7637 -       error = may_create(dir, new_dentry, NULL);
7638 +       error = may_create(dir, new_dentry, nd);
7639         if (error)
7640                 return error;
7641  
7642 @@ -2194,7 +2275,7 @@ int vfs_link(struct dentry *old_dentry, 
7643         /*
7644          * A link to an append-only or immutable file cannot be created.
7645          */
7646 -       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
7647 +       if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
7648                 return -EPERM;
7649         if (!dir->i_op || !dir->i_op->link)
7650                 return -EPERM;
7651 @@ -2251,7 +2332,8 @@ asmlinkage long sys_linkat(int olddfd, c
7652         new_dentry = lookup_create(&nd, 0);
7653         error = PTR_ERR(new_dentry);
7654         if (!IS_ERR(new_dentry)) {
7655 -               error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
7656 +               error = vfs_link(old_nd.dentry, nd.dentry->d_inode,
7657 +                       new_dentry, &nd);
7658                 dput(new_dentry);
7659         }
7660         mutex_unlock(&nd.dentry->d_inode->i_mutex);
7661 @@ -2383,14 +2465,14 @@ int vfs_rename(struct inode *old_dir, st
7662         if (old_dentry->d_inode == new_dentry->d_inode)
7663                 return 0;
7664   
7665 -       error = may_delete(old_dir, old_dentry, is_dir);
7666 +       error = may_delete(old_dir, old_dentry, is_dir, NULL);
7667         if (error)
7668                 return error;
7669  
7670         if (!new_dentry->d_inode)
7671                 error = may_create(new_dir, new_dentry, NULL);
7672         else
7673 -               error = may_delete(new_dir, new_dentry, is_dir);
7674 +               error = may_delete(new_dir, new_dentry, is_dir, NULL);
7675         if (error)
7676                 return error;
7677  
7678 @@ -2468,6 +2550,9 @@ static int do_rename(int olddfd, const c
7679         error = -EINVAL;
7680         if (old_dentry == trap)
7681                 goto exit4;
7682 +       error = -EROFS;
7683 +       if (MNT_IS_RDONLY(newnd.mnt))
7684 +               goto exit4;
7685         new_dentry = lookup_hash(&newnd);
7686         error = PTR_ERR(new_dentry);
7687         if (IS_ERR(new_dentry))
7688 @@ -2561,6 +2646,125 @@ int vfs_follow_link(struct nameidata *nd
7689         return __vfs_follow_link(nd, link);
7690  }
7691  
7692 +
7693 +#ifdef CONFIG_VSERVER_COWBL
7694 +
7695 +#include <linux/file.h>
7696 +
7697 +int cow_break_link(struct dentry *dentry, const char *pathname)
7698 +{
7699 +       int err = -EMLINK;
7700 +       int ret, mode, pathlen;
7701 +       struct nameidata old_nd, dir_nd;
7702 +       struct dentry *old_dentry, *new_dentry;
7703 +       struct vfsmount *old_mnt, *new_mnt;
7704 +       struct file *old_file;
7705 +       struct file *new_file;
7706 +       char *to, *path, pad='\251';
7707 +       loff_t size;
7708 +
7709 +       vxdprintk(VXD_CBIT(misc, 2),
7710 +               "cow_break_link(%p,»%s«)", dentry, pathname);
7711 +       path = kmalloc(PATH_MAX, GFP_KERNEL);
7712 +
7713 +       ret = path_lookup(pathname, LOOKUP_FOLLOW, &old_nd);
7714 +       vxdprintk(VXD_CBIT(misc, 2), "path_lookup(old): %d", ret);
7715 +       old_dentry = old_nd.dentry;
7716 +       old_mnt = old_nd.mnt;
7717 +       mode = old_dentry->d_inode->i_mode;
7718 +
7719 +       to = d_path(old_dentry, old_mnt, path, PATH_MAX-2);
7720 +       pathlen = strlen(to);
7721 +       vxdprintk(VXD_CBIT(misc, 2), "old path »%s«", to);
7722 +
7723 +       to[pathlen+1] = 0;
7724 +retry:
7725 +       to[pathlen] = pad--;
7726 +       if (pad <= '\240')
7727 +               goto out_rel_old;
7728 +
7729 +       vxdprintk(VXD_CBIT(misc, 2), "temp copy »%s«", to);
7730 +       ret = path_lookup(to,
7731 +               LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, &dir_nd);
7732 +
7733 +       /* this puppy downs the inode sem */
7734 +       new_dentry = lookup_create(&dir_nd, 0);
7735 +       vxdprintk(VXD_CBIT(misc, 2),
7736 +               "lookup_create(new): %p", new_dentry);
7737 +       if (!new_dentry) {
7738 +               path_release(&dir_nd);
7739 +               goto retry;
7740 +       }
7741 +
7742 +       ret = vfs_create(dir_nd.dentry->d_inode, new_dentry, mode, &dir_nd);
7743 +       vxdprintk(VXD_CBIT(misc, 2),
7744 +               "vfs_create(new): %d", ret);
7745 +       if (ret == -EEXIST) {
7746 +
7747 +               mutex_unlock(&dir_nd.dentry->d_inode->i_mutex);
7748 +               dput(new_dentry);
7749 +               path_release(&dir_nd);
7750 +               goto retry;
7751 +       }
7752 +
7753 +       new_mnt = dir_nd.mnt;
7754 +
7755 +       dget(old_dentry);
7756 +       mntget(old_mnt);
7757 +       /* this one cleans up the dentry in case of failure */
7758 +       old_file = dentry_open(old_dentry, old_mnt, O_RDONLY);
7759 +       vxdprintk(VXD_CBIT(misc, 2),
7760 +               "dentry_open(old): %p", old_file);
7761 +       if (!old_file)
7762 +               goto out_rel_both;
7763 +
7764 +       dget(new_dentry);
7765 +       mntget(new_mnt);
7766 +       /* this one cleans up the dentry in case of failure */
7767 +       new_file = dentry_open(new_dentry, new_mnt, O_WRONLY);
7768 +       vxdprintk(VXD_CBIT(misc, 2),
7769 +               "dentry_open(new): %p", new_file);
7770 +       if (!new_file)
7771 +               goto out_fput_old;
7772 +
7773 +       size = i_size_read(old_file->f_dentry->d_inode);
7774 +       ret = vfs_sendfile(new_file, old_file, NULL, size, 0);
7775 +       vxdprintk(VXD_CBIT(misc, 2), "vfs_sendfile: %d", ret);
7776 +
7777 +       if (ret < 0)
7778 +               goto out_fput_both;
7779 +
7780 +       ret = vfs_rename(dir_nd.dentry->d_inode, new_dentry,
7781 +               old_nd.dentry->d_parent->d_inode, old_dentry);
7782 +       vxdprintk(VXD_CBIT(misc, 2), "vfs_rename: %d", ret);
7783 +       if (!ret)
7784 +               err = 0;
7785 +
7786 +out_fput_both:
7787 +       vxdprintk(VXD_CBIT(misc, 3),
7788 +               "fput(new_file=%p[#%d])", new_file,
7789 +               atomic_read(&new_file->f_count));
7790 +       fput(new_file);
7791 +
7792 +out_fput_old:
7793 +       vxdprintk(VXD_CBIT(misc, 3),
7794 +               "fput(old_file=%p[#%d])", old_file,
7795 +               atomic_read(&old_file->f_count));
7796 +       fput(old_file);
7797 +
7798 +out_rel_both:
7799 +       mutex_unlock(&dir_nd.dentry->d_inode->i_mutex);
7800 +       dput(new_dentry);
7801 +
7802 +       path_release(&dir_nd);
7803 +out_rel_old:
7804 +       path_release(&old_nd);
7805 +       kfree(path);
7806 +       return err;
7807 +}
7808 +
7809 +#endif
7810 +
7811  /* get the link contents into pagecache */
7812  static char *page_getlink(struct dentry * dentry, struct page **ppage)
7813  {
7814 diff -NurpP --minimal linux-2.6.16.20/fs/namespace.c linux-2.6.16.20-vs2.1.1-rc22/fs/namespace.c
7815 --- linux-2.6.16.20/fs/namespace.c      2006-04-09 13:49:53 +0200
7816 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/namespace.c 2006-05-29 19:20:26 +0200
7817 @@ -23,6 +23,8 @@
7818  #include <linux/namei.h>
7819  #include <linux/security.h>
7820  #include <linux/mount.h>
7821 +#include <linux/vserver/namespace.h>
7822 +#include <linux/vserver/tag.h>
7823  #include <asm/uaccess.h>
7824  #include <asm/unistd.h>
7825  #include "pnode.h"
7826 @@ -241,6 +243,7 @@ static struct vfsmount *clone_mnt(struct
7827                 mnt->mnt_root = dget(root);
7828                 mnt->mnt_mountpoint = mnt->mnt_root;
7829                 mnt->mnt_parent = mnt;
7830 +               mnt->mnt_tag = old->mnt_tag;
7831  
7832                 if (flag & CL_SLAVE) {
7833                         list_add(&mnt->mnt_slave, &old->mnt_slave_list);
7834 @@ -349,43 +352,85 @@ static inline void mangle(struct seq_fil
7835         seq_escape(m, s, " \t\n\\");
7836  }
7837  
7838 +static int mnt_is_reachable(struct vfsmount *mnt)
7839 +{
7840 +       struct vfsmount *root_mnt;
7841 +       struct dentry *root, *point;
7842 +       int ret;
7843 +
7844 +       if (mnt == mnt->mnt_namespace->root)
7845 +               return 1;
7846 +
7847 +       spin_lock(&dcache_lock);
7848 +       root_mnt = current->fs->rootmnt;
7849 +       root = current->fs->root;
7850 +       point = root;
7851 +
7852 +       while ((mnt != mnt->mnt_parent) && (mnt != root_mnt)) {
7853 +               point = mnt->mnt_mountpoint;
7854 +               mnt = mnt->mnt_parent;
7855 +       }
7856 +
7857 +       ret = (mnt == root_mnt) && is_subdir(point, root);
7858 +
7859 +       spin_unlock(&dcache_lock);
7860 +
7861 +       return ret;
7862 +}
7863 +
7864  static int show_vfsmnt(struct seq_file *m, void *v)
7865  {
7866         struct vfsmount *mnt = v;
7867         int err = 0;
7868         static struct proc_fs_info {
7869 -               int flag;
7870 -               char *str;
7871 +               int s_flag;
7872 +               int mnt_flag;
7873 +               char *set_str;
7874 +               char *unset_str;
7875         } fs_info[] = {
7876 -               { MS_SYNCHRONOUS, ",sync" },
7877 -               { MS_DIRSYNC, ",dirsync" },
7878 -               { MS_MANDLOCK, ",mand" },
7879 -               { 0, NULL }
7880 -       };
7881 -       static struct proc_fs_info mnt_info[] = {
7882 -               { MNT_NOSUID, ",nosuid" },
7883 -               { MNT_NODEV, ",nodev" },
7884 -               { MNT_NOEXEC, ",noexec" },
7885 -               { MNT_NOATIME, ",noatime" },
7886 -               { MNT_NODIRATIME, ",nodiratime" },
7887 -               { 0, NULL }
7888 +               { MS_RDONLY, MNT_RDONLY, "ro", "rw" },
7889 +               { MS_SYNCHRONOUS, 0, ",sync", NULL },
7890 +               { MS_DIRSYNC, 0, ",dirsync", NULL },
7891 +               { MS_MANDLOCK, 0, ",mand", NULL },
7892 +               { MS_TAGGED, 0, ",tag", NULL },
7893 +               { MS_NOATIME, MNT_NOATIME, ",noatime", NULL },
7894 +               { MS_NODIRATIME, MNT_NODIRATIME, ",nodiratime", NULL },
7895 +               { 0, MNT_NOSUID, ",nosuid", NULL },
7896 +               { 0, MNT_NODEV, ",nodev", NULL },
7897 +               { 0, MNT_NOEXEC, ",noexec", NULL },
7898 +               { 0, 0, NULL, NULL }
7899         };
7900 -       struct proc_fs_info *fs_infop;
7901 +       struct proc_fs_info *p;
7902 +       unsigned long s_flags = mnt->mnt_sb->s_flags;
7903 +       int mnt_flags = mnt->mnt_flags;
7904  
7905 -       mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
7906 -       seq_putc(m, ' ');
7907 -       seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
7908 -       seq_putc(m, ' ');
7909 -       mangle(m, mnt->mnt_sb->s_type->name);
7910 -       seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw");
7911 -       for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
7912 -               if (mnt->mnt_sb->s_flags & fs_infop->flag)
7913 -                       seq_puts(m, fs_infop->str);
7914 +       if (vx_flags(VXF_HIDE_MOUNT, 0))
7915 +               return 0;
7916 +       if (!mnt_is_reachable(mnt) && !vx_check(0, VX_WATCH))
7917 +               return 0;
7918 +
7919 +       if (!vx_check(0, VX_ADMIN|VX_WATCH) &&
7920 +               mnt == current->fs->rootmnt) {
7921 +               seq_puts(m, "/dev/root / ");
7922 +       } else {
7923 +               mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
7924 +               seq_putc(m, ' ');
7925 +               seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
7926 +               seq_putc(m, ' ');
7927         }
7928 -       for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
7929 -               if (mnt->mnt_flags & fs_infop->flag)
7930 -                       seq_puts(m, fs_infop->str);
7931 +       mangle(m, mnt->mnt_sb->s_type->name);
7932 +       seq_putc(m, ' ');
7933 +       for (p = fs_info; (p->s_flag | p->mnt_flag) ; p++) {
7934 +               if ((s_flags & p->s_flag) || (mnt_flags & p->mnt_flag)) {
7935 +                       if (p->set_str)
7936 +                               seq_puts(m, p->set_str);
7937 +               } else {
7938 +                       if (p->unset_str)
7939 +                               seq_puts(m, p->unset_str);
7940 +               }
7941         }
7942 +       if (mnt->mnt_flags & MNT_TAGID)
7943 +               seq_printf(m, ",tag=%d", mnt->mnt_tag);
7944         if (mnt->mnt_sb->s_op->show_options)
7945                 err = mnt->mnt_sb->s_op->show_options(m, mnt);
7946         seq_puts(m, " 0 0\n");
7947 @@ -475,15 +520,11 @@ void release_mounts(struct list_head *he
7948         }
7949  }
7950  
7951 -void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
7952 +static inline void __umount_list(struct vfsmount *mnt,
7953 +       int propagate, struct list_head *kill)
7954  {
7955         struct vfsmount *p;
7956  
7957 -       for (p = mnt; p; p = next_mnt(p, mnt)) {
7958 -               list_del(&p->mnt_hash);
7959 -               list_add(&p->mnt_hash, kill);
7960 -       }
7961 -
7962         if (propagate)
7963                 propagate_umount(kill);
7964  
7965 @@ -499,6 +540,33 @@ void umount_tree(struct vfsmount *mnt, i
7966         }
7967  }
7968  
7969 +void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
7970 +{
7971 +       struct vfsmount *p;
7972 +
7973 +       for (p = mnt; p; p = next_mnt(p, mnt)) {
7974 +               list_del(&p->mnt_hash);
7975 +               list_add(&p->mnt_hash, kill);
7976 +               // p->mnt_namespace = NULL;
7977 +       }
7978 +       __umount_list(mnt, propagate, kill);
7979 +}
7980 +
7981 +void umount_unused(struct vfsmount *mnt, struct fs_struct *fs)
7982 +{
7983 +       struct vfsmount *p;
7984 +       LIST_HEAD(kill);
7985 +
7986 +       for (p = mnt; p; p = next_mnt(p, mnt)) {
7987 +               if (p == fs->rootmnt || p == fs->pwdmnt)
7988 +                       continue;
7989 +               list_del(&p->mnt_list);
7990 +               list_add(&p->mnt_list, &kill);
7991 +               p->mnt_namespace = NULL;
7992 +       }
7993 +       __umount_list(mnt, 0, &kill);
7994 +}
7995 +
7996  static int do_umount(struct vfsmount *mnt, int flags)
7997  {
7998         struct super_block *sb = mnt->mnt_sb;
7999 @@ -559,7 +627,7 @@ static int do_umount(struct vfsmount *mn
8000                 down_write(&sb->s_umount);
8001                 if (!(sb->s_flags & MS_RDONLY)) {
8002                         lock_kernel();
8003 -                       DQUOT_OFF(sb);
8004 +                       DQUOT_OFF(sb->s_dqh);
8005                         retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
8006                         unlock_kernel();
8007                 }
8008 @@ -608,7 +676,7 @@ asmlinkage long sys_umount(char __user *
8009                 goto dput_and_out;
8010  
8011         retval = -EPERM;
8012 -       if (!capable(CAP_SYS_ADMIN))
8013 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
8014                 goto dput_and_out;
8015  
8016         retval = do_umount(nd.mnt, flags);
8017 @@ -632,7 +700,7 @@ asmlinkage long sys_oldumount(char __use
8018  
8019  static int mount_is_safe(struct nameidata *nd)
8020  {
8021 -       if (capable(CAP_SYS_ADMIN))
8022 +       if (vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
8023                 return 0;
8024         return -EPERM;
8025  #ifdef notyet
8026 @@ -861,11 +929,13 @@ static int do_change_type(struct nameida
8027  /*
8028   * do loopback mount.
8029   */
8030 -static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
8031 +static int do_loopback(struct nameidata *nd, char *old_name, tag_t tag,
8032 +       unsigned long flags, int mnt_flags)
8033  {
8034         struct nameidata old_nd;
8035         struct vfsmount *mnt = NULL;
8036         int err = mount_is_safe(nd);
8037 +       int recurse = flags & MS_REC;
8038         if (err)
8039                 return err;
8040         if (!old_name || !*old_name)
8041 @@ -891,6 +961,12 @@ static int do_loopback(struct nameidata 
8042         if (!mnt)
8043                 goto out;
8044  
8045 +       mnt->mnt_flags = mnt_flags;
8046 +       if (flags & MS_TAGID) {
8047 +               mnt->mnt_tag = tag;
8048 +               mnt->mnt_flags |= MNT_TAGID;
8049 +       }
8050 +
8051         err = graft_tree(mnt, nd);
8052         if (err) {
8053                 LIST_HEAD(umount_list);
8054 @@ -899,6 +975,7 @@ static int do_loopback(struct nameidata 
8055                 spin_unlock(&vfsmount_lock);
8056                 release_mounts(&umount_list);
8057         }
8058 +       mnt->mnt_flags = mnt_flags;
8059  
8060  out:
8061         up_write(&namespace_sem);
8062 @@ -912,12 +989,12 @@ out:
8063   * on it - tough luck.
8064   */
8065  static int do_remount(struct nameidata *nd, int flags, int mnt_flags,
8066 -                     void *data)
8067 +                     void *data, xid_t xid)
8068  {
8069         int err;
8070         struct super_block *sb = nd->mnt->mnt_sb;
8071  
8072 -       if (!capable(CAP_SYS_ADMIN))
8073 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_REMOUNT))
8074                 return -EPERM;
8075  
8076         if (!check_mnt(nd->mnt))
8077 @@ -951,7 +1028,7 @@ static int do_move_mount(struct nameidat
8078         struct nameidata old_nd, parent_nd;
8079         struct vfsmount *p;
8080         int err = 0;
8081 -       if (!capable(CAP_SYS_ADMIN))
8082 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
8083                 return -EPERM;
8084         if (!old_name || !*old_name)
8085                 return -EINVAL;
8086 @@ -1031,7 +1108,7 @@ static int do_new_mount(struct nameidata
8087                 return -EINVAL;
8088  
8089         /* we need capabilities... */
8090 -       if (!capable(CAP_SYS_ADMIN))
8091 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
8092                 return -EPERM;
8093  
8094         mnt = do_kern_mount(type, flags, name, data);
8095 @@ -1269,6 +1346,7 @@ long do_mount(char *dev_name, char *dir_
8096         struct nameidata nd;
8097         int retval = 0;
8098         int mnt_flags = 0;
8099 +       tag_t tag = 0;
8100  
8101         /* Discard magic */
8102         if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
8103 @@ -1284,7 +1362,19 @@ long do_mount(char *dev_name, char *dir_
8104         if (data_page)
8105                 ((char *)data_page)[PAGE_SIZE - 1] = 0;
8106  
8107 +#ifdef CONFIG_PROPAGATE
8108 +       retval = dx_parse_tag(data_page, &tag, 1);
8109 +       if (retval) {
8110 +               mnt_flags |= MNT_TAGID;
8111 +               /* bind and re-mounts get the tag flag */
8112 +               if (flags & (MS_BIND|MS_REMOUNT))
8113 +                       flags |= MS_TAGID;
8114 +       }
8115 +#endif
8116 +
8117         /* Separate the per-mountpoint flags */
8118 +       if (flags & MS_RDONLY)
8119 +               mnt_flags |= MNT_RDONLY;
8120         if (flags & MS_NOSUID)
8121                 mnt_flags |= MNT_NOSUID;
8122         if (flags & MS_NODEV)
8123 @@ -1296,6 +1386,8 @@ long do_mount(char *dev_name, char *dir_
8124         if (flags & MS_NODIRATIME)
8125                 mnt_flags |= MNT_NODIRATIME;
8126  
8127 +       if (vx_ccaps(VXC_SECURE_MOUNT))
8128 +               mnt_flags |= MNT_NODEV;
8129         flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
8130                    MS_NOATIME | MS_NODIRATIME);
8131  
8132 @@ -1310,9 +1402,9 @@ long do_mount(char *dev_name, char *dir_
8133  
8134         if (flags & MS_REMOUNT)
8135                 retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
8136 -                                   data_page);
8137 +                                   data_page, tag);
8138         else if (flags & MS_BIND)
8139 -               retval = do_loopback(&nd, dev_name, flags & MS_REC);
8140 +               retval = do_loopback(&nd, dev_name, tag, flags, mnt_flags);
8141         else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
8142                 retval = do_change_type(&nd, flags);
8143         else if (flags & MS_MOVE)
8144 @@ -1410,7 +1502,7 @@ int copy_namespace(int flags, struct tas
8145         if (!(flags & CLONE_NEWNS))
8146                 return 0;
8147  
8148 -       if (!capable(CAP_SYS_ADMIN)) {
8149 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT)) {
8150                 err = -EPERM;
8151                 goto out;
8152         }
8153 diff -NurpP --minimal linux-2.6.16.20/fs/nfs/dir.c linux-2.6.16.20-vs2.1.1-rc22/fs/nfs/dir.c
8154 --- linux-2.6.16.20/fs/nfs/dir.c        2006-02-18 14:40:23 +0100
8155 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/nfs/dir.c   2006-04-26 19:07:00 +0200
8156 @@ -28,9 +28,11 @@
8157  #include <linux/sunrpc/clnt.h>
8158  #include <linux/nfs_fs.h>
8159  #include <linux/nfs_mount.h>
8160 +#include <linux/mount.h>
8161  #include <linux/pagemap.h>
8162  #include <linux/smp_lock.h>
8163  #include <linux/namei.h>
8164 +#include <linux/vserver/tag.h>
8165  
8166  #include "nfs4_fs.h"
8167  #include "delegation.h"
8168 @@ -869,6 +871,7 @@ static struct dentry *nfs_lookup(struct 
8169         inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
8170         if (!inode)
8171                 goto out_unlock;
8172 +       dx_propagate_tag(nd, inode);
8173  no_entry:
8174         res = d_add_unique(dentry, inode);
8175         if (res != NULL)
8176 @@ -902,7 +905,8 @@ static int is_atomic_open(struct inode *
8177         if (nd->flags & LOOKUP_DIRECTORY)
8178                 return 0;
8179         /* Are we trying to write to a read only partition? */
8180 -       if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
8181 +       if ((IS_RDONLY(dir) || MNT_IS_RDONLY(nd->mnt)) &&
8182 +               (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
8183                 return 0;
8184         return 1;
8185  }
8186 diff -NurpP --minimal linux-2.6.16.20/fs/nfs/inode.c linux-2.6.16.20-vs2.1.1-rc22/fs/nfs/inode.c
8187 --- linux-2.6.16.20/fs/nfs/inode.c      2006-02-18 14:40:23 +0100
8188 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/nfs/inode.c 2006-04-26 19:07:00 +0200
8189 @@ -35,6 +35,7 @@
8190  #include <linux/mount.h>
8191  #include <linux/nfs_idmap.h>
8192  #include <linux/vfs.h>
8193 +#include <linux/vserver/tag.h>
8194  
8195  #include <asm/system.h>
8196  #include <asm/uaccess.h>
8197 @@ -336,12 +337,16 @@ nfs_sb_init(struct super_block *sb, rpc_
8198         }
8199         server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
8200  
8201 +       if (server->flags & NFS_MOUNT_TAGGED)
8202 +               sb->s_flags |= MS_TAGGED;
8203 +
8204         sb->s_maxbytes = fsinfo.maxfilesize;
8205         if (sb->s_maxbytes > MAX_LFS_FILESIZE) 
8206                 sb->s_maxbytes = MAX_LFS_FILESIZE; 
8207  
8208         server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0;
8209         server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0;
8210 +       server->client->cl_tag = (server->flags & NFS_MOUNT_TAGGED) ? 1 : 0;
8211  
8212         /* We're airborne Set socket buffersize */
8213         rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
8214 @@ -413,6 +418,7 @@ nfs_create_client(struct nfs_server *ser
8215  
8216         clnt->cl_intr     = 1;
8217         clnt->cl_softrtry = 1;
8218 +       clnt->cl_tag      = 1;
8219  
8220         return clnt;
8221  
8222 @@ -593,6 +599,7 @@ static int nfs_show_options(struct seq_f
8223                 { NFS_MOUNT_NOAC, ",noac", "" },
8224                 { NFS_MOUNT_NONLM, ",nolock", ",lock" },
8225                 { NFS_MOUNT_NOACL, ",noacl", "" },
8226 +               { NFS_MOUNT_TAGGED, ",tag", "" },
8227                 { 0, NULL, NULL }
8228         };
8229         struct proc_nfs_info *nfs_infop;
8230 @@ -805,8 +812,10 @@ nfs_fhget(struct super_block *sb, struct
8231                         nfsi->change_attr = fattr->change_attr;
8232                 inode->i_size = nfs_size_to_loff_t(fattr->size);
8233                 inode->i_nlink = fattr->nlink;
8234 -               inode->i_uid = fattr->uid;
8235 -               inode->i_gid = fattr->gid;
8236 +               inode->i_uid = INOTAG_UID(DX_TAG(inode), fattr->uid, fattr->gid);
8237 +               inode->i_gid = INOTAG_GID(DX_TAG(inode), fattr->uid, fattr->gid);
8238 +               inode->i_tag = INOTAG_TAG(DX_TAG(inode), fattr->uid, fattr->gid, 0);
8239 +                                        /* maybe fattr->xid someday */
8240                 if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) {
8241                         /*
8242                          * report the blocks in 512byte units
8243 @@ -897,6 +906,8 @@ void nfs_setattr_update_inode(struct ino
8244                         inode->i_uid = attr->ia_uid;
8245                 if ((attr->ia_valid & ATTR_GID) != 0)
8246                         inode->i_gid = attr->ia_gid;
8247 +               if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
8248 +                       inode->i_tag = attr->ia_tag;
8249                 spin_lock(&inode->i_lock);
8250                 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
8251                 spin_unlock(&inode->i_lock);
8252 @@ -1294,6 +1305,9 @@ static int nfs_check_inode_attributes(st
8253         struct nfs_inode *nfsi = NFS_I(inode);
8254         loff_t cur_size, new_isize;
8255         int data_unstable;
8256 +       uid_t uid;
8257 +       gid_t gid;
8258 +       tag_t tag;
8259  
8260  
8261         if ((fattr->valid & NFS_ATTR_FATTR) == 0)
8262 @@ -1333,10 +1347,15 @@ static int nfs_check_inode_attributes(st
8263                         nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
8264         }
8265  
8266 +       uid = INOTAG_UID(DX_TAG(inode), fattr->uid, fattr->gid);
8267 +       gid = INOTAG_GID(DX_TAG(inode), fattr->uid, fattr->gid);
8268 +       tag = INOTAG_TAG(DX_TAG(inode), fattr->uid, fattr->gid, 0);
8269 +
8270         /* Have any file permissions changed? */
8271         if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
8272 -                       || inode->i_uid != fattr->uid
8273 -                       || inode->i_gid != fattr->gid)
8274 +                       || inode->i_uid != uid
8275 +                       || inode->i_gid != gid
8276 +                       || inode->i_tag != tag)
8277                 nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
8278  
8279         /* Has the link count changed? */
8280 @@ -1420,6 +1439,9 @@ static int nfs_update_inode(struct inode
8281         loff_t cur_isize, new_isize;
8282         unsigned int    invalid = 0;
8283         int data_stable;
8284 +       uid_t uid;
8285 +       gid_t gid;
8286 +       tag_t tag;
8287  
8288         dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
8289                         __FUNCTION__, inode->i_sb->s_id, inode->i_ino,
8290 @@ -1498,15 +1520,21 @@ static int nfs_update_inode(struct inode
8291         }
8292         memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
8293  
8294 +       uid = INOTAG_UID(DX_TAG(inode), fattr->uid, fattr->gid);
8295 +       gid = INOTAG_GID(DX_TAG(inode), fattr->uid, fattr->gid);
8296 +       tag = INOTAG_TAG(DX_TAG(inode), fattr->uid, fattr->gid, 0);
8297 +
8298         if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ||
8299 -           inode->i_uid != fattr->uid ||
8300 -           inode->i_gid != fattr->gid)
8301 +           inode->i_uid != uid ||
8302 +           inode->i_gid != gid ||
8303 +           inode->i_tag != tag)
8304                 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
8305  
8306         inode->i_mode = fattr->mode;
8307         inode->i_nlink = fattr->nlink;
8308 -       inode->i_uid = fattr->uid;
8309 -       inode->i_gid = fattr->gid;
8310 +       inode->i_uid = uid;
8311 +       inode->i_gid = gid;
8312 +       inode->i_tag = tag;
8313  
8314         if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) {
8315                 /*
8316 diff -NurpP --minimal linux-2.6.16.20/fs/nfs/nfs3xdr.c linux-2.6.16.20-vs2.1.1-rc22/fs/nfs/nfs3xdr.c
8317 --- linux-2.6.16.20/fs/nfs/nfs3xdr.c    2006-02-18 14:40:23 +0100
8318 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/nfs/nfs3xdr.c       2006-04-26 19:07:00 +0200
8319 @@ -22,6 +22,7 @@
8320  #include <linux/nfs3.h>
8321  #include <linux/nfs_fs.h>
8322  #include <linux/nfsacl.h>
8323 +#include <linux/vserver/tag.h>
8324  
8325  #define NFSDBG_FACILITY                NFSDBG_XDR
8326  
8327 @@ -178,7 +179,7 @@ xdr_decode_fattr(u32 *p, struct nfs_fatt
8328  }
8329  
8330  static inline u32 *
8331 -xdr_encode_sattr(u32 *p, struct iattr *attr)
8332 +xdr_encode_sattr(u32 *p, struct iattr *attr, int tag)
8333  {
8334         if (attr->ia_valid & ATTR_MODE) {
8335                 *p++ = xdr_one;
8336 @@ -186,15 +187,17 @@ xdr_encode_sattr(u32 *p, struct iattr *a
8337         } else {
8338                 *p++ = xdr_zero;
8339         }
8340 -       if (attr->ia_valid & ATTR_UID) {
8341 +       if (attr->ia_valid & ATTR_UID ||
8342 +               (tag && (attr->ia_valid & ATTR_TAG))) {
8343                 *p++ = xdr_one;
8344 -               *p++ = htonl(attr->ia_uid);
8345 +               *p++ = htonl(TAGINO_UID(tag, attr->ia_uid, attr->ia_tag));
8346         } else {
8347                 *p++ = xdr_zero;
8348         }
8349 -       if (attr->ia_valid & ATTR_GID) {
8350 +       if (attr->ia_valid & ATTR_GID ||
8351 +               (tag && (attr->ia_valid & ATTR_TAG))) {
8352                 *p++ = xdr_one;
8353 -               *p++ = htonl(attr->ia_gid);
8354 +               *p++ = htonl(TAGINO_GID(tag, attr->ia_gid, attr->ia_tag));
8355         } else {
8356                 *p++ = xdr_zero;
8357         }
8358 @@ -279,7 +282,8 @@ static int
8359  nfs3_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs3_sattrargs *args)
8360  {
8361         p = xdr_encode_fhandle(p, args->fh);
8362 -       p = xdr_encode_sattr(p, args->sattr);
8363 +       p = xdr_encode_sattr(p, args->sattr,
8364 +               req->rq_task->tk_client->cl_tag);
8365         *p++ = htonl(args->guard);
8366         if (args->guard)
8367                 p = xdr_encode_time3(p, &args->guardtime);
8368 @@ -370,7 +374,8 @@ nfs3_xdr_createargs(struct rpc_rqst *req
8369                 *p++ = args->verifier[0];
8370                 *p++ = args->verifier[1];
8371         } else
8372 -               p = xdr_encode_sattr(p, args->sattr);
8373 +               p = xdr_encode_sattr(p, args->sattr,
8374 +                       req->rq_task->tk_client->cl_tag);
8375  
8376         req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
8377         return 0;
8378 @@ -384,7 +389,8 @@ nfs3_xdr_mkdirargs(struct rpc_rqst *req,
8379  {
8380         p = xdr_encode_fhandle(p, args->fh);
8381         p = xdr_encode_array(p, args->name, args->len);
8382 -       p = xdr_encode_sattr(p, args->sattr);
8383 +       p = xdr_encode_sattr(p, args->sattr,
8384 +               req->rq_task->tk_client->cl_tag);
8385         req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
8386         return 0;
8387  }
8388 @@ -397,7 +403,8 @@ nfs3_xdr_symlinkargs(struct rpc_rqst *re
8389  {
8390         p = xdr_encode_fhandle(p, args->fromfh);
8391         p = xdr_encode_array(p, args->fromname, args->fromlen);
8392 -       p = xdr_encode_sattr(p, args->sattr);
8393 +       p = xdr_encode_sattr(p, args->sattr,
8394 +               req->rq_task->tk_client->cl_tag);
8395         p = xdr_encode_array(p, args->topath, args->tolen);
8396         req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
8397         return 0;
8398 @@ -412,7 +419,8 @@ nfs3_xdr_mknodargs(struct rpc_rqst *req,
8399         p = xdr_encode_fhandle(p, args->fh);
8400         p = xdr_encode_array(p, args->name, args->len);
8401         *p++ = htonl(args->type);
8402 -       p = xdr_encode_sattr(p, args->sattr);
8403 +       p = xdr_encode_sattr(p, args->sattr,
8404 +               req->rq_task->tk_client->cl_tag);
8405         if (args->type == NF3CHR || args->type == NF3BLK) {
8406                 *p++ = htonl(MAJOR(args->rdev));
8407                 *p++ = htonl(MINOR(args->rdev));
8408 diff -NurpP --minimal linux-2.6.16.20/fs/nfs/nfsroot.c linux-2.6.16.20-vs2.1.1-rc22/fs/nfs/nfsroot.c
8409 --- linux-2.6.16.20/fs/nfs/nfsroot.c    2006-02-18 14:40:23 +0100
8410 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/nfs/nfsroot.c       2006-04-26 19:07:00 +0200
8411 @@ -87,6 +87,7 @@
8412  #include <linux/root_dev.h>
8413  #include <net/ipconfig.h>
8414  #include <linux/parser.h>
8415 +#include <linux/vs_cvirt.h>
8416  
8417  /* Define this to allow debugging output */
8418  #undef NFSROOT_DEBUG
8419 @@ -119,12 +120,12 @@ static int mount_port __initdata = 0;             /
8420  enum {
8421         /* Options that take integer arguments */
8422         Opt_port, Opt_rsize, Opt_wsize, Opt_timeo, Opt_retrans, Opt_acregmin,
8423 -       Opt_acregmax, Opt_acdirmin, Opt_acdirmax,
8424 +       Opt_acregmax, Opt_acdirmin, Opt_acdirmax, Opt_tagid,
8425         /* Options that take no arguments */
8426         Opt_soft, Opt_hard, Opt_intr,
8427         Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, 
8428         Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp,
8429 -       Opt_acl, Opt_noacl,
8430 +       Opt_acl, Opt_noacl, Opt_tag, Opt_notag,
8431         /* Error token */
8432         Opt_err
8433  };
8434 @@ -161,6 +162,10 @@ static match_table_t __initdata tokens =
8435         {Opt_tcp, "tcp"},
8436         {Opt_acl, "acl"},
8437         {Opt_noacl, "noacl"},
8438 +       {Opt_tag, "tag"},
8439 +       {Opt_notag, "notag"},
8440 +       {Opt_tagid, "tagid=%u"},
8441 +       {Opt_tag, "tagxid"},
8442         {Opt_err, NULL}
8443         
8444  };
8445 @@ -275,6 +280,20 @@ static int __init root_nfs_parse(char *n
8446                         case Opt_noacl:
8447                                 nfs_data.flags |= NFS_MOUNT_NOACL;
8448                                 break;
8449 +#ifndef CONFIG_TAGGING_NONE
8450 +                       case Opt_tag:
8451 +                               nfs_data.flags |= NFS_MOUNT_TAGGED;
8452 +                               break;
8453 +                       case Opt_notag:
8454 +                               nfs_data.flags &= ~NFS_MOUNT_TAGGED;
8455 +                               break;
8456 +#endif
8457 +#ifdef CONFIG_PROPAGATE
8458 +                       case Opt_tagid:
8459 +                               /* use args[0] */
8460 +                               nfs_data.flags |= NFS_MOUNT_TAGGED;
8461 +                               break;
8462 +#endif
8463                         default:
8464                                 printk(KERN_WARNING "Root-NFS: unknown "
8465                                         "option: %s\n", p);
8466 @@ -312,7 +331,7 @@ static int __init root_nfs_name(char *na
8467         /* Override them by options set on kernel command-line */
8468         root_nfs_parse(name, buf);
8469  
8470 -       cp = system_utsname.nodename;
8471 +       cp = vx_new_uts(nodename);
8472         if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
8473                 printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
8474                 return -1;
8475 diff -NurpP --minimal linux-2.6.16.20/fs/nfsd/auth.c linux-2.6.16.20-vs2.1.1-rc22/fs/nfsd/auth.c
8476 --- linux-2.6.16.20/fs/nfsd/auth.c      2004-08-14 12:56:14 +0200
8477 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/nfsd/auth.c 2006-04-26 19:07:00 +0200
8478 @@ -9,6 +9,7 @@
8479  #include <linux/sunrpc/svc.h>
8480  #include <linux/sunrpc/svcauth.h>
8481  #include <linux/nfsd/nfsd.h>
8482 +#include <linux/vserver/tag.h>
8483  
8484  #define        CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE))
8485  
8486 @@ -42,18 +43,21 @@ int nfsd_setuser(struct svc_rqst *rqstp,
8487         }
8488  
8489         if (cred->cr_uid != (uid_t) -1)
8490 -               current->fsuid = cred->cr_uid;
8491 +               current->fsuid = INOTAG_UID(DX_TAG_NFSD, cred->cr_uid, cred->cr_gid);
8492         else
8493                 current->fsuid = exp->ex_anon_uid;
8494         if (cred->cr_gid != (gid_t) -1)
8495 -               current->fsgid = cred->cr_gid;
8496 +               current->fsgid = INOTAG_GID(DX_TAG_NFSD, cred->cr_uid, cred->cr_gid);
8497         else
8498                 current->fsgid = exp->ex_anon_gid;
8499  
8500 +       /* this desperately needs a tag :) */
8501 +       current->xid = (xid_t)INOTAG_TAG(DX_TAG_NFSD, cred->cr_uid, cred->cr_gid, 0);
8502 +
8503         if (!cred->cr_group_info)
8504                 return -ENOMEM;
8505         ret = set_current_groups(cred->cr_group_info);
8506 -       if ((cred->cr_uid)) {
8507 +       if (INOTAG_UID(DX_TAG_NFSD, cred->cr_uid, cred->cr_gid)) {
8508                 cap_t(current->cap_effective) &= ~CAP_NFSD_MASK;
8509         } else {
8510                 cap_t(current->cap_effective) |= (CAP_NFSD_MASK &
8511 diff -NurpP --minimal linux-2.6.16.20/fs/nfsd/nfs3xdr.c linux-2.6.16.20-vs2.1.1-rc22/fs/nfsd/nfs3xdr.c
8512 --- linux-2.6.16.20/fs/nfsd/nfs3xdr.c   2006-04-09 13:49:54 +0200
8513 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/nfsd/nfs3xdr.c      2006-04-26 19:07:00 +0200
8514 @@ -21,6 +21,7 @@
8515  #include <linux/sunrpc/svc.h>
8516  #include <linux/nfsd/nfsd.h>
8517  #include <linux/nfsd/xdr3.h>
8518 +#include <linux/vserver/tag.h>
8519  
8520  #define NFSDDBG_FACILITY               NFSDDBG_XDR
8521  
8522 @@ -111,6 +112,8 @@ static inline u32 *
8523  decode_sattr3(u32 *p, struct iattr *iap)
8524  {
8525         u32     tmp;
8526 +       uid_t   uid = 0;
8527 +       gid_t   gid = 0;
8528  
8529         iap->ia_valid = 0;
8530  
8531 @@ -120,12 +123,15 @@ decode_sattr3(u32 *p, struct iattr *iap)
8532         }
8533         if (*p++) {
8534                 iap->ia_valid |= ATTR_UID;
8535 -               iap->ia_uid = ntohl(*p++);
8536 +               uid = ntohl(*p++);
8537         }
8538         if (*p++) {
8539                 iap->ia_valid |= ATTR_GID;
8540 -               iap->ia_gid = ntohl(*p++);
8541 +               gid = ntohl(*p++);
8542         }
8543 +       iap->ia_uid = INOTAG_UID(DX_TAG_NFSD, uid, gid);
8544 +       iap->ia_gid = INOTAG_GID(DX_TAG_NFSD, uid, gid);
8545 +       iap->ia_tag = INOTAG_TAG(DX_TAG_NFSD, uid, gid, 0);
8546         if (*p++) {
8547                 u64     newsize;
8548  
8549 @@ -163,8 +169,10 @@ encode_fattr3(struct svc_rqst *rqstp, u3
8550         *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
8551         *p++ = htonl((u32) stat->mode);
8552         *p++ = htonl((u32) stat->nlink);
8553 -       *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid));
8554 -       *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid));
8555 +       *p++ = htonl((u32) nfsd_ruid(rqstp,
8556 +               TAGINO_UID(DX_TAG(dentry->d_inode), stat->uid, stat->tag)));
8557 +       *p++ = htonl((u32) nfsd_rgid(rqstp,
8558 +               TAGINO_GID(DX_TAG(dentry->d_inode), stat->gid, stat->tag)));
8559         if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) {
8560                 p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
8561         } else {
8562 diff -NurpP --minimal linux-2.6.16.20/fs/nfsd/nfs4recover.c linux-2.6.16.20-vs2.1.1-rc22/fs/nfsd/nfs4recover.c
8563 --- linux-2.6.16.20/fs/nfsd/nfs4recover.c       2006-02-18 14:40:23 +0100
8564 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/nfsd/nfs4recover.c  2006-04-26 19:07:00 +0200
8565 @@ -155,7 +155,7 @@ nfsd4_create_clid_dir(struct nfs4_client
8566                 dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n");
8567                 goto out_put;
8568         }
8569 -       status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU);
8570 +       status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU, NULL);
8571  out_put:
8572         dput(dentry);
8573  out_unlock:
8574 @@ -259,7 +259,7 @@ nfsd4_remove_clid_file(struct dentry *di
8575                 return -EINVAL;
8576         }
8577         mutex_lock(&dir->d_inode->i_mutex);
8578 -       status = vfs_unlink(dir->d_inode, dentry);
8579 +       status = vfs_unlink(dir->d_inode, dentry, NULL);
8580         mutex_unlock(&dir->d_inode->i_mutex);
8581         return status;
8582  }
8583 @@ -274,7 +274,7 @@ nfsd4_clear_clid_dir(struct dentry *dir,
8584          * a kernel from the future.... */
8585         nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
8586         mutex_lock(&dir->d_inode->i_mutex);
8587 -       status = vfs_rmdir(dir->d_inode, dentry);
8588 +       status = vfs_rmdir(dir->d_inode, dentry, NULL);
8589         mutex_unlock(&dir->d_inode->i_mutex);
8590         return status;
8591  }
8592 diff -NurpP --minimal linux-2.6.16.20/fs/nfsd/nfs4xdr.c linux-2.6.16.20-vs2.1.1-rc22/fs/nfsd/nfs4xdr.c
8593 --- linux-2.6.16.20/fs/nfsd/nfs4xdr.c   2006-02-18 14:40:23 +0100
8594 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/nfsd/nfs4xdr.c      2006-04-26 19:07:00 +0200
8595 @@ -57,6 +57,7 @@
8596  #include <linux/nfsd_idmap.h>
8597  #include <linux/nfs4.h>
8598  #include <linux/nfs4_acl.h>
8599 +#include <linux/vserver/tag.h>
8600  
8601  #define NFSDDBG_FACILITY               NFSDDBG_XDR
8602  
8603 @@ -1561,14 +1562,18 @@ out_acl:
8604                 WRITE32(stat.nlink);
8605         }
8606         if (bmval1 & FATTR4_WORD1_OWNER) {
8607 -               status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen);
8608 +               status = nfsd4_encode_user(rqstp,
8609 +                       TAGINO_UID(DX_TAG(dentry->d_inode),
8610 +                       stat.uid, stat.tag), &p, &buflen);
8611                 if (status == nfserr_resource)
8612                         goto out_resource;
8613                 if (status)
8614                         goto out;
8615         }
8616         if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
8617 -               status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen);
8618 +               status = nfsd4_encode_group(rqstp,
8619 +                       TAGINO_GID(DX_TAG(dentry->d_inode),
8620 +                       stat.gid, stat.tag), &p, &buflen);
8621                 if (status == nfserr_resource)
8622                         goto out_resource;
8623                 if (status)
8624 diff -NurpP --minimal linux-2.6.16.20/fs/nfsd/nfsxdr.c linux-2.6.16.20-vs2.1.1-rc22/fs/nfsd/nfsxdr.c
8625 --- linux-2.6.16.20/fs/nfsd/nfsxdr.c    2006-04-09 13:49:54 +0200
8626 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/nfsd/nfsxdr.c       2006-04-26 19:07:00 +0200
8627 @@ -15,6 +15,7 @@
8628  #include <linux/nfsd/nfsd.h>
8629  #include <linux/nfsd/xdr.h>
8630  #include <linux/mm.h>
8631 +#include <linux/vserver/tag.h>
8632  
8633  #define NFSDDBG_FACILITY               NFSDDBG_XDR
8634  
8635 @@ -102,6 +103,8 @@ static inline u32 *
8636  decode_sattr(u32 *p, struct iattr *iap)
8637  {
8638         u32     tmp, tmp1;
8639 +       uid_t   uid = 0;
8640 +       gid_t   gid = 0;
8641  
8642         iap->ia_valid = 0;
8643  
8644 @@ -115,12 +118,15 @@ decode_sattr(u32 *p, struct iattr *iap)
8645         }
8646         if ((tmp = ntohl(*p++)) != (u32)-1) {
8647                 iap->ia_valid |= ATTR_UID;
8648 -               iap->ia_uid = tmp;
8649 +               uid = tmp;
8650         }
8651         if ((tmp = ntohl(*p++)) != (u32)-1) {
8652                 iap->ia_valid |= ATTR_GID;
8653 -               iap->ia_gid = tmp;
8654 +               gid = tmp;
8655         }
8656 +       iap->ia_uid = INOTAG_UID(DX_TAG_NFSD, uid, gid);
8657 +       iap->ia_gid = INOTAG_GID(DX_TAG_NFSD, uid, gid);
8658 +       iap->ia_tag = INOTAG_TAG(DX_TAG_NFSD, uid, gid, 0);
8659         if ((tmp = ntohl(*p++)) != (u32)-1) {
8660                 iap->ia_valid |= ATTR_SIZE;
8661                 iap->ia_size = tmp;
8662 @@ -164,8 +170,10 @@ encode_fattr(struct svc_rqst *rqstp, u32
8663         *p++ = htonl(nfs_ftypes[type >> 12]);
8664         *p++ = htonl((u32) stat->mode);
8665         *p++ = htonl((u32) stat->nlink);
8666 -       *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid));
8667 -       *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid));
8668 +       *p++ = htonl((u32) nfsd_ruid(rqstp,
8669 +               TAGINO_UID(DX_TAG(dentry->d_inode), stat->uid, stat->tag)));
8670 +       *p++ = htonl((u32) nfsd_rgid(rqstp,
8671 +               TAGINO_GID(DX_TAG(dentry->d_inode), stat->gid, stat->tag)));
8672  
8673         if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) {
8674                 *p++ = htonl(NFS_MAXPATHLEN);
8675 diff -NurpP --minimal linux-2.6.16.20/fs/nfsd/vfs.c linux-2.6.16.20-vs2.1.1-rc22/fs/nfsd/vfs.c
8676 --- linux-2.6.16.20/fs/nfsd/vfs.c       2006-02-18 14:40:23 +0100
8677 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/nfsd/vfs.c  2006-04-26 19:07:00 +0200
8678 @@ -1160,13 +1160,13 @@ nfsd_create(struct svc_rqst *rqstp, stru
8679                 err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
8680                 break;
8681         case S_IFDIR:
8682 -               err = vfs_mkdir(dirp, dchild, iap->ia_mode);
8683 +               err = vfs_mkdir(dirp, dchild, iap->ia_mode, NULL);
8684                 break;
8685         case S_IFCHR:
8686         case S_IFBLK:
8687         case S_IFIFO:
8688         case S_IFSOCK:
8689 -               err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
8690 +               err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev, NULL);
8691                 break;
8692         default:
8693                 printk("nfsd: bad file type %o in nfsd_create\n", type);
8694 @@ -1446,11 +1446,13 @@ nfsd_symlink(struct svc_rqst *rqstp, str
8695                 else {
8696                         strncpy(path_alloced, path, plen);
8697                         path_alloced[plen] = 0;
8698 -                       err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode);
8699 +                       err = vfs_symlink(dentry->d_inode, dnew,
8700 +                               path_alloced, mode, NULL);
8701                         kfree(path_alloced);
8702                 }
8703         } else
8704 -               err = vfs_symlink(dentry->d_inode, dnew, path, mode);
8705 +               err = vfs_symlink(dentry->d_inode, dnew,
8706 +                       path, mode, NULL);
8707  
8708         if (!err)
8709                 if (EX_ISSYNC(fhp->fh_export))
8710 @@ -1508,7 +1510,7 @@ nfsd_link(struct svc_rqst *rqstp, struct
8711         dold = tfhp->fh_dentry;
8712         dest = dold->d_inode;
8713  
8714 -       err = vfs_link(dold, dirp, dnew);
8715 +       err = vfs_link(dold, dirp, dnew, NULL);
8716         if (!err) {
8717                 if (EX_ISSYNC(ffhp->fh_export)) {
8718                         err = nfserrno(nfsd_sync_dir(ddir));
8719 @@ -1670,9 +1672,9 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
8720                         err = -EPERM;
8721                 } else
8722  #endif
8723 -               err = vfs_unlink(dirp, rdentry);
8724 +               err = vfs_unlink(dirp, rdentry, NULL);
8725         } else { /* It's RMDIR */
8726 -               err = vfs_rmdir(dirp, rdentry);
8727 +               err = vfs_rmdir(dirp, rdentry, NULL);
8728         }
8729  
8730         dput(rdentry);
8731 @@ -1781,7 +1783,8 @@ nfsd_permission(struct svc_export *exp, 
8732          */
8733         if (!(acc & MAY_LOCAL_ACCESS))
8734                 if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) {
8735 -                       if (EX_RDONLY(exp) || IS_RDONLY(inode))
8736 +                       if (EX_RDONLY(exp) || IS_RDONLY(inode)
8737 +                               || MNT_IS_RDONLY(exp->ex_mnt))
8738                                 return nfserr_rofs;
8739                         if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode))
8740                                 return nfserr_perm;
8741 diff -NurpP --minimal linux-2.6.16.20/fs/open.c linux-2.6.16.20-vs2.1.1-rc22/fs/open.c
8742 --- linux-2.6.16.20/fs/open.c   2006-05-11 21:25:36 +0200
8743 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/open.c      2006-04-26 19:07:00 +0200
8744 @@ -27,6 +27,9 @@
8745  #include <linux/pagemap.h>
8746  #include <linux/syscalls.h>
8747  #include <linux/rcupdate.h>
8748 +#include <linux/vs_limit.h>
8749 +#include <linux/vs_dlimit.h>
8750 +#include <linux/vserver/tag.h>
8751  
8752  #include <asm/unistd.h>
8753  
8754 @@ -45,6 +48,8 @@ int vfs_statfs(struct super_block *sb, s
8755                         if (retval == 0 && buf->f_frsize == 0)
8756                                 buf->f_frsize = buf->f_bsize;
8757                 }
8758 +               if (!vx_check(0, VX_ADMIN|VX_WATCH))
8759 +                       vx_vsi_statfs(sb, buf);
8760         }
8761         return retval;
8762  }
8763 @@ -248,7 +253,7 @@ static long do_sys_truncate(const char _
8764                 goto dput_and_out;
8765  
8766         error = -EROFS;
8767 -       if (IS_RDONLY(inode))
8768 +       if (IS_RDONLY(inode) || MNT_IS_RDONLY(nd.mnt))
8769                 goto dput_and_out;
8770  
8771         error = -EPERM;
8772 @@ -378,7 +383,7 @@ asmlinkage long sys_utime(char __user * 
8773         inode = nd.dentry->d_inode;
8774  
8775         error = -EROFS;
8776 -       if (IS_RDONLY(inode))
8777 +       if (IS_RDONLY(inode) || MNT_IS_RDONLY(nd.mnt))
8778                 goto dput_and_out;
8779  
8780         /* Don't worry, the checks are done in inode_change_ok() */
8781 @@ -435,7 +440,7 @@ long do_utimes(int dfd, char __user *fil
8782         inode = nd.dentry->d_inode;
8783  
8784         error = -EROFS;
8785 -       if (IS_RDONLY(inode))
8786 +       if (IS_RDONLY(inode) || MNT_IS_RDONLY(nd.mnt))
8787                 goto dput_and_out;
8788  
8789         /* Don't worry, the checks are done in inode_change_ok() */
8790 @@ -522,7 +527,8 @@ asmlinkage long sys_faccessat(int dfd, c
8791         if (!res) {
8792                 res = vfs_permission(&nd, mode);
8793                 /* SuS v2 requires we report a read only fs too */
8794 -               if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
8795 +               if(!res && (mode & S_IWOTH)
8796 +                  && (IS_RDONLY(nd.dentry->d_inode) || MNT_IS_RDONLY(nd.mnt))
8797                    && !special_file(nd.dentry->d_inode->i_mode))
8798                         res = -EROFS;
8799                 path_release(&nd);
8800 @@ -633,7 +639,7 @@ asmlinkage long sys_fchmod(unsigned int 
8801         inode = dentry->d_inode;
8802  
8803         err = -EROFS;
8804 -       if (IS_RDONLY(inode))
8805 +       if (IS_RDONLY(inode) || MNT_IS_RDONLY(file->f_vfsmnt))
8806                 goto out_putf;
8807         err = -EPERM;
8808         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
8809 @@ -666,7 +672,7 @@ asmlinkage long sys_fchmodat(int dfd, co
8810         inode = nd.dentry->d_inode;
8811  
8812         error = -EROFS;
8813 -       if (IS_RDONLY(inode))
8814 +       if (IS_RDONLY(inode) || MNT_IS_RDONLY(nd.mnt))
8815                 goto dput_and_out;
8816  
8817         error = -EPERM;
8818 @@ -692,7 +698,8 @@ asmlinkage long sys_chmod(const char __u
8819         return sys_fchmodat(AT_FDCWD, filename, mode);
8820  }
8821  
8822 -static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
8823 +static int chown_common(struct dentry *dentry, struct vfsmount *mnt,
8824 +       uid_t user, gid_t group)
8825  {
8826         struct inode * inode;
8827         int error;
8828 @@ -704,7 +711,7 @@ static int chown_common(struct dentry * 
8829                 goto out;
8830         }
8831         error = -EROFS;
8832 -       if (IS_RDONLY(inode))
8833 +       if (IS_RDONLY(inode) || MNT_IS_RDONLY(mnt))
8834                 goto out;
8835         error = -EPERM;
8836         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
8837 @@ -712,11 +719,11 @@ static int chown_common(struct dentry * 
8838         newattrs.ia_valid =  ATTR_CTIME;
8839         if (user != (uid_t) -1) {
8840                 newattrs.ia_valid |= ATTR_UID;
8841 -               newattrs.ia_uid = user;
8842 +               newattrs.ia_uid = dx_map_uid(user);
8843         }
8844         if (group != (gid_t) -1) {
8845                 newattrs.ia_valid |= ATTR_GID;
8846 -               newattrs.ia_gid = group;
8847 +               newattrs.ia_gid = dx_map_gid(group);
8848         }
8849         if (!S_ISDIR(inode->i_mode))
8850                 newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID;
8851 @@ -734,7 +741,7 @@ asmlinkage long sys_chown(const char __u
8852  
8853         error = user_path_walk(filename, &nd);
8854         if (!error) {
8855 -               error = chown_common(nd.dentry, user, group);
8856 +               error = chown_common(nd.dentry, nd.mnt, user, group);
8857                 path_release(&nd);
8858         }
8859         return error;
8860 @@ -753,7 +760,7 @@ asmlinkage long sys_fchownat(int dfd, co
8861         follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
8862         error = __user_walk_fd(dfd, filename, follow, &nd);
8863         if (!error) {
8864 -               error = chown_common(nd.dentry, user, group);
8865 +               error = chown_common(nd.dentry, nd.mnt, user, group);
8866                 path_release(&nd);
8867         }
8868  out:
8869 @@ -767,7 +774,7 @@ asmlinkage long sys_lchown(const char __
8870  
8871         error = user_path_walk_link(filename, &nd);
8872         if (!error) {
8873 -               error = chown_common(nd.dentry, user, group);
8874 +               error = chown_common(nd.dentry, nd.mnt, user, group);
8875                 path_release(&nd);
8876         }
8877         return error;
8878 @@ -781,7 +788,7 @@ asmlinkage long sys_fchown(unsigned int 
8879  
8880         file = fget(fd);
8881         if (file) {
8882 -               error = chown_common(file->f_dentry, user, group);
8883 +               error = chown_common(file->f_dentry, file->f_vfsmnt, user, group);
8884                 fput(file);
8885         }
8886         return error;
8887 @@ -1005,6 +1012,7 @@ repeat:
8888         FD_SET(fd, fdt->open_fds);
8889         FD_CLR(fd, fdt->close_on_exec);
8890         fdt->next_fd = fd + 1;
8891 +       vx_openfd_inc(fd);
8892  #if 1
8893         /* Sanity check */
8894         if (fdt->fd[fd] != NULL) {
8895 @@ -1027,6 +1035,7 @@ static void __put_unused_fd(struct files
8896         __FD_CLR(fd, fdt->open_fds);
8897         if (fd < fdt->next_fd)
8898                 fdt->next_fd = fd;
8899 +       vx_openfd_dec(fd);
8900  }
8901  
8902  void fastcall put_unused_fd(unsigned int fd)
8903 diff -NurpP --minimal linux-2.6.16.20/fs/proc/array.c linux-2.6.16.20-vs2.1.1-rc22/fs/proc/array.c
8904 --- linux-2.6.16.20/fs/proc/array.c     2006-02-18 14:40:26 +0100
8905 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/proc/array.c        2006-04-27 20:29:01 +0200
8906 @@ -75,6 +75,9 @@
8907  #include <linux/times.h>
8908  #include <linux/cpuset.h>
8909  #include <linux/rcupdate.h>
8910 +#include <linux/vs_context.h>
8911 +#include <linux/vs_network.h>
8912 +#include <linux/vs_pid.h>
8913  
8914  #include <asm/uaccess.h>
8915  #include <asm/pgtable.h>
8916 @@ -135,7 +138,9 @@ static const char *task_state_array[] = 
8917         "T (stopped)",          /*  4 */
8918         "T (tracing stop)",     /*  8 */
8919         "Z (zombie)",           /* 16 */
8920 -       "X (dead)"              /* 32 */
8921 +       "X (dead)",             /* 32 */
8922 +       "N (noninteractive)",   /* 64 */
8923 +       "H (on hold)"           /* 128 */
8924  };
8925  
8926  static inline const char * get_task_state(struct task_struct *tsk)
8927 @@ -144,7 +149,8 @@ static inline const char * get_task_stat
8928                                             TASK_INTERRUPTIBLE |
8929                                             TASK_UNINTERRUPTIBLE |
8930                                             TASK_STOPPED |
8931 -                                           TASK_TRACED)) |
8932 +                                          TASK_TRACED |
8933 +                                          TASK_ONHOLD)) |
8934                         (tsk->exit_state & (EXIT_ZOMBIE |
8935                                             EXIT_DEAD));
8936         const char **p = &task_state_array[0];
8937 @@ -161,8 +167,13 @@ static inline char * task_state(struct t
8938         struct group_info *group_info;
8939         int g;
8940         struct fdtable *fdt = NULL;
8941 +       pid_t pid, ptgid, tppid, tgid;
8942  
8943         read_lock(&tasklist_lock);
8944 +       tgid = vx_map_tgid(p->tgid);
8945 +       pid = vx_map_pid(p->pid);
8946 +       ptgid = vx_map_pid(p->group_leader->real_parent->tgid);
8947 +       tppid = vx_map_pid(p->parent->pid);
8948         buffer += sprintf(buffer,
8949                 "State:\t%s\n"
8950                 "SleepAVG:\t%lu%%\n"
8951 @@ -174,9 +185,8 @@ static inline char * task_state(struct t
8952                 "Gid:\t%d\t%d\t%d\t%d\n",
8953                 get_task_state(p),
8954                 (p->sleep_avg/1024)*100/(1020000000/1024),
8955 -               p->tgid,
8956 -               p->pid, pid_alive(p) ? p->group_leader->real_parent->tgid : 0,
8957 -               pid_alive(p) && p->ptrace ? p->parent->pid : 0,
8958 +               tgid, pid, (pid > 1) ? ptgid : 0,
8959 +               pid_alive(p) && p->ptrace ? tppid : 0,
8960                 p->uid, p->euid, p->suid, p->fsuid,
8961                 p->gid, p->egid, p->sgid, p->fsgid);
8962         read_unlock(&tasklist_lock);
8963 @@ -285,17 +295,26 @@ static inline char * task_sig(struct tas
8964  
8965  static inline char *task_cap(struct task_struct *p, char *buffer)
8966  {
8967 -    return buffer + sprintf(buffer, "CapInh:\t%016x\n"
8968 -                           "CapPrm:\t%016x\n"
8969 -                           "CapEff:\t%016x\n",
8970 -                           cap_t(p->cap_inheritable),
8971 -                           cap_t(p->cap_permitted),
8972 -                           cap_t(p->cap_effective));
8973 +       struct vx_info *vxi = p->vx_info;
8974 +
8975 +       return buffer + sprintf(buffer,
8976 +               "CapInh:\t%016x\n"
8977 +               "CapPrm:\t%016x\n"
8978 +               "CapEff:\t%016x\n",
8979 +               (unsigned)vx_info_mbcap(vxi, p->cap_inheritable),
8980 +               (unsigned)vx_info_mbcap(vxi, p->cap_permitted),
8981 +               (unsigned)vx_info_mbcap(vxi, p->cap_effective));
8982  }
8983  
8984  int proc_pid_status(struct task_struct *task, char * buffer)
8985  {
8986         char * orig = buffer;
8987 +#ifdef CONFIG_VSERVER_LEGACY
8988 +       struct vx_info *vxi;
8989 +#endif
8990 +#ifdef CONFIG_VSERVER_LEGACYNET
8991 +       struct nx_info *nxi;
8992 +#endif
8993         struct mm_struct *mm = get_task_mm(task);
8994  
8995         buffer = task_name(task, buffer);
8996 @@ -308,6 +327,46 @@ int proc_pid_status(struct task_struct *
8997         buffer = task_sig(task, buffer);
8998         buffer = task_cap(task, buffer);
8999         buffer = cpuset_task_status_allowed(task, buffer);
9000 +
9001 +       if (task_vx_flags(task, VXF_INFO_HIDE, 0))
9002 +               goto skip;
9003 +#ifdef CONFIG_VSERVER_LEGACY
9004 +       buffer += sprintf (buffer,"s_context: %d\n", vx_task_xid(task));
9005 +       vxi = task_get_vx_info(task);
9006 +       if (vxi) {
9007 +               buffer += sprintf (buffer,"ctxflags: %08llx\n"
9008 +                       ,(unsigned long long)vxi->vx_flags);
9009 +               buffer += sprintf (buffer,"initpid: %d\n"
9010 +                       ,vxi->vx_initpid);
9011 +       } else {
9012 +               buffer += sprintf (buffer,"ctxflags: none\n");
9013 +               buffer += sprintf (buffer,"initpid: none\n");
9014 +       }
9015 +       put_vx_info(vxi);
9016 +#else
9017 +       buffer += sprintf (buffer,"VxID: %d\n", vx_task_xid(task));
9018 +#endif
9019 +#ifdef CONFIG_VSERVER_LEGACYNET
9020 +       nxi = task_get_nx_info(task);
9021 +       if (nxi) {
9022 +               int i;
9023 +
9024 +               buffer += sprintf (buffer,"ipv4root:");
9025 +               for (i=0; i<nxi->nbipv4; i++){
9026 +                       buffer += sprintf (buffer," %08x/%08x"
9027 +                               ,nxi->ipv4[i]
9028 +                               ,nxi->mask[i]);
9029 +               }
9030 +               *buffer++ = '\n';
9031 +               buffer += sprintf (buffer,"ipv4root_bcast: %08x\n"
9032 +                       ,nxi->v4_bcast);
9033 +       } else {
9034 +               buffer += sprintf (buffer,"ipv4root: 0\n");
9035 +               buffer += sprintf (buffer,"ipv4root_bcast: 0\n");
9036 +       }
9037 +       put_nx_info(nxi);
9038 +#endif
9039 +skip:
9040  #if defined(CONFIG_S390)
9041         buffer = task_show_regs(task, buffer);
9042  #endif
9043 @@ -322,7 +381,7 @@ static int do_task_stat(struct task_stru
9044         sigset_t sigign, sigcatch;
9045         char state;
9046         int res;
9047 -       pid_t ppid, pgid = -1, sid = -1;
9048 +       pid_t pid, ppid, pgid = -1, sid = -1;
9049         int num_threads = 0;
9050         struct mm_struct *mm;
9051         unsigned long long start_time;
9052 @@ -388,7 +447,11 @@ static int do_task_stat(struct task_stru
9053                 }
9054                 it_real_value = task->signal->real_timer.expires;
9055         }
9056 -       ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0;
9057 +       pid = vx_info_map_pid(task->vx_info, pid_alive(task) ? task->pid : 0);
9058 +       ppid = (!(pid > 1)) ? 0 : vx_info_map_tgid(task->vx_info,
9059 +               task->group_leader->real_parent->tgid);
9060 +       pgid = vx_info_map_pid(task->vx_info, pgid);
9061 +
9062         read_unlock(&tasklist_lock);
9063  
9064         if (!whole || num_threads<2)
9065 @@ -412,10 +475,21 @@ static int do_task_stat(struct task_stru
9066         /* convert nsec -> ticks */
9067         start_time = nsec_to_clock_t(start_time);
9068  
9069 +       /* fixup start time for virt uptime */
9070 +       if (vx_flags(VXF_VIRT_UPTIME, 0)) {
9071 +               unsigned long long bias =
9072 +                       current->vx_info->cvirt.bias_clock;
9073 +
9074 +               if (start_time > bias)
9075 +                       start_time -= bias;
9076 +               else
9077 +                       start_time = 0;
9078 +       }
9079 +
9080         res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
9081  %lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \
9082  %lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n",
9083 -               task->pid,
9084 +               pid,
9085                 tcomm,
9086                 state,
9087                 ppid,
9088 diff -NurpP --minimal linux-2.6.16.20/fs/proc/base.c linux-2.6.16.20-vs2.1.1-rc22/fs/proc/base.c
9089 --- linux-2.6.16.20/fs/proc/base.c      2006-05-11 21:25:36 +0200
9090 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/proc/base.c 2006-04-26 19:07:00 +0200
9091 @@ -72,6 +72,8 @@
9092  #include <linux/cpuset.h>
9093  #include <linux/audit.h>
9094  #include <linux/poll.h>
9095 +#include <linux/vs_network.h>
9096 +#include <linux/vs_pid.h>
9097  #include "internal.h"
9098  
9099  /*
9100 @@ -121,6 +123,8 @@ enum pid_directory_inos {
9101         PROC_TGID_ATTR_EXEC,
9102         PROC_TGID_ATTR_FSCREATE,
9103  #endif
9104 +       PROC_TGID_VX_INFO,
9105 +       PROC_TGID_IP_INFO,
9106  #ifdef CONFIG_AUDITSYSCALL
9107         PROC_TGID_LOGINUID,
9108  #endif
9109 @@ -161,6 +165,8 @@ enum pid_directory_inos {
9110         PROC_TID_ATTR_EXEC,
9111         PROC_TID_ATTR_FSCREATE,
9112  #endif
9113 +       PROC_TID_VX_INFO,
9114 +       PROC_TID_IP_INFO,
9115  #ifdef CONFIG_AUDITSYSCALL
9116         PROC_TID_LOGINUID,
9117  #endif
9118 @@ -216,6 +222,8 @@ static struct pid_entry tgid_base_stuff[
9119  #ifdef CONFIG_CPUSETS
9120         E(PROC_TGID_CPUSET,    "cpuset",  S_IFREG|S_IRUGO),
9121  #endif
9122 +       E(PROC_TGID_VX_INFO,   "vinfo",   S_IFREG|S_IRUGO),
9123 +       E(PROC_TGID_IP_INFO,   "ninfo",   S_IFREG|S_IRUGO),
9124         E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO),
9125         E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR),
9126  #ifdef CONFIG_AUDITSYSCALL
9127 @@ -258,6 +266,8 @@ static struct pid_entry tid_base_stuff[]
9128  #ifdef CONFIG_CPUSETS
9129         E(PROC_TID_CPUSET,     "cpuset",  S_IFREG|S_IRUGO),
9130  #endif
9131 +       E(PROC_TID_VX_INFO,    "vinfo",   S_IFREG|S_IRUGO),
9132 +       E(PROC_TID_IP_INFO,    "ninfo",   S_IFREG|S_IRUGO),
9133         E(PROC_TID_OOM_SCORE,  "oom_score",S_IFREG|S_IRUGO),
9134         E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR),
9135  #ifdef CONFIG_AUDITSYSCALL
9136 @@ -541,6 +551,11 @@ static int proc_check_chroot(struct dent
9137         struct dentry *de, *base;
9138         struct vfsmount *our_vfsmnt, *mnt;
9139         int res = 0;
9140 +
9141 +       /* context admin override */
9142 +       if (capable(CAP_CONTEXT))
9143 +               goto override;
9144 +
9145         read_lock(&current->fs->lock);
9146         our_vfsmnt = mntget(current->fs->rootmnt);
9147         base = dget(current->fs->root);
9148 @@ -550,11 +565,11 @@ static int proc_check_chroot(struct dent
9149         de = root;
9150         mnt = vfsmnt;
9151  
9152 -       while (vfsmnt != our_vfsmnt) {
9153 -               if (vfsmnt == vfsmnt->mnt_parent)
9154 +       while (mnt != our_vfsmnt) {
9155 +               if (mnt == mnt->mnt_parent)
9156                         goto out;
9157 -               de = vfsmnt->mnt_mountpoint;
9158 -               vfsmnt = vfsmnt->mnt_parent;
9159 +               de = mnt->mnt_mountpoint;
9160 +               mnt = mnt->mnt_parent;
9161         }
9162  
9163         if (!is_subdir(de, base))
9164 @@ -564,8 +579,9 @@ static int proc_check_chroot(struct dent
9165  exit:
9166         dput(base);
9167         mntput(our_vfsmnt);
9168 +override:
9169         dput(root);
9170 -       mntput(mnt);
9171 +       mntput(vfsmnt);
9172         return res;
9173  out:
9174         spin_unlock(&vfsmount_lock);
9175 @@ -1225,7 +1241,7 @@ static int proc_pident_readdir(struct fi
9176         struct inode *inode = dentry->d_inode;
9177         struct pid_entry *p;
9178         ino_t ino;
9179 -       int ret;
9180 +       int ret, hide;
9181  
9182         ret = -ENOENT;
9183         if (!pid_alive(proc_task(inode)))
9184 @@ -1256,11 +1272,20 @@ static int proc_pident_readdir(struct fi
9185                         goto out;
9186                 }
9187                 p = ents + i;
9188 +               hide = vx_flags(VXF_INFO_HIDE, 0);
9189                 while (p->name) {
9190 +                       if (hide) {
9191 +                               switch (p->type) {
9192 +                               case PROC_TGID_VX_INFO:
9193 +                               case PROC_TGID_IP_INFO:
9194 +                                       goto skip;
9195 +                               }
9196 +                       }
9197                         if (filldir(dirent, p->name, p->len, filp->f_pos,
9198                                     fake_ino(pid, p->type), p->mode >> 12) < 0)
9199                                 goto out;
9200                         filp->f_pos++;
9201 +               skip:
9202                         p++;
9203                 }
9204         }
9205 @@ -1334,6 +1359,8 @@ static struct inode *proc_pid_make_inode
9206                 inode->i_uid = task->euid;
9207                 inode->i_gid = task->egid;
9208         }
9209 +       /* procfs is xid tagged */
9210 +       inode->i_tag = (tag_t)vx_task_xid(task);
9211         security_task_to_inode(task, inode);
9212  
9213  out:
9214 @@ -1359,6 +1386,11 @@ static int pid_revalidate(struct dentry 
9215  {
9216         struct inode *inode = dentry->d_inode;
9217         struct task_struct *task = proc_task(inode);
9218 +
9219 +       if (!vx_check(vx_task_xid(task), VX_IDENT))
9220 +               goto out_drop;
9221 +       /* discard wrong fakeinit */
9222 +
9223         if (pid_alive(task)) {
9224                 if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) {
9225                         inode->i_uid = task->euid;
9226 @@ -1370,6 +1402,7 @@ static int pid_revalidate(struct dentry 
9227                 security_task_to_inode(task, inode);
9228                 return 1;
9229         }
9230 +out_drop:
9231         d_drop(dentry);
9232         return 0;
9233  }
9234 @@ -1609,6 +1642,9 @@ static struct file_operations proc_tgid_
9235  static struct inode_operations proc_tgid_attr_inode_operations;
9236  #endif
9237  
9238 +extern int proc_pid_vx_info(struct task_struct *, char *);
9239 +extern int proc_pid_nx_info(struct task_struct *, char *);
9240 +
9241  static int get_tid_list(int index, unsigned int *tids, struct inode *dir);
9242  
9243  /* SMP-safe */
9244 @@ -1796,15 +1832,33 @@ static struct dentry *proc_pident_lookup
9245                         inode->i_fop = &proc_loginuid_operations;
9246                         break;
9247  #endif
9248 +               case PROC_TID_VX_INFO:
9249 +               case PROC_TGID_VX_INFO:
9250 +                       if (task_vx_flags(task, VXF_INFO_HIDE, 0))
9251 +                               goto out_noent;
9252 +                       inode->i_fop = &proc_info_file_operations;
9253 +                       ei->op.proc_read = proc_pid_vx_info;
9254 +                       break;
9255 +               case PROC_TID_IP_INFO:
9256 +               case PROC_TGID_IP_INFO:
9257 +                       if (task_vx_flags(task, VXF_INFO_HIDE, 0))
9258 +                               goto out_noent;
9259 +                       inode->i_fop = &proc_info_file_operations;
9260 +                       ei->op.proc_read = proc_pid_nx_info;
9261 +                       break;
9262                 default:
9263                         printk("procfs: impossible type (%d)",p->type);
9264 -                       iput(inode);
9265 -                       return ERR_PTR(-EINVAL);
9266 +                       error = -EINVAL;
9267 +                       goto out_put;
9268         }
9269         dentry->d_op = &pid_dentry_operations;
9270         d_add(dentry, inode);
9271         return NULL;
9272  
9273 +out_noent:
9274 +       error=-ENOENT;
9275 +out_put:
9276 +       iput(inode);
9277  out:
9278         return ERR_PTR(error);
9279  }
9280 @@ -1888,14 +1942,14 @@ static int proc_self_readlink(struct den
9281                               int buflen)
9282  {
9283         char tmp[30];
9284 -       sprintf(tmp, "%d", current->tgid);
9285 +       sprintf(tmp, "%d", vx_map_tgid(current->tgid));
9286         return vfs_readlink(dentry,buffer,buflen,tmp);
9287  }
9288  
9289  static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
9290  {
9291         char tmp[30];
9292 -       sprintf(tmp, "%d", current->tgid);
9293 +       sprintf(tmp, "%d", vx_map_tgid(current->tgid));
9294         return ERR_PTR(vfs_follow_link(nd,tmp));
9295  }      
9296  
9297 @@ -1958,6 +2012,20 @@ void proc_pid_flush(struct dentry *proc_
9298         }
9299  }
9300  
9301 +#define VXF_FAKE_INIT  (VXF_INFO_INIT|VXF_STATE_INIT)
9302 +
9303 +static inline int proc_pid_visible(struct task_struct *task, int pid)
9304 +{
9305 +       if ((pid == 1) &&
9306 +               !vx_flags(VXF_FAKE_INIT, VXF_FAKE_INIT))
9307 +               goto visible;
9308 +       if (vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT))
9309 +               goto visible;
9310 +       return 0;
9311 +visible:
9312 +       return 1;
9313 +}
9314 +
9315  /* SMP-safe */
9316  struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
9317  {
9318 @@ -1994,13 +2062,14 @@ struct dentry *proc_pid_lookup(struct in
9319         if (!task)
9320                 goto out;
9321  
9322 -       inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO);
9323 +       /* check for context visibility */
9324 +       if (!proc_pid_visible(task, tgid))
9325 +               goto out_drop_task;
9326  
9327 +       inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO);
9328 +       if (!inode)
9329 +               goto out_drop_task;
9330  
9331 -       if (!inode) {
9332 -               put_task_struct(task);
9333 -               goto out;
9334 -       }
9335         inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
9336         inode->i_op = &proc_tgid_base_inode_operations;
9337         inode->i_fop = &proc_tgid_base_operations;
9338 @@ -2029,6 +2098,8 @@ struct dentry *proc_pid_lookup(struct in
9339                 goto out;
9340         }
9341         return NULL;
9342 +out_drop_task:
9343 +       put_task_struct(task);
9344  out:
9345         return ERR_PTR(-ENOENT);
9346  }
9347 @@ -2044,6 +2115,8 @@ static struct dentry *proc_task_lookup(s
9348         tid = name_to_int(dentry);
9349         if (tid == ~0U)
9350                 goto out;
9351 +       if (vx_current_initpid(tid))
9352 +               goto out;
9353  
9354         read_lock(&tasklist_lock);
9355         task = find_task_by_pid(tid);
9356 @@ -2055,11 +2128,14 @@ static struct dentry *proc_task_lookup(s
9357         if (leader->tgid != task->tgid)
9358                 goto out_drop_task;
9359  
9360 -       inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO);
9361 -
9362 +       /* check for context visibility */
9363 +       if (!proc_pid_visible(task, tid))
9364 +               goto out_drop_task;
9365  
9366 +       inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO);
9367         if (!inode)
9368                 goto out_drop_task;
9369 +
9370         inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
9371         inode->i_op = &proc_tid_base_inode_operations;
9372         inode->i_fop = &proc_tid_base_operations;
9373 @@ -2099,7 +2175,7 @@ static int get_tgid_list(int index, unsi
9374         read_lock(&tasklist_lock);
9375         p = NULL;
9376         if (version) {
9377 -               p = find_task_by_pid(version);
9378 +               p = find_task_by_real_pid(version);
9379                 if (p && !thread_group_leader(p))
9380                         p = NULL;
9381         }
9382 @@ -2111,11 +2187,15 @@ static int get_tgid_list(int index, unsi
9383  
9384         for ( ; p != &init_task; p = next_task(p)) {
9385                 int tgid = p->pid;
9386 +
9387                 if (!pid_alive(p))
9388                         continue;
9389 +               /* check for context visibility */
9390 +               if (!proc_pid_visible(p, tgid))
9391 +                       continue;
9392                 if (--index >= 0)
9393                         continue;
9394 -               tgids[nr_tgids] = tgid;
9395 +               tgids[nr_tgids] = vx_map_tgid(tgid);
9396                 nr_tgids++;
9397                 if (nr_tgids >= PROC_MAXPIDS)
9398                         break;
9399 @@ -2145,10 +2225,13 @@ static int get_tid_list(int index, unsig
9400         if (pid_alive(task)) do {
9401                 int tid = task->pid;
9402  
9403 +               /* check for context visibility */
9404 +               if (!proc_pid_visible(task, tid))
9405 +                       continue;
9406                 if (--index >= 0)
9407                         continue;
9408                 if (tids != NULL)
9409 -                       tids[nr_tids] = tid;
9410 +                       tids[nr_tids] = vx_map_pid(tid);
9411                 nr_tids++;
9412                 if (nr_tids >= PROC_MAXPIDS)
9413                         break;
9414 @@ -2224,11 +2307,14 @@ static int proc_task_readdir(struct file
9415         unsigned int nr_tids, i;
9416         struct dentry *dentry = filp->f_dentry;
9417         struct inode *inode = dentry->d_inode;
9418 +       struct task_struct *task = proc_task(inode);
9419         int retval = -ENOENT;
9420         ino_t ino;
9421         unsigned long pos = filp->f_pos;  /* avoiding "long long" filp->f_pos */
9422  
9423 -       if (!pid_alive(proc_task(inode)))
9424 +       if (!vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT))
9425 +               goto out;
9426 +       if (!pid_alive(task))
9427                 goto out;
9428         retval = 0;
9429  
9430 diff -NurpP --minimal linux-2.6.16.20/fs/proc/generic.c linux-2.6.16.20-vs2.1.1-rc22/fs/proc/generic.c
9431 --- linux-2.6.16.20/fs/proc/generic.c   2006-02-18 14:40:26 +0100
9432 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/proc/generic.c      2006-04-26 19:07:00 +0200
9433 @@ -19,6 +19,7 @@
9434  #include <linux/idr.h>
9435  #include <linux/namei.h>
9436  #include <linux/bitops.h>
9437 +#include <linux/vserver/inode.h>
9438  #include <asm/uaccess.h>
9439  
9440  #include "internal.h"
9441 @@ -385,11 +386,15 @@ struct dentry *proc_lookup(struct inode 
9442                 for (de = de->subdir; de ; de = de->next) {
9443                         if (de->namelen != dentry->d_name.len)
9444                                 continue;
9445 +                       if (!vx_hide_check(0, de->vx_flags))
9446 +                               continue;
9447                         if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
9448                                 unsigned int ino = de->low_ino;
9449  
9450                                 error = -EINVAL;
9451                                 inode = proc_get_inode(dir->i_sb, ino, de);
9452 +                               /* generic proc entries belong to the host */
9453 +                               inode->i_tag = 0;
9454                                 break;
9455                         }
9456                 }
9457 @@ -461,9 +466,12 @@ int proc_readdir(struct file * filp,
9458                         }
9459  
9460                         do {
9461 +                               if (!vx_hide_check(0, de->vx_flags))
9462 +                                       goto skip;
9463                                 if (filldir(dirent, de->name, de->namelen, filp->f_pos,
9464                                             de->low_ino, de->mode >> 12) < 0)
9465                                         goto out;
9466 +                       skip:
9467                                 filp->f_pos++;
9468                                 de = de->next;
9469                         } while (de);
9470 @@ -581,6 +589,7 @@ static struct proc_dir_entry *proc_creat
9471         ent->namelen = len;
9472         ent->mode = mode;
9473         ent->nlink = nlink;
9474 +       ent->vx_flags = IATTR_PROC_DEFAULT;
9475   out:
9476         return ent;
9477  }
9478 @@ -601,7 +610,8 @@ struct proc_dir_entry *proc_symlink(cons
9479                                 kfree(ent->data);
9480                                 kfree(ent);
9481                                 ent = NULL;
9482 -                       }
9483 +                       } else
9484 +                               ent->vx_flags = IATTR_PROC_SYMLINK;
9485                 } else {
9486                         kfree(ent);
9487                         ent = NULL;
9488 diff -NurpP --minimal linux-2.6.16.20/fs/proc/inode.c linux-2.6.16.20-vs2.1.1-rc22/fs/proc/inode.c
9489 --- linux-2.6.16.20/fs/proc/inode.c     2006-04-09 13:49:54 +0200
9490 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/proc/inode.c        2006-04-26 19:07:00 +0200
9491 @@ -170,6 +170,8 @@ struct inode *proc_get_inode(struct supe
9492                         inode->i_uid = de->uid;
9493                         inode->i_gid = de->gid;
9494                 }
9495 +               if (de->vx_flags)
9496 +                       PROC_I(inode)->vx_flags = de->vx_flags;
9497                 if (de->size)
9498                         inode->i_size = de->size;
9499                 if (de->nlink)
9500 diff -NurpP --minimal linux-2.6.16.20/fs/proc/proc_misc.c linux-2.6.16.20-vs2.1.1-rc22/fs/proc/proc_misc.c
9501 --- linux-2.6.16.20/fs/proc/proc_misc.c 2006-05-11 21:25:36 +0200
9502 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/proc/proc_misc.c    2006-05-03 16:06:42 +0200
9503 @@ -53,6 +53,8 @@
9504  #include <asm/div64.h>
9505  #include "internal.h"
9506  
9507 +#include <linux/vs_cvirt.h>
9508 +
9509  #define LOAD_INT(x) ((x) >> FSHIFT)
9510  #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
9511  /*
9512 @@ -82,17 +84,32 @@ static int proc_calc_metrics(char *page,
9513  static int loadavg_read_proc(char *page, char **start, off_t off,
9514                                  int count, int *eof, void *data)
9515  {
9516 +       unsigned int running, threads;
9517         int a, b, c;
9518         int len;
9519  
9520 -       a = avenrun[0] + (FIXED_1/200);
9521 -       b = avenrun[1] + (FIXED_1/200);
9522 -       c = avenrun[2] + (FIXED_1/200);
9523 -       len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
9524 +       if (vx_flags(VXF_VIRT_LOAD, 0)) {
9525 +               struct vx_info *vxi = current->vx_info;
9526 +
9527 +               a = vxi->cvirt.load[0] + (FIXED_1/200);
9528 +               b = vxi->cvirt.load[1] + (FIXED_1/200);
9529 +               c = vxi->cvirt.load[2] + (FIXED_1/200);
9530 +
9531 +               running = atomic_read(&vxi->cvirt.nr_running);
9532 +               threads = atomic_read(&vxi->cvirt.nr_threads);
9533 +       } else {
9534 +               a = avenrun[0] + (FIXED_1/200);
9535 +               b = avenrun[1] + (FIXED_1/200);
9536 +               c = avenrun[2] + (FIXED_1/200);
9537 +
9538 +               running = nr_running();
9539 +               threads = nr_threads;
9540 +       }
9541 +       len = sprintf(page,"%d.%02d %d.%02d %d.%02d %d/%d %d\n",
9542                 LOAD_INT(a), LOAD_FRAC(a),
9543                 LOAD_INT(b), LOAD_FRAC(b),
9544                 LOAD_INT(c), LOAD_FRAC(c),
9545 -               nr_running(), nr_threads, last_pid);
9546 +               running, threads, last_pid);
9547         return proc_calc_metrics(page, start, off, count, eof, len);
9548  }
9549  
9550 @@ -106,6 +123,9 @@ static int uptime_read_proc(char *page, 
9551  
9552         do_posix_clock_monotonic_gettime(&uptime);
9553         cputime_to_timespec(idletime, &idle);
9554 +       if (vx_flags(VXF_VIRT_UPTIME, 0))
9555 +               vx_vsi_uptime(&uptime, &idle);
9556 +
9557         len = sprintf(page,"%lu.%02lu %lu.%02lu\n",
9558                         (unsigned long) uptime.tv_sec,
9559                         (uptime.tv_nsec / (NSEC_PER_SEC / 100)),
9560 @@ -143,7 +163,7 @@ static int meminfo_read_proc(char *page,
9561                 * sysctl_overcommit_ratio / 100) + total_swap_pages;
9562  
9563         cached = get_page_cache_size() - total_swapcache_pages - i.bufferram;
9564 -       if (cached < 0)
9565 +       if (cached < 0 || vx_flags(VXF_VIRT_MEM, 0))
9566                 cached = 0;
9567  
9568         get_vmalloc_info(&vmi);
9569 @@ -238,8 +258,9 @@ static int version_read_proc(char *page,
9570  {
9571         int len;
9572  
9573 -       strcpy(page, linux_banner);
9574 -       len = strlen(page);
9575 +       len = sprintf(page, vx_linux_banner,
9576 +               vx_new_uts(release),
9577 +               vx_new_uts(version));
9578         return proc_calc_metrics(page, start, off, count, eof, len);
9579  }
9580  
9581 diff -NurpP --minimal linux-2.6.16.20/fs/proc/root.c linux-2.6.16.20-vs2.1.1-rc22/fs/proc/root.c
9582 --- linux-2.6.16.20/fs/proc/root.c      2006-04-09 13:49:54 +0200
9583 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/proc/root.c 2006-04-26 19:07:00 +0200
9584 @@ -25,6 +25,9 @@ struct proc_dir_entry *proc_net, *proc_n
9585  #ifdef CONFIG_SYSCTL
9586  struct proc_dir_entry *proc_sys_root;
9587  #endif
9588 +struct proc_dir_entry *proc_virtual;
9589 +
9590 +extern void proc_vx_init(void);
9591  
9592  static struct super_block *proc_get_sb(struct file_system_type *fs_type,
9593         int flags, const char *dev_name, void *data)
9594 @@ -78,6 +81,7 @@ void __init proc_root_init(void)
9595         proc_device_tree_init();
9596  #endif
9597         proc_bus = proc_mkdir("bus", NULL);
9598 +       proc_vx_init();
9599  }
9600  
9601  static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
9602 diff -NurpP --minimal linux-2.6.16.20/fs/quota.c linux-2.6.16.20-vs2.1.1-rc22/fs/quota.c
9603 --- linux-2.6.16.20/fs/quota.c  2006-02-18 14:40:26 +0100
9604 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/quota.c     2006-04-27 21:28:28 +0200
9605 @@ -17,47 +17,50 @@
9606  #include <linux/buffer_head.h>
9607  #include <linux/capability.h>
9608  #include <linux/quotaops.h>
9609 +#include <linux/major.h>
9610 +#include <linux/blkdev.h>
9611 +#include <linux/vserver/debug.h>
9612  
9613  /* Check validity of generic quotactl commands */
9614 -static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id)
9615 +static int generic_quotactl_valid(struct dqhash *hash, int type, int cmd, qid_t id)
9616  {
9617         if (type >= MAXQUOTAS)
9618                 return -EINVAL;
9619 -       if (!sb && cmd != Q_SYNC)
9620 +       if (!hash && cmd != Q_SYNC)
9621                 return -ENODEV;
9622         /* Is operation supported? */
9623 -       if (sb && !sb->s_qcop)
9624 +       if (hash && !hash->dqh_qcop)
9625                 return -ENOSYS;
9626  
9627         switch (cmd) {
9628                 case Q_GETFMT:
9629                         break;
9630                 case Q_QUOTAON:
9631 -                       if (!sb->s_qcop->quota_on)
9632 +                       if (!hash->dqh_qcop->quota_on)
9633                                 return -ENOSYS;
9634                         break;
9635                 case Q_QUOTAOFF:
9636 -                       if (!sb->s_qcop->quota_off)
9637 +                       if (!hash->dqh_qcop->quota_off)
9638                                 return -ENOSYS;
9639                         break;
9640                 case Q_SETINFO:
9641 -                       if (!sb->s_qcop->set_info)
9642 +                       if (!hash->dqh_qcop->set_info)
9643                                 return -ENOSYS;
9644                         break;
9645                 case Q_GETINFO:
9646 -                       if (!sb->s_qcop->get_info)
9647 +                       if (!hash->dqh_qcop->get_info)
9648                                 return -ENOSYS;
9649                         break;
9650                 case Q_SETQUOTA:
9651 -                       if (!sb->s_qcop->set_dqblk)
9652 +                       if (!hash->dqh_qcop->set_dqblk)
9653                                 return -ENOSYS;
9654                         break;
9655                 case Q_GETQUOTA:
9656 -                       if (!sb->s_qcop->get_dqblk)
9657 +                       if (!hash->dqh_qcop->get_dqblk)
9658                                 return -ENOSYS;
9659                         break;
9660                 case Q_SYNC:
9661 -                       if (sb && !sb->s_qcop->quota_sync)
9662 +                       if (hash && !hash->dqh_qcop->quota_sync)
9663                                 return -ENOSYS;
9664                         break;
9665                 default:
9666 @@ -73,7 +76,7 @@ static int generic_quotactl_valid(struct
9667                 case Q_SETQUOTA:
9668                 case Q_GETQUOTA:
9669                         /* This is just informative test so we are satisfied without a lock */
9670 -                       if (!sb_has_quota_enabled(sb, type))
9671 +                       if (!dqh_has_quota_enabled(hash, type))
9672                                 return -ESRCH;
9673         }
9674  
9675 @@ -81,47 +84,47 @@ static int generic_quotactl_valid(struct
9676         if (cmd == Q_GETQUOTA) {
9677                 if (((type == USRQUOTA && current->euid != id) ||
9678                      (type == GRPQUOTA && !in_egroup_p(id))) &&
9679 -                   !capable(CAP_SYS_ADMIN))
9680 +                   !vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
9681                         return -EPERM;
9682         }
9683         else if (cmd != Q_GETFMT && cmd != Q_SYNC && cmd != Q_GETINFO)
9684 -               if (!capable(CAP_SYS_ADMIN))
9685 +               if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
9686                         return -EPERM;
9687  
9688         return 0;
9689  }
9690  
9691  /* Check validity of XFS Quota Manager commands */
9692 -static int xqm_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id)
9693 +static int xqm_quotactl_valid(struct dqhash *hash, int type, int cmd, qid_t id)
9694  {
9695         if (type >= XQM_MAXQUOTAS)
9696                 return -EINVAL;
9697 -       if (!sb)
9698 +       if (!hash)
9699                 return -ENODEV;
9700 -       if (!sb->s_qcop)
9701 +       if (!hash->dqh_qcop)
9702                 return -ENOSYS;
9703  
9704         switch (cmd) {
9705                 case Q_XQUOTAON:
9706                 case Q_XQUOTAOFF:
9707                 case Q_XQUOTARM:
9708 -                       if (!sb->s_qcop->set_xstate)
9709 +                       if (!hash->dqh_qcop->set_xstate)
9710                                 return -ENOSYS;
9711                         break;
9712                 case Q_XGETQSTAT:
9713 -                       if (!sb->s_qcop->get_xstate)
9714 +                       if (!hash->dqh_qcop->get_xstate)
9715                                 return -ENOSYS;
9716                         break;
9717                 case Q_XSETQLIM:
9718 -                       if (!sb->s_qcop->set_xquota)
9719 +                       if (!hash->dqh_qcop->set_xquota)
9720                                 return -ENOSYS;
9721                         break;
9722                 case Q_XGETQUOTA:
9723 -                       if (!sb->s_qcop->get_xquota)
9724 +                       if (!hash->dqh_qcop->get_xquota)
9725                                 return -ENOSYS;
9726                         break;
9727                 case Q_XQUOTASYNC:
9728 -                       if (!sb->s_qcop->quota_sync)
9729 +                       if (!hash->dqh_qcop->quota_sync)
9730                                 return -ENOSYS;
9731                         break;
9732                 default:
9733 @@ -132,57 +135,68 @@ static int xqm_quotactl_valid(struct sup
9734         if (cmd == Q_XGETQUOTA) {
9735                 if (((type == XQM_USRQUOTA && current->euid != id) ||
9736                      (type == XQM_GRPQUOTA && !in_egroup_p(id))) &&
9737 -                    !capable(CAP_SYS_ADMIN))
9738 +                    !vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
9739                         return -EPERM;
9740         } else if (cmd != Q_XGETQSTAT && cmd != Q_XQUOTASYNC) {
9741 -               if (!capable(CAP_SYS_ADMIN))
9742 +               if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
9743                         return -EPERM;
9744         }
9745  
9746         return 0;
9747  }
9748  
9749 -static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id)
9750 +static int check_quotactl_valid(struct dqhash *hash, int type, int cmd, qid_t id)
9751  {
9752         int error;
9753  
9754         if (XQM_COMMAND(cmd))
9755 -               error = xqm_quotactl_valid(sb, type, cmd, id);
9756 +               error = xqm_quotactl_valid(hash, type, cmd, id);
9757         else
9758 -               error = generic_quotactl_valid(sb, type, cmd, id);
9759 +               error = generic_quotactl_valid(hash, type, cmd, id);
9760         if (!error)
9761 -               error = security_quotactl(cmd, type, id, sb);
9762 +               error = security_quotactl(cmd, type, id, hash);
9763         return error;
9764  }
9765  
9766 -static void quota_sync_sb(struct super_block *sb, int type)
9767 +static void quota_sync_sb(struct super_block *sb)
9768  {
9769 -       int cnt;
9770 -       struct inode *discard[MAXQUOTAS];
9771 -
9772 -       sb->s_qcop->quota_sync(sb, type);
9773         /* This is not very clever (and fast) but currently I don't know about
9774          * any other simple way of getting quota data to disk and we must get
9775          * them there for userspace to be visible... */
9776         if (sb->s_op->sync_fs)
9777                 sb->s_op->sync_fs(sb, 1);
9778         sync_blockdev(sb->s_bdev);
9779 +}
9780 +
9781 +static void quota_sync_dqh(struct dqhash *hash, int type)
9782 +{
9783 +       int cnt;
9784 +       struct inode *discard[MAXQUOTAS];
9785 +
9786 +       vxdprintk(VXD_CBIT(quota, 1),
9787 +               "quota_sync_dqh(%p,%d)", hash, type);
9788 +       hash->dqh_qcop->quota_sync(hash, type);
9789 +
9790 +       quota_sync_sb(hash->dqh_sb);
9791  
9792         /* Now when everything is written we can discard the pagecache so
9793          * that userspace sees the changes. We need i_mutex and so we could
9794          * not do it inside dqonoff_sem. Moreover we need to be carefull
9795          * about races with quotaoff() (that is the reason why we have own
9796          * reference to inode). */
9797 -       down(&sb_dqopt(sb)->dqonoff_sem);
9798 +       down(&dqh_dqopt(hash)->dqonoff_sem);
9799         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
9800                 discard[cnt] = NULL;
9801                 if (type != -1 && cnt != type)
9802                         continue;
9803 -               if (!sb_has_quota_enabled(sb, cnt))
9804 +               if (!dqh_has_quota_enabled(hash, cnt))
9805                         continue;
9806 -               discard[cnt] = igrab(sb_dqopt(sb)->files[cnt]);
9807 +               vxdprintk(VXD_CBIT(quota, 0),
9808 +                       "quota_sync_dqh(%p,%d) discard inode %p",
9809 +                       hash, type, dqh_dqopt(hash)->files[cnt]);
9810 +               discard[cnt] = igrab(dqh_dqopt(hash)->files[cnt]);
9811         }
9812 -       up(&sb_dqopt(sb)->dqonoff_sem);
9813 +       up(&dqh_dqopt(hash)->dqonoff_sem);
9814         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
9815                 if (discard[cnt]) {
9816                         mutex_lock(&discard[cnt]->i_mutex);
9817 @@ -193,67 +207,59 @@ static void quota_sync_sb(struct super_b
9818         }
9819  }
9820  
9821 -void sync_dquots(struct super_block *sb, int type)
9822 +void sync_dquots_dqh(struct dqhash *hash, int type)
9823  {
9824 -       int cnt, dirty;
9825 +       vxdprintk(VXD_CBIT(quota, 1),
9826 +               "sync_dquots_dqh(%p,%d)", hash, type);
9827  
9828 -       if (sb) {
9829 -               if (sb->s_qcop->quota_sync)
9830 -                       quota_sync_sb(sb, type);
9831 -               return;
9832 -       }
9833 +       if (hash->dqh_qcop->quota_sync)
9834 +               quota_sync_dqh(hash, type);
9835 +}
9836  
9837 -       spin_lock(&sb_lock);
9838 -restart:
9839 -       list_for_each_entry(sb, &super_blocks, s_list) {
9840 -               /* This test just improves performance so it needn't be reliable... */
9841 -               for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
9842 -                       if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
9843 -                           && info_any_dirty(&sb_dqopt(sb)->info[cnt]))
9844 -                               dirty = 1;
9845 -               if (!dirty)
9846 -                       continue;
9847 -               sb->s_count++;
9848 -               spin_unlock(&sb_lock);
9849 -               down_read(&sb->s_umount);
9850 -               if (sb->s_root && sb->s_qcop->quota_sync)
9851 -                       quota_sync_sb(sb, type);
9852 -               up_read(&sb->s_umount);
9853 -               spin_lock(&sb_lock);
9854 -               if (__put_super_and_need_restart(sb))
9855 -                       goto restart;
9856 +void sync_dquots(struct dqhash *hash, int type)
9857 +
9858 +{
9859 +       vxdprintk(VXD_CBIT(quota, 1),
9860 +               "sync_dquots(%p,%d)", hash, type);
9861 +
9862 +       if (hash) {
9863 +               if (hash->dqh_qcop->quota_sync)
9864 +                       quota_sync_dqh(hash, type);
9865 +               return;
9866         }
9867 -       spin_unlock(&sb_lock);
9868  }
9869  
9870  /* Copy parameters and call proper function */
9871 -static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void __user *addr)
9872 +static int do_quotactl(struct dqhash *hash, int type, int cmd, qid_t id, void __user *addr)
9873  {
9874         int ret;
9875  
9876 +       vxdprintk(VXD_CBIT(quota, 3),
9877 +               "do_quotactl(%p,%d,cmd=%d,id=%d,%p)", hash, type, cmd, id, addr);
9878 +
9879         switch (cmd) {
9880                 case Q_QUOTAON: {
9881                         char *pathname;
9882  
9883                         if (IS_ERR(pathname = getname(addr)))
9884                                 return PTR_ERR(pathname);
9885 -                       ret = sb->s_qcop->quota_on(sb, type, id, pathname);
9886 +                       ret = hash->dqh_qcop->quota_on(hash, type, id, pathname);
9887                         putname(pathname);
9888                         return ret;
9889                 }
9890                 case Q_QUOTAOFF:
9891 -                       return sb->s_qcop->quota_off(sb, type);
9892 +                       return hash->dqh_qcop->quota_off(hash, type);
9893  
9894                 case Q_GETFMT: {
9895                         __u32 fmt;
9896  
9897 -                       down_read(&sb_dqopt(sb)->dqptr_sem);
9898 -                       if (!sb_has_quota_enabled(sb, type)) {
9899 -                               up_read(&sb_dqopt(sb)->dqptr_sem);
9900 +                       down_read(&dqh_dqopt(hash)->dqptr_sem);
9901 +                       if (!dqh_has_quota_enabled(hash, type)) {
9902 +                               up_read(&dqh_dqopt(hash)->dqptr_sem);
9903                                 return -ESRCH;
9904                         }
9905 -                       fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id;
9906 -                       up_read(&sb_dqopt(sb)->dqptr_sem);
9907 +                       fmt = dqh_dqopt(hash)->info[type].dqi_format->qf_fmt_id;
9908 +                       up_read(&dqh_dqopt(hash)->dqptr_sem);
9909                         if (copy_to_user(addr, &fmt, sizeof(fmt)))
9910                                 return -EFAULT;
9911                         return 0;
9912 @@ -261,7 +267,7 @@ static int do_quotactl(struct super_bloc
9913                 case Q_GETINFO: {
9914                         struct if_dqinfo info;
9915  
9916 -                       if ((ret = sb->s_qcop->get_info(sb, type, &info)))
9917 +                       if ((ret = hash->dqh_qcop->get_info(hash, type, &info)))
9918                                 return ret;
9919                         if (copy_to_user(addr, &info, sizeof(info)))
9920                                 return -EFAULT;
9921 @@ -272,12 +278,12 @@ static int do_quotactl(struct super_bloc
9922  
9923                         if (copy_from_user(&info, addr, sizeof(info)))
9924                                 return -EFAULT;
9925 -                       return sb->s_qcop->set_info(sb, type, &info);
9926 +                       return hash->dqh_qcop->set_info(hash, type, &info);
9927                 }
9928                 case Q_GETQUOTA: {
9929                         struct if_dqblk idq;
9930  
9931 -                       if ((ret = sb->s_qcop->get_dqblk(sb, type, id, &idq)))
9932 +                       if ((ret = hash->dqh_qcop->get_dqblk(hash, type, id, &idq)))
9933                                 return ret;
9934                         if (copy_to_user(addr, &idq, sizeof(idq)))
9935                                 return -EFAULT;
9936 @@ -288,10 +294,10 @@ static int do_quotactl(struct super_bloc
9937  
9938                         if (copy_from_user(&idq, addr, sizeof(idq)))
9939                                 return -EFAULT;
9940 -                       return sb->s_qcop->set_dqblk(sb, type, id, &idq);
9941 +                       return hash->dqh_qcop->set_dqblk(hash, type, id, &idq);
9942                 }
9943                 case Q_SYNC:
9944 -                       sync_dquots(sb, type);
9945 +                       sync_dquots_dqh(hash, type);
9946                         return 0;
9947  
9948                 case Q_XQUOTAON:
9949 @@ -301,12 +307,12 @@ static int do_quotactl(struct super_bloc
9950  
9951                         if (copy_from_user(&flags, addr, sizeof(flags)))
9952                                 return -EFAULT;
9953 -                       return sb->s_qcop->set_xstate(sb, flags, cmd);
9954 +                       return hash->dqh_qcop->set_xstate(hash, flags, cmd);
9955                 }
9956                 case Q_XGETQSTAT: {
9957                         struct fs_quota_stat fqs;
9958                 
9959 -                       if ((ret = sb->s_qcop->get_xstate(sb, &fqs)))
9960 +                       if ((ret = hash->dqh_qcop->get_xstate(hash, &fqs)))
9961                                 return ret;
9962                         if (copy_to_user(addr, &fqs, sizeof(fqs)))
9963                                 return -EFAULT;
9964 @@ -317,19 +323,19 @@ static int do_quotactl(struct super_bloc
9965  
9966                         if (copy_from_user(&fdq, addr, sizeof(fdq)))
9967                                 return -EFAULT;
9968 -                      return sb->s_qcop->set_xquota(sb, type, id, &fdq);
9969 +                      return hash->dqh_qcop->set_xquota(hash, type, id, &fdq);
9970                 }
9971                 case Q_XGETQUOTA: {
9972                         struct fs_disk_quota fdq;
9973  
9974 -                       if ((ret = sb->s_qcop->get_xquota(sb, type, id, &fdq)))
9975 +                       if ((ret = hash->dqh_qcop->get_xquota(hash, type, id, &fdq)))
9976                                 return ret;
9977                         if (copy_to_user(addr, &fdq, sizeof(fdq)))
9978                                 return -EFAULT;
9979                         return 0;
9980                 }
9981                 case Q_XQUOTASYNC:
9982 -                       return sb->s_qcop->quota_sync(sb, type);
9983 +                       return hash->dqh_qcop->quota_sync(hash, type);
9984                 /* We never reach here unless validity check is broken */
9985                 default:
9986                         BUG();
9987 @@ -337,6 +343,43 @@ static int do_quotactl(struct super_bloc
9988         return 0;
9989  }
9990  
9991 +#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE)
9992 +
9993 +#include <linux/vroot.h>
9994 +#include <linux/kallsyms.h>
9995 +
9996 +static vroot_grb_func *vroot_get_real_bdev = NULL;
9997 +
9998 +static spinlock_t vroot_grb_lock = SPIN_LOCK_UNLOCKED;
9999 +
10000 +int register_vroot_grb(vroot_grb_func *func) {
10001 +       int ret = -EBUSY;
10002 +
10003 +       spin_lock(&vroot_grb_lock);
10004 +       if (!vroot_get_real_bdev) {
10005 +               vroot_get_real_bdev = func;
10006 +               ret = 0;
10007 +       }
10008 +       spin_unlock(&vroot_grb_lock);
10009 +       return ret;
10010 +}
10011 +EXPORT_SYMBOL(register_vroot_grb);
10012 +
10013 +int unregister_vroot_grb(vroot_grb_func *func) {
10014 +       int ret = -EINVAL;
10015 +
10016 +       spin_lock(&vroot_grb_lock);
10017 +       if (vroot_get_real_bdev) {
10018 +               vroot_get_real_bdev = NULL;
10019 +               ret = 0;
10020 +       }
10021 +       spin_unlock(&vroot_grb_lock);
10022 +       return ret;
10023 +}
10024 +EXPORT_SYMBOL(unregister_vroot_grb);
10025 +
10026 +#endif
10027 +
10028  /*
10029   * This is the system call interface. This communicates with
10030   * the user-level programs. Currently this only supports diskquota
10031 @@ -347,6 +390,7 @@ asmlinkage long sys_quotactl(unsigned in
10032  {
10033         uint cmds, type;
10034         struct super_block *sb = NULL;
10035 +       struct dqhash *dqh = NULL;
10036         struct block_device *bdev;
10037         char *tmp;
10038         int ret;
10039 @@ -362,15 +406,33 @@ asmlinkage long sys_quotactl(unsigned in
10040                 putname(tmp);
10041                 if (IS_ERR(bdev))
10042                         return PTR_ERR(bdev);
10043 +#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE)
10044 +               if (bdev && bdev->bd_inode &&
10045 +                       imajor(bdev->bd_inode) == VROOT_MAJOR) {
10046 +                       struct block_device *bdnew = (void *)-EINVAL;
10047 +
10048 +                       if (vroot_get_real_bdev)
10049 +                               bdnew = vroot_get_real_bdev(bdev);
10050 +                       else
10051 +                               vxdprintk(VXD_CBIT(misc, 0),
10052 +                                       "vroot_get_real_bdev not set");
10053 +
10054 +                       bdput(bdev);
10055 +                       if (IS_ERR(bdnew))
10056 +                               return PTR_ERR(bdnew);
10057 +                       bdev = bdnew;
10058 +               }
10059 +#endif
10060                 sb = get_super(bdev);
10061                 bdput(bdev);
10062                 if (!sb)
10063                         return -ENODEV;
10064         }
10065 -
10066 -       ret = check_quotactl_valid(sb, type, cmds, id);
10067 +       if (sb)
10068 +               dqh = sb->s_dqh;
10069 +       ret = check_quotactl_valid(dqh, type, cmds, id);
10070         if (ret >= 0)
10071 -               ret = do_quotactl(sb, type, cmds, id, addr);
10072 +               ret = do_quotactl(dqh, type, cmds, id, addr);
10073         if (sb)
10074                 drop_super(sb);
10075  
10076 diff -NurpP --minimal linux-2.6.16.20/fs/quota_v1.c linux-2.6.16.20-vs2.1.1-rc22/fs/quota_v1.c
10077 --- linux-2.6.16.20/fs/quota_v1.c       2005-03-02 12:38:45 +0100
10078 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/quota_v1.c  2006-04-26 19:07:00 +0200
10079 @@ -42,12 +42,13 @@ static int v1_read_dqblk(struct dquot *d
10080         int type = dquot->dq_type;
10081         struct v1_disk_dqblk dqblk;
10082  
10083 -       if (!sb_dqopt(dquot->dq_sb)->files[type])
10084 +       if (!dqh_dqopt(dquot->dq_dqh)->files[type])
10085                 return -EINVAL;
10086  
10087         /* Set structure to 0s in case read fails/is after end of file */
10088         memset(&dqblk, 0, sizeof(struct v1_disk_dqblk));
10089 -       dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type, (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id));
10090 +       dquot->dq_dqh->dqh_sb->s_op->quota_read(dquot->dq_dqh, type,
10091 +               (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id));
10092  
10093         v1_disk2mem_dqblk(&dquot->dq_dqb, &dqblk);
10094         if (dquot->dq_dqb.dqb_bhardlimit == 0 && dquot->dq_dqb.dqb_bsoftlimit == 0 &&
10095 @@ -66,16 +67,16 @@ static int v1_commit_dqblk(struct dquot 
10096  
10097         v1_mem2disk_dqblk(&dqblk, &dquot->dq_dqb);
10098         if (dquot->dq_id == 0) {
10099 -               dqblk.dqb_btime = sb_dqopt(dquot->dq_sb)->info[type].dqi_bgrace;
10100 -               dqblk.dqb_itime = sb_dqopt(dquot->dq_sb)->info[type].dqi_igrace;
10101 +               dqblk.dqb_btime = dqh_dqopt(dquot->dq_dqh)->info[type].dqi_bgrace;
10102 +               dqblk.dqb_itime = dqh_dqopt(dquot->dq_dqh)->info[type].dqi_igrace;
10103         }
10104         ret = 0;
10105 -       if (sb_dqopt(dquot->dq_sb)->files[type])
10106 -               ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type, (char *)&dqblk,
10107 -                                       sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id));
10108 +       if (dqh_dqopt(dquot->dq_dqh)->files[type])
10109 +               ret = dquot->dq_dqh->dqh_sb->s_op->quota_write(dquot->dq_dqh, type,
10110 +                       (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id));
10111         if (ret != sizeof(struct v1_disk_dqblk)) {
10112                 printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
10113 -                       dquot->dq_sb->s_id);
10114 +                       dquot->dq_dqh->dqh_sb->s_id);
10115                 if (ret >= 0)
10116                         ret = -EIO;
10117                 goto out;
10118 @@ -100,9 +101,9 @@ struct v2_disk_dqheader {
10119         __le32 dqh_version;      /* File version */
10120  };
10121  
10122 -static int v1_check_quota_file(struct super_block *sb, int type)
10123 +static int v1_check_quota_file(struct dqhash *hash, int type)
10124  {
10125 -       struct inode *inode = sb_dqopt(sb)->files[type];
10126 +       struct inode *inode = dqh_dqopt(hash)->files[type];
10127         ulong blocks;
10128         size_t off; 
10129         struct v2_disk_dqheader dqhead;
10130 @@ -118,22 +119,26 @@ static int v1_check_quota_file(struct su
10131         if ((blocks % sizeof(struct v1_disk_dqblk) * BLOCK_SIZE + off) % sizeof(struct v1_disk_dqblk))
10132                 return 0;
10133         /* Doublecheck whether we didn't get file with new format - with old quotactl() this could happen */
10134 -       size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
10135 +       size = hash->dqh_sb->s_op->quota_read(hash, type,
10136 +               (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
10137         if (size != sizeof(struct v2_disk_dqheader))
10138                 return 1;       /* Probably not new format */
10139         if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type])
10140                 return 1;       /* Definitely not new format */
10141 -       printk(KERN_INFO "VFS: %s: Refusing to turn on old quota format on given file. It probably contains newer quota format.\n", sb->s_id);
10142 +       printk(KERN_INFO "VFS: %s: Refusing to turn on old quota format on given file."
10143 +               " It probably contains newer quota format.\n", hash->dqh_sb->s_id);
10144          return 0;              /* Seems like a new format file -> refuse it */
10145  }
10146  
10147 -static int v1_read_file_info(struct super_block *sb, int type)
10148 +static int v1_read_file_info(struct dqhash *hash, int type)
10149  {
10150 -       struct quota_info *dqopt = sb_dqopt(sb);
10151 +       struct quota_info *dqopt = dqh_dqopt(hash);
10152         struct v1_disk_dqblk dqblk;
10153         int ret;
10154  
10155 -       if ((ret = sb->s_op->quota_read(sb, type, (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(0))) != sizeof(struct v1_disk_dqblk)) {
10156 +       if ((ret = hash->dqh_sb->s_op->quota_read(hash, type,
10157 +               (char *)&dqblk, sizeof(struct v1_disk_dqblk),
10158 +               v1_dqoff(0))) != sizeof(struct v1_disk_dqblk)) {
10159                 if (ret >= 0)
10160                         ret = -EIO;
10161                 goto out;
10162 @@ -145,14 +150,14 @@ out:
10163         return ret;
10164  }
10165  
10166 -static int v1_write_file_info(struct super_block *sb, int type)
10167 +static int v1_write_file_info(struct dqhash *hash, int type)
10168  {
10169 -       struct quota_info *dqopt = sb_dqopt(sb);
10170 +       struct quota_info *dqopt = dqh_dqopt(hash);
10171         struct v1_disk_dqblk dqblk;
10172         int ret;
10173  
10174         dqopt->info[type].dqi_flags &= ~DQF_INFO_DIRTY;
10175 -       if ((ret = sb->s_op->quota_read(sb, type, (char *)&dqblk,
10176 +       if ((ret = hash->dqh_sb->s_op->quota_read(hash, type, (char *)&dqblk,
10177             sizeof(struct v1_disk_dqblk), v1_dqoff(0))) != sizeof(struct v1_disk_dqblk)) {
10178                 if (ret >= 0)
10179                         ret = -EIO;
10180 @@ -160,7 +165,7 @@ static int v1_write_file_info(struct sup
10181         }
10182         dqblk.dqb_itime = dqopt->info[type].dqi_igrace;
10183         dqblk.dqb_btime = dqopt->info[type].dqi_bgrace;
10184 -       ret = sb->s_op->quota_write(sb, type, (char *)&dqblk,
10185 +       ret = hash->dqh_sb->s_op->quota_write(hash, type, (char *)&dqblk,
10186               sizeof(struct v1_disk_dqblk), v1_dqoff(0));
10187         if (ret == sizeof(struct v1_disk_dqblk))
10188                 ret = 0;
10189 diff -NurpP --minimal linux-2.6.16.20/fs/quota_v2.c linux-2.6.16.20-vs2.1.1-rc22/fs/quota_v2.c
10190 --- linux-2.6.16.20/fs/quota_v2.c       2006-04-09 13:49:54 +0200
10191 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/quota_v2.c  2006-04-26 19:07:00 +0200
10192 @@ -26,14 +26,15 @@ typedef char *dqbuf_t;
10193  #define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader)))
10194  
10195  /* Check whether given file is really vfsv0 quotafile */
10196 -static int v2_check_quota_file(struct super_block *sb, int type)
10197 +static int v2_check_quota_file(struct dqhash *hash, int type)
10198  {
10199         struct v2_disk_dqheader dqhead;
10200         ssize_t size;
10201         static const uint quota_magics[] = V2_INITQMAGICS;
10202         static const uint quota_versions[] = V2_INITQVERSIONS;
10203   
10204 -       size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
10205 +       size = hash->dqh_sb->s_op->quota_read(hash, type,
10206 +               (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
10207         if (size != sizeof(struct v2_disk_dqheader)) {
10208                 printk("quota_v2: failed read expected=%zd got=%zd\n",
10209                         sizeof(struct v2_disk_dqheader), size);
10210 @@ -46,17 +47,17 @@ static int v2_check_quota_file(struct su
10211  }
10212  
10213  /* Read information header from quota file */
10214 -static int v2_read_file_info(struct super_block *sb, int type)
10215 +static int v2_read_file_info(struct dqhash *hash, int type)
10216  {
10217         struct v2_disk_dqinfo dinfo;
10218 -       struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
10219 +       struct mem_dqinfo *info = dqh_dqopt(hash)->info+type;
10220         ssize_t size;
10221  
10222 -       size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
10223 -              sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
10224 +       size = hash->dqh_sb->s_op->quota_read(hash, type,
10225 +               (char *)&dinfo, sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
10226         if (size != sizeof(struct v2_disk_dqinfo)) {
10227                 printk(KERN_WARNING "Can't read info structure on device %s.\n",
10228 -                       sb->s_id);
10229 +                       hash->dqh_sb->s_id);
10230                 return -1;
10231         }
10232         info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
10233 @@ -69,10 +70,10 @@ static int v2_read_file_info(struct supe
10234  }
10235  
10236  /* Write information header to quota file */
10237 -static int v2_write_file_info(struct super_block *sb, int type)
10238 +static int v2_write_file_info(struct dqhash *hash, int type)
10239  {
10240         struct v2_disk_dqinfo dinfo;
10241 -       struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
10242 +       struct mem_dqinfo *info = dqh_dqopt(hash)->info+type;
10243         ssize_t size;
10244  
10245         spin_lock(&dq_data_lock);
10246 @@ -84,11 +85,11 @@ static int v2_write_file_info(struct sup
10247         dinfo.dqi_blocks = cpu_to_le32(info->u.v2_i.dqi_blocks);
10248         dinfo.dqi_free_blk = cpu_to_le32(info->u.v2_i.dqi_free_blk);
10249         dinfo.dqi_free_entry = cpu_to_le32(info->u.v2_i.dqi_free_entry);
10250 -       size = sb->s_op->quota_write(sb, type, (char *)&dinfo,
10251 +       size = hash->dqh_sb->s_op->quota_write(hash, type, (char *)&dinfo,
10252                sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
10253         if (size != sizeof(struct v2_disk_dqinfo)) {
10254                 printk(KERN_WARNING "Can't write info structure on device %s.\n",
10255 -                       sb->s_id);
10256 +                       hash->dqh_sb->s_id);
10257                 return -1;
10258         }
10259         return 0;
10260 @@ -132,24 +133,24 @@ static inline void freedqbuf(dqbuf_t buf
10261         kfree(buf);
10262  }
10263  
10264 -static inline ssize_t read_blk(struct super_block *sb, int type, uint blk, dqbuf_t buf)
10265 +static inline ssize_t read_blk(struct dqhash *hash, int type, uint blk, dqbuf_t buf)
10266  {
10267         memset(buf, 0, V2_DQBLKSIZE);
10268 -       return sb->s_op->quota_read(sb, type, (char *)buf,
10269 -              V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS);
10270 +       return hash->dqh_sb->s_op->quota_read(hash, type,
10271 +               (char *)buf, V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS);
10272  }
10273  
10274 -static inline ssize_t write_blk(struct super_block *sb, int type, uint blk, dqbuf_t buf)
10275 +static inline ssize_t write_blk(struct dqhash *hash, int type, uint blk, dqbuf_t buf)
10276  {
10277 -       return sb->s_op->quota_write(sb, type, (char *)buf,
10278 -              V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS);
10279 +       return hash->dqh_sb->s_op->quota_write(hash, type,
10280 +               (char *)buf, V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS);
10281  }
10282  
10283  /* Remove empty block from list and return it */
10284 -static int get_free_dqblk(struct super_block *sb, int type)
10285 +static int get_free_dqblk(struct dqhash *hash, int type)
10286  {
10287         dqbuf_t buf = getdqbuf();
10288 -       struct mem_dqinfo *info = sb_dqinfo(sb, type);
10289 +       struct mem_dqinfo *info = dqh_dqinfo(hash, type);
10290         struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
10291         int ret, blk;
10292  
10293 @@ -157,18 +158,18 @@ static int get_free_dqblk(struct super_b
10294                 return -ENOMEM;
10295         if (info->u.v2_i.dqi_free_blk) {
10296                 blk = info->u.v2_i.dqi_free_blk;
10297 -               if ((ret = read_blk(sb, type, blk, buf)) < 0)
10298 +               if ((ret = read_blk(hash, type, blk, buf)) < 0)
10299                         goto out_buf;
10300                 info->u.v2_i.dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
10301         }
10302         else {
10303                 memset(buf, 0, V2_DQBLKSIZE);
10304                 /* Assure block allocation... */
10305 -               if ((ret = write_blk(sb, type, info->u.v2_i.dqi_blocks, buf)) < 0)
10306 +               if ((ret = write_blk(hash, type, info->u.v2_i.dqi_blocks, buf)) < 0)
10307                         goto out_buf;
10308                 blk = info->u.v2_i.dqi_blocks++;
10309         }
10310 -       mark_info_dirty(sb, type);
10311 +       mark_info_dirty(hash, type);
10312         ret = blk;
10313  out_buf:
10314         freedqbuf(buf);
10315 @@ -176,9 +177,9 @@ out_buf:
10316  }
10317  
10318  /* Insert empty block to the list */
10319 -static int put_free_dqblk(struct super_block *sb, int type, dqbuf_t buf, uint blk)
10320 +static int put_free_dqblk(struct dqhash *hash, int type, dqbuf_t buf, uint blk)
10321  {
10322 -       struct mem_dqinfo *info = sb_dqinfo(sb, type);
10323 +       struct mem_dqinfo *info = dqh_dqinfo(hash, type);
10324         struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
10325         int err;
10326  
10327 @@ -186,18 +187,18 @@ static int put_free_dqblk(struct super_b
10328         dh->dqdh_prev_free = cpu_to_le32(0);
10329         dh->dqdh_entries = cpu_to_le16(0);
10330         info->u.v2_i.dqi_free_blk = blk;
10331 -       mark_info_dirty(sb, type);
10332 +       mark_info_dirty(hash, type);
10333         /* Some strange block. We had better leave it... */
10334 -       if ((err = write_blk(sb, type, blk, buf)) < 0)
10335 +       if ((err = write_blk(hash, type, blk, buf)) < 0)
10336                 return err;
10337         return 0;
10338  }
10339  
10340  /* Remove given block from the list of blocks with free entries */
10341 -static int remove_free_dqentry(struct super_block *sb, int type, dqbuf_t buf, uint blk)
10342 +static int remove_free_dqentry(struct dqhash *hash, int type, dqbuf_t buf, uint blk)
10343  {
10344         dqbuf_t tmpbuf = getdqbuf();
10345 -       struct mem_dqinfo *info = sb_dqinfo(sb, type);
10346 +       struct mem_dqinfo *info = dqh_dqinfo(hash, type);
10347         struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
10348         uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk = le32_to_cpu(dh->dqdh_prev_free);
10349         int err;
10350 @@ -205,27 +206,27 @@ static int remove_free_dqentry(struct su
10351         if (!tmpbuf)
10352                 return -ENOMEM;
10353         if (nextblk) {
10354 -               if ((err = read_blk(sb, type, nextblk, tmpbuf)) < 0)
10355 +               if ((err = read_blk(hash, type, nextblk, tmpbuf)) < 0)
10356                         goto out_buf;
10357                 ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = dh->dqdh_prev_free;
10358 -               if ((err = write_blk(sb, type, nextblk, tmpbuf)) < 0)
10359 +               if ((err = write_blk(hash, type, nextblk, tmpbuf)) < 0)
10360                         goto out_buf;
10361         }
10362         if (prevblk) {
10363 -               if ((err = read_blk(sb, type, prevblk, tmpbuf)) < 0)
10364 +               if ((err = read_blk(hash, type, prevblk, tmpbuf)) < 0)
10365                         goto out_buf;
10366                 ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_next_free = dh->dqdh_next_free;
10367 -               if ((err = write_blk(sb, type, prevblk, tmpbuf)) < 0)
10368 +               if ((err = write_blk(hash, type, prevblk, tmpbuf)) < 0)
10369                         goto out_buf;
10370         }
10371         else {
10372                 info->u.v2_i.dqi_free_entry = nextblk;
10373 -               mark_info_dirty(sb, type);
10374 +               mark_info_dirty(hash, type);
10375         }
10376         freedqbuf(tmpbuf);
10377         dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
10378         /* No matter whether write succeeds block is out of list */
10379 -       if (write_blk(sb, type, blk, buf) < 0)
10380 +       if (write_blk(hash, type, blk, buf) < 0)
10381                 printk(KERN_ERR "VFS: Can't write block (%u) with free entries.\n", blk);
10382         return 0;
10383  out_buf:
10384 @@ -234,10 +235,10 @@ out_buf:
10385  }
10386  
10387  /* Insert given block to the beginning of list with free entries */
10388 -static int insert_free_dqentry(struct super_block *sb, int type, dqbuf_t buf, uint blk)
10389 +static int insert_free_dqentry(struct dqhash *hash, int type, dqbuf_t buf, uint blk)
10390  {
10391         dqbuf_t tmpbuf = getdqbuf();
10392 -       struct mem_dqinfo *info = sb_dqinfo(sb, type);
10393 +       struct mem_dqinfo *info = dqh_dqinfo(hash, type);
10394         struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
10395         int err;
10396  
10397 @@ -245,18 +246,18 @@ static int insert_free_dqentry(struct su
10398                 return -ENOMEM;
10399         dh->dqdh_next_free = cpu_to_le32(info->u.v2_i.dqi_free_entry);
10400         dh->dqdh_prev_free = cpu_to_le32(0);
10401 -       if ((err = write_blk(sb, type, blk, buf)) < 0)
10402 +       if ((err = write_blk(hash, type, blk, buf)) < 0)
10403                 goto out_buf;
10404         if (info->u.v2_i.dqi_free_entry) {
10405 -               if ((err = read_blk(sb, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
10406 +               if ((err = read_blk(hash, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
10407                         goto out_buf;
10408                 ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = cpu_to_le32(blk);
10409 -               if ((err = write_blk(sb, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
10410 +               if ((err = write_blk(hash, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
10411                         goto out_buf;
10412         }
10413         freedqbuf(tmpbuf);
10414         info->u.v2_i.dqi_free_entry = blk;
10415 -       mark_info_dirty(sb, type);
10416 +       mark_info_dirty(hash, type);
10417         return 0;
10418  out_buf:
10419         freedqbuf(tmpbuf);
10420 @@ -266,8 +267,9 @@ out_buf:
10421  /* Find space for dquot */
10422  static uint find_free_dqentry(struct dquot *dquot, int *err)
10423  {
10424 -       struct super_block *sb = dquot->dq_sb;
10425 -       struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type;
10426 +       // struct super_block *sb = dquot->dq_sb;
10427 +       struct dqhash *dqh = dquot->dq_dqh;
10428 +       struct mem_dqinfo *info = dqh_dqopt(dqh)->info+dquot->dq_type;
10429         uint blk, i;
10430         struct v2_disk_dqdbheader *dh;
10431         struct v2_disk_dqblk *ddquot;
10432 @@ -283,11 +285,11 @@ static uint find_free_dqentry(struct dqu
10433         ddquot = GETENTRIES(buf);
10434         if (info->u.v2_i.dqi_free_entry) {
10435                 blk = info->u.v2_i.dqi_free_entry;
10436 -               if ((*err = read_blk(sb, dquot->dq_type, blk, buf)) < 0)
10437 +               if ((*err = read_blk(dqh, dquot->dq_type, blk, buf)) < 0)
10438                         goto out_buf;
10439         }
10440         else {
10441 -               blk = get_free_dqblk(sb, dquot->dq_type);
10442 +               blk = get_free_dqblk(dqh, dquot->dq_type);
10443                 if ((int)blk < 0) {
10444                         *err = blk;
10445                         freedqbuf(buf);
10446 @@ -296,10 +298,10 @@ static uint find_free_dqentry(struct dqu
10447                 memset(buf, 0, V2_DQBLKSIZE);
10448                 /* This is enough as block is already zeroed and entry list is empty... */
10449                 info->u.v2_i.dqi_free_entry = blk;
10450 -               mark_info_dirty(sb, dquot->dq_type);
10451 +               mark_info_dirty(dqh, dquot->dq_type);
10452         }
10453         if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK)   /* Block will be full? */
10454 -               if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
10455 +               if ((*err = remove_free_dqentry(dqh, dquot->dq_type, buf, blk)) < 0) {
10456                         printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
10457                         goto out_buf;
10458                 }
10459 @@ -314,7 +316,7 @@ static uint find_free_dqentry(struct dqu
10460                 goto out_buf;
10461         }
10462  #endif
10463 -       if ((*err = write_blk(sb, dquot->dq_type, blk, buf)) < 0) {
10464 +       if ((*err = write_blk(dqh, dquot->dq_type, blk, buf)) < 0) {
10465                 printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk);
10466                 goto out_buf;
10467         }
10468 @@ -329,7 +331,7 @@ out_buf:
10469  /* Insert reference to structure into the trie */
10470  static int do_insert_tree(struct dquot *dquot, uint *treeblk, int depth)
10471  {
10472 -       struct super_block *sb = dquot->dq_sb;
10473 +       struct dqhash *dqh = dquot->dq_dqh;
10474         dqbuf_t buf;
10475         int ret = 0, newson = 0, newact = 0;
10476         __le32 *ref;
10477 @@ -338,7 +340,7 @@ static int do_insert_tree(struct dquot *
10478         if (!(buf = getdqbuf()))
10479                 return -ENOMEM;
10480         if (!*treeblk) {
10481 -               ret = get_free_dqblk(sb, dquot->dq_type);
10482 +               ret = get_free_dqblk(dqh, dquot->dq_type);
10483                 if (ret < 0)
10484                         goto out_buf;
10485                 *treeblk = ret;
10486 @@ -346,7 +348,7 @@ static int do_insert_tree(struct dquot *
10487                 newact = 1;
10488         }
10489         else {
10490 -               if ((ret = read_blk(sb, dquot->dq_type, *treeblk, buf)) < 0) {
10491 +               if ((ret = read_blk(dqh, dquot->dq_type, *treeblk, buf)) < 0) {
10492                         printk(KERN_ERR "VFS: Can't read tree quota block %u.\n", *treeblk);
10493                         goto out_buf;
10494                 }
10495 @@ -369,10 +371,10 @@ static int do_insert_tree(struct dquot *
10496                 ret = do_insert_tree(dquot, &newblk, depth+1);
10497         if (newson && ret >= 0) {
10498                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
10499 -               ret = write_blk(sb, dquot->dq_type, *treeblk, buf);
10500 +               ret = write_blk(dqh, dquot->dq_type, *treeblk, buf);
10501         }
10502         else if (newact && ret < 0)
10503 -               put_free_dqblk(sb, dquot->dq_type, buf, *treeblk);
10504 +               put_free_dqblk(dqh, dquot->dq_type, buf, *treeblk);
10505  out_buf:
10506         freedqbuf(buf);
10507         return ret;
10508 @@ -409,10 +411,11 @@ static int v2_write_dquot(struct dquot *
10509         if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
10510                 ddquot.dqb_itime = cpu_to_le64(1);
10511         spin_unlock(&dq_data_lock);
10512 -       ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
10513 +       ret = dquot->dq_dqh->dqh_sb->s_op->quota_write(dquot->dq_dqh, type,
10514               (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off);
10515         if (ret != sizeof(struct v2_disk_dqblk)) {
10516 -               printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id);
10517 +               printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
10518 +                       dquot->dq_dqh->dqh_sb->s_id);
10519                 if (ret >= 0)
10520                         ret = -ENOSPC;
10521         }
10522 @@ -426,7 +429,8 @@ static int v2_write_dquot(struct dquot *
10523  /* Free dquot entry in data block */
10524  static int free_dqentry(struct dquot *dquot, uint blk)
10525  {
10526 -       struct super_block *sb = dquot->dq_sb;
10527 +       // struct super_block *sb = dquot->dq_sb;
10528 +       struct dqhash *dqh = dquot->dq_dqh;
10529         int type = dquot->dq_type;
10530         struct v2_disk_dqdbheader *dh;
10531         dqbuf_t buf = getdqbuf();
10532 @@ -440,15 +444,15 @@ static int free_dqentry(struct dquot *dq
10533                   (uint)(dquot->dq_off >> V2_DQBLKSIZE_BITS));
10534                 goto out_buf;
10535         }
10536 -       if ((ret = read_blk(sb, type, blk, buf)) < 0) {
10537 +       if ((ret = read_blk(dqh, type, blk, buf)) < 0) {
10538                 printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk);
10539                 goto out_buf;
10540         }
10541         dh = (struct v2_disk_dqdbheader *)buf;
10542         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)-1);
10543         if (!le16_to_cpu(dh->dqdh_entries)) {   /* Block got free? */
10544 -               if ((ret = remove_free_dqentry(sb, type, buf, blk)) < 0 ||
10545 -                   (ret = put_free_dqblk(sb, type, buf, blk)) < 0) {
10546 +               if ((ret = remove_free_dqentry(dqh, type, buf, blk)) < 0 ||
10547 +                   (ret = put_free_dqblk(dqh, type, buf, blk)) < 0) {
10548                         printk(KERN_ERR "VFS: Can't move quota data block (%u) "
10549                           "to free list.\n", blk);
10550                         goto out_buf;
10551 @@ -459,13 +463,13 @@ static int free_dqentry(struct dquot *dq
10552                   sizeof(struct v2_disk_dqblk));
10553                 if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) {
10554                         /* Insert will write block itself */
10555 -                       if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) {
10556 +                       if ((ret = insert_free_dqentry(dqh, type, buf, blk)) < 0) {
10557                                 printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk);
10558                                 goto out_buf;
10559                         }
10560                 }
10561                 else
10562 -                       if ((ret = write_blk(sb, type, blk, buf)) < 0) {
10563 +                       if ((ret = write_blk(dqh, type, blk, buf)) < 0) {
10564                                 printk(KERN_ERR "VFS: Can't write quota data "
10565                                   "block %u\n", blk);
10566                                 goto out_buf;
10567 @@ -480,7 +484,7 @@ out_buf:
10568  /* Remove reference to dquot from tree */
10569  static int remove_tree(struct dquot *dquot, uint *blk, int depth)
10570  {
10571 -       struct super_block *sb = dquot->dq_sb;
10572 +       struct dqhash *dqh = dquot->dq_dqh;
10573         int type = dquot->dq_type;
10574         dqbuf_t buf = getdqbuf();
10575         int ret = 0;
10576 @@ -489,7 +493,7 @@ static int remove_tree(struct dquot *dqu
10577         
10578         if (!buf)
10579                 return -ENOMEM;
10580 -       if ((ret = read_blk(sb, type, *blk, buf)) < 0) {
10581 +       if ((ret = read_blk(dqh, type, *blk, buf)) < 0) {
10582                 printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk);
10583                 goto out_buf;
10584         }
10585 @@ -506,11 +510,11 @@ static int remove_tree(struct dquot *dqu
10586                 for (i = 0; i < V2_DQBLKSIZE && !buf[i]; i++);  /* Block got empty? */
10587                 /* Don't put the root block into the free block list */
10588                 if (i == V2_DQBLKSIZE && *blk != V2_DQTREEOFF) {
10589 -                       put_free_dqblk(sb, type, buf, *blk);
10590 +                       put_free_dqblk(dqh, type, buf, *blk);
10591                         *blk = 0;
10592                 }
10593                 else
10594 -                       if ((ret = write_blk(sb, type, *blk, buf)) < 0)
10595 +                       if ((ret = write_blk(dqh, type, *blk, buf)) < 0)
10596                                 printk(KERN_ERR "VFS: Can't write quota tree "
10597                                   "block %u.\n", *blk);
10598         }
10599 @@ -539,7 +543,7 @@ static loff_t find_block_dqentry(struct 
10600  
10601         if (!buf)
10602                 return -ENOMEM;
10603 -       if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
10604 +       if ((ret = read_blk(dquot->dq_dqh, dquot->dq_type, blk, buf)) < 0) {
10605                 printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
10606                 goto out_buf;
10607         }
10608 @@ -578,7 +582,7 @@ static loff_t find_tree_dqentry(struct d
10609  
10610         if (!buf)
10611                 return -ENOMEM;
10612 -       if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
10613 +       if ((ret = read_blk(dquot->dq_dqh, dquot->dq_type, blk, buf)) < 0) {
10614                 printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
10615                 goto out_buf;
10616         }
10617 @@ -610,7 +614,7 @@ static int v2_read_dquot(struct dquot *d
10618  
10619  #ifdef __QUOTA_V2_PARANOIA
10620         /* Invalidated quota? */
10621 -       if (!dquot->dq_sb || !sb_dqopt(dquot->dq_sb)->files[type]) {
10622 +       if (!dquot->dq_dqh || !dqh_dqopt(dquot->dq_dqh)->files[type]) {
10623                 printk(KERN_ERR "VFS: Quota invalidated while reading!\n");
10624                 return -EIO;
10625         }
10626 @@ -627,7 +631,7 @@ static int v2_read_dquot(struct dquot *d
10627         }
10628         else {
10629                 dquot->dq_off = offset;
10630 -               if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
10631 +               if ((ret = dquot->dq_dqh->dqh_sb->s_op->quota_read(dquot->dq_dqh, type,
10632                     (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset))
10633                     != sizeof(struct v2_disk_dqblk)) {
10634                         if (ret >= 0)
10635 diff -NurpP --minimal linux-2.6.16.20/fs/read_write.c linux-2.6.16.20-vs2.1.1-rc22/fs/read_write.c
10636 --- linux-2.6.16.20/fs/read_write.c     2006-04-09 13:49:54 +0200
10637 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/read_write.c        2006-04-26 19:07:00 +0200
10638 @@ -636,12 +636,77 @@ sys_writev(unsigned long fd, const struc
10639         return ret;
10640  }
10641  
10642 +ssize_t vfs_sendfile(struct file *out_file, struct file *in_file, loff_t *ppos,
10643 +                    size_t count, loff_t max)
10644 +{
10645 +       struct inode * in_inode, * out_inode;
10646 +       loff_t pos;
10647 +       ssize_t ret;
10648 +
10649 +       /* verify in_file */
10650 +       in_inode = in_file->f_dentry->d_inode;
10651 +       if (!in_inode)
10652 +               return -EINVAL;
10653 +       if (!in_file->f_op || !in_file->f_op->sendfile)
10654 +               return -EINVAL;
10655 +
10656 +       if (!ppos)
10657 +               ppos = &in_file->f_pos;
10658 +       else
10659 +               if (!(in_file->f_mode & FMODE_PREAD))
10660 +                       return -ESPIPE;
10661 +
10662 +       ret = rw_verify_area(READ, in_file, ppos, count);
10663 +       if (ret < 0)
10664 +               return ret;
10665 +       count = ret;
10666 +
10667 +       /* verify out_file */
10668 +       out_inode = out_file->f_dentry->d_inode;
10669 +       if (!out_inode)
10670 +               return -EINVAL;
10671 +       if (!out_file->f_op || !out_file->f_op->sendpage)
10672 +               return -EINVAL;
10673 +
10674 +       ret = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
10675 +       if (ret < 0)
10676 +               return ret;
10677 +       count = ret;
10678 +
10679 +       ret = security_file_permission (out_file, MAY_WRITE);
10680 +       if (ret)
10681 +               return ret;
10682 +
10683 +       if (!max)
10684 +               max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
10685 +
10686 +       pos = *ppos;
10687 +       if (unlikely(pos < 0))
10688 +               return -EINVAL;
10689 +       if (unlikely(pos + count > max)) {
10690 +               if (pos >= max)
10691 +                       return -EOVERFLOW;
10692 +               count = max - pos;
10693 +       }
10694 +
10695 +       ret = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file);
10696 +
10697 +       if (ret > 0) {
10698 +               current->rchar += ret;
10699 +               current->wchar += ret;
10700 +       }
10701 +
10702 +       if (*ppos > max)
10703 +               return -EOVERFLOW;
10704 +       return ret;
10705 +}
10706 +
10707 +EXPORT_SYMBOL(vfs_sendfile);
10708 +
10709  static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
10710                            size_t count, loff_t max)
10711  {
10712         struct file * in_file, * out_file;
10713 -       struct inode * in_inode, * out_inode;
10714 -       loff_t pos;
10715         ssize_t retval;
10716         int fput_needed_in, fput_needed_out;
10717  
10718 @@ -654,22 +719,6 @@ static ssize_t do_sendfile(int out_fd, i
10719                 goto out;
10720         if (!(in_file->f_mode & FMODE_READ))
10721                 goto fput_in;
10722 -       retval = -EINVAL;
10723 -       in_inode = in_file->f_dentry->d_inode;
10724 -       if (!in_inode)
10725 -               goto fput_in;
10726 -       if (!in_file->f_op || !in_file->f_op->sendfile)
10727 -               goto fput_in;
10728 -       retval = -ESPIPE;
10729 -       if (!ppos)
10730 -               ppos = &in_file->f_pos;
10731 -       else
10732 -               if (!(in_file->f_mode & FMODE_PREAD))
10733 -                       goto fput_in;
10734 -       retval = rw_verify_area(READ, in_file, ppos, count);
10735 -       if (retval < 0)
10736 -               goto fput_in;
10737 -       count = retval;
10738  
10739         retval = security_file_permission (in_file, MAY_READ);
10740         if (retval)
10741 @@ -684,45 +733,12 @@ static ssize_t do_sendfile(int out_fd, i
10742                 goto fput_in;
10743         if (!(out_file->f_mode & FMODE_WRITE))
10744                 goto fput_out;
10745 -       retval = -EINVAL;
10746 -       if (!out_file->f_op || !out_file->f_op->sendpage)
10747 -               goto fput_out;
10748 -       out_inode = out_file->f_dentry->d_inode;
10749 -       retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
10750 -       if (retval < 0)
10751 -               goto fput_out;
10752 -       count = retval;
10753 -
10754 -       retval = security_file_permission (out_file, MAY_WRITE);
10755 -       if (retval)
10756 -               goto fput_out;
10757 -
10758 -       if (!max)
10759 -               max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
10760 -
10761 -       pos = *ppos;
10762 -       retval = -EINVAL;
10763 -       if (unlikely(pos < 0))
10764 -               goto fput_out;
10765 -       if (unlikely(pos + count > max)) {
10766 -               retval = -EOVERFLOW;
10767 -               if (pos >= max)
10768 -                       goto fput_out;
10769 -               count = max - pos;
10770 -       }
10771  
10772 -       retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file);
10773 +       retval = vfs_sendfile(out_file, in_file, ppos, count, max);
10774  
10775 -       if (retval > 0) {
10776 -               current->rchar += retval;
10777 -               current->wchar += retval;
10778 -       }
10779         current->syscr++;
10780         current->syscw++;
10781  
10782 -       if (*ppos > max)
10783 -               retval = -EOVERFLOW;
10784 -
10785  fput_out:
10786         fput_light(out_file, fput_needed_out);
10787  fput_in:
10788 diff -NurpP --minimal linux-2.6.16.20/fs/reiserfs/bitmap.c linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/bitmap.c
10789 --- linux-2.6.16.20/fs/reiserfs/bitmap.c        2005-08-29 22:25:33 +0200
10790 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/bitmap.c   2006-04-26 19:07:00 +0200
10791 @@ -13,6 +13,7 @@
10792  #include <linux/reiserfs_fs_sb.h>
10793  #include <linux/reiserfs_fs_i.h>
10794  #include <linux/quotaops.h>
10795 +#include <linux/vs_dlimit.h>
10796  
10797  #define PREALLOCATION_SIZE 9
10798  
10799 @@ -411,8 +412,10 @@ static void _reiserfs_free_block(struct 
10800         set_sb_free_blocks(rs, sb_free_blocks(rs) + 1);
10801  
10802         journal_mark_dirty(th, s, sbh);
10803 -       if (for_unformatted)
10804 +       if (for_unformatted) {
10805 +               DLIMIT_FREE_BLOCK(inode, 1);
10806                 DQUOT_FREE_BLOCK_NODIRTY(inode, 1);
10807 +       }
10808  }
10809  
10810  void reiserfs_free_block(struct reiserfs_transaction_handle *th,
10811 @@ -1021,6 +1024,7 @@ static inline int blocknrs_and_prealloc_
10812         int passno = 0;
10813         int nr_allocated = 0;
10814         int bigalloc = 0;
10815 +       int blocks;
10816  
10817         determine_prealloc_size(hint);
10818         if (!hint->formatted_node) {
10819 @@ -1030,19 +1034,30 @@ static inline int blocknrs_and_prealloc_
10820                                "reiserquota: allocating %d blocks id=%u",
10821                                amount_needed, hint->inode->i_uid);
10822  #endif
10823 -               quota_ret =
10824 -                   DQUOT_ALLOC_BLOCK_NODIRTY(hint->inode, amount_needed);
10825 -               if (quota_ret)  /* Quota exceeded? */
10826 +               quota_ret = DQUOT_ALLOC_BLOCK_NODIRTY(hint->inode,
10827 +                       amount_needed);
10828 +               if (quota_ret)
10829                         return QUOTA_EXCEEDED;
10830 +               if (DLIMIT_ALLOC_BLOCK(hint->inode, amount_needed)) {
10831 +                       DQUOT_FREE_BLOCK_NODIRTY(hint->inode,
10832 +                               amount_needed);
10833 +                       return NO_DISK_SPACE;
10834 +               }
10835 +
10836                 if (hint->preallocate && hint->prealloc_size) {
10837  #ifdef REISERQUOTA_DEBUG
10838                         reiserfs_debug(s, REISERFS_DEBUG_CODE,
10839                                        "reiserquota: allocating (prealloc) %d blocks id=%u",
10840                                        hint->prealloc_size, hint->inode->i_uid);
10841  #endif
10842 -                       quota_ret =
10843 -                           DQUOT_PREALLOC_BLOCK_NODIRTY(hint->inode,
10844 -                                                        hint->prealloc_size);
10845 +                       quota_ret = DQUOT_PREALLOC_BLOCK_NODIRTY(hint->inode,
10846 +                               hint->prealloc_size);
10847 +                       if (!quota_ret &&
10848 +                               DLIMIT_ALLOC_BLOCK(hint->inode, hint->prealloc_size)) {
10849 +                               DQUOT_FREE_BLOCK_NODIRTY(hint->inode,
10850 +                                       hint->prealloc_size);
10851 +                               quota_ret = 1;
10852 +                       }
10853                         if (quota_ret)
10854                                 hint->preallocate = hint->prealloc_size = 0;
10855                 }
10856 @@ -1093,7 +1108,10 @@ static inline int blocknrs_and_prealloc_
10857                                                nr_allocated,
10858                                                hint->inode->i_uid);
10859  #endif
10860 -                               DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + hint->prealloc_size - nr_allocated);      /* Free not allocated blocks */
10861 +                               /* Free not allocated blocks */
10862 +                               blocks = amount_needed + hint->prealloc_size - nr_allocated;
10863 +                               DLIMIT_FREE_BLOCK(hint->inode, blocks);
10864 +                               DQUOT_FREE_BLOCK_NODIRTY(hint->inode, blocks);
10865                         }
10866                         while (nr_allocated--)
10867                                 reiserfs_free_block(hint->th, hint->inode,
10868 @@ -1125,10 +1143,10 @@ static inline int blocknrs_and_prealloc_
10869                                REISERFS_I(hint->inode)->i_prealloc_count,
10870                                hint->inode->i_uid);
10871  #endif
10872 -               DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed +
10873 -                                        hint->prealloc_size - nr_allocated -
10874 -                                        REISERFS_I(hint->inode)->
10875 -                                        i_prealloc_count);
10876 +               blocks = amount_needed + hint->prealloc_size - nr_allocated -
10877 +                       REISERFS_I(hint->inode)->i_prealloc_count;
10878 +               DLIMIT_FREE_BLOCK(hint->inode, blocks);
10879 +               DQUOT_FREE_BLOCK_NODIRTY(hint->inode, blocks);
10880         }
10881  
10882         return CARRY_ON;
10883 diff -NurpP --minimal linux-2.6.16.20/fs/reiserfs/file.c linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/file.c
10884 --- linux-2.6.16.20/fs/reiserfs/file.c  2006-04-09 13:49:54 +0200
10885 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/file.c     2006-04-26 19:07:00 +0200
10886 @@ -1574,6 +1574,7 @@ struct file_operations reiserfs_file_ope
10887         .release = reiserfs_file_release,
10888         .fsync = reiserfs_sync_file,
10889         .sendfile = generic_file_sendfile,
10890 +       .sendpage = generic_file_sendpage,
10891         .aio_read = generic_file_aio_read,
10892         .aio_write = reiserfs_aio_write,
10893  };
10894 @@ -1586,4 +1587,5 @@ struct inode_operations reiserfs_file_in
10895         .listxattr = reiserfs_listxattr,
10896         .removexattr = reiserfs_removexattr,
10897         .permission = reiserfs_permission,
10898 +       .sync_flags = reiserfs_sync_flags,
10899  };
10900 diff -NurpP --minimal linux-2.6.16.20/fs/reiserfs/inode.c linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/inode.c
10901 --- linux-2.6.16.20/fs/reiserfs/inode.c 2006-04-09 13:49:55 +0200
10902 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/inode.c    2006-04-26 19:07:00 +0200
10903 @@ -17,6 +17,8 @@
10904  #include <linux/mpage.h>
10905  #include <linux/writeback.h>
10906  #include <linux/quotaops.h>
10907 +#include <linux/vs_dlimit.h>
10908 +#include <linux/vserver/tag.h>
10909  
10910  extern int reiserfs_default_io_size;   /* default io size devuned in super.c */
10911  
10912 @@ -57,6 +59,7 @@ void reiserfs_delete_inode(struct inode 
10913                  * stat data deletion */
10914                 if (!err) 
10915                         DQUOT_FREE_INODE(inode);
10916 +               DLIMIT_FREE_INODE(inode);
10917  
10918                 if (journal_end(&th, inode->i_sb, jbegin_count)) {
10919                         mutex_unlock(&inode->i_mutex);
10920 @@ -1126,6 +1129,8 @@ static void init_inode(struct inode *ino
10921         struct buffer_head *bh;
10922         struct item_head *ih;
10923         __u32 rdev;
10924 +       uid_t uid;
10925 +       gid_t gid;
10926         //int version = ITEM_VERSION_1;
10927  
10928         bh = PATH_PLAST_BUFFER(path);
10929 @@ -1149,12 +1154,13 @@ static void init_inode(struct inode *ino
10930                     (struct stat_data_v1 *)B_I_PITEM(bh, ih);
10931                 unsigned long blocks;
10932  
10933 +               uid = sd_v1_uid(sd);
10934 +               gid = sd_v1_gid(sd);
10935 +
10936                 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
10937                 set_inode_sd_version(inode, STAT_DATA_V1);
10938                 inode->i_mode = sd_v1_mode(sd);
10939                 inode->i_nlink = sd_v1_nlink(sd);
10940 -               inode->i_uid = sd_v1_uid(sd);
10941 -               inode->i_gid = sd_v1_gid(sd);
10942                 inode->i_size = sd_v1_size(sd);
10943                 inode->i_atime.tv_sec = sd_v1_atime(sd);
10944                 inode->i_mtime.tv_sec = sd_v1_mtime(sd);
10945 @@ -1196,11 +1202,12 @@ static void init_inode(struct inode *ino
10946                 // (directories and symlinks)
10947                 struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih);
10948  
10949 +               uid    = sd_v2_uid(sd);
10950 +               gid    = sd_v2_gid(sd);
10951 +
10952                 inode->i_mode = sd_v2_mode(sd);
10953                 inode->i_nlink = sd_v2_nlink(sd);
10954 -               inode->i_uid = sd_v2_uid(sd);
10955                 inode->i_size = sd_v2_size(sd);
10956 -               inode->i_gid = sd_v2_gid(sd);
10957                 inode->i_mtime.tv_sec = sd_v2_mtime(sd);
10958                 inode->i_atime.tv_sec = sd_v2_atime(sd);
10959                 inode->i_ctime.tv_sec = sd_v2_ctime(sd);
10960 @@ -1230,6 +1237,10 @@ static void init_inode(struct inode *ino
10961                 sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode);
10962         }
10963  
10964 +       inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
10965 +       inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
10966 +       inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, 0);
10967 +
10968         pathrelse(path);
10969         if (S_ISREG(inode->i_mode)) {
10970                 inode->i_op = &reiserfs_file_inode_operations;
10971 @@ -1252,13 +1263,15 @@ static void init_inode(struct inode *ino
10972  static void inode2sd(void *sd, struct inode *inode, loff_t size)
10973  {
10974         struct stat_data *sd_v2 = (struct stat_data *)sd;
10975 +       uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
10976 +       gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
10977         __u16 flags;
10978  
10979 +       set_sd_v2_uid(sd_v2, uid);
10980 +       set_sd_v2_gid(sd_v2, gid);
10981         set_sd_v2_mode(sd_v2, inode->i_mode);
10982         set_sd_v2_nlink(sd_v2, inode->i_nlink);
10983 -       set_sd_v2_uid(sd_v2, inode->i_uid);
10984         set_sd_v2_size(sd_v2, size);
10985 -       set_sd_v2_gid(sd_v2, inode->i_gid);
10986         set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec);
10987         set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec);
10988         set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec);
10989 @@ -1789,6 +1802,10 @@ int reiserfs_new_inode(struct reiserfs_t
10990  
10991         BUG_ON(!th->t_trans_id);
10992  
10993 +       if (DLIMIT_ALLOC_INODE(inode)) {
10994 +               err = -ENOSPC;
10995 +               goto out_bad_dlimit;
10996 +       }
10997         if (DQUOT_ALLOC_INODE(inode)) {
10998                 err = -EDQUOT;
10999                 goto out_end_trans;
11000 @@ -1974,6 +1991,9 @@ int reiserfs_new_inode(struct reiserfs_t
11001         DQUOT_FREE_INODE(inode);
11002  
11003        out_end_trans:
11004 +       DLIMIT_FREE_INODE(inode);
11005 +
11006 +      out_bad_dlimit:
11007         journal_end(th, th->t_super, th->t_blocks_allocated);
11008         /* Drop can be outside and it needs more credits so it's better to have it outside */
11009         DQUOT_DROP(inode);
11010 @@ -2701,6 +2721,14 @@ void sd_attrs_to_i_attrs(__u16 sd_attrs,
11011                         inode->i_flags |= S_IMMUTABLE;
11012                 else
11013                         inode->i_flags &= ~S_IMMUTABLE;
11014 +               if (sd_attrs & REISERFS_IUNLINK_FL)
11015 +                       inode->i_flags |= S_IUNLINK;
11016 +               else
11017 +                       inode->i_flags &= ~S_IUNLINK;
11018 +               if (sd_attrs & REISERFS_BARRIER_FL)
11019 +                       inode->i_flags |= S_BARRIER;
11020 +               else
11021 +                       inode->i_flags &= ~S_BARRIER;
11022                 if (sd_attrs & REISERFS_APPEND_FL)
11023                         inode->i_flags |= S_APPEND;
11024                 else
11025 @@ -2723,6 +2751,14 @@ void i_attrs_to_sd_attrs(struct inode *i
11026                         *sd_attrs |= REISERFS_IMMUTABLE_FL;
11027                 else
11028                         *sd_attrs &= ~REISERFS_IMMUTABLE_FL;
11029 +               if (inode->i_flags & S_IUNLINK)
11030 +                       *sd_attrs |= REISERFS_IUNLINK_FL;
11031 +               else
11032 +                       *sd_attrs &= ~REISERFS_IUNLINK_FL;
11033 +               if (inode->i_flags & S_BARRIER)
11034 +                       *sd_attrs |= REISERFS_BARRIER_FL;
11035 +               else
11036 +                       *sd_attrs &= ~REISERFS_BARRIER_FL;
11037                 if (inode->i_flags & S_SYNC)
11038                         *sd_attrs |= REISERFS_SYNC_FL;
11039                 else
11040 @@ -2900,6 +2936,22 @@ static ssize_t reiserfs_direct_IO(int rw
11041                                   reiserfs_get_blocks_direct_io, NULL);
11042  }
11043  
11044 +int reiserfs_sync_flags(struct inode *inode)
11045 +{
11046 +       u16 oldflags, newflags;
11047 +
11048 +       oldflags = REISERFS_I(inode)->i_attrs;
11049 +       newflags = oldflags;
11050 +       i_attrs_to_sd_attrs(inode, &newflags);
11051 +
11052 +       if (oldflags ^ newflags) {
11053 +               REISERFS_I(inode)->i_attrs = newflags;
11054 +               inode->i_ctime = CURRENT_TIME_SEC;
11055 +               mark_inode_dirty(inode);
11056 +       }
11057 +       return 0;
11058 +}
11059 +
11060  int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
11061  {
11062         struct inode *inode = dentry->d_inode;
11063 @@ -2944,9 +2996,11 @@ int reiserfs_setattr(struct dentry *dent
11064         }
11065  
11066         error = inode_change_ok(inode, attr);
11067 +
11068         if (!error) {
11069                 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
11070 -                   (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
11071 +                   (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) ||
11072 +                   (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) {
11073                         error = reiserfs_chown_xattrs(inode, attr);
11074  
11075                         if (!error) {
11076 @@ -2976,6 +3030,9 @@ int reiserfs_setattr(struct dentry *dent
11077                                         inode->i_uid = attr->ia_uid;
11078                                 if (attr->ia_valid & ATTR_GID)
11079                                         inode->i_gid = attr->ia_gid;
11080 +                               if ((attr->ia_valid & ATTR_TAG) &&
11081 +                                       IS_TAGGED(inode))
11082 +                                       inode->i_tag = attr->ia_tag;
11083                                 mark_inode_dirty(inode);
11084                                 error =
11085                                     journal_end(&th, inode->i_sb, jbegin_count);
11086 diff -NurpP --minimal linux-2.6.16.20/fs/reiserfs/ioctl.c linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/ioctl.c
11087 --- linux-2.6.16.20/fs/reiserfs/ioctl.c 2006-04-09 13:49:55 +0200
11088 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/ioctl.c    2006-04-26 19:07:00 +0200
11089 @@ -4,6 +4,7 @@
11090  
11091  #include <linux/capability.h>
11092  #include <linux/fs.h>
11093 +#include <linux/mount.h>
11094  #include <linux/reiserfs_fs.h>
11095  #include <linux/time.h>
11096  #include <asm/uaccess.h>
11097 @@ -23,7 +24,7 @@ static int reiserfs_unpack(struct inode 
11098  int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
11099                    unsigned long arg)
11100  {
11101 -       unsigned int flags;
11102 +       unsigned int flags, oldflags;
11103  
11104         switch (cmd) {
11105         case REISERFS_IOC_UNPACK:
11106 @@ -42,12 +43,14 @@ int reiserfs_ioctl(struct inode *inode, 
11107  
11108                 flags = REISERFS_I(inode)->i_attrs;
11109                 i_attrs_to_sd_attrs(inode, (__u16 *) & flags);
11110 +               flags &= REISERFS_FL_USER_VISIBLE;
11111                 return put_user(flags, (int __user *)arg);
11112         case REISERFS_IOC_SETFLAGS:{
11113                         if (!reiserfs_attrs(inode->i_sb))
11114                                 return -ENOTTY;
11115  
11116 -                       if (IS_RDONLY(inode))
11117 +                       if (IS_RDONLY(inode) ||
11118 +                               (filp && MNT_IS_RDONLY(filp->f_vfsmnt)))
11119                                 return -EROFS;
11120  
11121                         if ((current->fsuid != inode->i_uid)
11122 @@ -57,10 +60,12 @@ int reiserfs_ioctl(struct inode *inode, 
11123                         if (get_user(flags, (int __user *)arg))
11124                                 return -EFAULT;
11125  
11126 -                       if (((flags ^ REISERFS_I(inode)->
11127 -                             i_attrs) & (REISERFS_IMMUTABLE_FL |
11128 -                                         REISERFS_APPEND_FL))
11129 -                           && !capable(CAP_LINUX_IMMUTABLE))
11130 +                       oldflags = REISERFS_I(inode) -> i_attrs;
11131 +                       if (((oldflags & REISERFS_IMMUTABLE_FL) ||
11132 +                               ((flags ^ oldflags) &
11133 +                               (REISERFS_IMMUTABLE_FL | REISERFS_IUNLINK_FL |
11134 +                                REISERFS_APPEND_FL))) &&
11135 +                               !capable(CAP_LINUX_IMMUTABLE))
11136                                 return -EPERM;
11137  
11138                         if ((flags & REISERFS_NOTAIL_FL) &&
11139 @@ -71,6 +76,9 @@ int reiserfs_ioctl(struct inode *inode, 
11140                                 if (result)
11141                                         return result;
11142                         }
11143 +
11144 +                       flags = flags & REISERFS_FL_USER_MODIFIABLE;
11145 +                       flags |= oldflags & ~REISERFS_FL_USER_MODIFIABLE;
11146                         sd_attrs_to_i_attrs(flags, inode);
11147                         REISERFS_I(inode)->i_attrs = flags;
11148                         inode->i_ctime = CURRENT_TIME_SEC;
11149 @@ -82,7 +90,8 @@ int reiserfs_ioctl(struct inode *inode, 
11150         case REISERFS_IOC_SETVERSION:
11151                 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
11152                         return -EPERM;
11153 -               if (IS_RDONLY(inode))
11154 +               if (IS_RDONLY(inode) ||
11155 +                       (filp && MNT_IS_RDONLY(filp->f_vfsmnt)))
11156                         return -EROFS;
11157                 if (get_user(inode->i_generation, (int __user *)arg))
11158                         return -EFAULT;
11159 diff -NurpP --minimal linux-2.6.16.20/fs/reiserfs/namei.c linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/namei.c
11160 --- linux-2.6.16.20/fs/reiserfs/namei.c 2006-04-09 13:49:55 +0200
11161 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/namei.c    2006-04-26 19:07:00 +0200
11162 @@ -19,6 +19,7 @@
11163  #include <linux/reiserfs_xattr.h>
11164  #include <linux/smp_lock.h>
11165  #include <linux/quotaops.h>
11166 +#include <linux/vs_tag.h>
11167  
11168  #define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { i->i_nlink++; if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; }
11169  #define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) i->i_nlink--;
11170 @@ -365,6 +366,7 @@ static struct dentry *reiserfs_lookup(st
11171                         reiserfs_write_unlock(dir->i_sb);
11172                         return ERR_PTR(-EACCES);
11173                 }
11174 +               dx_propagate_tag(nd, inode);
11175  
11176                 /* Propogate the priv_object flag so we know we're in the priv tree */
11177                 if (is_reiserfs_priv_object(dir))
11178 @@ -600,6 +602,7 @@ static int new_inode_init(struct inode *
11179         } else {
11180                 inode->i_gid = current->fsgid;
11181         }
11182 +       inode->i_tag = dx_current_fstag(inode->i_sb);
11183         DQUOT_INIT(inode);
11184         return 0;
11185  }
11186 @@ -1546,6 +1549,7 @@ struct inode_operations reiserfs_dir_ino
11187         .listxattr = reiserfs_listxattr,
11188         .removexattr = reiserfs_removexattr,
11189         .permission = reiserfs_permission,
11190 +       .sync_flags = reiserfs_sync_flags,
11191  };
11192  
11193  /*
11194 @@ -1562,6 +1566,7 @@ struct inode_operations reiserfs_symlink
11195         .listxattr = reiserfs_listxattr,
11196         .removexattr = reiserfs_removexattr,
11197         .permission = reiserfs_permission,
11198 +       .sync_flags = reiserfs_sync_flags,
11199  
11200  };
11201  
11202 @@ -1575,5 +1580,6 @@ struct inode_operations reiserfs_special
11203         .listxattr = reiserfs_listxattr,
11204         .removexattr = reiserfs_removexattr,
11205         .permission = reiserfs_permission,
11206 +       .sync_flags = reiserfs_sync_flags,
11207  
11208  };
11209 diff -NurpP --minimal linux-2.6.16.20/fs/reiserfs/stree.c linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/stree.c
11210 --- linux-2.6.16.20/fs/reiserfs/stree.c 2005-08-29 22:25:33 +0200
11211 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/stree.c    2006-04-26 19:07:00 +0200
11212 @@ -57,6 +57,7 @@
11213  #include <linux/smp_lock.h>
11214  #include <linux/buffer_head.h>
11215  #include <linux/quotaops.h>
11216 +#include <linux/vs_dlimit.h>
11217  
11218  /* Does the buffer contain a disk block which is in the tree. */
11219  inline int B_IS_IN_TREE(const struct buffer_head *p_s_bh)
11220 @@ -1365,6 +1366,7 @@ int reiserfs_delete_item(struct reiserfs
11221                        "reiserquota delete_item(): freeing %u, id=%u type=%c",
11222                        quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih));
11223  #endif
11224 +       DLIMIT_FREE_SPACE(p_s_inode, quota_cut_bytes);
11225         DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
11226  
11227         /* Return deleted body length */
11228 @@ -1453,6 +1455,7 @@ void reiserfs_delete_solid_item(struct r
11229  #endif
11230                                 DQUOT_FREE_SPACE_NODIRTY(inode,
11231                                                          quota_cut_bytes);
11232 +                               DLIMIT_FREE_SPACE(inode, quota_cut_bytes);
11233                         }
11234                         break;
11235                 }
11236 @@ -1808,6 +1811,7 @@ int reiserfs_cut_from_item(struct reiser
11237                        "reiserquota cut_from_item(): freeing %u id=%u type=%c",
11238                        quota_cut_bytes, p_s_inode->i_uid, '?');
11239  #endif
11240 +       DLIMIT_FREE_SPACE(p_s_inode, quota_cut_bytes);
11241         DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
11242         return n_ret_value;
11243  }
11244 @@ -2048,6 +2052,11 @@ int reiserfs_paste_into_item(struct reis
11245                 pathrelse(p_s_search_path);
11246                 return -EDQUOT;
11247         }
11248 +       if (DLIMIT_ALLOC_SPACE(inode, n_pasted_size)) {
11249 +               DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size);
11250 +               pathrelse(p_s_search_path);
11251 +               return -ENOSPC;
11252 +       }
11253         init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path,
11254                        n_pasted_size);
11255  #ifdef DISPLACE_NEW_PACKING_LOCALITIES
11256 @@ -2100,6 +2109,7 @@ int reiserfs_paste_into_item(struct reis
11257                        n_pasted_size, inode->i_uid,
11258                        key2type(&(p_s_key->on_disk_key)));
11259  #endif
11260 +       DLIMIT_FREE_SPACE(inode, n_pasted_size);
11261         DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size);
11262         return retval;
11263  }
11264 @@ -2137,6 +2147,11 @@ int reiserfs_insert_item(struct reiserfs
11265                         pathrelse(p_s_path);
11266                         return -EDQUOT;
11267                 }
11268 +               if (DLIMIT_ALLOC_SPACE(inode, quota_bytes)) {
11269 +                       DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes);
11270 +                       pathrelse(p_s_path);
11271 +                       return -ENOSPC;
11272 +               }
11273         }
11274         init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path,
11275                        IH_SIZE + ih_item_len(p_s_ih));
11276 @@ -2184,7 +2199,9 @@ int reiserfs_insert_item(struct reiserfs
11277                        "reiserquota insert_item(): freeing %u id=%u type=%c",
11278                        quota_bytes, inode->i_uid, head2type(p_s_ih));
11279  #endif
11280 -       if (inode)
11281 +       if (inode) {
11282 +               DLIMIT_FREE_SPACE(inode, quota_bytes);
11283                 DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes);
11284 +       }
11285         return retval;
11286  }
11287 diff -NurpP --minimal linux-2.6.16.20/fs/reiserfs/super.c linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/super.c
11288 --- linux-2.6.16.20/fs/reiserfs/super.c 2006-02-18 14:40:26 +0100
11289 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/super.c    2006-04-26 19:07:00 +0200
11290 @@ -138,7 +138,7 @@ static int remove_save_link_only(struct 
11291  }
11292  
11293  #ifdef CONFIG_QUOTA
11294 -static int reiserfs_quota_on_mount(struct super_block *, int);
11295 +static int reiserfs_quota_on_mount(struct dqhash *, int);
11296  #endif
11297  
11298  /* look for uncompleted unlinks and truncates and complete them */
11299 @@ -178,7 +178,7 @@ static int finish_unfinished(struct supe
11300         /* Turn on quotas so that they are updated correctly */
11301         for (i = 0; i < MAXQUOTAS; i++) {
11302                 if (REISERFS_SB(s)->s_qf_names[i]) {
11303 -                       int ret = reiserfs_quota_on_mount(s, i);
11304 +                       int ret = reiserfs_quota_on_mount(s->s_dqh, i);
11305                         if (ret < 0)
11306                                 reiserfs_warning(s,
11307                                                  "reiserfs: cannot turn on journalled quota: error %d",
11308 @@ -292,8 +292,8 @@ static int finish_unfinished(struct supe
11309  #ifdef CONFIG_QUOTA
11310         /* Turn quotas off */
11311         for (i = 0; i < MAXQUOTAS; i++) {
11312 -               if (sb_dqopt(s)->files[i])
11313 -                       vfs_quota_off_mount(s, i);
11314 +               if (dqh_dqopt(s->s_dqh)->files[i])
11315 +                       vfs_quota_off_mount(s->s_dqh, i);
11316         }
11317         if (ms_active_set)
11318                 /* Restore the flag back */
11319 @@ -578,9 +578,9 @@ static void reiserfs_clear_inode(struct 
11320  }
11321  
11322  #ifdef CONFIG_QUOTA
11323 -static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
11324 +static ssize_t reiserfs_quota_write(struct dqhash *, int, const char *,
11325                                     size_t, loff_t);
11326 -static ssize_t reiserfs_quota_read(struct super_block *, int, char *, size_t,
11327 +static ssize_t reiserfs_quota_read(struct dqhash *, int, char *, size_t,
11328                                    loff_t);
11329  #endif
11330  
11331 @@ -613,8 +613,8 @@ static int reiserfs_write_dquot(struct d
11332  static int reiserfs_acquire_dquot(struct dquot *);
11333  static int reiserfs_release_dquot(struct dquot *);
11334  static int reiserfs_mark_dquot_dirty(struct dquot *);
11335 -static int reiserfs_write_info(struct super_block *, int);
11336 -static int reiserfs_quota_on(struct super_block *, int, int, char *);
11337 +static int reiserfs_write_info(struct dqhash *, int);
11338 +static int reiserfs_quota_on(struct dqhash *, int, int, char *);
11339  
11340  static struct dquot_operations reiserfs_quota_operations = {
11341         .initialize = reiserfs_dquot_initialize,
11342 @@ -882,6 +882,14 @@ static int reiserfs_parse_options(struct
11343                 {"user_xattr",.setmask = 1 << REISERFS_UNSUPPORTED_OPT},
11344                 {"nouser_xattr",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT},
11345  #endif
11346 +#ifndef CONFIG_TAGGING_NONE
11347 +               {"tagxid",.setmask = 1 << REISERFS_TAGGED},
11348 +               {"tag",.setmask = 1 << REISERFS_TAGGED},
11349 +               {"notag",.clrmask = 1 << REISERFS_TAGGED},
11350 +#endif
11351 +#ifdef CONFIG_PROPAGATE
11352 +               {"tag",.arg_required = 'T',.values = NULL},
11353 +#endif
11354  #ifdef CONFIG_REISERFS_FS_POSIX_ACL
11355                 {"acl",.setmask = 1 << REISERFS_POSIXACL},
11356                 {"noacl",.clrmask = 1 << REISERFS_POSIXACL},
11357 @@ -989,7 +997,7 @@ static int reiserfs_parse_options(struct
11358                 if (c == 'u' || c == 'g') {
11359                         int qtype = c == 'u' ? USRQUOTA : GRPQUOTA;
11360  
11361 -                       if (sb_any_quota_enabled(s)) {
11362 +                       if (dqh_any_quota_enabled(s->s_dqh)) {
11363                                 reiserfs_warning(s,
11364                                                  "reiserfs_parse_options: cannot change journalled quota options when quota turned on.");
11365                                 return 0;
11366 @@ -1052,7 +1060,7 @@ static int reiserfs_parse_options(struct
11367         }
11368         /* This checking is not precise wrt the quota type but for our purposes it is sufficient */
11369         if (!(*mount_options & (1 << REISERFS_QUOTA))
11370 -           && sb_any_quota_enabled(s)) {
11371 +           && dqh_any_quota_enabled(s->s_dqh)) {
11372                 reiserfs_warning(s,
11373                                  "reiserfs_parse_options: quota options must be present when quota is turned on.");
11374                 return 0;
11375 @@ -1154,6 +1162,12 @@ static int reiserfs_remount(struct super
11376                 return -EINVAL;
11377         }
11378  
11379 +       if ((mount_options & (1 << REISERFS_TAGGED)) &&
11380 +               !(s->s_flags & MS_TAGGED)) {
11381 +               reiserfs_warning(s, "reiserfs: tagging not permitted on remount.");
11382 +               return -EINVAL;
11383 +       }
11384 +
11385         handle_attrs(s);
11386  
11387         /* Add options that are safe here */
11388 @@ -1456,7 +1470,7 @@ static int read_super_block(struct super
11389         s->s_export_op = &reiserfs_export_ops;
11390  #ifdef CONFIG_QUOTA
11391         s->s_qcop = &reiserfs_qctl_operations;
11392 -       s->dq_op = &reiserfs_quota_operations;
11393 +       s->s_qop = &reiserfs_quota_operations;
11394  #endif
11395  
11396         /* new format is limited by the 32 bit wide i_blocks field, want to
11397 @@ -1729,6 +1743,10 @@ static int reiserfs_fill_super(struct su
11398                 goto error;
11399         }
11400  
11401 +       /* map mount option tagxid */
11402 +       if (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TAGGED))
11403 +               s->s_flags |= MS_TAGGED;
11404 +
11405         rs = SB_DISK_SUPER_BLOCK(s);
11406         /* Let's do basic sanity check to verify that underlying device is not
11407            smaller than the filesystem. If the check fails then abort and scream,
11408 @@ -2003,16 +2021,16 @@ static int reiserfs_write_dquot(struct d
11409         struct reiserfs_transaction_handle th;
11410         int ret, err;
11411  
11412 -       reiserfs_write_lock(dquot->dq_sb);
11413 +       reiserfs_write_lock(dquot->dq_dqh->dqh_sb);
11414         ret =
11415 -           journal_begin(&th, dquot->dq_sb,
11416 -                         REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
11417 +           journal_begin(&th, dquot->dq_dqh->dqh_sb,
11418 +               REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_dqh->dqh_sb));
11419         if (ret)
11420                 goto out;
11421         ret = dquot_commit(dquot);
11422         err =
11423 -           journal_end(&th, dquot->dq_sb,
11424 -                       REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
11425 +           journal_end(&th, dquot->dq_dqh->dqh_sb,
11426 +               REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_dqh->dqh_sb));
11427         if (!ret && err)
11428                 ret = err;
11429        out:
11430 @@ -2025,20 +2043,20 @@ static int reiserfs_acquire_dquot(struct
11431         struct reiserfs_transaction_handle th;
11432         int ret, err;
11433  
11434 -       reiserfs_write_lock(dquot->dq_sb);
11435 +       reiserfs_write_lock(dquot->dq_dqh->dqh_sb);
11436         ret =
11437 -           journal_begin(&th, dquot->dq_sb,
11438 -                         REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
11439 +           journal_begin(&th, dquot->dq_dqh->dqh_sb,
11440 +               REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_dqh->dqh_sb));
11441         if (ret)
11442                 goto out;
11443         ret = dquot_acquire(dquot);
11444         err =
11445 -           journal_end(&th, dquot->dq_sb,
11446 -                       REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
11447 +           journal_end(&th, dquot->dq_dqh->dqh_sb,
11448 +               REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_dqh->dqh_sb));
11449         if (!ret && err)
11450                 ret = err;
11451        out:
11452 -       reiserfs_write_unlock(dquot->dq_sb);
11453 +       reiserfs_write_unlock(dquot->dq_dqh->dqh_sb);
11454         return ret;
11455  }
11456  
11457 @@ -2047,37 +2065,38 @@ static int reiserfs_release_dquot(struct
11458         struct reiserfs_transaction_handle th;
11459         int ret, err;
11460  
11461 -       reiserfs_write_lock(dquot->dq_sb);
11462 +       reiserfs_write_lock(dquot->dq_dqh->dqh_sb);
11463         ret =
11464 -           journal_begin(&th, dquot->dq_sb,
11465 -                         REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
11466 +           journal_begin(&th, dquot->dq_dqh->dqh_sb,
11467 +               REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_dqh->dqh_sb));
11468         if (ret)
11469                 goto out;
11470         ret = dquot_release(dquot);
11471         err =
11472 -           journal_end(&th, dquot->dq_sb,
11473 -                       REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
11474 +           journal_end(&th, dquot->dq_dqh->dqh_sb,
11475 +               REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_dqh->dqh_sb));
11476         if (!ret && err)
11477                 ret = err;
11478        out:
11479 -       reiserfs_write_unlock(dquot->dq_sb);
11480 +       reiserfs_write_unlock(dquot->dq_dqh->dqh_sb);
11481         return ret;
11482  }
11483  
11484  static int reiserfs_mark_dquot_dirty(struct dquot *dquot)
11485  {
11486         /* Are we journalling quotas? */
11487 -       if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
11488 -           REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
11489 +       if (REISERFS_SB(dquot->dq_dqh->dqh_sb)->s_qf_names[USRQUOTA] ||
11490 +           REISERFS_SB(dquot->dq_dqh->dqh_sb)->s_qf_names[GRPQUOTA]) {
11491                 dquot_mark_dquot_dirty(dquot);
11492                 return reiserfs_write_dquot(dquot);
11493         } else
11494                 return dquot_mark_dquot_dirty(dquot);
11495  }
11496  
11497 -static int reiserfs_write_info(struct super_block *sb, int type)
11498 +static int reiserfs_write_info(struct dqhash *hash, int type)
11499  {
11500         struct reiserfs_transaction_handle th;
11501 +       struct super_block *sb = hash->dqh_sb;
11502         int ret, err;
11503  
11504         /* Data block + inode block */
11505 @@ -2085,7 +2104,7 @@ static int reiserfs_write_info(struct su
11506         ret = journal_begin(&th, sb, 2);
11507         if (ret)
11508                 goto out;
11509 -       ret = dquot_commit_info(sb, type);
11510 +       ret = dquot_commit_info(hash, type);
11511         err = journal_end(&th, sb, 2);
11512         if (!ret && err)
11513                 ret = err;
11514 @@ -2097,18 +2116,21 @@ static int reiserfs_write_info(struct su
11515  /*
11516   * Turn on quotas during mount time - we need to find the quota file and such...
11517   */
11518 -static int reiserfs_quota_on_mount(struct super_block *sb, int type)
11519 +static int reiserfs_quota_on_mount(struct dqhash *hash, int type)
11520  {
11521 -       return vfs_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type],
11522 +       struct super_block *sb = hash->dqh_sb;
11523 +
11524 +       return vfs_quota_on_mount(hash, REISERFS_SB(sb)->s_qf_names[type],
11525                                   REISERFS_SB(sb)->s_jquota_fmt, type);
11526  }
11527  
11528  /*
11529   * Standard function to be called on quota_on
11530   */
11531 -static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
11532 +static int reiserfs_quota_on(struct dqhash *hash, int type, int format_id,
11533                              char *path)
11534  {
11535 +       struct super_block *sb = hash->dqh_sb;
11536         int err;
11537         struct nameidata nd;
11538  
11539 @@ -2133,7 +2155,7 @@ static int reiserfs_quota_on(struct supe
11540         if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] &&
11541             !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) {
11542                 path_release(&nd);
11543 -               return vfs_quota_on(sb, type, format_id, path);
11544 +               return vfs_quota_on(hash, type, format_id, path);
11545         }
11546         /* Quotafile not of fs root? */
11547         if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode)
11548 @@ -2141,17 +2163,18 @@ static int reiserfs_quota_on(struct supe
11549                                  "reiserfs: Quota file not on filesystem root. "
11550                                  "Journalled quota will not work.");
11551         path_release(&nd);
11552 -       return vfs_quota_on(sb, type, format_id, path);
11553 +       return vfs_quota_on(hash, type, format_id, path);
11554  }
11555  
11556  /* Read data from quotafile - avoid pagecache and such because we cannot afford
11557   * acquiring the locks... As quota files are never truncated and quota code
11558   * itself serializes the operations (and noone else should touch the files)
11559   * we don't have to be afraid of races */
11560 -static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data,
11561 +static ssize_t reiserfs_quota_read(struct dqhash *hash, int type, char *data,
11562                                    size_t len, loff_t off)
11563  {
11564 -       struct inode *inode = sb_dqopt(sb)->files[type];
11565 +       struct inode *inode = dqh_dqopt(hash)->files[type];
11566 +       struct super_block *sb = hash->dqh_sb;
11567         unsigned long blk = off >> sb->s_blocksize_bits;
11568         int err = 0, offset = off & (sb->s_blocksize - 1), tocopy;
11569         size_t toread;
11570 @@ -2193,10 +2216,11 @@ static ssize_t reiserfs_quota_read(struc
11571  
11572  /* Write to quotafile (we know the transaction is already started and has
11573   * enough credits) */
11574 -static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
11575 +static ssize_t reiserfs_quota_write(struct dqhash *hash, int type,
11576                                     const char *data, size_t len, loff_t off)
11577  {
11578 -       struct inode *inode = sb_dqopt(sb)->files[type];
11579 +       struct inode *inode = dqh_dqopt(hash)->files[type];
11580 +       struct super_block *sb = hash->dqh_sb;
11581         unsigned long blk = off >> sb->s_blocksize_bits;
11582         int err = 0, offset = off & (sb->s_blocksize - 1), tocopy;
11583         int journal_quota = REISERFS_SB(sb)->s_qf_names[type] != NULL;
11584 diff -NurpP --minimal linux-2.6.16.20/fs/reiserfs/xattr.c linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/xattr.c
11585 --- linux-2.6.16.20/fs/reiserfs/xattr.c 2006-02-18 14:40:26 +0100
11586 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/xattr.c    2006-04-26 19:07:00 +0200
11587 @@ -35,6 +35,7 @@
11588  #include <linux/namei.h>
11589  #include <linux/errno.h>
11590  #include <linux/fs.h>
11591 +#include <linux/mount.h>
11592  #include <linux/file.h>
11593  #include <linux/pagemap.h>
11594  #include <linux/xattr.h>
11595 @@ -824,7 +825,7 @@ int reiserfs_delete_xattrs(struct inode 
11596         if (dir->d_inode->i_nlink <= 2) {
11597                 root = get_xa_root(inode->i_sb);
11598                 reiserfs_write_lock_xattrs(inode->i_sb);
11599 -               err = vfs_rmdir(root->d_inode, dir);
11600 +               err = vfs_rmdir(root->d_inode, dir, NULL);
11601                 reiserfs_write_unlock_xattrs(inode->i_sb);
11602                 dput(root);
11603         } else {
11604 diff -NurpP --minimal linux-2.6.16.20/fs/stat.c linux-2.6.16.20-vs2.1.1-rc22/fs/stat.c
11605 --- linux-2.6.16.20/fs/stat.c   2006-02-18 14:40:26 +0100
11606 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/stat.c      2006-04-26 19:07:00 +0200
11607 @@ -27,6 +27,7 @@ void generic_fillattr(struct inode *inod
11608         stat->nlink = inode->i_nlink;
11609         stat->uid = inode->i_uid;
11610         stat->gid = inode->i_gid;
11611 +       stat->tag = inode->i_tag;
11612         stat->rdev = inode->i_rdev;
11613         stat->atime = inode->i_atime;
11614         stat->mtime = inode->i_mtime;
11615 diff -NurpP --minimal linux-2.6.16.20/fs/super.c linux-2.6.16.20-vs2.1.1-rc22/fs/super.c
11616 --- linux-2.6.16.20/fs/super.c  2006-04-09 13:49:55 +0200
11617 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/super.c     2006-04-27 21:28:48 +0200
11618 @@ -37,6 +37,8 @@
11619  #include <linux/writeback.h>           /* for the emergency remount stuff */
11620  #include <linux/idr.h>
11621  #include <linux/kobject.h>
11622 +#include <linux/devpts_fs.h>
11623 +#include <linux/proc_fs.h>
11624  #include <asm/uaccess.h>
11625  
11626  
11627 @@ -77,15 +79,14 @@ static struct super_block *alloc_super(v
11628                 s->s_count = S_BIAS;
11629                 atomic_set(&s->s_active, 1);
11630                 sema_init(&s->s_vfs_rename_sem,1);
11631 -               sema_init(&s->s_dquot.dqio_sem, 1);
11632 -               sema_init(&s->s_dquot.dqonoff_sem, 1);
11633 -               init_rwsem(&s->s_dquot.dqptr_sem);
11634                 init_waitqueue_head(&s->s_wait_unfrozen);
11635                 s->s_maxbytes = MAX_NON_LFS;
11636 -               s->dq_op = sb_dquot_ops;
11637 +               s->s_qop = sb_dquot_ops;
11638                 s->s_qcop = sb_quotactl_ops;
11639                 s->s_op = &default_op;
11640                 s->s_time_gran = 1000000000;
11641 +               /* quick hack to make dqhash id unique, sufficient for now */
11642 +               s->s_dqh = new_dqhash(s, (unsigned long)s);
11643         }
11644  out:
11645         return s;
11646 @@ -100,6 +101,7 @@ out:
11647  static inline void destroy_super(struct super_block *s)
11648  {
11649         security_sb_free(s);
11650 +       dqhput(s->s_dqh);
11651         kfree(s);
11652  }
11653  
11654 @@ -171,7 +173,7 @@ void deactivate_super(struct super_block
11655         if (atomic_dec_and_lock(&s->s_active, &sb_lock)) {
11656                 s->s_count -= S_BIAS-1;
11657                 spin_unlock(&sb_lock);
11658 -               DQUOT_OFF(s);
11659 +               DQUOT_OFF(s->s_dqh);
11660                 down_write(&s->s_umount);
11661                 fs->kill_sb(s);
11662                 put_filesystem(fs);
11663 @@ -803,7 +805,7 @@ struct vfsmount *
11664  do_kern_mount(const char *fstype, int flags, const char *name, void *data)
11665  {
11666         struct file_system_type *type = get_fs_type(fstype);
11667 -       struct super_block *sb = ERR_PTR(-ENOMEM);
11668 +       struct super_block *sb;
11669         struct vfsmount *mnt;
11670         int error;
11671         char *secdata = NULL;
11672 @@ -811,6 +813,12 @@ do_kern_mount(const char *fstype, int fl
11673         if (!type)
11674                 return ERR_PTR(-ENODEV);
11675  
11676 +       sb = ERR_PTR(-EPERM);
11677 +       if ((type->fs_flags & FS_BINARY_MOUNTDATA) &&
11678 +               !vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT))
11679 +               goto out;
11680 +
11681 +       sb = ERR_PTR(-ENOMEM);
11682         mnt = alloc_vfsmnt(name);
11683         if (!mnt)
11684                 goto out;
11685 @@ -832,6 +840,13 @@ do_kern_mount(const char *fstype, int fl
11686         sb = type->get_sb(type, flags, name, data);
11687         if (IS_ERR(sb))
11688                 goto out_free_secdata;
11689 +
11690 +       error = -EPERM;
11691 +       if (!capable(CAP_SYS_ADMIN) && !sb->s_bdev &&
11692 +               (sb->s_magic != PROC_SUPER_MAGIC) &&
11693 +               (sb->s_magic != DEVPTS_SUPER_MAGIC))
11694 +               goto out_sb;
11695 +
11696         error = security_sb_kern_mount(sb, secdata);
11697         if (error)
11698                 goto out_sb;
11699 diff -NurpP --minimal linux-2.6.16.20/fs/sysfs/mount.c linux-2.6.16.20-vs2.1.1-rc22/fs/sysfs/mount.c
11700 --- linux-2.6.16.20/fs/sysfs/mount.c    2005-08-29 22:25:33 +0200
11701 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/sysfs/mount.c       2006-04-26 19:07:00 +0200
11702 @@ -11,8 +11,6 @@
11703  
11704  #include "sysfs.h"
11705  
11706 -/* Random magic number */
11707 -#define SYSFS_MAGIC 0x62656572
11708  
11709  struct vfsmount *sysfs_mount;
11710  struct super_block * sysfs_sb = NULL;
11711 @@ -38,7 +36,7 @@ static int sysfs_fill_super(struct super
11712  
11713         sb->s_blocksize = PAGE_CACHE_SIZE;
11714         sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
11715 -       sb->s_magic = SYSFS_MAGIC;
11716 +       sb->s_magic = SYSFS_SUPER_MAGIC;
11717         sb->s_op = &sysfs_ops;
11718         sb->s_time_gran = 1;
11719         sysfs_sb = sb;
11720 diff -NurpP --minimal linux-2.6.16.20/fs/udf/super.c linux-2.6.16.20-vs2.1.1-rc22/fs/udf/super.c
11721 --- linux-2.6.16.20/fs/udf/super.c      2006-04-09 13:49:55 +0200
11722 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/udf/super.c 2006-04-26 19:07:00 +0200
11723 @@ -1575,7 +1575,7 @@ static int udf_fill_super(struct super_b
11724  
11725         /* Fill in the rest of the superblock */
11726         sb->s_op = &udf_sb_ops;
11727 -       sb->dq_op = NULL;
11728 +       sb->s_qop = NULL;
11729         sb->s_dirt = 0;
11730         sb->s_magic = UDF_SUPER_MAGIC;
11731         sb->s_time_gran = 1000;
11732 diff -NurpP --minimal linux-2.6.16.20/fs/ufs/super.c linux-2.6.16.20-vs2.1.1-rc22/fs/ufs/super.c
11733 --- linux-2.6.16.20/fs/ufs/super.c      2006-04-09 13:49:55 +0200
11734 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ufs/super.c 2006-04-26 19:07:00 +0200
11735 @@ -873,7 +873,7 @@ magic_found:
11736          * Read ufs_super_block into internal data structures
11737          */
11738         sb->s_op = &ufs_super_ops;
11739 -       sb->dq_op = NULL; /***/
11740 +       sb->s_qop = NULL; /***/
11741         sb->s_magic = fs32_to_cpu(sb, usb3->fs_magic);
11742  
11743         uspi->s_sblkno = fs32_to_cpu(sb, usb1->fs_sblkno);
11744 @@ -1198,8 +1198,8 @@ static void destroy_inodecache(void)
11745  }
11746  
11747  #ifdef CONFIG_QUOTA
11748 -static ssize_t ufs_quota_read(struct super_block *, int, char *,size_t, loff_t);
11749 -static ssize_t ufs_quota_write(struct super_block *, int, const char *, size_t, loff_t);
11750 +static ssize_t ufs_quota_read(struct dqhash *, int, char *,size_t, loff_t);
11751 +static ssize_t ufs_quota_write(struct dqhash *, int, const char *, size_t, loff_t);
11752  #endif
11753  
11754  static struct super_operations ufs_super_ops = {
11755 @@ -1224,10 +1224,11 @@ static struct super_operations ufs_super
11756   * acquiring the locks... As quota files are never truncated and quota code
11757   * itself serializes the operations (and noone else should touch the files)
11758   * we don't have to be afraid of races */
11759 -static ssize_t ufs_quota_read(struct super_block *sb, int type, char *data,
11760 +static ssize_t ufs_quota_read(struct dqhash *hash, int type, char *data,
11761                                size_t len, loff_t off)
11762  {
11763 -       struct inode *inode = sb_dqopt(sb)->files[type];
11764 +       struct inode *inode = dqh_dqopt(hash)->files[type];
11765 +       struct super_block *sb = hash->dqh_sb;
11766         sector_t blk = off >> sb->s_blocksize_bits;
11767         int err = 0;
11768         int offset = off & (sb->s_blocksize - 1);
11769 @@ -1263,10 +1264,11 @@ static ssize_t ufs_quota_read(struct sup
11770  }
11771  
11772  /* Write to quotafile */
11773 -static ssize_t ufs_quota_write(struct super_block *sb, int type,
11774 +static ssize_t ufs_quota_write(struct dqhash *hash, int type,
11775                                 const char *data, size_t len, loff_t off)
11776  {
11777 -       struct inode *inode = sb_dqopt(sb)->files[type];
11778 +       struct inode *inode = dqh_dqopt(hash)->files[type];
11779 +       struct super_block *sb = hash->dqh_sb;
11780         sector_t blk = off >> sb->s_blocksize_bits;
11781         int err = 0;
11782         int offset = off & (sb->s_blocksize - 1);
11783 diff -NurpP --minimal linux-2.6.16.20/fs/xattr.c linux-2.6.16.20-vs2.1.1-rc22/fs/xattr.c
11784 --- linux-2.6.16.20/fs/xattr.c  2006-02-18 14:40:27 +0100
11785 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xattr.c     2006-04-26 19:07:00 +0200
11786 @@ -17,6 +17,7 @@
11787  #include <linux/syscalls.h>
11788  #include <linux/module.h>
11789  #include <linux/fsnotify.h>
11790 +#include <linux/mount.h>
11791  #include <asm/uaccess.h>
11792  
11793  
11794 @@ -167,7 +168,7 @@ EXPORT_SYMBOL_GPL(vfs_removexattr);
11795   */
11796  static long
11797  setxattr(struct dentry *d, char __user *name, void __user *value,
11798 -        size_t size, int flags)
11799 +        size_t size, int flags, struct vfsmount *mnt)
11800  {
11801         int error;
11802         void *kvalue = NULL;
11803 @@ -194,6 +195,9 @@ setxattr(struct dentry *d, char __user *
11804                 }
11805         }
11806  
11807 +       if (MNT_IS_RDONLY(mnt))
11808 +               return -EROFS;
11809 +
11810         error = vfs_setxattr(d, kname, kvalue, size, flags);
11811         kfree(kvalue);
11812         return error;
11813 @@ -209,7 +213,7 @@ sys_setxattr(char __user *path, char __u
11814         error = user_path_walk(path, &nd);
11815         if (error)
11816                 return error;
11817 -       error = setxattr(nd.dentry, name, value, size, flags);
11818 +       error = setxattr(nd.dentry, name, value, size, flags, nd.mnt);
11819         path_release(&nd);
11820         return error;
11821  }
11822 @@ -224,7 +228,7 @@ sys_lsetxattr(char __user *path, char __
11823         error = user_path_walk_link(path, &nd);
11824         if (error)
11825                 return error;
11826 -       error = setxattr(nd.dentry, name, value, size, flags);
11827 +       error = setxattr(nd.dentry, name, value, size, flags, nd.mnt);
11828         path_release(&nd);
11829         return error;
11830  }
11831 @@ -239,7 +243,7 @@ sys_fsetxattr(int fd, char __user *name,
11832         f = fget(fd);
11833         if (!f)
11834                 return error;
11835 -       error = setxattr(f->f_dentry, name, value, size, flags);
11836 +       error = setxattr(f->f_dentry, name, value, size, flags, f->f_vfsmnt);
11837         fput(f);
11838         return error;
11839  }
11840 @@ -412,7 +416,7 @@ sys_flistxattr(int fd, char __user *list
11841   * Extended attribute REMOVE operations
11842   */
11843  static long
11844 -removexattr(struct dentry *d, char __user *name)
11845 +removexattr(struct dentry *d, char __user *name, struct vfsmount *mnt)
11846  {
11847         int error;
11848         char kname[XATTR_NAME_MAX + 1];
11849 @@ -423,6 +427,9 @@ removexattr(struct dentry *d, char __use
11850         if (error < 0)
11851                 return error;
11852  
11853 +       if (MNT_IS_RDONLY(mnt))
11854 +               return -EROFS;
11855 +
11856         return vfs_removexattr(d, kname);
11857  }
11858  
11859 @@ -435,7 +442,7 @@ sys_removexattr(char __user *path, char 
11860         error = user_path_walk(path, &nd);
11861         if (error)
11862                 return error;
11863 -       error = removexattr(nd.dentry, name);
11864 +       error = removexattr(nd.dentry, name, nd.mnt);
11865         path_release(&nd);
11866         return error;
11867  }
11868 @@ -449,7 +456,7 @@ sys_lremovexattr(char __user *path, char
11869         error = user_path_walk_link(path, &nd);
11870         if (error)
11871                 return error;
11872 -       error = removexattr(nd.dentry, name);
11873 +       error = removexattr(nd.dentry, name, nd.mnt);
11874         path_release(&nd);
11875         return error;
11876  }
11877 @@ -463,7 +470,7 @@ sys_fremovexattr(int fd, char __user *na
11878         f = fget(fd);
11879         if (!f)
11880                 return error;
11881 -       error = removexattr(f->f_dentry, name);
11882 +       error = removexattr(f->f_dentry, name, f->f_vfsmnt);
11883         fput(f);
11884         return error;
11885  }
11886 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/linux-2.6/xfs_file.c linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_file.c
11887 --- linux-2.6.16.20/fs/xfs/linux-2.6/xfs_file.c 2006-04-09 13:49:55 +0200
11888 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_file.c    2006-04-26 19:07:00 +0200
11889 @@ -536,6 +536,7 @@ struct file_operations linvfs_file_opera
11890         .aio_read       = linvfs_aio_read,
11891         .aio_write      = linvfs_aio_write,
11892         .sendfile       = linvfs_sendfile,
11893 +       .sendpage       = generic_file_sendpage,
11894         .unlocked_ioctl = linvfs_ioctl,
11895  #ifdef CONFIG_COMPAT
11896         .compat_ioctl   = linvfs_compat_ioctl,
11897 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/linux-2.6/xfs_ioctl.c linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_ioctl.c
11898 --- linux-2.6.16.20/fs/xfs/linux-2.6/xfs_ioctl.c        2006-02-18 14:40:27 +0100
11899 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_ioctl.c   2006-04-26 19:07:00 +0200
11900 @@ -1100,6 +1100,8 @@ xfs_ioc_fsgeometry(
11901  #define LINUX_XFLAG_APPEND     0x00000020 /* writes to file may only append */
11902  #define LINUX_XFLAG_NODUMP     0x00000040 /* do not dump file */
11903  #define LINUX_XFLAG_NOATIME    0x00000080 /* do not update atime */
11904 +#define LINUX_XFLAG_BARRIER    0x04000000 /* chroot() barrier */
11905 +#define LINUX_XFLAG_IUNLINK    0x08000000 /* immutable unlink */
11906  
11907  STATIC unsigned int
11908  xfs_merge_ioc_xflags(
11909 @@ -1140,6 +1142,10 @@ xfs_di2lxflags(
11910  
11911         if (di_flags & XFS_DIFLAG_IMMUTABLE)
11912                 flags |= LINUX_XFLAG_IMMUTABLE;
11913 +       if (di_flags & XFS_DIFLAG_IUNLINK)
11914 +               flags |= LINUX_XFLAG_IUNLINK;
11915 +       if (di_flags & XFS_DIFLAG_BARRIER)
11916 +               flags |= LINUX_XFLAG_BARRIER;
11917         if (di_flags & XFS_DIFLAG_APPEND)
11918                 flags |= LINUX_XFLAG_APPEND;
11919         if (di_flags & XFS_DIFLAG_SYNC)
11920 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/linux-2.6/xfs_iops.c linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_iops.c
11921 --- linux-2.6.16.20/fs/xfs/linux-2.6/xfs_iops.c 2006-05-11 21:25:36 +0200
11922 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_iops.c    2006-04-26 19:07:00 +0200
11923 @@ -55,6 +55,7 @@
11924  #include <linux/xattr.h>
11925  #include <linux/namei.h>
11926  #include <linux/security.h>
11927 +#include <linux/vserver/tag.h>
11928  
11929  /*
11930   * Get a XFS inode from a given vnode.
11931 @@ -410,6 +411,7 @@ linvfs_lookup(
11932                 d_add(dentry, NULL);
11933                 return NULL;
11934         }
11935 +       dx_propagate_tag(nd, LINVFS_GET_IP(cvp));
11936  
11937         return d_splice_alias(LINVFS_GET_IP(cvp), dentry);
11938  }
11939 @@ -646,6 +648,41 @@ linvfs_getattr(
11940  }
11941  
11942  STATIC int
11943 +linvfs_sync_flags(struct inode *inode)
11944 +{
11945 +       unsigned int oldflags, newflags;
11946 +       vattr_t         vattr;
11947 +       int             flags = 0;
11948 +       int             error;
11949 +       vnode_t         *vp = LINVFS_GET_VP(inode);
11950 +
11951 +       memset(&vattr, 0, sizeof(vattr_t));
11952 +
11953 +       vattr.va_mask = XFS_AT_XFLAGS;
11954 +       VOP_GETATTR(vp, &vattr, 0, NULL, error);
11955 +       if (error)
11956 +               return error;
11957 +       oldflags = vattr.va_xflags;
11958 +       newflags = oldflags & ~(XFS_XFLAG_IMMUTABLE |
11959 +               XFS_XFLAG_IUNLINK | XFS_XFLAG_BARRIER);
11960 +
11961 +       if (IS_IMMUTABLE(inode))
11962 +               newflags |= XFS_XFLAG_IMMUTABLE;
11963 +       if (IS_IUNLINK(inode))
11964 +               newflags |= XFS_XFLAG_IUNLINK;
11965 +       if (IS_BARRIER(inode))
11966 +               newflags |= XFS_XFLAG_BARRIER;
11967 +
11968 +       if (oldflags ^ newflags) {
11969 +               vattr.va_xflags = newflags;
11970 +               vattr.va_mask |= XFS_AT_XFLAGS;
11971 +               VOP_SETATTR(vp, &vattr, flags, NULL, error);
11972 +       }
11973 +       vn_revalidate(vp);
11974 +       return error;
11975 +}
11976 +
11977 +STATIC int
11978  linvfs_setattr(
11979         struct dentry   *dentry,
11980         struct iattr    *attr)
11981 @@ -657,6 +694,10 @@ linvfs_setattr(
11982         int             flags = 0;
11983         int             error;
11984  
11985 +       error = inode_change_ok(inode, attr);
11986 +       if (error)
11987 +               return error;
11988 +
11989         memset(&vattr, 0, sizeof(vattr_t));
11990         if (ia_valid & ATTR_UID) {
11991                 vattr.va_mask |= XFS_AT_UID;
11992 @@ -666,6 +707,10 @@ linvfs_setattr(
11993                 vattr.va_mask |= XFS_AT_GID;
11994                 vattr.va_gid = attr->ia_gid;
11995         }
11996 +       if ((ia_valid & ATTR_TAG) && IS_TAGGED(inode)) {
11997 +               vattr.va_mask |= XFS_AT_TAG;
11998 +               vattr.va_tag = attr->ia_tag;
11999 +       }
12000         if (ia_valid & ATTR_SIZE) {
12001                 vattr.va_mask |= XFS_AT_SIZE;
12002                 vattr.va_size = attr->ia_size;
12003 @@ -824,6 +869,7 @@ struct inode_operations linvfs_file_inod
12004         .getxattr               = linvfs_getxattr,
12005         .listxattr              = linvfs_listxattr,
12006         .removexattr            = linvfs_removexattr,
12007 +       .sync_flags             = linvfs_sync_flags,
12008  };
12009  
12010  struct inode_operations linvfs_dir_inode_operations = {
12011 @@ -843,6 +889,7 @@ struct inode_operations linvfs_dir_inode
12012         .getxattr               = linvfs_getxattr,
12013         .listxattr              = linvfs_listxattr,
12014         .removexattr            = linvfs_removexattr,
12015 +       .sync_flags             = linvfs_sync_flags,
12016  };
12017  
12018  struct inode_operations linvfs_symlink_inode_operations = {
12019 @@ -856,4 +903,5 @@ struct inode_operations linvfs_symlink_i
12020         .getxattr               = linvfs_getxattr,
12021         .listxattr              = linvfs_listxattr,
12022         .removexattr            = linvfs_removexattr,
12023 +       .sync_flags             = linvfs_sync_flags,
12024  };
12025 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/linux-2.6/xfs_linux.h linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_linux.h
12026 --- linux-2.6.16.20/fs/xfs/linux-2.6/xfs_linux.h        2006-02-18 14:40:27 +0100
12027 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_linux.h   2006-04-26 19:07:00 +0200
12028 @@ -133,6 +133,7 @@ BUFFER_FNS(PrivateStart, unwritten);
12029  #define current_pid()          (current->pid)
12030  #define current_fsuid(cred)    (current->fsuid)
12031  #define current_fsgid(cred)    (current->fsgid)
12032 +#define current_fstag(cred,vp) (dx_current_fstag(LINVFS_GET_IP(vp)->i_sb))
12033  
12034  #define NBPP           PAGE_SIZE
12035  #define DPPSHFT                (PAGE_SHIFT - 9)
12036 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/linux-2.6/xfs_super.c linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_super.c
12037 --- linux-2.6.16.20/fs/xfs/linux-2.6/xfs_super.c        2006-04-09 13:49:55 +0200
12038 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_super.c   2006-04-26 19:07:00 +0200
12039 @@ -161,6 +161,7 @@ xfs_revalidate_inode(
12040         inode->i_nlink  = ip->i_d.di_nlink;
12041         inode->i_uid    = ip->i_d.di_uid;
12042         inode->i_gid    = ip->i_d.di_gid;
12043 +       inode->i_tag    = ip->i_d.di_tag;
12044  
12045         switch (inode->i_mode & S_IFMT) {
12046         case S_IFBLK:
12047 @@ -189,6 +190,14 @@ xfs_revalidate_inode(
12048                 inode->i_flags |= S_IMMUTABLE;
12049         else
12050                 inode->i_flags &= ~S_IMMUTABLE;
12051 +       if (ip->i_d.di_flags & XFS_DIFLAG_IUNLINK)
12052 +               inode->i_flags |= S_IUNLINK;
12053 +       else
12054 +               inode->i_flags &= ~S_IUNLINK;
12055 +       if (ip->i_d.di_flags & XFS_DIFLAG_BARRIER)
12056 +               inode->i_flags |= S_BARRIER;
12057 +       else
12058 +               inode->i_flags &= ~S_BARRIER;
12059         if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
12060                 inode->i_flags |= S_APPEND;
12061         else
12062 @@ -729,6 +738,12 @@ linvfs_remount(
12063         int                     error;
12064  
12065         VFS_PARSEARGS(vfsp, options, args, 1, error);
12066 +       if ((args->flags2 & XFSMNT2_TAGGED) &&
12067 +               !(sb->s_flags & MS_TAGGED)) {
12068 +               printk("XFS: %s: tagging not permitted on remount.\n",
12069 +                       sb->s_id);
12070 +               error = EINVAL;
12071 +       }
12072         if (!error)
12073                 VFS_MNTUPDATE(vfsp, flags, args, error);
12074         kmem_free(args, sizeof(*args));
12075 @@ -756,9 +771,10 @@ linvfs_show_options(
12076  
12077  STATIC int
12078  linvfs_quotasync(
12079 -       struct super_block      *sb,
12080 +       struct dqhash           *hash,
12081         int                     type)
12082  {
12083 +       struct super_block      *sb = hash->dqh_sb;
12084         struct vfs              *vfsp = LINVFS_GET_VFS(sb);
12085         int                     error;
12086  
12087 @@ -768,10 +784,10 @@ linvfs_quotasync(
12088  
12089  STATIC int
12090  linvfs_getxstate(
12091 -       struct super_block      *sb,
12092 +       struct dqhash           *hash,
12093         struct fs_quota_stat    *fqs)
12094  {
12095 -       struct vfs              *vfsp = LINVFS_GET_VFS(sb);
12096 +       struct vfs              *vfsp = LINVFS_GET_VFS(hash->dqh_sb);
12097         int                     error;
12098  
12099         VFS_QUOTACTL(vfsp, Q_XGETQSTAT, 0, (caddr_t)fqs, error);
12100 @@ -780,11 +796,11 @@ linvfs_getxstate(
12101  
12102  STATIC int
12103  linvfs_setxstate(
12104 -       struct super_block      *sb,
12105 +       struct dqhash           *hash,
12106         unsigned int            flags,
12107         int                     op)
12108  {
12109 -       struct vfs              *vfsp = LINVFS_GET_VFS(sb);
12110 +       struct vfs              *vfsp = LINVFS_GET_VFS(hash->dqh_sb);
12111         int                     error;
12112  
12113         VFS_QUOTACTL(vfsp, op, 0, (caddr_t)&flags, error);
12114 @@ -793,12 +809,12 @@ linvfs_setxstate(
12115  
12116  STATIC int
12117  linvfs_getxquota(
12118 -       struct super_block      *sb,
12119 +       struct dqhash           *hash,
12120         int                     type,
12121         qid_t                   id,
12122         struct fs_disk_quota    *fdq)
12123  {
12124 -       struct vfs              *vfsp = LINVFS_GET_VFS(sb);
12125 +       struct vfs              *vfsp = LINVFS_GET_VFS(hash->dqh_sb);
12126         int                     error, getmode;
12127  
12128         getmode = (type == USRQUOTA) ? Q_XGETQUOTA :
12129 @@ -809,12 +825,12 @@ linvfs_getxquota(
12130  
12131  STATIC int
12132  linvfs_setxquota(
12133 -       struct super_block      *sb,
12134 +       struct dqhash           *hash,
12135         int                     type,
12136         qid_t                   id,
12137         struct fs_disk_quota    *fdq)
12138  {
12139 -       struct vfs              *vfsp = LINVFS_GET_VFS(sb);
12140 +       struct vfs              *vfsp = LINVFS_GET_VFS(hash->dqh_sb);
12141         int                     error, setmode;
12142  
12143         setmode = (type == USRQUOTA) ? Q_XSETQLIM :
12144 @@ -852,6 +868,9 @@ linvfs_fill_super(
12145         sb->s_export_op = &linvfs_export_ops;
12146  #endif
12147         sb->s_qcop = &linvfs_qops;
12148 +#ifdef CONFIG_QUOTA
12149 +       sb->s_dqh->dqh_qcop = &linvfs_qops;
12150 +#endif
12151         sb->s_op = &linvfs_sops;
12152  
12153         VFS_MOUNT(vfsp, args, NULL, error);
12154 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/linux-2.6/xfs_sysctl.c linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_sysctl.c
12155 --- linux-2.6.16.20/fs/xfs/linux-2.6/xfs_sysctl.c       2006-01-03 17:29:59 +0100
12156 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_sysctl.c  2006-04-26 19:07:00 +0200
12157 @@ -58,74 +58,74 @@ xfs_stats_clear_proc_handler(
12158  STATIC ctl_table xfs_table[] = {
12159         {XFS_RESTRICT_CHOWN, "restrict_chown", &xfs_params.restrict_chown.val,
12160         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12161 -       &sysctl_intvec, NULL,
12162 +       NULL, &sysctl_intvec, NULL,
12163         &xfs_params.restrict_chown.min, &xfs_params.restrict_chown.max},
12164  
12165         {XFS_SGID_INHERIT, "irix_sgid_inherit", &xfs_params.sgid_inherit.val,
12166         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12167 -       &sysctl_intvec, NULL,
12168 +       NULL, &sysctl_intvec, NULL,
12169         &xfs_params.sgid_inherit.min, &xfs_params.sgid_inherit.max},
12170  
12171         {XFS_SYMLINK_MODE, "irix_symlink_mode", &xfs_params.symlink_mode.val,
12172         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12173 -       &sysctl_intvec, NULL,
12174 +       NULL, &sysctl_intvec, NULL,
12175         &xfs_params.symlink_mode.min, &xfs_params.symlink_mode.max},
12176  
12177         {XFS_PANIC_MASK, "panic_mask", &xfs_params.panic_mask.val,
12178         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12179 -       &sysctl_intvec, NULL,
12180 +       NULL, &sysctl_intvec, NULL,
12181         &xfs_params.panic_mask.min, &xfs_params.panic_mask.max},
12182  
12183         {XFS_ERRLEVEL, "error_level", &xfs_params.error_level.val,
12184         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12185 -       &sysctl_intvec, NULL,
12186 +       NULL, &sysctl_intvec, NULL,
12187         &xfs_params.error_level.min, &xfs_params.error_level.max},
12188  
12189         {XFS_SYNCD_TIMER, "xfssyncd_centisecs", &xfs_params.syncd_timer.val,
12190         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12191 -       &sysctl_intvec, NULL,
12192 +       NULL, &sysctl_intvec, NULL,
12193         &xfs_params.syncd_timer.min, &xfs_params.syncd_timer.max},
12194  
12195         {XFS_INHERIT_SYNC, "inherit_sync", &xfs_params.inherit_sync.val,
12196         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12197 -       &sysctl_intvec, NULL,
12198 +       NULL, &sysctl_intvec, NULL,
12199         &xfs_params.inherit_sync.min, &xfs_params.inherit_sync.max},
12200  
12201         {XFS_INHERIT_NODUMP, "inherit_nodump", &xfs_params.inherit_nodump.val,
12202         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12203 -       &sysctl_intvec, NULL,
12204 +       NULL, &sysctl_intvec, NULL,
12205         &xfs_params.inherit_nodump.min, &xfs_params.inherit_nodump.max},
12206  
12207         {XFS_INHERIT_NOATIME, "inherit_noatime", &xfs_params.inherit_noatim.val,
12208         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12209 -       &sysctl_intvec, NULL,
12210 +       NULL, &sysctl_intvec, NULL,
12211         &xfs_params.inherit_noatim.min, &xfs_params.inherit_noatim.max},
12212  
12213         {XFS_BUF_TIMER, "xfsbufd_centisecs", &xfs_params.xfs_buf_timer.val,
12214         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12215 -       &sysctl_intvec, NULL,
12216 +       NULL, &sysctl_intvec, NULL,
12217         &xfs_params.xfs_buf_timer.min, &xfs_params.xfs_buf_timer.max},
12218  
12219         {XFS_BUF_AGE, "age_buffer_centisecs", &xfs_params.xfs_buf_age.val,
12220         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12221 -       &sysctl_intvec, NULL,
12222 +       NULL, &sysctl_intvec, NULL,
12223         &xfs_params.xfs_buf_age.min, &xfs_params.xfs_buf_age.max},
12224  
12225         {XFS_INHERIT_NOSYM, "inherit_nosymlinks", &xfs_params.inherit_nosym.val,
12226         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12227 -       &sysctl_intvec, NULL,
12228 +       NULL, &sysctl_intvec, NULL,
12229         &xfs_params.inherit_nosym.min, &xfs_params.inherit_nosym.max},
12230  
12231         {XFS_ROTORSTEP, "rotorstep", &xfs_params.rotorstep.val,
12232         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12233 -       &sysctl_intvec, NULL,
12234 +       NULL, &sysctl_intvec, NULL,
12235         &xfs_params.rotorstep.min, &xfs_params.rotorstep.max},
12236  
12237         /* please keep this the last entry */
12238  #ifdef CONFIG_PROC_FS
12239         {XFS_STATS_CLEAR, "stats_clear", &xfs_params.stats_clear.val,
12240         sizeof(int), 0644, NULL, &xfs_stats_clear_proc_handler,
12241 -       &sysctl_intvec, NULL,
12242 +       NULL, &sysctl_intvec, NULL,
12243         &xfs_params.stats_clear.min, &xfs_params.stats_clear.max},
12244  #endif /* CONFIG_PROC_FS */
12245  
12246 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/linux-2.6/xfs_vnode.c linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_vnode.c
12247 --- linux-2.6.16.20/fs/xfs/linux-2.6/xfs_vnode.c        2006-02-18 14:40:27 +0100
12248 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_vnode.c   2006-04-26 19:07:00 +0200
12249 @@ -103,6 +103,7 @@ vn_revalidate_core(
12250         inode->i_nlink      = vap->va_nlink;
12251         inode->i_uid        = vap->va_uid;
12252         inode->i_gid        = vap->va_gid;
12253 +       inode->i_tag        = vap->va_tag;
12254         inode->i_blocks     = vap->va_nblocks;
12255         inode->i_mtime      = vap->va_mtime;
12256         inode->i_ctime      = vap->va_ctime;
12257 @@ -111,6 +112,14 @@ vn_revalidate_core(
12258                 inode->i_flags |= S_IMMUTABLE;
12259         else
12260                 inode->i_flags &= ~S_IMMUTABLE;
12261 +       if (vap->va_xflags & XFS_XFLAG_IUNLINK)
12262 +               inode->i_flags |= S_IUNLINK;
12263 +       else
12264 +               inode->i_flags &= ~S_IUNLINK;
12265 +       if (vap->va_xflags & XFS_XFLAG_BARRIER)
12266 +               inode->i_flags |= S_BARRIER;
12267 +       else
12268 +               inode->i_flags &= ~S_BARRIER;
12269         if (vap->va_xflags & XFS_XFLAG_APPEND)
12270                 inode->i_flags |= S_APPEND;
12271         else
12272 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/linux-2.6/xfs_vnode.h linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_vnode.h
12273 --- linux-2.6.16.20/fs/xfs/linux-2.6/xfs_vnode.h        2006-02-18 14:40:27 +0100
12274 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_vnode.h   2006-04-26 19:07:00 +0200
12275 @@ -386,6 +386,7 @@ typedef struct vattr {
12276         xfs_nlink_t     va_nlink;       /* number of references to file */
12277         uid_t           va_uid;         /* owner user id */
12278         gid_t           va_gid;         /* owner group id */
12279 +       tag_t           va_tag;         /* owner group id */
12280         xfs_ino_t       va_nodeid;      /* file id */
12281         xfs_off_t       va_size;        /* file size in bytes */
12282         u_long          va_blocksize;   /* blocksize preferred for i/o */
12283 @@ -434,13 +435,15 @@ typedef struct vattr {
12284  #define XFS_AT_PROJID          0x04000000
12285  #define XFS_AT_SIZE_NOPERM     0x08000000
12286  #define XFS_AT_GENCOUNT                0x10000000
12287 +#define XFS_AT_TAG             0x20000000
12288  
12289  #define XFS_AT_ALL     (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
12290                 XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
12291                 XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
12292                 XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|XFS_AT_MAC|\
12293                 XFS_AT_ACL|XFS_AT_CAP|XFS_AT_INF|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|\
12294 -               XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT)
12295 +               XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT\
12296 +               XFS_AT_TAG)
12297  
12298  #define XFS_AT_STAT    (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
12299                 XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
12300 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/quota/xfs_qm_syscalls.c linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/quota/xfs_qm_syscalls.c
12301 --- linux-2.6.16.20/fs/xfs/quota/xfs_qm_syscalls.c      2006-04-09 13:49:55 +0200
12302 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/quota/xfs_qm_syscalls.c 2006-04-27 21:30:23 +0200
12303 @@ -215,7 +215,7 @@ xfs_qm_scall_quotaoff(
12304         xfs_qoff_logitem_t      *qoffstart;
12305         int                     nculprits;
12306  
12307 -       if (!force && !capable(CAP_SYS_ADMIN))
12308 +       if (!force && !vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
12309                 return XFS_ERROR(EPERM);
12310         /*
12311          * No file system can have quotas enabled on disk but not in core.
12312 @@ -384,7 +384,7 @@ xfs_qm_scall_trunc_qfiles(
12313         int             error;
12314         xfs_inode_t     *qip;
12315  
12316 -       if (!capable(CAP_SYS_ADMIN))
12317 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
12318                 return XFS_ERROR(EPERM);
12319         error = 0;
12320         if (!XFS_SB_VERSION_HASQUOTA(&mp->m_sb) || flags == 0) {
12321 @@ -429,7 +429,7 @@ xfs_qm_scall_quotaon(
12322         uint            accflags;
12323         __int64_t       sbflags;
12324  
12325 -       if (!capable(CAP_SYS_ADMIN))
12326 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
12327                 return XFS_ERROR(EPERM);
12328  
12329         flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
12330 @@ -600,7 +600,7 @@ xfs_qm_scall_setqlim(
12331         int                     error;
12332         xfs_qcnt_t              hard, soft;
12333  
12334 -       if (!capable(CAP_SYS_ADMIN))
12335 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
12336                 return XFS_ERROR(EPERM);
12337  
12338         if ((newlim->d_fieldmask &
12339 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/xfs_clnt.h linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_clnt.h
12340 --- linux-2.6.16.20/fs/xfs/xfs_clnt.h   2006-02-18 14:40:27 +0100
12341 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_clnt.h      2006-04-26 19:07:00 +0200
12342 @@ -100,5 +100,7 @@ struct xfs_mount_args {
12343   */
12344  #define XFSMNT2_COMPAT_IOSIZE  0x00000001      /* don't report large preferred
12345                                                  * I/O size in stat(2) */
12346 +#define XFSMNT2_TAGGED         0x80000000      /* context tagging */
12347 +
12348  
12349  #endif /* __XFS_CLNT_H__ */
12350 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/xfs_dinode.h linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_dinode.h
12351 --- linux-2.6.16.20/fs/xfs/xfs_dinode.h 2006-04-09 13:49:55 +0200
12352 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_dinode.h    2006-04-26 19:07:00 +0200
12353 @@ -53,7 +53,8 @@ typedef struct xfs_dinode_core
12354         __uint32_t      di_gid;         /* owner's group id */
12355         __uint32_t      di_nlink;       /* number of links to file */
12356         __uint16_t      di_projid;      /* owner's project id */
12357 -       __uint8_t       di_pad[8];      /* unused, zeroed space */
12358 +       __uint16_t      di_tag;         /* context tagging */
12359 +       __uint8_t       di_pad[6];      /* unused, zeroed space */
12360         __uint16_t      di_flushiter;   /* incremented on flush */
12361         xfs_timestamp_t di_atime;       /* time last accessed */
12362         xfs_timestamp_t di_mtime;       /* time last modified */
12363 @@ -257,6 +258,9 @@ typedef enum xfs_dinode_fmt
12364  #define XFS_DIFLAG_NOSYMLINKS_BIT   10 /* disallow symlink creation */
12365  #define XFS_DIFLAG_EXTSIZE_BIT      11 /* inode extent size allocator hint */
12366  #define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */
12367 +#define XFS_DIFLAG_BARRIER_BIT 13      /* chroot() barrier */
12368 +#define XFS_DIFLAG_IUNLINK_BIT 14      /* immutable unlink */
12369 +
12370  #define XFS_DIFLAG_REALTIME      (1 << XFS_DIFLAG_REALTIME_BIT)
12371  #define XFS_DIFLAG_PREALLOC      (1 << XFS_DIFLAG_PREALLOC_BIT)
12372  #define XFS_DIFLAG_NEWRTBM       (1 << XFS_DIFLAG_NEWRTBM_BIT)
12373 @@ -270,12 +274,14 @@ typedef enum xfs_dinode_fmt
12374  #define XFS_DIFLAG_NOSYMLINKS    (1 << XFS_DIFLAG_NOSYMLINKS_BIT)
12375  #define XFS_DIFLAG_EXTSIZE       (1 << XFS_DIFLAG_EXTSIZE_BIT)
12376  #define XFS_DIFLAG_EXTSZINHERIT  (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
12377 +#define XFS_DIFLAG_BARRIER      (1 << XFS_DIFLAG_BARRIER_BIT)
12378 +#define XFS_DIFLAG_IUNLINK      (1 << XFS_DIFLAG_IUNLINK_BIT)
12379  
12380  #define XFS_DIFLAG_ANY \
12381         (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \
12382          XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
12383          XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
12384          XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
12385 -        XFS_DIFLAG_EXTSZINHERIT)
12386 +        XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_BARRIER | XFS_DIFLAG_IUNLINK)
12387  
12388  #endif /* __XFS_DINODE_H__ */
12389 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/xfs_fs.h linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_fs.h
12390 --- linux-2.6.16.20/fs/xfs/xfs_fs.h     2006-04-09 13:49:55 +0200
12391 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_fs.h        2006-04-26 19:07:00 +0200
12392 @@ -67,6 +67,8 @@ struct fsxattr {
12393  #define XFS_XFLAG_NOSYMLINKS   0x00000400      /* disallow symlink creation */
12394  #define XFS_XFLAG_EXTSIZE      0x00000800      /* extent size allocator hint */
12395  #define XFS_XFLAG_EXTSZINHERIT 0x00001000      /* inherit inode extent size */
12396 +#define XFS_XFLAG_BARRIER      0x00004000      /* chroot() barrier */
12397 +#define XFS_XFLAG_IUNLINK      0x00008000      /* immutable unlink */
12398  #define XFS_XFLAG_HASATTR      0x80000000      /* no DIFLAG for this   */
12399  
12400  /*
12401 @@ -295,7 +297,8 @@ typedef struct xfs_bstat {
12402         __s32           bs_extents;     /* number of extents            */
12403         __u32           bs_gen;         /* generation count             */
12404         __u16           bs_projid;      /* project id                   */
12405 -       unsigned char   bs_pad[14];     /* pad space, unused            */
12406 +       __u16           bs_tag;         /* context tagging              */
12407 +       unsigned char   bs_pad[12];     /* pad space, unused            */
12408         __u32           bs_dmevmask;    /* DMIG event mask              */
12409         __u16           bs_dmstate;     /* DMIG state info              */
12410         __u16           bs_aextents;    /* attribute number of extents  */
12411 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/xfs_inode.c linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_inode.c
12412 --- linux-2.6.16.20/fs/xfs/xfs_inode.c  2006-02-18 14:40:27 +0100
12413 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_inode.c     2006-04-26 19:07:00 +0200
12414 @@ -52,6 +52,7 @@
12415  #include "xfs_mac.h"
12416  #include "xfs_acl.h"
12417  
12418 +#include <linux/vs_tag.h>
12419  
12420  kmem_zone_t *xfs_ifork_zone;
12421  kmem_zone_t *xfs_inode_zone;
12422 @@ -734,20 +735,35 @@ xfs_xlate_dinode_core(
12423         xfs_dinode_core_t       *buf_core = (xfs_dinode_core_t *)buf;
12424         xfs_dinode_core_t       *mem_core = (xfs_dinode_core_t *)dip;
12425         xfs_arch_t              arch = ARCH_CONVERT;
12426 +       uint32_t                uid = 0, gid = 0;
12427 +       uint16_t                tag = 0;
12428  
12429         ASSERT(dir);
12430  
12431 +       if (dir < 0) {
12432 +               tag = mem_core->di_tag;
12433 +               /* FIXME: supposed to use superblock flag */
12434 +               uid = TAGINO_UID(1, mem_core->di_uid, tag);
12435 +               gid = TAGINO_GID(1, mem_core->di_gid, tag);
12436 +               tag = TAGINO_TAG(1, tag);
12437 +       }
12438 +
12439         INT_XLATE(buf_core->di_magic, mem_core->di_magic, dir, arch);
12440         INT_XLATE(buf_core->di_mode, mem_core->di_mode, dir, arch);
12441         INT_XLATE(buf_core->di_version, mem_core->di_version, dir, arch);
12442         INT_XLATE(buf_core->di_format, mem_core->di_format, dir, arch);
12443         INT_XLATE(buf_core->di_onlink, mem_core->di_onlink, dir, arch);
12444 -       INT_XLATE(buf_core->di_uid, mem_core->di_uid, dir, arch);
12445 -       INT_XLATE(buf_core->di_gid, mem_core->di_gid, dir, arch);
12446 +       INT_XLATE(buf_core->di_uid, uid, dir, arch);
12447 +       INT_XLATE(buf_core->di_gid, gid, dir, arch);
12448 +       INT_XLATE(buf_core->di_tag, tag, dir, arch);
12449         INT_XLATE(buf_core->di_nlink, mem_core->di_nlink, dir, arch);
12450         INT_XLATE(buf_core->di_projid, mem_core->di_projid, dir, arch);
12451  
12452         if (dir > 0) {
12453 +               /* FIXME: supposed to use superblock flag */
12454 +               mem_core->di_uid = INOTAG_UID(1, uid, gid);
12455 +               mem_core->di_gid = INOTAG_GID(1, uid, gid);
12456 +               mem_core->di_tag = INOTAG_TAG(1, uid, gid, tag);
12457                 memcpy(mem_core->di_pad, buf_core->di_pad,
12458                         sizeof(buf_core->di_pad));
12459         } else {
12460 @@ -796,6 +812,10 @@ _xfs_dic2xflags(
12461                         flags |= XFS_XFLAG_PREALLOC;
12462                 if (di_flags & XFS_DIFLAG_IMMUTABLE)
12463                         flags |= XFS_XFLAG_IMMUTABLE;
12464 +               if (di_flags & XFS_DIFLAG_IUNLINK)
12465 +                       flags |= XFS_XFLAG_IUNLINK;
12466 +               if (di_flags & XFS_DIFLAG_BARRIER)
12467 +                       flags |= XFS_XFLAG_BARRIER;
12468                 if (di_flags & XFS_DIFLAG_APPEND)
12469                         flags |= XFS_XFLAG_APPEND;
12470                 if (di_flags & XFS_DIFLAG_SYNC)
12471 @@ -1125,6 +1145,7 @@ xfs_ialloc(
12472         ASSERT(ip->i_d.di_nlink == nlink);
12473         ip->i_d.di_uid = current_fsuid(cr);
12474         ip->i_d.di_gid = current_fsgid(cr);
12475 +       ip->i_d.di_tag = current_fstag(cr, vp);
12476         ip->i_d.di_projid = prid;
12477         memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
12478  
12479 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/xfs_itable.c linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_itable.c
12480 --- linux-2.6.16.20/fs/xfs/xfs_itable.c 2006-02-18 14:40:27 +0100
12481 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_itable.c    2006-04-26 19:07:00 +0200
12482 @@ -85,6 +85,7 @@ xfs_bulkstat_one_iget(
12483         buf->bs_mode = dic->di_mode;
12484         buf->bs_uid = dic->di_uid;
12485         buf->bs_gid = dic->di_gid;
12486 +       buf->bs_tag = dic->di_tag;
12487         buf->bs_size = dic->di_size;
12488         vn_atime_to_bstime(vp, &buf->bs_atime);
12489         buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
12490 @@ -159,6 +160,7 @@ xfs_bulkstat_one_dinode(
12491         buf->bs_mode = INT_GET(dic->di_mode, ARCH_CONVERT);
12492         buf->bs_uid = INT_GET(dic->di_uid, ARCH_CONVERT);
12493         buf->bs_gid = INT_GET(dic->di_gid, ARCH_CONVERT);
12494 +       buf->bs_tag = INT_GET(dic->di_tag, ARCH_CONVERT);
12495         buf->bs_size = INT_GET(dic->di_size, ARCH_CONVERT);
12496         buf->bs_atime.tv_sec = INT_GET(dic->di_atime.t_sec, ARCH_CONVERT);
12497         buf->bs_atime.tv_nsec = INT_GET(dic->di_atime.t_nsec, ARCH_CONVERT);
12498 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/xfs_mount.h linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_mount.h
12499 --- linux-2.6.16.20/fs/xfs/xfs_mount.h  2006-02-18 14:40:27 +0100
12500 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_mount.h     2006-04-26 19:07:00 +0200
12501 @@ -412,6 +412,7 @@ typedef struct xfs_mount {
12502  #define XFS_MOUNT_COMPAT_IOSIZE        (1ULL << 22)    /* don't report large preferred
12503                                                  * I/O size in stat() */
12504  
12505 +#define XFS_MOUNT_TAGGED       (1ULL << 31)    /* context tagging */
12506  
12507  /*
12508   * Default minimum read and write sizes.
12509 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/xfs_vfsops.c linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_vfsops.c
12510 --- linux-2.6.16.20/fs/xfs/xfs_vfsops.c 2006-02-18 14:40:27 +0100
12511 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_vfsops.c    2006-04-26 19:07:00 +0200
12512 @@ -296,6 +296,8 @@ xfs_start_flags(
12513  
12514         if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
12515                 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
12516 +       if (ap->flags2 & XFSMNT2_TAGGED)
12517 +               mp->m_flags |= XFS_MOUNT_TAGGED;
12518  
12519         /*
12520          * no recovery flag requires a read-only mount
12521 @@ -390,6 +392,8 @@ xfs_finish_flags(
12522                         return XFS_ERROR(EINVAL);
12523         }
12524  
12525 +       if (ap->flags2 & XFSMNT2_TAGGED)
12526 +               vfs->vfs_super->s_flags |= MS_TAGGED;
12527         return 0;
12528  }
12529  
12530 @@ -1653,6 +1657,9 @@ xfs_vget(
12531                                          * in stat(). */
12532  #define MNTOPT_ATTR2   "attr2"         /* do use attr2 attribute format */
12533  #define MNTOPT_NOATTR2 "noattr2"       /* do not use attr2 attribute format */
12534 +#define MNTOPT_TAGXID  "tagxid"        /* context tagging for inodes */
12535 +#define MNTOPT_TAGGED  "tag"           /* context tagging for inodes */
12536 +#define MNTOPT_NOTAGTAG        "notag"         /* do not use context tagging */
12537  
12538  STATIC unsigned long
12539  suffix_strtoul(const char *cp, char **endp, unsigned int base)
12540 @@ -1829,6 +1836,19 @@ xfs_parseargs(
12541                         args->flags |= XFSMNT_ATTR2;
12542                 } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
12543                         args->flags &= ~XFSMNT_ATTR2;
12544 +#ifndef CONFIG_TAGGING_NONE
12545 +               } else if (!strcmp(this_char, MNTOPT_TAGGED)) {
12546 +                       args->flags2 |= XFSMNT2_TAGGED;
12547 +               } else if (!strcmp(this_char, MNTOPT_NOTAGTAG)) {
12548 +                       args->flags2 &= ~XFSMNT2_TAGGED;
12549 +               } else if (!strcmp(this_char, MNTOPT_TAGXID)) {
12550 +                       args->flags2 |= XFSMNT2_TAGGED;
12551 +#endif
12552 +#ifdef CONFIG_PROPAGATE
12553 +               } else if (!strcmp(this_char, MNTOPT_TAGGED)) {
12554 +                       /* use value */
12555 +                       args->flags2 |= XFSMNT2_TAGGED;
12556 +#endif
12557                 } else if (!strcmp(this_char, "osyncisdsync")) {
12558                         /* no-op, this is now the default */
12559  printk("XFS: osyncisdsync is now the default, option is deprecated.\n");
12560 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/xfs_vnodeops.c linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_vnodeops.c
12561 --- linux-2.6.16.20/fs/xfs/xfs_vnodeops.c       2006-02-18 14:40:27 +0100
12562 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_vnodeops.c  2006-04-26 19:07:00 +0200
12563 @@ -154,6 +154,7 @@ xfs_getattr(
12564         vap->va_mode = ip->i_d.di_mode;
12565         vap->va_uid = ip->i_d.di_uid;
12566         vap->va_gid = ip->i_d.di_gid;
12567 +       vap->va_tag = ip->i_d.di_tag;
12568         vap->va_projid = ip->i_d.di_projid;
12569  
12570         /*
12571 @@ -254,6 +255,7 @@ xfs_setattr(
12572         uint                    commit_flags=0;
12573         uid_t                   uid=0, iuid=0;
12574         gid_t                   gid=0, igid=0;
12575 +       tag_t                   tag=0, itag=0;
12576         int                     timeflags = 0;
12577         vnode_t                 *vp;
12578         xfs_prid_t              projid=0, iprojid=0;
12579 @@ -310,6 +312,7 @@ xfs_setattr(
12580             (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) {
12581                 uint    qflags = 0;
12582  
12583 +               /* FIXME: handle tagging? */
12584                 if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) {
12585                         uid = vap->va_uid;
12586                         qflags |= XFS_QMOPT_UQUOTA;
12587 @@ -390,6 +393,8 @@ xfs_setattr(
12588         if (mask &
12589             (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID|
12590              XFS_AT_GID|XFS_AT_PROJID)) {
12591 +               /* FIXME: handle tagging? */
12592 +
12593                 /*
12594                  * CAP_FOWNER overrides the following restrictions:
12595                  *
12596 @@ -438,7 +443,7 @@ xfs_setattr(
12597          * and can change the group id only to a group of which he
12598          * or she is a member.
12599          */
12600 -       if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) {
12601 +       if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_TAG|XFS_AT_PROJID)) {
12602                 /*
12603                  * These IDs could have changed since we last looked at them.
12604                  * But, we're assured that if the ownership did change
12605 @@ -446,10 +451,12 @@ xfs_setattr(
12606                  * would have changed also.
12607                  */
12608                 iuid = ip->i_d.di_uid;
12609 -               iprojid = ip->i_d.di_projid;
12610                 igid = ip->i_d.di_gid;
12611 -               gid = (mask & XFS_AT_GID) ? vap->va_gid : igid;
12612 +               itag = ip->i_d.di_tag;
12613 +               iprojid = ip->i_d.di_projid;
12614                 uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid;
12615 +               gid = (mask & XFS_AT_GID) ? vap->va_gid : igid;
12616 +               tag = (mask & XFS_AT_TAG) ? vap->va_tag : itag;
12617                 projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid :
12618                          iprojid;
12619  
12620 @@ -477,6 +484,7 @@ xfs_setattr(
12621                 if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
12622                     (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) ||
12623                     (XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
12624 +                       /* FIXME: handle tagging? */
12625                         ASSERT(tp);
12626                         code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
12627                                                 capable(CAP_FOWNER) ?
12628 @@ -693,7 +701,7 @@ xfs_setattr(
12629          * and can change the group id only to a group of which he
12630          * or she is a member.
12631          */
12632 -       if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) {
12633 +       if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_TAG|XFS_AT_PROJID)) {
12634                 /*
12635                  * CAP_FSETID overrides the following restrictions:
12636                  *
12637 @@ -709,6 +717,12 @@ xfs_setattr(
12638                  * Change the ownerships and register quota modifications
12639                  * in the transaction.
12640                  */
12641 +               if (itag != tag) {
12642 +                       if (XFS_IS_GQUOTA_ON(mp)) {
12643 +                               /* FIXME: handle tag quota? */
12644 +                       }
12645 +                       ip->i_d.di_tag = tag;
12646 +               }
12647                 if (iuid != uid) {
12648                         if (XFS_IS_UQUOTA_ON(mp)) {
12649                                 ASSERT(mask & XFS_AT_UID);
12650 @@ -789,6 +803,10 @@ xfs_setattr(
12651                         di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
12652                         if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
12653                                 di_flags |= XFS_DIFLAG_IMMUTABLE;
12654 +                       if (vap->va_xflags & XFS_XFLAG_IUNLINK)
12655 +                               di_flags |= XFS_DIFLAG_IUNLINK;
12656 +                       if (vap->va_xflags & XFS_XFLAG_BARRIER)
12657 +                               di_flags |= XFS_DIFLAG_BARRIER;
12658                         if (vap->va_xflags & XFS_XFLAG_APPEND)
12659                                 di_flags |= XFS_DIFLAG_APPEND;
12660                         if (vap->va_xflags & XFS_XFLAG_SYNC)
12661 diff -NurpP --minimal linux-2.6.16.20/include/asm-arm/tlb.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-arm/tlb.h
12662 --- linux-2.6.16.20/include/asm-arm/tlb.h       2006-01-03 17:30:02 +0100
12663 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-arm/tlb.h  2006-04-26 19:07:00 +0200
12664 @@ -20,6 +20,7 @@
12665  #include <asm/cacheflush.h>
12666  #include <asm/tlbflush.h>
12667  #include <asm/pgalloc.h>
12668 +#include <linux/vs_memory.h>
12669  
12670  /*
12671   * TLB handling.  This allows us to remove pages from the page
12672 diff -NurpP --minimal linux-2.6.16.20/include/asm-arm26/tlb.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-arm26/tlb.h
12673 --- linux-2.6.16.20/include/asm-arm26/tlb.h     2006-01-03 17:30:02 +0100
12674 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-arm26/tlb.h        2006-04-26 19:07:00 +0200
12675 @@ -3,6 +3,7 @@
12676  
12677  #include <asm/pgalloc.h>
12678  #include <asm/tlbflush.h>
12679 +#include <linux/vs_memory.h>
12680  
12681  /*
12682   * TLB handling.  This allows us to remove pages from the page
12683 diff -NurpP --minimal linux-2.6.16.20/include/asm-arm26/unistd.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-arm26/unistd.h
12684 --- linux-2.6.16.20/include/asm-arm26/unistd.h  2006-01-03 17:30:02 +0100
12685 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-arm26/unistd.h     2006-04-26 19:07:00 +0200
12686 @@ -304,6 +304,8 @@
12687  #define __NR_mq_getsetattr             (__NR_SYSCALL_BASE+279)
12688  #define __NR_waitid                    (__NR_SYSCALL_BASE+280)
12689  
12690 +#define __NR_vserver                   (__NR_SYSCALL_BASE+313)
12691 +
12692  /*
12693   * The following SWIs are ARM private. FIXME - make appropriate for arm26
12694   */
12695 diff -NurpP --minimal linux-2.6.16.20/include/asm-generic/tlb.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-generic/tlb.h
12696 --- linux-2.6.16.20/include/asm-generic/tlb.h   2006-01-03 17:30:02 +0100
12697 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-generic/tlb.h      2006-04-26 19:07:00 +0200
12698 @@ -15,6 +15,7 @@
12699  
12700  #include <linux/config.h>
12701  #include <linux/swap.h>
12702 +#include <linux/vs_memory.h>
12703  #include <asm/pgalloc.h>
12704  #include <asm/tlbflush.h>
12705  
12706 diff -NurpP --minimal linux-2.6.16.20/include/asm-i386/elf.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-i386/elf.h
12707 --- linux-2.6.16.20/include/asm-i386/elf.h      2006-01-03 17:30:04 +0100
12708 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-i386/elf.h 2006-05-11 16:06:22 +0200
12709 @@ -71,7 +71,7 @@ typedef struct user_fxsr_struct elf_fpxr
12710     the loader.  We need to make sure that it is out of the way of the program
12711     that it will "exec", and that there is sufficient room for the brk.  */
12712  
12713 -#define ELF_ET_DYN_BASE         (TASK_SIZE / 3 * 2)
12714 +#define ELF_ET_DYN_BASE                ((TASK_UNMAPPED_BASE) * 2)
12715  
12716  /* regs is struct pt_regs, pr_reg is elf_gregset_t (which is
12717     now struct_user_regs, they are different) */
12718 @@ -108,7 +108,7 @@ typedef struct user_fxsr_struct elf_fpxr
12719     For the moment, we have only optimizations for the Intel generations,
12720     but that could change... */
12721  
12722 -#define ELF_PLATFORM  (system_utsname.machine)
12723 +#define ELF_PLATFORM  (vx_new_uts(machine))
12724  
12725  #ifdef __KERNEL__
12726  #define SET_PERSONALITY(ex, ibcs2) do { } while (0)
12727 diff -NurpP --minimal linux-2.6.16.20/include/asm-i386/page.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-i386/page.h
12728 --- linux-2.6.16.20/include/asm-i386/page.h     2006-02-18 14:40:29 +0100
12729 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-i386/page.h        2006-04-26 19:07:00 +0200
12730 @@ -109,19 +109,15 @@ extern int page_is_ram(unsigned long pag
12731  
12732  #endif /* __ASSEMBLY__ */
12733  
12734 -#ifdef __ASSEMBLY__
12735  #define __PAGE_OFFSET          CONFIG_PAGE_OFFSET
12736  #define __PHYSICAL_START       CONFIG_PHYSICAL_START
12737 -#else
12738 -#define __PAGE_OFFSET          ((unsigned long)CONFIG_PAGE_OFFSET)
12739 -#define __PHYSICAL_START       ((unsigned long)CONFIG_PHYSICAL_START)
12740 -#endif
12741  #define __KERNEL_START         (__PAGE_OFFSET + __PHYSICAL_START)
12742 -
12743 +#define __MAXMEM               (-__PAGE_OFFSET-__VMALLOC_RESERVE)
12744  
12745  #define PAGE_OFFSET            ((unsigned long)__PAGE_OFFSET)
12746 +#define PHYSICAL_START         ((unsigned long)__PHYSICAL_START)
12747  #define VMALLOC_RESERVE                ((unsigned long)__VMALLOC_RESERVE)
12748 -#define MAXMEM                 (-__PAGE_OFFSET-__VMALLOC_RESERVE)
12749 +#define MAXMEM                 ((unsigned long)__MAXMEM)
12750  #define __pa(x)                        ((unsigned long)(x)-PAGE_OFFSET)
12751  #define __va(x)                        ((void *)((unsigned long)(x)+PAGE_OFFSET))
12752  #define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
12753 diff -NurpP --minimal linux-2.6.16.20/include/asm-i386/processor.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-i386/processor.h
12754 --- linux-2.6.16.20/include/asm-i386/processor.h        2006-02-18 14:40:29 +0100
12755 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-i386/processor.h   2006-04-26 19:07:00 +0200
12756 @@ -316,9 +316,10 @@ extern unsigned int mca_pentium_flag;
12757  extern int bootloader_type;
12758  
12759  /*
12760 - * User space process size: 3GB (default).
12761 + * User space process size: (3GB default).
12762   */
12763 -#define TASK_SIZE      (PAGE_OFFSET)
12764 +#define __TASK_SIZE            (__PAGE_OFFSET)
12765 +#define TASK_SIZE              ((unsigned long)__TASK_SIZE)
12766  
12767  /* This decides where the kernel will search for a free chunk of vm
12768   * space during mmap's.
12769 diff -NurpP --minimal linux-2.6.16.20/include/asm-ia64/tlb.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-ia64/tlb.h
12770 --- linux-2.6.16.20/include/asm-ia64/tlb.h      2006-01-03 17:30:05 +0100
12771 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-ia64/tlb.h 2006-04-26 19:07:00 +0200
12772 @@ -41,6 +41,7 @@
12773  #include <linux/mm.h>
12774  #include <linux/pagemap.h>
12775  #include <linux/swap.h>
12776 +#include <linux/vs_memory.h>
12777  
12778  #include <asm/pgalloc.h>
12779  #include <asm/processor.h>
12780 diff -NurpP --minimal linux-2.6.16.20/include/asm-powerpc/unistd.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-powerpc/unistd.h
12781 --- linux-2.6.16.20/include/asm-powerpc/unistd.h        2006-02-18 14:40:31 +0100
12782 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-powerpc/unistd.h   2006-04-26 19:07:00 +0200
12783 @@ -275,7 +275,7 @@
12784  #endif
12785  #define __NR_rtas              255
12786  #define __NR_sys_debug_setcontext 256
12787 -/* Number 257 is reserved for vserver */
12788 +#define __NR_vserver           257
12789  /* 258 currently unused */
12790  #define __NR_mbind             259
12791  #define __NR_get_mempolicy     260
12792 diff -NurpP --minimal linux-2.6.16.20/include/asm-s390/unistd.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-s390/unistd.h
12793 --- linux-2.6.16.20/include/asm-s390/unistd.h   2006-02-18 14:40:31 +0100
12794 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-s390/unistd.h      2006-04-26 19:07:00 +0200
12795 @@ -255,7 +255,7 @@
12796  #define __NR_clock_gettime     (__NR_timer_create+6)
12797  #define __NR_clock_getres      (__NR_timer_create+7)
12798  #define __NR_clock_nanosleep   (__NR_timer_create+8)
12799 -/* Number 263 is reserved for vserver */
12800 +#define __NR_vserver           263
12801  #define __NR_fadvise64_64      264
12802  #define __NR_statfs64          265
12803  #define __NR_fstatfs64         266
12804 diff -NurpP --minimal linux-2.6.16.20/include/asm-sparc/unistd.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-sparc/unistd.h
12805 --- linux-2.6.16.20/include/asm-sparc/unistd.h  2006-02-18 14:40:31 +0100
12806 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-sparc/unistd.h     2006-04-26 19:07:00 +0200
12807 @@ -283,7 +283,7 @@
12808  #define __NR_timer_getoverrun  264
12809  #define __NR_timer_delete      265
12810  #define __NR_timer_create      266
12811 -/* #define __NR_vserver                267 Reserved for VSERVER */
12812 +#define __NR_vserver           267
12813  #define __NR_io_setup          268
12814  #define __NR_io_destroy                269
12815  #define __NR_io_submit         270
12816 diff -NurpP --minimal linux-2.6.16.20/include/asm-sparc64/tlb.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-sparc64/tlb.h
12817 --- linux-2.6.16.20/include/asm-sparc64/tlb.h   2006-01-03 17:30:08 +0100
12818 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-sparc64/tlb.h      2006-04-26 19:07:00 +0200
12819 @@ -3,6 +3,7 @@
12820  
12821  #include <linux/config.h>
12822  #include <linux/swap.h>
12823 +#include <linux/vs_memory.h>
12824  #include <asm/pgalloc.h>
12825  #include <asm/tlbflush.h>
12826  #include <asm/mmu_context.h>
12827 diff -NurpP --minimal linux-2.6.16.20/include/asm-sparc64/unistd.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-sparc64/unistd.h
12828 --- linux-2.6.16.20/include/asm-sparc64/unistd.h        2006-02-18 14:40:32 +0100
12829 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-sparc64/unistd.h   2006-04-26 19:07:00 +0200
12830 @@ -285,7 +285,7 @@
12831  #define __NR_timer_getoverrun  264
12832  #define __NR_timer_delete      265
12833  #define __NR_timer_create      266
12834 -/* #define __NR_vserver                267 Reserved for VSERVER */
12835 +#define __NR_vserver           267
12836  #define __NR_io_setup          268
12837  #define __NR_io_destroy                269
12838  #define __NR_io_submit         270
12839 diff -NurpP --minimal linux-2.6.16.20/include/asm-x86_64/unistd.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-x86_64/unistd.h
12840 --- linux-2.6.16.20/include/asm-x86_64/unistd.h 2006-02-18 14:40:32 +0100
12841 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-x86_64/unistd.h    2006-04-26 19:07:00 +0200
12842 @@ -532,7 +532,7 @@ __SYSCALL(__NR_tgkill, sys_tgkill)
12843  #define __NR_utimes            235
12844  __SYSCALL(__NR_utimes, sys_utimes)
12845  #define __NR_vserver           236
12846 -__SYSCALL(__NR_vserver, sys_ni_syscall)
12847 +__SYSCALL(__NR_vserver, sys_vserver)
12848  #define __NR_mbind             237
12849  __SYSCALL(__NR_mbind, sys_mbind)
12850  #define __NR_set_mempolicy     238
12851 diff -NurpP --minimal linux-2.6.16.20/include/linux/capability.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/capability.h
12852 --- linux-2.6.16.20/include/linux/capability.h  2006-02-18 14:40:32 +0100
12853 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/capability.h     2006-04-27 18:21:01 +0200
12854 @@ -235,6 +235,7 @@ typedef __u32 kernel_cap_t;
12855     arbitrary SCSI commands */
12856  /* Allow setting encryption key on loopback filesystem */
12857  /* Allow setting zone reclaim policy */
12858 +/* Allow the selection of a security context */
12859  
12860  #define CAP_SYS_ADMIN        21
12861  
12862 @@ -288,6 +289,11 @@ typedef __u32 kernel_cap_t;
12863  
12864  #define CAP_AUDIT_CONTROL    30
12865  
12866 +/* Allow context manipulations */
12867 +/* Allow changing context info on files */
12868 +
12869 +#define CAP_CONTEXT         31
12870 +
12871  #ifdef __KERNEL__
12872  /* 
12873   * Bounding set
12874 diff -NurpP --minimal linux-2.6.16.20/include/linux/devpts_fs.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/devpts_fs.h
12875 --- linux-2.6.16.20/include/linux/devpts_fs.h   2004-08-14 12:55:59 +0200
12876 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/devpts_fs.h      2006-04-26 19:07:00 +0200
12877 @@ -30,5 +30,7 @@ static inline void devpts_pty_kill(int n
12878  
12879  #endif
12880  
12881 +#define DEVPTS_SUPER_MAGIC     0x00001cd1
12882 +
12883  
12884  #endif /* _LINUX_DEVPTS_FS_H */
12885 diff -NurpP --minimal linux-2.6.16.20/include/linux/ext2_fs.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/ext2_fs.h
12886 --- linux-2.6.16.20/include/linux/ext2_fs.h     2005-10-28 20:49:54 +0200
12887 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/ext2_fs.h        2006-04-26 19:07:00 +0200
12888 @@ -192,10 +192,17 @@ struct ext2_group_desc
12889  #define EXT2_NOTAIL_FL                 0x00008000 /* file tail should not be merged */
12890  #define EXT2_DIRSYNC_FL                        0x00010000 /* dirsync behaviour (directories only) */
12891  #define EXT2_TOPDIR_FL                 0x00020000 /* Top of directory hierarchies*/
12892 +#define EXT2_BARRIER_FL                        0x04000000 /* Barrier for chroot() */
12893 +#define EXT2_IUNLINK_FL                        0x08000000 /* Immutable unlink */
12894  #define EXT2_RESERVED_FL               0x80000000 /* reserved for ext2 lib */
12895  
12896 +#ifdef CONFIG_VSERVER_LEGACY
12897 +#define EXT2_FL_USER_VISIBLE           0x0803DFFF /* User visible flags */
12898 +#define EXT2_FL_USER_MODIFIABLE                0x080380FF /* User modifiable flags */
12899 +#else
12900  #define EXT2_FL_USER_VISIBLE           0x0003DFFF /* User visible flags */
12901  #define EXT2_FL_USER_MODIFIABLE                0x000380FF /* User modifiable flags */
12902 +#endif
12903  
12904  /*
12905   * ioctl commands
12906 @@ -240,7 +247,7 @@ struct ext2_inode {
12907                 struct {
12908                         __u8    l_i_frag;       /* Fragment number */
12909                         __u8    l_i_fsize;      /* Fragment size */
12910 -                       __u16   i_pad1;
12911 +                       __u16   l_i_tag;        /* Context Tag */
12912                         __le16  l_i_uid_high;   /* these 2 fields    */
12913                         __le16  l_i_gid_high;   /* were reserved2[0] */
12914                         __u32   l_i_reserved2;
12915 @@ -272,6 +279,7 @@ struct ext2_inode {
12916  #define i_gid_low      i_gid
12917  #define i_uid_high     osd2.linux2.l_i_uid_high
12918  #define i_gid_high     osd2.linux2.l_i_gid_high
12919 +#define i_raw_tag      osd2.linux2.l_i_tag
12920  #define i_reserved2    osd2.linux2.l_i_reserved2
12921  #endif
12922  
12923 @@ -313,8 +321,9 @@ struct ext2_inode {
12924  #define EXT2_MOUNT_XATTR_USER          0x004000  /* Extended user attributes */
12925  #define EXT2_MOUNT_POSIX_ACL           0x008000  /* POSIX Access Control Lists */
12926  #define EXT2_MOUNT_XIP                 0x010000  /* Execute in place */
12927 -#define EXT2_MOUNT_USRQUOTA            0x020000 /* user quota */
12928 -#define EXT2_MOUNT_GRPQUOTA            0x040000 /* group quota */
12929 +#define EXT2_MOUNT_USRQUOTA            0x020000  /* user quota */
12930 +#define EXT2_MOUNT_GRPQUOTA            0x040000  /* group quota */
12931 +#define EXT2_MOUNT_TAGGED              (1<<24)   /* Enable Context Tags */
12932  
12933  
12934  #define clear_opt(o, opt)              o &= ~EXT2_MOUNT_##opt
12935 diff -NurpP --minimal linux-2.6.16.20/include/linux/ext3_fs.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/ext3_fs.h
12936 --- linux-2.6.16.20/include/linux/ext3_fs.h     2005-10-28 20:49:54 +0200
12937 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/ext3_fs.h        2006-04-26 19:07:00 +0200
12938 @@ -185,10 +185,20 @@ struct ext3_group_desc
12939  #define EXT3_NOTAIL_FL                 0x00008000 /* file tail should not be merged */
12940  #define EXT3_DIRSYNC_FL                        0x00010000 /* dirsync behaviour (directories only) */
12941  #define EXT3_TOPDIR_FL                 0x00020000 /* Top of directory hierarchies*/
12942 +#define EXT3_BARRIER_FL                        0x04000000 /* Barrier for chroot() */
12943 +#define EXT3_IUNLINK_FL                        0x08000000 /* Immutable unlink */
12944  #define EXT3_RESERVED_FL               0x80000000 /* reserved for ext3 lib */
12945  
12946 +#ifdef CONFIG_VSERVER_LEGACY
12947 +#define EXT3_FL_USER_VISIBLE           0x0803DFFF /* User visible flags */
12948 +#define EXT3_FL_USER_MODIFIABLE                0x080380FF /* User modifiable flags */
12949 +#else
12950  #define EXT3_FL_USER_VISIBLE           0x0003DFFF /* User visible flags */
12951  #define EXT3_FL_USER_MODIFIABLE                0x000380FF /* User modifiable flags */
12952 +#endif
12953 +#ifdef CONFIG_VSERVER_LEGACY
12954 +#define EXT3_IOC_SETTAG                        FIOC_SETTAGJ
12955 +#endif
12956  
12957  /*
12958   * Inode dynamic state flags
12959 @@ -287,7 +297,7 @@ struct ext3_inode {
12960                 struct {
12961                         __u8    l_i_frag;       /* Fragment number */
12962                         __u8    l_i_fsize;      /* Fragment size */
12963 -                       __u16   i_pad1;
12964 +                       __u16   l_i_tag;        /* Context Tag */
12965                         __le16  l_i_uid_high;   /* these 2 fields    */
12966                         __le16  l_i_gid_high;   /* were reserved2[0] */
12967                         __u32   l_i_reserved2;
12968 @@ -321,6 +331,7 @@ struct ext3_inode {
12969  #define i_gid_low      i_gid
12970  #define i_uid_high     osd2.linux2.l_i_uid_high
12971  #define i_gid_high     osd2.linux2.l_i_gid_high
12972 +#define i_raw_tag      osd2.linux2.l_i_tag
12973  #define i_reserved2    osd2.linux2.l_i_reserved2
12974  
12975  #elif defined(__GNU__)
12976 @@ -375,6 +386,7 @@ struct ext3_inode {
12977  #define EXT3_MOUNT_QUOTA               0x80000 /* Some quota option set */
12978  #define EXT3_MOUNT_USRQUOTA            0x100000 /* "old" user quota */
12979  #define EXT3_MOUNT_GRPQUOTA            0x200000 /* "old" group quota */
12980 +#define EXT3_MOUNT_TAGGED              (1<<24) /* Enable Context Tags */
12981  
12982  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
12983  #ifndef _LINUX_EXT2_FS_H
12984 @@ -775,6 +787,7 @@ extern unsigned long ext3_count_free (st
12985  extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
12986  extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
12987  extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
12988 +extern int ext3_sync_flags(struct inode *inode);
12989  
12990  extern void ext3_read_inode (struct inode *);
12991  extern int  ext3_write_inode (struct inode *, int);
12992 diff -NurpP --minimal linux-2.6.16.20/include/linux/ext3_jbd.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/ext3_jbd.h
12993 --- linux-2.6.16.20/include/linux/ext3_jbd.h    2005-08-29 22:25:41 +0200
12994 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/ext3_jbd.h       2006-04-26 19:07:00 +0200
12995 @@ -77,10 +77,10 @@
12996  #define EXT3_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0)
12997  /* Amount of blocks needed for quota insert/delete - we do some block writes
12998   * but inode, sb and group updates are done only once */
12999 -#define EXT3_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
13000 -               (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_INIT_REWRITE) : 0)
13001 -#define EXT3_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
13002 -               (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_DEL_REWRITE) : 0)
13003 +#define EXT3_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? \
13004 +       (DQUOT_INIT_ALLOC*(EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_INIT_REWRITE) : 0)
13005 +#define EXT3_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? \
13006 +       (DQUOT_DEL_ALLOC*(EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_DEL_REWRITE) : 0)
13007  #else
13008  #define EXT3_QUOTA_TRANS_BLOCKS(sb) 0
13009  #define EXT3_QUOTA_INIT_BLOCKS(sb) 0
13010 diff -NurpP --minimal linux-2.6.16.20/include/linux/fs.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/fs.h
13011 --- linux-2.6.16.20/include/linux/fs.h  2006-05-11 21:25:36 +0200
13012 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/fs.h     2006-05-21 23:34:46 +0200
13013 @@ -109,6 +109,8 @@ extern int dir_notify_enable;
13014  #define MS_PRIVATE     (1<<18) /* change to private */
13015  #define MS_SLAVE       (1<<19) /* change to slave */
13016  #define MS_SHARED      (1<<20) /* change to shared */
13017 +#define MS_TAGGED      (1<<24) /* use generic inode tagging */
13018 +#define MS_TAGID       (1<<25) /* use specific tag for this mount */
13019  #define MS_ACTIVE      (1<<30)
13020  #define MS_NOUSER      (1<<31)
13021  
13022 @@ -135,6 +137,8 @@ extern int dir_notify_enable;
13023  #define S_NOCMTIME     128     /* Do not update file c/mtime */
13024  #define S_SWAPFILE     256     /* Do not truncate: swapon got its bmaps */
13025  #define S_PRIVATE      512     /* Inode is fs-internal */
13026 +#define S_BARRIER      1024    /* Barrier for chroot() */
13027 +#define S_IUNLINK      2048    /* Immutable unlink */
13028  
13029  /*
13030   * Note that nosuid etc flags are inode-specific: setting some file-system
13031 @@ -151,23 +155,30 @@ extern int dir_notify_enable;
13032   */
13033  #define __IS_FLG(inode,flg) ((inode)->i_sb->s_flags & (flg))
13034  
13035 -#define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY)
13036 +#define IS_RDONLY(inode)       __IS_FLG(inode, MS_RDONLY)
13037  #define IS_SYNC(inode)         (__IS_FLG(inode, MS_SYNCHRONOUS) || \
13038                                         ((inode)->i_flags & S_SYNC))
13039  #define IS_DIRSYNC(inode)      (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \
13040                                         ((inode)->i_flags & (S_SYNC|S_DIRSYNC)))
13041  #define IS_MANDLOCK(inode)     __IS_FLG(inode, MS_MANDLOCK)
13042 +#define IS_TAGGED(inode)       __IS_FLG(inode, MS_TAGGED)
13043  
13044  #define IS_NOQUOTA(inode)      ((inode)->i_flags & S_NOQUOTA)
13045  #define IS_APPEND(inode)       ((inode)->i_flags & S_APPEND)
13046  #define IS_IMMUTABLE(inode)    ((inode)->i_flags & S_IMMUTABLE)
13047 +#define IS_IUNLINK(inode)      ((inode)->i_flags & S_IUNLINK)
13048 +#define IS_IXORUNLINK(inode)   ((IS_IUNLINK(inode) ? S_IMMUTABLE : 0) ^ IS_IMMUTABLE(inode))
13049  #define IS_POSIXACL(inode)     __IS_FLG(inode, MS_POSIXACL)
13050  
13051 +#define IS_BARRIER(inode)      (S_ISDIR((inode)->i_mode) && ((inode)->i_flags & S_BARRIER))
13052  #define IS_DEADDIR(inode)      ((inode)->i_flags & S_DEAD)
13053  #define IS_NOCMTIME(inode)     ((inode)->i_flags & S_NOCMTIME)
13054  #define IS_SWAPFILE(inode)     ((inode)->i_flags & S_SWAPFILE)
13055  #define IS_PRIVATE(inode)      ((inode)->i_flags & S_PRIVATE)
13056  
13057 +#define IS_COW(inode)          (IS_IUNLINK(inode) && IS_IMMUTABLE(inode))
13058 +#define IS_COW_LINK(inode)     (S_ISREG((inode)->i_mode) && ((inode)->i_nlink > 1))
13059 +
13060  /* the read-only stuff doesn't really belong here, but any other place is
13061     probably as bad and I don't want to create yet another include file. */
13062  
13063 @@ -265,6 +276,7 @@ typedef void (dio_iodone_t)(struct kiocb
13064  #define ATTR_KILL_SUID 2048
13065  #define ATTR_KILL_SGID 4096
13066  #define ATTR_FILE      8192
13067 +#define ATTR_TAG       16384
13068  
13069  /*
13070   * This is the Inode Attributes structure, used for notify_change().  It
13071 @@ -280,6 +292,7 @@ struct iattr {
13072         umode_t         ia_mode;
13073         uid_t           ia_uid;
13074         gid_t           ia_gid;
13075 +       tag_t           ia_tag;
13076         loff_t          ia_size;
13077         struct timespec ia_atime;
13078         struct timespec ia_mtime;
13079 @@ -293,6 +306,9 @@ struct iattr {
13080         struct file     *ia_file;
13081  };
13082  
13083 +#define ATTR_FLAG_BARRIER      512     /* Barrier for chroot() */
13084 +#define ATTR_FLAG_IUNLINK      1024    /* Immutable unlink */
13085 +
13086  /*
13087   * Includes for diskquotas.
13088   */
13089 @@ -471,6 +487,7 @@ struct inode {
13090         unsigned int            i_nlink;
13091         uid_t                   i_uid;
13092         gid_t                   i_gid;
13093 +       tag_t                   i_tag;
13094         dev_t                   i_rdev;
13095         loff_t                  i_size;
13096         struct timespec         i_atime;
13097 @@ -491,6 +508,7 @@ struct inode {
13098         struct address_space    *i_mapping;
13099         struct address_space    i_data;
13100  #ifdef CONFIG_QUOTA
13101 +       struct dqhash           *i_dqh;
13102         struct dquot            *i_dquot[MAXQUOTAS];
13103  #endif
13104         /* These three should probably be a union */
13105 @@ -633,6 +651,7 @@ struct file {
13106         struct fown_struct      f_owner;
13107         unsigned int            f_uid, f_gid;
13108         struct file_ra_state    f_ra;
13109 +       xid_t                   f_xid;
13110  
13111         unsigned long           f_version;
13112         void                    *f_security;
13113 @@ -712,6 +731,7 @@ struct file_lock {
13114         unsigned char fl_type;
13115         loff_t fl_start;
13116         loff_t fl_end;
13117 +       xid_t fl_xid;
13118  
13119         struct fasync_struct *  fl_fasync; /* for lease break notifications */
13120         unsigned long fl_break_time;    /* for nonblocking lease breaks */
13121 @@ -811,7 +831,7 @@ struct super_block {
13122         unsigned long long      s_maxbytes;     /* Max file size */
13123         struct file_system_type *s_type;
13124         struct super_operations *s_op;
13125 -       struct dquot_operations *dq_op;
13126 +       struct dquot_operations *s_qop;
13127         struct quotactl_ops     *s_qcop;
13128         struct export_operations *s_export_op;
13129         unsigned long           s_flags;
13130 @@ -834,7 +854,7 @@ struct super_block {
13131  
13132         struct block_device     *s_bdev;
13133         struct list_head        s_instances;
13134 -       struct quota_info       s_dquot;        /* Diskquota specific options */
13135 +       struct dqhash           *s_dqh;         /* Diskquota hash */
13136  
13137         int                     s_frozen;
13138         wait_queue_head_t       s_wait_unfrozen;
13139 @@ -904,12 +924,12 @@ static inline void unlock_super(struct s
13140   */
13141  extern int vfs_permission(struct nameidata *, int);
13142  extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *);
13143 -extern int vfs_mkdir(struct inode *, struct dentry *, int);
13144 -extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t);
13145 -extern int vfs_symlink(struct inode *, struct dentry *, const char *, int);
13146 -extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
13147 -extern int vfs_rmdir(struct inode *, struct dentry *);
13148 -extern int vfs_unlink(struct inode *, struct dentry *);
13149 +extern int vfs_mkdir(struct inode *, struct dentry *, int, struct nameidata *);
13150 +extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t, struct nameidata *);
13151 +extern int vfs_symlink(struct inode *, struct dentry *, const char *, int, struct nameidata *);
13152 +extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct nameidata *);
13153 +extern int vfs_rmdir(struct inode *, struct dentry *, struct nameidata *);
13154 +extern int vfs_unlink(struct inode *, struct dentry *, struct nameidata *);
13155  extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
13156  
13157  /*
13158 @@ -1049,6 +1069,7 @@ struct inode_operations {
13159         ssize_t (*listxattr) (struct dentry *, char *, size_t);
13160         int (*removexattr) (struct dentry *, const char *);
13161         void (*truncate_range)(struct inode *, loff_t, loff_t);
13162 +       int (*sync_flags) (struct inode *);
13163  };
13164  
13165  struct seq_file;
13166 @@ -1059,6 +1080,7 @@ extern ssize_t vfs_readv(struct file *, 
13167                 unsigned long, loff_t *);
13168  extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
13169                 unsigned long, loff_t *);
13170 +ssize_t vfs_sendfile(struct file *, struct file *, loff_t *, size_t, loff_t);
13171  
13172  /*
13173   * NOTE: write_inode, delete_inode, clear_inode, put_inode can be called
13174 @@ -1087,8 +1109,8 @@ struct super_operations {
13175  
13176         int (*show_options)(struct seq_file *, struct vfsmount *);
13177  
13178 -       ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
13179 -       ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
13180 +       ssize_t (*quota_read)(struct dqhash *, int, char *, size_t, loff_t);
13181 +       ssize_t (*quota_write)(struct dqhash *, int, const char *, size_t, loff_t);
13182  };
13183  
13184  /* Inode state bits.  Protected by inode_lock. */
13185 @@ -1526,7 +1548,7 @@ extern void clear_inode(struct inode *);
13186  extern void destroy_inode(struct inode *);
13187  extern struct inode *new_inode(struct super_block *);
13188  extern int remove_suid(struct dentry *);
13189 -extern void remove_dquot_ref(struct super_block *, int, struct list_head *);
13190 +extern void remove_dquot_ref(struct dqhash *, int, struct list_head *);
13191  extern struct semaphore iprune_sem;
13192  
13193  extern void __insert_inode_hash(struct inode *, unsigned long hashval);
13194 @@ -1566,6 +1588,7 @@ extern ssize_t do_sync_write(struct file
13195  ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov,
13196                                 unsigned long nr_segs, loff_t *ppos);
13197  extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
13198 +extern ssize_t generic_file_sendpage(struct file *, struct page *, int, size_t, loff_t *, int);
13199  extern void do_generic_mapping_read(struct address_space *mapping,
13200                                     struct file_ra_state *, struct file *,
13201                                     loff_t *, read_descriptor_t *, read_actor_t);
13202 @@ -1688,6 +1711,7 @@ extern int dcache_dir_open(struct inode 
13203  extern int dcache_dir_close(struct inode *, struct file *);
13204  extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
13205  extern int dcache_readdir(struct file *, void *, filldir_t);
13206 +extern int dcache_readdir_filter(struct file *, void *, filldir_t, int (*)(struct dentry *));
13207  extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *);
13208  extern int simple_statfs(struct super_block *, struct kstatfs *);
13209  extern int simple_link(struct dentry *, struct inode *, struct dentry *);
13210 diff -NurpP --minimal linux-2.6.16.20/include/linux/init_task.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/init_task.h
13211 --- linux-2.6.16.20/include/linux/init_task.h   2006-01-03 17:30:09 +0100
13212 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/init_task.h      2006-04-26 19:07:00 +0200
13213 @@ -121,6 +121,10 @@ extern struct group_info init_groups;
13214         .journal_info   = NULL,                                         \
13215         .cpu_timers     = INIT_CPU_TIMERS(tsk.cpu_timers),              \
13216         .fs_excl        = ATOMIC_INIT(0),                               \
13217 +       .xid            = 0,                                            \
13218 +       .vx_info        = NULL,                                         \
13219 +       .nid            = 0,                                            \
13220 +       .nx_info        = NULL,                                         \
13221  }
13222  
13223  
13224 diff -NurpP --minimal linux-2.6.16.20/include/linux/ipc.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/ipc.h
13225 --- linux-2.6.16.20/include/linux/ipc.h 2004-08-14 12:54:46 +0200
13226 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/ipc.h    2006-04-26 19:07:00 +0200
13227 @@ -66,6 +66,7 @@ struct kern_ipc_perm
13228         mode_t          mode; 
13229         unsigned long   seq;
13230         void            *security;
13231 +       xid_t           xid;
13232  };
13233  
13234  #endif /* __KERNEL__ */
13235 diff -NurpP --minimal linux-2.6.16.20/include/linux/kernel.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/kernel.h
13236 --- linux-2.6.16.20/include/linux/kernel.h      2006-02-18 14:40:33 +0100
13237 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/kernel.h 2006-04-26 19:07:00 +0200
13238 @@ -17,6 +17,7 @@
13239  #include <asm/bug.h>
13240  
13241  extern const char linux_banner[];
13242 +extern const char vx_linux_banner[];
13243  
13244  #define INT_MAX                ((int)(~0U>>1))
13245  #define INT_MIN                (-INT_MAX - 1)
13246 diff -NurpP --minimal linux-2.6.16.20/include/linux/major.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/major.h
13247 --- linux-2.6.16.20/include/linux/major.h       2005-08-29 22:25:41 +0200
13248 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/major.h  2006-04-26 19:07:00 +0200
13249 @@ -15,6 +15,7 @@
13250  #define HD_MAJOR               IDE0_MAJOR
13251  #define PTY_SLAVE_MAJOR                3
13252  #define TTY_MAJOR              4
13253 +#define VROOT_MAJOR            4
13254  #define TTYAUX_MAJOR           5
13255  #define LP_MAJOR               6
13256  #define VCS_MAJOR              7
13257 diff -NurpP --minimal linux-2.6.16.20/include/linux/mount.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/mount.h
13258 --- linux-2.6.16.20/include/linux/mount.h       2006-04-09 13:49:57 +0200
13259 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/mount.h  2006-04-26 19:07:00 +0200
13260 @@ -22,10 +22,14 @@
13261  #define MNT_NOEXEC     0x04
13262  #define MNT_NOATIME    0x08
13263  #define MNT_NODIRATIME 0x10
13264 +#define MNT_RDONLY     0x20
13265 +
13266 +#define MNT_IS_RDONLY(m)       ((m) && ((m)->mnt_flags & MNT_RDONLY))
13267  
13268  #define MNT_SHARED     0x1000  /* if the vfsmount is a shared mount */
13269  #define MNT_UNBINDABLE 0x2000  /* if the vfsmount is a unbindable mount */
13270  #define MNT_PNODE_MASK 0x3000  /* propogation flag mask */
13271 +#define MNT_TAGID              0x8000
13272  
13273  struct vfsmount {
13274         struct list_head mnt_hash;
13275 @@ -47,6 +51,7 @@ struct vfsmount {
13276         struct vfsmount *mnt_master;    /* slave is on master->mnt_slave_list */
13277         struct namespace *mnt_namespace; /* containing namespace */
13278         int mnt_pinned;
13279 +       tag_t mnt_tag;                  /* tagging used for vfsmount */
13280  };
13281  
13282  static inline struct vfsmount *mntget(struct vfsmount *mnt)
13283 diff -NurpP --minimal linux-2.6.16.20/include/linux/namespace.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/namespace.h
13284 --- linux-2.6.16.20/include/linux/namespace.h   2006-02-18 14:40:34 +0100
13285 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/namespace.h      2006-04-26 19:07:00 +0200
13286 @@ -16,6 +16,7 @@ struct namespace {
13287  extern int copy_namespace(int, struct task_struct *);
13288  extern void __put_namespace(struct namespace *namespace);
13289  extern struct namespace *dup_namespace(struct task_struct *, struct fs_struct *);
13290 +extern void umount_unused(struct vfsmount *, struct fs_struct *);
13291  
13292  static inline void put_namespace(struct namespace *namespace)
13293  {
13294 diff -NurpP --minimal linux-2.6.16.20/include/linux/net.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/net.h
13295 --- linux-2.6.16.20/include/linux/net.h 2006-02-18 14:40:34 +0100
13296 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/net.h    2006-04-26 19:07:00 +0200
13297 @@ -62,6 +62,7 @@ typedef enum {
13298  #define SOCK_ASYNC_WAITDATA    1
13299  #define SOCK_NOSPACE           2
13300  #define SOCK_PASSCRED          3
13301 +#define SOCK_USER_SOCKET       4
13302  
13303  #ifndef ARCH_HAS_SOCKET_TYPES
13304  /**
13305 diff -NurpP --minimal linux-2.6.16.20/include/linux/nfs_mount.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/nfs_mount.h
13306 --- linux-2.6.16.20/include/linux/nfs_mount.h   2005-08-29 22:25:42 +0200
13307 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/nfs_mount.h      2006-04-26 19:07:00 +0200
13308 @@ -61,6 +61,7 @@ struct nfs_mount_data {
13309  #define NFS_MOUNT_NOACL                0x0800  /* 4 */
13310  #define NFS_MOUNT_STRICTLOCK   0x1000  /* reserved for NFSv4 */
13311  #define NFS_MOUNT_SECFLAVOUR   0x2000  /* 5 */
13312 +#define NFS_MOUNT_TAGGED       0x8000  /* context tagging */
13313  #define NFS_MOUNT_FLAGMASK     0xFFFF
13314  
13315  #endif
13316 diff -NurpP --minimal linux-2.6.16.20/include/linux/percpu.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/percpu.h
13317 --- linux-2.6.16.20/include/linux/percpu.h      2006-04-09 13:49:57 +0200
13318 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/percpu.h 2006-04-26 19:07:00 +0200
13319 @@ -8,7 +8,7 @@
13320  
13321  /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */
13322  #ifndef PERCPU_ENOUGH_ROOM
13323 -#define PERCPU_ENOUGH_ROOM 32768
13324 +#define PERCPU_ENOUGH_ROOM 65536
13325  #endif
13326  
13327  /* Must be an lvalue. */
13328 diff -NurpP --minimal linux-2.6.16.20/include/linux/proc_fs.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/proc_fs.h
13329 --- linux-2.6.16.20/include/linux/proc_fs.h     2006-05-11 21:25:36 +0200
13330 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/proc_fs.h        2006-04-26 19:07:00 +0200
13331 @@ -55,6 +55,7 @@ struct proc_dir_entry {
13332         nlink_t nlink;
13333         uid_t uid;
13334         gid_t gid;
13335 +       int vx_flags;
13336         unsigned long size;
13337         struct inode_operations * proc_iops;
13338         struct file_operations * proc_fops;
13339 @@ -248,9 +249,11 @@ extern void kclist_add(struct kcore_list
13340  struct proc_inode {
13341         struct task_struct *task;
13342         int type;
13343 +       int vx_flags;
13344         union {
13345                 int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **);
13346                 int (*proc_read)(struct task_struct *task, char *page);
13347 +               int (*proc_vid_read)(int vid, char *page);
13348         } op;
13349         struct proc_dir_entry *pde;
13350         struct inode vfs_inode;
13351 diff -NurpP --minimal linux-2.6.16.20/include/linux/quota.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/quota.h
13352 --- linux-2.6.16.20/include/linux/quota.h       2006-01-03 17:30:10 +0100
13353 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/quota.h  2006-04-26 19:07:00 +0200
13354 @@ -56,6 +56,13 @@ extern spinlock_t dq_data_lock;
13355  #define kb2qb(x) ((x) >> (QUOTABLOCK_BITS-10))
13356  #define toqb(x) (((x) + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS)
13357  
13358 +/* are NULL dqhash ptrs valid? */
13359 +#ifdef HANDLE_DQHASH_NULL
13360 +#define        dqhash_valid(hash)      ((hash) != NULL)
13361 +#else
13362 +#define        dqhash_valid(hash)      (0 == 0)
13363 +#endif
13364 +
13365  #define MAXQUOTAS 2
13366  #define USRQUOTA  0            /* element used for user quotas */
13367  #define GRPQUOTA  1            /* element used for group quotas */
13368 @@ -175,19 +182,20 @@ struct mem_dqinfo {
13369         } u;
13370  };
13371  
13372 -struct super_block;
13373 +struct dqhash;
13374  
13375  #define DQF_MASK 0xffff                /* Mask for format specific flags */
13376  #define DQF_INFO_DIRTY_B 16
13377  #define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B) /* Is info dirty? */
13378  
13379 -extern void mark_info_dirty(struct super_block *sb, int type);
13380 +extern void mark_info_dirty(struct dqhash *hash, int type);
13381 +
13382  #define info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags)
13383  #define info_any_dquot_dirty(info) (!list_empty(&(info)->dqi_dirty_list))
13384  #define info_any_dirty(info) (info_dirty(info) || info_any_dquot_dirty(info))
13385  
13386 -#define sb_dqopt(sb) (&(sb)->s_dquot)
13387 -#define sb_dqinfo(sb, type) (sb_dqopt(sb)->info+(type))
13388 +#define dqh_dqopt(hash) (&(hash)->dqh_dqopt)
13389 +#define dqh_dqinfo(hash, type) (dqh_dqopt(hash)->info+(type))
13390  
13391  struct dqstats {
13392         int lookups;
13393 @@ -218,7 +226,7 @@ struct dquot {
13394         struct semaphore dq_lock;       /* dquot IO lock */
13395         atomic_t dq_count;              /* Use count */
13396         wait_queue_head_t dq_wait_unused;       /* Wait queue for dquot to become unused */
13397 -       struct super_block *dq_sb;      /* superblock this applies to */
13398 +       struct dqhash *dq_dqh;          /* quota hash backpointer */
13399         unsigned int dq_id;             /* ID this applies to (uid, gid) */
13400         loff_t dq_off;                  /* Offset of dquot on disk */
13401         unsigned long dq_flags;         /* See DQ_* */
13402 @@ -233,13 +241,14 @@ struct dquot {
13403  
13404  /* Operations which must be implemented by each quota format */
13405  struct quota_format_ops {
13406 -       int (*check_quota_file)(struct super_block *sb, int type);      /* Detect whether file is in our format */
13407 -       int (*read_file_info)(struct super_block *sb, int type);        /* Read main info about file - called on quotaon() */
13408 -       int (*write_file_info)(struct super_block *sb, int type);       /* Write main info about file */
13409 -       int (*free_file_info)(struct super_block *sb, int type);        /* Called on quotaoff() */
13410 -       int (*read_dqblk)(struct dquot *dquot);         /* Read structure for one user */
13411 -       int (*commit_dqblk)(struct dquot *dquot);       /* Write structure for one user */
13412 -       int (*release_dqblk)(struct dquot *dquot);      /* Called when last reference to dquot is being dropped */
13413 +       int (*check_quota_file)(struct dqhash *, int);  /* Detect whether file is in our format */
13414 +       int (*read_file_info)(struct dqhash *, int);    /* Read main info about file - called on quotaon() */
13415 +       int (*write_file_info)(struct dqhash *, int);   /* Write main info about file */
13416 +       int (*free_file_info)(struct dqhash *, int);    /* Called on quotaoff() */
13417 +
13418 +       int (*read_dqblk)(struct dquot *);      /* Read structure for one user */
13419 +       int (*commit_dqblk)(struct dquot *);    /* Write structure for one user */
13420 +       int (*release_dqblk)(struct dquot *);   /* Called when last reference to dquot is being dropped */
13421  };
13422  
13423  /* Operations working with dquots */
13424 @@ -255,22 +264,22 @@ struct dquot_operations {
13425         int (*acquire_dquot) (struct dquot *);          /* Quota is going to be created on disk */
13426         int (*release_dquot) (struct dquot *);          /* Quota is going to be deleted from disk */
13427         int (*mark_dirty) (struct dquot *);             /* Dquot is marked dirty */
13428 -       int (*write_info) (struct super_block *, int);  /* Write of quota "superblock" */
13429 +       int (*write_info) (struct dqhash *, int);       /* Write of quota "superblock" */
13430  };
13431  
13432  /* Operations handling requests from userspace */
13433  struct quotactl_ops {
13434 -       int (*quota_on)(struct super_block *, int, int, char *);
13435 -       int (*quota_off)(struct super_block *, int);
13436 -       int (*quota_sync)(struct super_block *, int);
13437 -       int (*get_info)(struct super_block *, int, struct if_dqinfo *);
13438 -       int (*set_info)(struct super_block *, int, struct if_dqinfo *);
13439 -       int (*get_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *);
13440 -       int (*set_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *);
13441 -       int (*get_xstate)(struct super_block *, struct fs_quota_stat *);
13442 -       int (*set_xstate)(struct super_block *, unsigned int, int);
13443 -       int (*get_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
13444 -       int (*set_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
13445 +       int (*quota_on)(struct dqhash *, int, int, char *);
13446 +       int (*quota_off)(struct dqhash *, int);
13447 +       int (*quota_sync)(struct dqhash *, int);
13448 +       int (*get_info)(struct dqhash *, int, struct if_dqinfo *);
13449 +       int (*set_info)(struct dqhash *, int, struct if_dqinfo *);
13450 +       int (*get_dqblk)(struct dqhash *, int, qid_t, struct if_dqblk *);
13451 +       int (*set_dqblk)(struct dqhash *, int, qid_t, struct if_dqblk *);
13452 +       int (*get_xstate)(struct dqhash *, struct fs_quota_stat *);
13453 +       int (*set_xstate)(struct dqhash *, unsigned int, int);
13454 +       int (*get_xquota)(struct dqhash *, int, qid_t, struct fs_disk_quota *);
13455 +       int (*set_xquota)(struct dqhash *, int, qid_t, struct fs_disk_quota *);
13456  };
13457  
13458  struct quota_format_type {
13459 @@ -293,16 +302,15 @@ struct quota_info {
13460         struct quota_format_ops *ops[MAXQUOTAS];        /* Operations for each type */
13461  };
13462  
13463 -/* Inline would be better but we need to dereference super_block which is not defined yet */
13464 -int mark_dquot_dirty(struct dquot *dquot);
13465  
13466  #define dquot_dirty(dquot) test_bit(DQ_MOD_B, &(dquot)->dq_flags)
13467  
13468 -#define sb_has_quota_enabled(sb, type) ((type)==USRQUOTA ? \
13469 -       (sb_dqopt(sb)->flags & DQUOT_USR_ENABLED) : (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED))
13470 +#define dqh_has_quota_enabled(hash, type) (dqhash_valid(hash) && ((type)==USRQUOTA ? \
13471 +       (dqh_dqopt(hash)->flags & DQUOT_USR_ENABLED) : (dqh_dqopt(hash)->flags & DQUOT_GRP_ENABLED)))
13472 +
13473 +#define dqh_any_quota_enabled(hash) (dqhash_valid(hash) && \
13474 +       (dqh_has_quota_enabled(hash, USRQUOTA) || dqh_has_quota_enabled(hash, GRPQUOTA)))
13475  
13476 -#define sb_any_quota_enabled(sb) (sb_has_quota_enabled(sb, USRQUOTA) | \
13477 -                                 sb_has_quota_enabled(sb, GRPQUOTA))
13478  
13479  int register_quota_format(struct quota_format_type *fmt);
13480  void unregister_quota_format(struct quota_format_type *fmt);
13481 @@ -317,6 +325,51 @@ struct quota_module_name {
13482         {QFMT_VFS_V0, "quota_v2"},\
13483         {0, NULL}}
13484  
13485 +struct dqhash {
13486 +       struct list_head dqh_list;      /* List of all quota hashes */
13487 +       unsigned int dqh_id;            /* ID for hash */
13488 +       atomic_t dqh_count;             /* Use count */
13489 +       struct quota_info dqh_dqopt;    /* Diskquota specific options */
13490 +       struct dquot_operations *dqh_qop;
13491 +       struct quotactl_ops *dqh_qcop;
13492 +       struct super_block *dqh_sb;     /* super block */
13493 +       unsigned int dqh_hash_bits;
13494 +       unsigned int dqh_hash_mask;
13495 +       struct hlist_head *dqh_hash;
13496 +};
13497 +
13498 +#if defined(CONFIG_QUOTA)
13499 +
13500 +
13501 +struct dqhash *new_dqhash(struct super_block *, unsigned int);
13502 +void destroy_dqhash(struct dqhash *);
13503 +struct dqhash *find_dqhash(unsigned int);
13504 +
13505 +static inline void dqhput(struct dqhash *hash)
13506 +{
13507 +       if (dqhash_valid(hash))
13508 +               if (atomic_dec_and_test(&hash->dqh_count))
13509 +                       destroy_dqhash(hash);
13510 +}
13511 +
13512 +static inline struct dqhash *dqhget(struct dqhash *hash)
13513 +{
13514 +       if (dqhash_valid(hash))
13515 +               atomic_inc(&hash->dqh_count);
13516 +       return hash;
13517 +}
13518 +
13519 +#else /* CONFIG_QUOTA */
13520 +
13521 +#define new_dqhash(sb, dqdom)          (0)
13522 +#define find_dqhash(dqdom)             (0)
13523 +#define destroy_dqhash(hash)           do { } while(0)
13524 +
13525 +#define dqhput(hash)                   do { } while(0)
13526 +#define dqhget(hash)                   (hash)
13527 +
13528 +#endif /* CONFIG_QUOTA */
13529 +
13530  #else
13531  
13532  # /* nodep */ include <sys/cdefs.h>
13533 diff -NurpP --minimal linux-2.6.16.20/include/linux/quotaops.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/quotaops.h
13534 --- linux-2.6.16.20/include/linux/quotaops.h    2006-04-09 13:49:57 +0200
13535 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/quotaops.h       2006-04-26 19:07:00 +0200
13536 @@ -20,7 +20,7 @@
13537  /*
13538   * declaration of quota_function calls in kernel.
13539   */
13540 -extern void sync_dquots(struct super_block *sb, int type);
13541 +extern void sync_dquots(struct dqhash *hash, int type);
13542  
13543  extern int dquot_initialize(struct inode *inode, int type);
13544  extern int dquot_drop(struct inode *inode);
13545 @@ -35,19 +35,19 @@ extern int dquot_transfer(struct inode *
13546  extern int dquot_commit(struct dquot *dquot);
13547  extern int dquot_acquire(struct dquot *dquot);
13548  extern int dquot_release(struct dquot *dquot);
13549 -extern int dquot_commit_info(struct super_block *sb, int type);
13550 +extern int dquot_commit_info(struct dqhash *hash, int type);
13551  extern int dquot_mark_dquot_dirty(struct dquot *dquot);
13552  
13553 -extern int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path);
13554 -extern int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
13555 +extern int vfs_quota_on(struct dqhash *hash, int type, int format_id, char *path);
13556 +extern int vfs_quota_on_mount(struct dqhash *hash, char *qf_name,
13557                 int format_id, int type);
13558 -extern int vfs_quota_off(struct super_block *sb, int type);
13559 -#define vfs_quota_off_mount(sb, type) vfs_quota_off(sb, type)
13560 -extern int vfs_quota_sync(struct super_block *sb, int type);
13561 -extern int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
13562 -extern int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
13563 -extern int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
13564 -extern int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
13565 +extern int vfs_quota_off(struct dqhash *hash, int type);
13566 +#define vfs_quota_off_mount(dqh, type) vfs_quota_off(dqh, type)
13567 +extern int vfs_quota_sync(struct dqhash *hash, int type);
13568 +extern int vfs_get_dqinfo(struct dqhash *hash, int type, struct if_dqinfo *ii);
13569 +extern int vfs_set_dqinfo(struct dqhash *hash, int type, struct if_dqinfo *ii);
13570 +extern int vfs_get_dqblk(struct dqhash *hash, int type, qid_t id, struct if_dqblk *di);
13571 +extern int vfs_set_dqblk(struct dqhash *hash, int type, qid_t id, struct if_dqblk *di);
13572  
13573  /*
13574   * Operations supported for diskquotas.
13575 @@ -62,9 +62,12 @@ extern struct quotactl_ops vfs_quotactl_
13576   * need a lot of space in journal for dquot structure allocation. */
13577  static __inline__ void DQUOT_INIT(struct inode *inode)
13578  {
13579 -       BUG_ON(!inode->i_sb);
13580 -       if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode))
13581 -               inode->i_sb->dq_op->initialize(inode, -1);
13582 +       if (!dqhash_valid(inode->i_dqh))
13583 +               return;
13584 +       BUG_ON(!inode->i_dqh);
13585 +       // printk("DQUOT_INIT(%p,%p,%d)\n", inode, inode->i_dqh, dqh_any_quota_enabled(inode->i_dqh));
13586 +       if (dqh_any_quota_enabled(inode->i_dqh) && !IS_NOQUOTA(inode))
13587 +               inode->i_dqh->dqh_qop->initialize(inode, -1);
13588  }
13589  
13590  /* The same as with DQUOT_INIT */
13591 @@ -73,8 +76,8 @@ static __inline__ void DQUOT_DROP(struct
13592         /* Here we can get arbitrary inode from clear_inode() so we have
13593          * to be careful. OTOH we don't need locking as quota operations
13594          * are allowed to change only at mount time */
13595 -       if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op
13596 -           && inode->i_sb->dq_op->drop) {
13597 +       if (!IS_NOQUOTA(inode) && inode->i_dqh && inode->i_dqh->dqh_qop
13598 +           && inode->i_dqh->dqh_qop->drop) {
13599                 int cnt;
13600                 /* Test before calling to rule out calls from proc and such
13601                   * where we are not allowed to block. Note that this is
13602 @@ -85,7 +88,7 @@ static __inline__ void DQUOT_DROP(struct
13603                         if (inode->i_dquot[cnt] != NODQUOT)
13604                                 break;
13605                 if (cnt < MAXQUOTAS)
13606 -                       inode->i_sb->dq_op->drop(inode);
13607 +                       inode->i_dqh->dqh_qop->drop(inode);
13608         }
13609  }
13610  
13611 @@ -93,9 +96,9 @@ static __inline__ void DQUOT_DROP(struct
13612   * a transaction (deadlocks possible otherwise) */
13613  static __inline__ int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
13614  {
13615 -       if (sb_any_quota_enabled(inode->i_sb)) {
13616 +       if (dqh_any_quota_enabled(inode->i_dqh)) {
13617                 /* Used space is updated in alloc_space() */
13618 -               if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA)
13619 +               if (inode->i_dqh->dqh_qop->alloc_space(inode, nr, 1) == NO_QUOTA)
13620                         return 1;
13621         }
13622         else
13623 @@ -113,9 +116,9 @@ static __inline__ int DQUOT_PREALLOC_SPA
13624  
13625  static __inline__ int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
13626  {
13627 -       if (sb_any_quota_enabled(inode->i_sb)) {
13628 +       if (dqh_any_quota_enabled(inode->i_dqh)) {
13629                 /* Used space is updated in alloc_space() */
13630 -               if (inode->i_sb->dq_op->alloc_space(inode, nr, 0) == NO_QUOTA)
13631 +               if (inode->i_dqh->dqh_qop->alloc_space(inode, nr, 0) == NO_QUOTA)
13632                         return 1;
13633         }
13634         else
13635 @@ -133,9 +136,9 @@ static __inline__ int DQUOT_ALLOC_SPACE(
13636  
13637  static __inline__ int DQUOT_ALLOC_INODE(struct inode *inode)
13638  {
13639 -       if (sb_any_quota_enabled(inode->i_sb)) {
13640 +       if (dqh_any_quota_enabled(inode->i_dqh)) {
13641                 DQUOT_INIT(inode);
13642 -               if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA)
13643 +               if (inode->i_dqh->dqh_qop->alloc_inode(inode, 1) == NO_QUOTA)
13644                         return 1;
13645         }
13646         return 0;
13647 @@ -143,8 +146,8 @@ static __inline__ int DQUOT_ALLOC_INODE(
13648  
13649  static __inline__ void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
13650  {
13651 -       if (sb_any_quota_enabled(inode->i_sb))
13652 -               inode->i_sb->dq_op->free_space(inode, nr);
13653 +       if (dqh_any_quota_enabled(inode->i_dqh))
13654 +               inode->i_dqh->dqh_qop->free_space(inode, nr);
13655         else
13656                 inode_sub_bytes(inode, nr);
13657  }
13658 @@ -157,29 +160,30 @@ static __inline__ void DQUOT_FREE_SPACE(
13659  
13660  static __inline__ void DQUOT_FREE_INODE(struct inode *inode)
13661  {
13662 -       if (sb_any_quota_enabled(inode->i_sb))
13663 -               inode->i_sb->dq_op->free_inode(inode, 1);
13664 +       if (dqh_any_quota_enabled(inode->i_dqh))
13665 +               inode->i_dqh->dqh_qop->free_inode(inode, 1);
13666  }
13667  
13668  static __inline__ int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr)
13669  {
13670 -       if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
13671 +       if (dqh_any_quota_enabled(inode->i_dqh) && !IS_NOQUOTA(inode)) {
13672                 DQUOT_INIT(inode);
13673 -               if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
13674 +               if (inode->i_dqh->dqh_qop->transfer(inode, iattr) == NO_QUOTA)
13675                         return 1;
13676         }
13677         return 0;
13678  }
13679  
13680  /* The following two functions cannot be called inside a transaction */
13681 -#define DQUOT_SYNC(sb) sync_dquots(sb, -1)
13682 +#define DQUOT_SYNC(hash)       sync_dquots(hash, -1)
13683  
13684 -static __inline__ int DQUOT_OFF(struct super_block *sb)
13685 +static __inline__ int DQUOT_OFF(struct dqhash *hash)
13686  {
13687         int ret = -ENOSYS;
13688  
13689 -       if (sb_any_quota_enabled(sb) && sb->s_qcop && sb->s_qcop->quota_off)
13690 -               ret = sb->s_qcop->quota_off(sb, -1);
13691 +       if (dqh_any_quota_enabled(hash) && hash->dqh_qcop &&
13692 +               hash->dqh_qcop->quota_off)
13693 +               ret = hash->dqh_qcop->quota_off(hash, -1);
13694         return ret;
13695  }
13696  
13697 @@ -194,8 +198,8 @@ static __inline__ int DQUOT_OFF(struct s
13698  #define DQUOT_DROP(inode)                      do { } while(0)
13699  #define DQUOT_ALLOC_INODE(inode)               (0)
13700  #define DQUOT_FREE_INODE(inode)                        do { } while(0)
13701 -#define DQUOT_SYNC(sb)                         do { } while(0)
13702 -#define DQUOT_OFF(sb)                          do { } while(0)
13703 +#define DQUOT_SYNC(hash)                       do { } while(0)
13704 +#define DQUOT_OFF(hash)                                do { } while(0)
13705  #define DQUOT_TRANSFER(inode, iattr)           (0)
13706  static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
13707  {
13708 diff -NurpP --minimal linux-2.6.16.20/include/linux/reiserfs_fs.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/reiserfs_fs.h
13709 --- linux-2.6.16.20/include/linux/reiserfs_fs.h 2006-04-09 13:49:57 +0200
13710 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/reiserfs_fs.h    2006-04-26 19:07:00 +0200
13711 @@ -829,6 +829,18 @@ struct stat_data_v1 {
13712  #define REISERFS_COMPR_FL     EXT2_COMPR_FL
13713  #define REISERFS_NOTAIL_FL    EXT2_NOTAIL_FL
13714  
13715 +/* unfortunately reiserfs sdattr is only 16 bit */
13716 +#define REISERFS_BARRIER_FL   (EXT2_BARRIER_FL >> 16)
13717 +#define REISERFS_IUNLINK_FL   (EXT2_IUNLINK_FL >> 16)
13718 +
13719 +#ifdef CONFIG_VSERVER_LEGACY
13720 +#define REISERFS_FL_USER_VISIBLE       (REISERFS_IUNLINK_FL|0x80FF)
13721 +#define REISERFS_FL_USER_MODIFIABLE    (REISERFS_IUNLINK_FL|0x80FF)
13722 +#else
13723 +#define REISERFS_FL_USER_VISIBLE       0x80FF
13724 +#define REISERFS_FL_USER_MODIFIABLE    0x80FF
13725 +#endif
13726 +
13727  /* persistent flags that file inherits from the parent directory */
13728  #define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL |        \
13729                                 REISERFS_SYNC_FL |      \
13730 @@ -1904,6 +1916,7 @@ static inline void reiserfs_update_sd(st
13731  void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode);
13732  void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs);
13733  int reiserfs_setattr(struct dentry *dentry, struct iattr *attr);
13734 +int reiserfs_sync_flags(struct inode *inode);
13735  
13736  /* namei.c */
13737  void set_de_name_and_namelen(struct reiserfs_dir_entry *de);
13738 diff -NurpP --minimal linux-2.6.16.20/include/linux/reiserfs_fs_sb.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/reiserfs_fs_sb.h
13739 --- linux-2.6.16.20/include/linux/reiserfs_fs_sb.h      2006-02-18 14:40:35 +0100
13740 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/reiserfs_fs_sb.h 2006-04-26 19:07:00 +0200
13741 @@ -456,6 +456,7 @@ enum reiserfs_mount_options {
13742         REISERFS_POSIXACL,
13743         REISERFS_BARRIER_NONE,
13744         REISERFS_BARRIER_FLUSH,
13745 +       REISERFS_TAGGED,
13746  
13747         /* Actions on error */
13748         REISERFS_ERROR_PANIC,
13749 diff -NurpP --minimal linux-2.6.16.20/include/linux/sched.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/sched.h
13750 --- linux-2.6.16.20/include/linux/sched.h       2006-04-09 13:49:57 +0200
13751 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/sched.h  2006-04-26 19:07:00 +0200
13752 @@ -15,6 +15,7 @@
13753  #include <linux/cpumask.h>
13754  #include <linux/errno.h>
13755  #include <linux/nodemask.h>
13756 +#include <linux/vs_base.h>
13757  
13758  #include <asm/system.h>
13759  #include <asm/semaphore.h>
13760 @@ -61,12 +62,13 @@ struct exec_domain;
13761  #define CLONE_UNTRACED         0x00800000      /* set if the tracing process can't force CLONE_PTRACE on this clone */
13762  #define CLONE_CHILD_SETTID     0x01000000      /* set the TID in the child */
13763  #define CLONE_STOPPED          0x02000000      /* Start in stopped state */
13764 +#define CLONE_KTHREAD          0x10000000      /* clone a kernel thread */
13765  
13766  /*
13767   * List of flags we want to share for kernel threads,
13768   * if only because they are not used by them anyway.
13769   */
13770 -#define CLONE_KERNEL   (CLONE_FS | CLONE_FILES | CLONE_SIGHAND)
13771 +#define CLONE_KERNEL   (CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_KTHREAD)
13772  
13773  /*
13774   * These are the constant used to fake the fixed-point load-average
13775 @@ -129,6 +131,7 @@ extern unsigned long nr_iowait(void);
13776  #define EXIT_DEAD              32
13777  /* in tsk->state again */
13778  #define TASK_NONINTERACTIVE    64
13779 +#define TASK_ONHOLD            128
13780  
13781  #define __set_task_state(tsk, state_value)             \
13782         do { (tsk)->state = (state_value); } while (0)
13783 @@ -257,27 +260,30 @@ extern void arch_unmap_area_topdown(stru
13784   * The mm counters are not protected by its page_table_lock,
13785   * so must be incremented atomically.
13786   */
13787 -#define set_mm_counter(mm, member, value) atomic_long_set(&(mm)->_##member, value)
13788 -#define get_mm_counter(mm, member) ((unsigned long)atomic_long_read(&(mm)->_##member))
13789 -#define add_mm_counter(mm, member, value) atomic_long_add(value, &(mm)->_##member)
13790 -#define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
13791 -#define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
13792  typedef atomic_long_t mm_counter_t;
13793 +#define __set_mm_counter(mm, member, value) \
13794 +       atomic_long_set(&(mm)->_##member, value)
13795 +#define get_mm_counter(mm, member) \
13796 +       ((unsigned long)atomic_long_read(&(mm)->_##member))
13797  
13798  #else  /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
13799  /*
13800   * The mm counters are protected by its page_table_lock,
13801   * so can be incremented directly.
13802   */
13803 -#define set_mm_counter(mm, member, value) (mm)->_##member = (value)
13804 -#define get_mm_counter(mm, member) ((mm)->_##member)
13805 -#define add_mm_counter(mm, member, value) (mm)->_##member += (value)
13806 -#define inc_mm_counter(mm, member) (mm)->_##member++
13807 -#define dec_mm_counter(mm, member) (mm)->_##member--
13808  typedef unsigned long mm_counter_t;
13809 +#define __set_mm_counter(mm, member, value) (mm)->_##member = (value)
13810 +#define get_mm_counter(mm, member) ((mm)->_##member)
13811  
13812  #endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
13813  
13814 +#define set_mm_counter(mm, member, value) \
13815 +       vx_ ## member ## pages_sub((mm), (get_mm_counter(mm, member) - value))
13816 +#define add_mm_counter(mm, member, value) \
13817 +       vx_ ## member ## pages_add((mm), (value))
13818 +#define inc_mm_counter(mm, member) vx_ ## member ## pages_inc((mm))
13819 +#define dec_mm_counter(mm, member) vx_ ## member ## pages_dec((mm))
13820 +
13821  #define get_mm_rss(mm)                                 \
13822         (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
13823  #define update_hiwater_rss(mm) do {                    \
13824 @@ -336,6 +342,7 @@ struct mm_struct {
13825  
13826         /* Architecture-specific MM context */
13827         mm_context_t context;
13828 +       struct vx_info *mm_vx_info;
13829  
13830         /* Token based thrashing protection. */
13831         unsigned long swap_token_time;
13832 @@ -514,9 +521,10 @@ struct user_struct {
13833         /* Hash table maintenance information */
13834         struct list_head uidhash_list;
13835         uid_t uid;
13836 +       xid_t xid;
13837  };
13838  
13839 -extern struct user_struct *find_user(uid_t);
13840 +extern struct user_struct *find_user(xid_t, uid_t);
13841  
13842  extern struct user_struct root_user;
13843  #define INIT_USER (&root_user)
13844 @@ -818,6 +826,14 @@ struct task_struct {
13845         
13846         void *security;
13847         struct audit_context *audit_context;
13848 +
13849 +/* vserver context data */
13850 +       struct vx_info *vx_info;
13851 +       struct nx_info *nx_info;
13852 +
13853 +       xid_t xid;
13854 +       nid_t nid;
13855 +
13856         seccomp_t seccomp;
13857  
13858  /* Thread group tracking */
13859 @@ -1020,13 +1036,19 @@ extern struct task_struct init_task;
13860  
13861  extern struct   mm_struct init_mm;
13862  
13863 -#define find_task_by_pid(nr)   find_task_by_pid_type(PIDTYPE_PID, nr)
13864 +
13865 +#define find_task_by_real_pid(nr) \
13866 +       find_task_by_pid_type(PIDTYPE_PID, nr)
13867 +#define find_task_by_pid(nr) \
13868 +       find_task_by_pid_type(PIDTYPE_PID, \
13869 +               vx_rmap_pid(nr))
13870 +
13871  extern struct task_struct *find_task_by_pid_type(int type, int pid);
13872  extern void set_special_pids(pid_t session, pid_t pgrp);
13873  extern void __set_special_pids(pid_t session, pid_t pgrp);
13874  
13875  /* per-UID process charging. */
13876 -extern struct user_struct * alloc_uid(uid_t);
13877 +extern struct user_struct * alloc_uid(xid_t, uid_t);
13878  static inline struct user_struct *get_uid(struct user_struct *u)
13879  {
13880         atomic_inc(&u->__count);
13881 diff -NurpP --minimal linux-2.6.16.20/include/linux/security.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/security.h
13882 --- linux-2.6.16.20/include/linux/security.h    2006-04-09 13:49:57 +0200
13883 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/security.h       2006-04-26 19:07:00 +0200
13884 @@ -1102,7 +1102,7 @@ struct security_operations {
13885         int (*acct) (struct file * file);
13886         int (*sysctl) (struct ctl_table * table, int op);
13887         int (*capable) (struct task_struct * tsk, int cap);
13888 -       int (*quotactl) (int cmds, int type, int id, struct super_block * sb);
13889 +       int (*quotactl) (int cmds, int type, int id, struct dqhash *);
13890         int (*quota_on) (struct dentry * dentry);
13891         int (*syslog) (int type);
13892         int (*settime) (struct timespec *ts, struct timezone *tz);
13893 @@ -1357,9 +1357,9 @@ static inline int security_sysctl(struct
13894  }
13895  
13896  static inline int security_quotactl (int cmds, int type, int id,
13897 -                                    struct super_block *sb)
13898 +                                    struct dqhash *hash)
13899  {
13900 -       return security_ops->quotactl (cmds, type, id, sb);
13901 +       return security_ops->quotactl (cmds, type, id, hash);
13902  }
13903  
13904  static inline int security_quota_on (struct dentry * dentry)
13905 @@ -2060,7 +2060,7 @@ static inline int security_sysctl(struct
13906  }
13907  
13908  static inline int security_quotactl (int cmds, int type, int id,
13909 -                                    struct super_block * sb)
13910 +                                    struct dqhash * hash)
13911  {
13912         return 0;
13913  }
13914 diff -NurpP --minimal linux-2.6.16.20/include/linux/shmem_fs.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/shmem_fs.h
13915 --- linux-2.6.16.20/include/linux/shmem_fs.h    2006-04-09 13:49:57 +0200
13916 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/shmem_fs.h       2006-04-26 19:07:00 +0200
13917 @@ -8,6 +8,9 @@
13918  
13919  #define SHMEM_NR_DIRECT 16
13920  
13921 +#define TMPFS_SUPER_MAGIC      0x01021994
13922 +
13923 +
13924  struct shmem_inode_info {
13925         spinlock_t              lock;
13926         unsigned long           flags;
13927 diff -NurpP --minimal linux-2.6.16.20/include/linux/stat.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/stat.h
13928 --- linux-2.6.16.20/include/linux/stat.h        2004-08-14 12:55:10 +0200
13929 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/stat.h   2006-04-26 19:07:00 +0200
13930 @@ -63,6 +63,7 @@ struct kstat {
13931         unsigned int    nlink;
13932         uid_t           uid;
13933         gid_t           gid;
13934 +       tag_t           tag;
13935         dev_t           rdev;
13936         loff_t          size;
13937         struct timespec  atime;
13938 diff -NurpP --minimal linux-2.6.16.20/include/linux/sunrpc/auth.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/sunrpc/auth.h
13939 --- linux-2.6.16.20/include/linux/sunrpc/auth.h 2006-02-18 14:40:35 +0100
13940 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/sunrpc/auth.h    2006-04-26 19:07:00 +0200
13941 @@ -28,6 +28,7 @@
13942  struct auth_cred {
13943         uid_t   uid;
13944         gid_t   gid;
13945 +       tag_t   tag;
13946         struct group_info *group_info;
13947  };
13948  
13949 diff -NurpP --minimal linux-2.6.16.20/include/linux/sunrpc/clnt.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/sunrpc/clnt.h
13950 --- linux-2.6.16.20/include/linux/sunrpc/clnt.h 2006-02-18 14:40:35 +0100
13951 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/sunrpc/clnt.h    2006-04-26 19:07:00 +0200
13952 @@ -51,7 +51,8 @@ struct rpc_clnt {
13953                                 cl_intr     : 1,/* interruptible */
13954                                 cl_autobind : 1,/* use getport() */
13955                                 cl_oneshot  : 1,/* dispose after use */
13956 -                               cl_dead     : 1;/* abandoned */
13957 +                               cl_dead     : 1,/* abandoned */
13958 +                               cl_tag      : 1;/* context tagging */
13959  
13960         struct rpc_rtt *        cl_rtt;         /* RTO estimator data */
13961         struct rpc_portmap *    cl_pmap;        /* port mapping */
13962 diff -NurpP --minimal linux-2.6.16.20/include/linux/syscalls.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/syscalls.h
13963 --- linux-2.6.16.20/include/linux/syscalls.h    2006-04-09 13:49:57 +0200
13964 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/syscalls.h       2006-04-26 19:07:00 +0200
13965 @@ -293,6 +293,8 @@ asmlinkage long sys_symlink(const char _
13966  asmlinkage long sys_unlink(const char __user *pathname);
13967  asmlinkage long sys_rename(const char __user *oldname,
13968                                 const char __user *newname);
13969 +asmlinkage long sys_copyfile(const char __user *from, const char __user *to,
13970 +                               umode_t mode);
13971  asmlinkage long sys_chmod(const char __user *filename, mode_t mode);
13972  asmlinkage long sys_fchmod(unsigned int fd, mode_t mode);
13973  
13974 diff -NurpP --minimal linux-2.6.16.20/include/linux/sysctl.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/sysctl.h
13975 --- linux-2.6.16.20/include/linux/sysctl.h      2006-04-09 13:49:57 +0200
13976 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/sysctl.h 2006-04-26 19:07:00 +0200
13977 @@ -93,6 +93,7 @@ enum
13978         KERN_CAP_BSET=14,       /* int: capability bounding set */
13979         KERN_PANIC=15,          /* int: panic timeout */
13980         KERN_REALROOTDEV=16,    /* real root device to mount after initrd */
13981 +       KERN_VSHELPER=17,       /* string: path to vshelper policy agent */
13982  
13983         KERN_SPARC_REBOOT=21,   /* reboot command on Sparc */
13984         KERN_CTLALTDEL=22,      /* int: allow ctl-alt-del to reboot */
13985 @@ -882,6 +883,9 @@ typedef int ctl_handler (ctl_table *tabl
13986  typedef int proc_handler (ctl_table *ctl, int write, struct file * filp,
13987                           void __user *buffer, size_t *lenp, loff_t *ppos);
13988  
13989 +typedef int virt_handler (struct ctl_table *ctl, int write, xid_t xid,
13990 +                         void **datap, size_t *lenp);
13991 +
13992  extern int proc_dostring(ctl_table *, int, struct file *,
13993                          void __user *, size_t *, loff_t *);
13994  extern int proc_dointvec(ctl_table *, int, struct file *,
13995 @@ -963,6 +967,7 @@ struct ctl_table 
13996         mode_t mode;
13997         ctl_table *child;
13998         proc_handler *proc_handler;     /* Callback for text formatting */
13999 +       virt_handler *virt_handler;     /* Context virtualization */
14000         ctl_handler *strategy;          /* Callback function for all r/w */
14001         struct proc_dir_entry *de;      /* /proc control block */
14002         void *extra1;
14003 diff -NurpP --minimal linux-2.6.16.20/include/linux/sysfs.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/sysfs.h
14004 --- linux-2.6.16.20/include/linux/sysfs.h       2005-08-29 22:25:42 +0200
14005 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/sysfs.h  2006-04-26 19:07:00 +0200
14006 @@ -12,6 +12,8 @@
14007  
14008  #include <asm/atomic.h>
14009  
14010 +#define SYSFS_SUPER_MAGIC      0x62656572
14011 +
14012  struct kobject;
14013  struct module;
14014  
14015 diff -NurpP --minimal linux-2.6.16.20/include/linux/time.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/time.h
14016 --- linux-2.6.16.20/include/linux/time.h        2006-02-15 13:54:35 +0100
14017 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/time.h   2006-05-29 17:31:53 +0200
14018 @@ -147,6 +147,8 @@ extern struct timespec ns_to_timespec(co
14019   */
14020  extern struct timeval ns_to_timeval(const nsec_t nsec);
14021  
14022 +#include <linux/vs_time.h>
14023 +
14024  #endif /* __KERNEL__ */
14025  
14026  #define NFDBITS                        __NFDBITS
14027 diff -NurpP --minimal linux-2.6.16.20/include/linux/types.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/types.h
14028 --- linux-2.6.16.20/include/linux/types.h       2006-02-18 14:40:35 +0100
14029 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/types.h  2006-04-26 19:07:00 +0200
14030 @@ -38,6 +38,9 @@ typedef __kernel_uid32_t      uid_t;
14031  typedef __kernel_gid32_t       gid_t;
14032  typedef __kernel_uid16_t        uid16_t;
14033  typedef __kernel_gid16_t        gid16_t;
14034 +typedef unsigned int           xid_t;
14035 +typedef unsigned int           nid_t;
14036 +typedef unsigned int           tag_t;
14037  
14038  #ifdef CONFIG_UID16
14039  /* This is defined by include/asm-{arch}/posix_types.h */
14040 diff -NurpP --minimal linux-2.6.16.20/include/linux/vroot.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vroot.h
14041 --- linux-2.6.16.20/include/linux/vroot.h       1970-01-01 01:00:00 +0100
14042 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vroot.h  2006-04-26 19:07:00 +0200
14043 @@ -0,0 +1,51 @@
14044 +
14045 +/*
14046 + * include/linux/vroot.h
14047 + *
14048 + * written by Herbert Pötzl, 9/11/2002
14049 + * ported to 2.6 by Herbert Pötzl, 30/12/2004
14050 + *
14051 + * Copyright (C) 2002-2005 by Herbert Pötzl.
14052 + * Redistribution of this file is permitted under the
14053 + * GNU General Public License.
14054 + */
14055 +
14056 +#ifndef _LINUX_VROOT_H
14057 +#define _LINUX_VROOT_H
14058 +
14059 +
14060 +#ifdef __KERNEL__
14061 +
14062 +/* Possible states of device */
14063 +enum {
14064 +       Vr_unbound,
14065 +       Vr_bound,
14066 +};
14067 +
14068 +struct vroot_device {
14069 +       int             vr_number;
14070 +       int             vr_refcnt;
14071 +
14072 +       struct semaphore        vr_ctl_mutex;
14073 +       struct block_device    *vr_device;
14074 +       int                     vr_state;
14075 +};
14076 +
14077 +
14078 +typedef struct block_device *(vroot_grb_func)(struct block_device *);
14079 +
14080 +extern int register_vroot_grb(vroot_grb_func *);
14081 +extern int unregister_vroot_grb(vroot_grb_func *);
14082 +
14083 +#endif /* __KERNEL__ */
14084 +
14085 +#define MAX_VROOT_DEFAULT      8
14086 +
14087 +/*
14088 + * IOCTL commands --- we will commandeer 0x56 ('V')
14089 + */
14090 +
14091 +#define VROOT_SET_DEV          0x5600
14092 +#define VROOT_CLR_DEV          0x5601
14093 +
14094 +#endif /* _LINUX_VROOT_H */
14095 diff -NurpP --minimal linux-2.6.16.20/include/linux/vs_base.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_base.h
14096 --- linux-2.6.16.20/include/linux/vs_base.h     1970-01-01 01:00:00 +0100
14097 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_base.h        2006-04-27 21:24:37 +0200
14098 @@ -0,0 +1,130 @@
14099 +#ifndef _VX_VS_BASE_H
14100 +#define _VX_VS_BASE_H
14101 +
14102 +#include "vserver/context.h"
14103 +
14104 +
14105 +#define vx_task_xid(t) ((t)->xid)
14106 +
14107 +#define vx_current_xid() vx_task_xid(current)
14108 +
14109 +#define vx_check(c,m)  __vx_check(vx_current_xid(),c,m)
14110 +
14111 +#define vx_weak_check(c,m)     ((m) ? vx_check(c,m) : 1)
14112 +
14113 +
14114 +/*
14115 + * check current context for ADMIN/WATCH and
14116 + * optionally against supplied argument
14117 + */
14118 +static inline int __vx_check(xid_t cid, xid_t id, unsigned int mode)
14119 +{
14120 +       if (mode & VX_ARG_MASK) {
14121 +               if ((mode & VX_IDENT) &&
14122 +                       (id == cid))
14123 +                       return 1;
14124 +       }
14125 +       if (mode & VX_ATR_MASK) {
14126 +               if ((mode & VX_DYNAMIC) &&
14127 +                       (id >= MIN_D_CONTEXT) &&
14128 +                       (id <= MAX_S_CONTEXT))
14129 +                       return 1;
14130 +               if ((mode & VX_STATIC) &&
14131 +                       (id > 1) && (id < MIN_D_CONTEXT))
14132 +                       return 1;
14133 +       }
14134 +       return (((mode & VX_ADMIN) && (cid == 0)) ||
14135 +               ((mode & VX_WATCH) && (cid == 1)) ||
14136 +               ((mode & VX_HOSTID) && (id == 0)));
14137 +}
14138 +
14139 +
14140 +#define __vx_state(v)  ((v) ? ((v)->vx_state) : 0)
14141 +
14142 +#define vx_info_state(v,m)     (__vx_state(v) & (m))
14143 +
14144 +
14145 +/* generic flag merging */
14146 +
14147 +#define vx_check_flags(v,m,f)  (((v) & (m)) ^ (f))
14148 +
14149 +#define vx_mask_flags(v,f,m)   (((v) & ~(m)) | ((f) & (m)))
14150 +
14151 +#define vx_mask_mask(v,f,m)    (((v) & ~(m)) | ((v) & (f) & (m)))
14152 +
14153 +#define vx_check_bit(v,n)      ((v) & (1LL << (n)))
14154 +
14155 +
14156 +/* context flags */
14157 +
14158 +#define __vx_flags(v)  ((v) ? (v)->vx_flags : 0)
14159 +
14160 +#define vx_current_flags()     __vx_flags(current->vx_info)
14161 +
14162 +#define vx_info_flags(v,m,f) \
14163 +       vx_check_flags(__vx_flags(v),(m),(f))
14164 +
14165 +#define task_vx_flags(t,m,f) \
14166 +       ((t) && vx_info_flags((t)->vx_info, (m), (f)))
14167 +
14168 +#define vx_flags(m,f)  vx_info_flags(current->vx_info,(m),(f))
14169 +
14170 +
14171 +/* context caps */
14172 +
14173 +#define __vx_ccaps(v)  ((v) ? (v)->vx_ccaps : 0)
14174 +
14175 +#define vx_current_ccaps()     __vx_ccaps(current->vx_info)
14176 +
14177 +#define vx_info_ccaps(v,c)     (__vx_ccaps(v) & (c))
14178 +
14179 +#define vx_ccaps(c)    vx_info_ccaps(current->vx_info,(c))
14180 +
14181 +
14182 +#define __vx_mcaps(v)  ((v) ? (v)->vx_ccaps >> 32UL : ~0 )
14183 +
14184 +#define vx_info_mcaps(v,c)     (__vx_mcaps(v) & (c))
14185 +
14186 +#define vx_mcaps(c)    vx_info_mcaps(current->vx_info,(c))
14187 +
14188 +
14189 +/* context bcap mask */
14190 +
14191 +#define __vx_bcaps(v)  ((v) ? (v)->vx_bcaps : ~0 )
14192 +
14193 +#define vx_current_bcaps()     __vx_bcaps(current->vx_info)
14194 +
14195 +#define vx_info_bcaps(v,c)     (__vx_bcaps(v) & (c))
14196 +
14197 +#define vx_bcaps(c)    vx_info_bcaps(current->vx_info,(c))
14198 +
14199 +
14200 +#define vx_info_cap_bset(v)    ((v) ? (v)->vx_cap_bset : cap_bset)
14201 +
14202 +#define vx_current_cap_bset()  vx_info_cap_bset(current->vx_info)
14203 +
14204 +
14205 +#define __vx_info_mbcap(v,b) \
14206 +       (!vx_info_flags(v, VXF_STATE_SETUP, 0) ? \
14207 +       vx_info_bcaps(v, b) : (b))
14208 +
14209 +#define vx_info_mbcap(v,b)     __vx_info_mbcap(v,cap_t(b))
14210 +
14211 +#define task_vx_mbcap(t,b) \
14212 +       vx_info_mbcap((t)->vx_info, (t)->b)
14213 +
14214 +#define vx_mbcap(b)    task_vx_mbcap(current,b)
14215 +
14216 +#define vx_cap_raised(v,c,f)   (vx_info_mbcap(v,c) & CAP_TO_MASK(f))
14217 +
14218 +#define vx_capable(b,c) (capable(b) || \
14219 +       (cap_raised(current->cap_effective,b) && vx_ccaps(c)))
14220 +
14221 +
14222 +#define vx_current_initpid(n) \
14223 +       (current->vx_info && \
14224 +       (current->vx_info->vx_initpid == (n)))
14225 +
14226 +#else
14227 +#warning duplicate inclusion
14228 +#endif
14229 diff -NurpP --minimal linux-2.6.16.20/include/linux/vs_context.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_context.h
14230 --- linux-2.6.16.20/include/linux/vs_context.h  1970-01-01 01:00:00 +0100
14231 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_context.h     2006-04-28 03:25:06 +0200
14232 @@ -0,0 +1,241 @@
14233 +#ifndef _VX_VS_CONTEXT_H
14234 +#define _VX_VS_CONTEXT_H
14235 +
14236 +#include <linux/kernel.h>
14237 +#include "vserver/debug.h"
14238 +#include "vserver/history.h"
14239 +
14240 +
14241 +#define get_vx_info(i) __get_vx_info(i,__FILE__,__LINE__,__HERE__)
14242 +
14243 +static inline struct vx_info *__get_vx_info(struct vx_info *vxi,
14244 +       const char *_file, int _line, void *_here)
14245 +{
14246 +       if (!vxi)
14247 +               return NULL;
14248 +
14249 +       vxlprintk(VXD_CBIT(xid, 2), "get_vx_info(%p[#%d.%d])",
14250 +               vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0,
14251 +               _file, _line);
14252 +       __vxh_get_vx_info(vxi, _here);
14253 +
14254 +       atomic_inc(&vxi->vx_usecnt);
14255 +       return vxi;
14256 +}
14257 +
14258 +
14259 +extern void free_vx_info(struct vx_info *);
14260 +
14261 +#define put_vx_info(i) __put_vx_info(i,__FILE__,__LINE__,__HERE__)
14262 +
14263 +static inline void __put_vx_info(struct vx_info *vxi,
14264 +       const char *_file, int _line, void *_here)
14265 +{
14266 +       if (!vxi)
14267 +               return;
14268 +
14269 +       vxlprintk(VXD_CBIT(xid, 2), "put_vx_info(%p[#%d.%d])",
14270 +               vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0,
14271 +               _file, _line);
14272 +       __vxh_put_vx_info(vxi, _here);
14273 +
14274 +       if (atomic_dec_and_test(&vxi->vx_usecnt))
14275 +               free_vx_info(vxi);
14276 +}
14277 +
14278 +
14279 +#define init_vx_info(p,i) __init_vx_info(p,i,__FILE__,__LINE__,__HERE__)
14280 +
14281 +static inline void __init_vx_info(struct vx_info **vxp, struct vx_info *vxi,
14282 +       const char *_file, int _line, void *_here)
14283 +{
14284 +       if (vxi) {
14285 +               vxlprintk(VXD_CBIT(xid, 3),
14286 +                       "init_vx_info(%p[#%d.%d])",
14287 +                       vxi, vxi?vxi->vx_id:0,
14288 +                       vxi?atomic_read(&vxi->vx_usecnt):0,
14289 +                       _file, _line);
14290 +               __vxh_init_vx_info(vxi, vxp, _here);
14291 +
14292 +               atomic_inc(&vxi->vx_usecnt);
14293 +       }
14294 +       *vxp = vxi;
14295 +}
14296 +
14297 +
14298 +#define set_vx_info(p,i) __set_vx_info(p,i,__FILE__,__LINE__,__HERE__)
14299 +
14300 +static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi,
14301 +       const char *_file, int _line, void *_here)
14302 +{
14303 +       struct vx_info *vxo;
14304 +
14305 +       if (!vxi)
14306 +               return;
14307 +
14308 +       vxlprintk(VXD_CBIT(xid, 3), "set_vx_info(%p[#%d.%d])",
14309 +               vxi, vxi?vxi->vx_id:0,
14310 +               vxi?atomic_read(&vxi->vx_usecnt):0,
14311 +               _file, _line);
14312 +       __vxh_set_vx_info(vxi, vxp, _here);
14313 +
14314 +       atomic_inc(&vxi->vx_usecnt);
14315 +       vxo = xchg(vxp, vxi);
14316 +       BUG_ON(vxo);
14317 +}
14318 +
14319 +
14320 +#define clr_vx_info(p) __clr_vx_info(p,__FILE__,__LINE__,__HERE__)
14321 +
14322 +static inline void __clr_vx_info(struct vx_info **vxp,
14323 +       const char *_file, int _line, void *_here)
14324 +{
14325 +       struct vx_info *vxo;
14326 +
14327 +       vxo = xchg(vxp, NULL);
14328 +       if (!vxo)
14329 +               return;
14330 +
14331 +       vxlprintk(VXD_CBIT(xid, 3), "clr_vx_info(%p[#%d.%d])",
14332 +               vxo, vxo?vxo->vx_id:0,
14333 +               vxo?atomic_read(&vxo->vx_usecnt):0,
14334 +               _file, _line);
14335 +       __vxh_clr_vx_info(vxo, vxp, _here);
14336 +
14337 +       if (atomic_dec_and_test(&vxo->vx_usecnt))
14338 +               free_vx_info(vxo);
14339 +}
14340 +
14341 +
14342 +#define claim_vx_info(v,p) \
14343 +       __claim_vx_info(v,p,__FILE__,__LINE__,__HERE__)
14344 +
14345 +static inline void __claim_vx_info(struct vx_info *vxi,
14346 +       struct task_struct *task,
14347 +       const char *_file, int _line, void *_here)
14348 +{
14349 +       vxlprintk(VXD_CBIT(xid, 3), "claim_vx_info(%p[#%d.%d.%d]) %p",
14350 +               vxi, vxi?vxi->vx_id:0,
14351 +               vxi?atomic_read(&vxi->vx_usecnt):0,
14352 +               vxi?atomic_read(&vxi->vx_tasks):0,
14353 +               task, _file, _line);
14354 +       __vxh_claim_vx_info(vxi, task, _here);
14355 +
14356 +       atomic_inc(&vxi->vx_tasks);
14357 +}
14358 +
14359 +
14360 +extern void unhash_vx_info(struct vx_info *);
14361 +
14362 +#define release_vx_info(v,p) \
14363 +       __release_vx_info(v,p,__FILE__,__LINE__,__HERE__)
14364 +
14365 +static inline void __release_vx_info(struct vx_info *vxi,
14366 +       struct task_struct *task,
14367 +       const char *_file, int _line, void *_here)
14368 +{
14369 +       vxlprintk(VXD_CBIT(xid, 3), "release_vx_info(%p[#%d.%d.%d]) %p",
14370 +               vxi, vxi?vxi->vx_id:0,
14371 +               vxi?atomic_read(&vxi->vx_usecnt):0,
14372 +               vxi?atomic_read(&vxi->vx_tasks):0,
14373 +               task, _file, _line);
14374 +       __vxh_release_vx_info(vxi, task, _here);
14375 +
14376 +       might_sleep();
14377 +
14378 +       if (atomic_dec_and_test(&vxi->vx_tasks))
14379 +               unhash_vx_info(vxi);
14380 +}
14381 +
14382 +
14383 +#define task_get_vx_info(p) \
14384 +       __task_get_vx_info(p,__FILE__,__LINE__,__HERE__)
14385 +
14386 +static inline struct vx_info *__task_get_vx_info(struct task_struct *p,
14387 +       const char *_file, int _line, void *_here)
14388 +{
14389 +       struct vx_info *vxi;
14390 +
14391 +       task_lock(p);
14392 +       vxlprintk(VXD_CBIT(xid, 5), "task_get_vx_info(%p)",
14393 +               p, _file, _line);
14394 +       vxi = __get_vx_info(p->vx_info, _file, _line, _here);
14395 +       task_unlock(p);
14396 +       return vxi;
14397 +}
14398 +
14399 +
14400 +static inline void __wakeup_vx_info(struct vx_info *vxi)
14401 +{
14402 +       if (waitqueue_active(&vxi->vx_wait))
14403 +               wake_up_interruptible(&vxi->vx_wait);
14404 +}
14405 +
14406 +
14407 +#define enter_vx_info(v,s)     __enter_vx_info(v,s,__FILE__,__LINE__)
14408 +
14409 +static inline void __enter_vx_info(struct vx_info *vxi,
14410 +       struct vx_info_save *vxis, const char *_file, int _line)
14411 +{
14412 +       vxlprintk(VXD_CBIT(xid, 5), "enter_vx_info(%p[#%d],%p) %p[#%d,%p]",
14413 +               vxi, vxi ? vxi->vx_id : 0, vxis, current,
14414 +               current->xid, current->vx_info, _file, _line);
14415 +       vxis->vxi = xchg(&current->vx_info, vxi);
14416 +       vxis->xid = current->xid;
14417 +       current->xid = vxi ? vxi->vx_id : 0;
14418 +}
14419 +
14420 +#define leave_vx_info(s)       __leave_vx_info(s,__FILE__,__LINE__)
14421 +
14422 +static inline void __leave_vx_info(struct vx_info_save *vxis,
14423 +       const char *_file, int _line)
14424 +{
14425 +       vxlprintk(VXD_CBIT(xid, 5), "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]",
14426 +               vxis, vxis->xid, vxis->vxi, current,
14427 +               current->xid, current->vx_info, _file, _line);
14428 +       (void)xchg(&current->vx_info, vxis->vxi);
14429 +       current->xid = vxis->xid;
14430 +}
14431 +
14432 +
14433 +static inline void __enter_vx_admin(struct vx_info_save *vxis)
14434 +{
14435 +       vxis->vxi = xchg(&current->vx_info, NULL);
14436 +       vxis->xid = current->xid;
14437 +       current->xid = 0;
14438 +}
14439 +
14440 +static inline void __leave_vx_admin(struct vx_info_save *vxis)
14441 +{
14442 +       if (vxis->vxi)
14443 +               (void)xchg(&current->vx_info, vxis->vxi);
14444 +       current->xid = vxis->xid;
14445 +}
14446 +
14447 +extern void exit_vx_info(struct task_struct *, int);
14448 +
14449 +
14450 +static inline
14451 +struct task_struct *vx_child_reaper(struct task_struct *p)
14452 +{
14453 +       struct vx_info *vxi = p->vx_info;
14454 +       struct task_struct *reaper = child_reaper;
14455 +
14456 +       if (!vxi)
14457 +               goto out;
14458 +
14459 +       BUG_ON(!p->vx_info->vx_reaper);
14460 +
14461 +       /* child reaper for the guest reaper */
14462 +       if (vxi->vx_reaper == p)
14463 +               goto out;
14464 +
14465 +       reaper = vxi->vx_reaper;
14466 +out:
14467 +       return reaper;
14468 +}
14469 +
14470 +
14471 +#else
14472 +#warning duplicate inclusion
14473 +#endif
14474 diff -NurpP --minimal linux-2.6.16.20/include/linux/vs_cvirt.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_cvirt.h
14475 --- linux-2.6.16.20/include/linux/vs_cvirt.h    1970-01-01 01:00:00 +0100
14476 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_cvirt.h       2006-04-26 19:07:00 +0200
14477 @@ -0,0 +1,59 @@
14478 +#ifndef _VX_VS_CVIRT_H
14479 +#define _VX_VS_CVIRT_H
14480 +
14481 +#include "vserver/cvirt.h"
14482 +#include "vserver/debug.h"
14483 +
14484 +
14485 +/* utsname virtualization */
14486 +
14487 +static inline struct new_utsname *vx_new_utsname(void)
14488 +{
14489 +       if (current->vx_info)
14490 +               return &current->vx_info->cvirt.utsname;
14491 +       return &system_utsname;
14492 +}
14493 +
14494 +#define vx_new_uts(x)          ((vx_new_utsname())->x)
14495 +
14496 +
14497 +static inline void vx_activate_task(struct task_struct *p)
14498 +{
14499 +       struct vx_info *vxi;
14500 +
14501 +       if ((vxi = p->vx_info)) {
14502 +               vx_update_load(vxi);
14503 +               atomic_inc(&vxi->cvirt.nr_running);
14504 +       }
14505 +}
14506 +
14507 +static inline void vx_deactivate_task(struct task_struct *p)
14508 +{
14509 +       struct vx_info *vxi;
14510 +
14511 +       if ((vxi = p->vx_info)) {
14512 +               vx_update_load(vxi);
14513 +               atomic_dec(&vxi->cvirt.nr_running);
14514 +       }
14515 +}
14516 +
14517 +static inline void vx_uninterruptible_inc(struct task_struct *p)
14518 +{
14519 +       struct vx_info *vxi;
14520 +
14521 +       if ((vxi = p->vx_info))
14522 +               atomic_inc(&vxi->cvirt.nr_uninterruptible);
14523 +}
14524 +
14525 +static inline void vx_uninterruptible_dec(struct task_struct *p)
14526 +{
14527 +       struct vx_info *vxi;
14528 +
14529 +       if ((vxi = p->vx_info))
14530 +               atomic_dec(&vxi->cvirt.nr_uninterruptible);
14531 +}
14532 +
14533 +
14534 +#else
14535 +#warning duplicate inclusion
14536 +#endif
14537 diff -NurpP --minimal linux-2.6.16.20/include/linux/vs_dlimit.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_dlimit.h
14538 --- linux-2.6.16.20/include/linux/vs_dlimit.h   1970-01-01 01:00:00 +0100
14539 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_dlimit.h      2006-04-26 19:07:00 +0200
14540 @@ -0,0 +1,213 @@
14541 +#ifndef _VX_VS_DLIMIT_H
14542 +#define _VX_VS_DLIMIT_H
14543 +
14544 +#include "vserver/dlimit.h"
14545 +#include "vserver/debug.h"
14546 +
14547 +
14548 +#define get_dl_info(i) __get_dl_info(i,__FILE__,__LINE__)
14549 +
14550 +static inline struct dl_info *__get_dl_info(struct dl_info *dli,
14551 +       const char *_file, int _line)
14552 +{
14553 +       if (!dli)
14554 +               return NULL;
14555 +       vxlprintk(VXD_CBIT(dlim, 4), "get_dl_info(%p[#%d.%d])",
14556 +               dli, dli?dli->dl_tag:0, dli?atomic_read(&dli->dl_usecnt):0,
14557 +               _file, _line);
14558 +       atomic_inc(&dli->dl_usecnt);
14559 +       return dli;
14560 +}
14561 +
14562 +
14563 +#define free_dl_info(i) \
14564 +       call_rcu(&i->dl_rcu, rcu_free_dl_info);
14565 +
14566 +#define put_dl_info(i) __put_dl_info(i,__FILE__,__LINE__)
14567 +
14568 +static inline void __put_dl_info(struct dl_info *dli,
14569 +       const char *_file, int _line)
14570 +{
14571 +       if (!dli)
14572 +               return;
14573 +       vxlprintk(VXD_CBIT(dlim, 4), "put_dl_info(%p[#%d.%d])",
14574 +               dli, dli?dli->dl_tag:0, dli?atomic_read(&dli->dl_usecnt):0,
14575 +               _file, _line);
14576 +       if (atomic_dec_and_test(&dli->dl_usecnt))
14577 +               free_dl_info(dli);
14578 +}
14579 +
14580 +
14581 +#define __dlimit_char(d)       ((d)?'*':' ')
14582 +
14583 +static inline int __dl_alloc_space(struct super_block *sb,
14584 +       tag_t tag, dlsize_t nr, const char *file, int line)
14585 +{
14586 +       struct dl_info *dli = NULL;
14587 +       int ret = 0;
14588 +
14589 +       if (nr == 0)
14590 +               goto out;
14591 +       dli = locate_dl_info(sb, tag);
14592 +       if (!dli)
14593 +               goto out;
14594 +
14595 +       spin_lock(&dli->dl_lock);
14596 +       ret = (dli->dl_space_used + nr > dli->dl_space_total);
14597 +       if (!ret)
14598 +               dli->dl_space_used += nr;
14599 +       spin_unlock(&dli->dl_lock);
14600 +       put_dl_info(dli);
14601 +out:
14602 +       vxlprintk(VXD_CBIT(dlim, 1),
14603 +               "ALLOC (%p,#%d)%c %lld bytes (%d)",
14604 +               sb, tag, __dlimit_char(dli), (long long)nr,
14605 +               ret, file, line);
14606 +       return ret;
14607 +}
14608 +
14609 +static inline void __dl_free_space(struct super_block *sb,
14610 +       tag_t tag, dlsize_t nr, const char *_file, int _line)
14611 +{
14612 +       struct dl_info *dli = NULL;
14613 +
14614 +       if (nr == 0)
14615 +               goto out;
14616 +       dli = locate_dl_info(sb, tag);
14617 +       if (!dli)
14618 +               goto out;
14619 +
14620 +       spin_lock(&dli->dl_lock);
14621 +       if (dli->dl_space_used > nr)
14622 +               dli->dl_space_used -= nr;
14623 +       else
14624 +               dli->dl_space_used = 0;
14625 +       spin_unlock(&dli->dl_lock);
14626 +       put_dl_info(dli);
14627 +out:
14628 +       vxlprintk(VXD_CBIT(dlim, 1),
14629 +               "FREE  (%p,#%d)%c %lld bytes",
14630 +               sb, tag, __dlimit_char(dli), (long long)nr,
14631 +               _file, _line);
14632 +}
14633 +
14634 +static inline int __dl_alloc_inode(struct super_block *sb,
14635 +       tag_t tag, const char *_file, int _line)
14636 +{
14637 +       struct dl_info *dli;
14638 +       int ret = 0;
14639 +
14640 +       dli = locate_dl_info(sb, tag);
14641 +       if (!dli)
14642 +               goto out;
14643 +
14644 +       spin_lock(&dli->dl_lock);
14645 +       ret = (dli->dl_inodes_used >= dli->dl_inodes_total);
14646 +       if (!ret)
14647 +               dli->dl_inodes_used++;
14648 +#if 0
14649 +       else
14650 +               vxwprintk("DLIMIT hit (%p,#%d), inode %d>=%d @ %s:%d",
14651 +                       sb, tag,
14652 +                       dli->dl_inodes_used, dli->dl_inodes_total,
14653 +                       file, line);
14654 +#endif
14655 +       spin_unlock(&dli->dl_lock);
14656 +       put_dl_info(dli);
14657 +out:
14658 +       vxlprintk(VXD_CBIT(dlim, 0),
14659 +               "ALLOC (%p,#%d)%c inode (%d)",
14660 +               sb, tag, __dlimit_char(dli), ret, _file, _line);
14661 +       return ret;
14662 +}
14663 +
14664 +static inline void __dl_free_inode(struct super_block *sb,
14665 +       tag_t tag, const char *_file, int _line)
14666 +{
14667 +       struct dl_info *dli;
14668 +
14669 +       dli = locate_dl_info(sb, tag);
14670 +       if (!dli)
14671 +               goto out;
14672 +
14673 +       spin_lock(&dli->dl_lock);
14674 +       if (dli->dl_inodes_used > 1)
14675 +               dli->dl_inodes_used--;
14676 +       else
14677 +               dli->dl_inodes_used = 0;
14678 +       spin_unlock(&dli->dl_lock);
14679 +       put_dl_info(dli);
14680 +out:
14681 +       vxlprintk(VXD_CBIT(dlim, 0),
14682 +               "FREE  (%p,#%d)%c inode",
14683 +               sb, tag, __dlimit_char(dli), _file, _line);
14684 +}
14685 +
14686 +static inline void __dl_adjust_block(struct super_block *sb, tag_t tag,
14687 +       unsigned int *free_blocks, unsigned int *root_blocks,
14688 +       const char *_file, int _line)
14689 +{
14690 +       struct dl_info *dli;
14691 +       uint64_t broot, bfree;
14692 +
14693 +       dli = locate_dl_info(sb, tag);
14694 +       if (!dli)
14695 +               return;
14696 +
14697 +       spin_lock(&dli->dl_lock);
14698 +       broot = (dli->dl_space_total -
14699 +               (dli->dl_space_total >> 10) * dli->dl_nrlmult)
14700 +               >> sb->s_blocksize_bits;
14701 +       bfree = (dli->dl_space_total - dli->dl_space_used)
14702 +                       >> sb->s_blocksize_bits;
14703 +       spin_unlock(&dli->dl_lock);
14704 +
14705 +       vxlprintk(VXD_CBIT(dlim, 2),
14706 +               "ADJUST: %lld,%lld on %d,%d [mult=%d]",
14707 +               (long long)bfree, (long long)broot,
14708 +               *free_blocks, *root_blocks, dli->dl_nrlmult,
14709 +               _file, _line);
14710 +       if (free_blocks) {
14711 +               if (*free_blocks > bfree)
14712 +                       *free_blocks = bfree;
14713 +       }
14714 +       if (root_blocks) {
14715 +               if (*root_blocks > broot)
14716 +                       *root_blocks = broot;
14717 +       }
14718 +       put_dl_info(dli);
14719 +}
14720 +
14721 +#define DLIMIT_ALLOC_SPACE(in, bytes) \
14722 +       __dl_alloc_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
14723 +               __FILE__, __LINE__ )
14724 +
14725 +#define DLIMIT_FREE_SPACE(in, bytes) \
14726 +       __dl_free_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
14727 +               __FILE__, __LINE__ )
14728 +
14729 +#define DLIMIT_ALLOC_BLOCK(in, nr) \
14730 +       __dl_alloc_space((in)->i_sb, (in)->i_tag, \
14731 +               ((dlsize_t)(nr)) << (in)->i_sb->s_blocksize_bits, \
14732 +               __FILE__, __LINE__ )
14733 +
14734 +#define DLIMIT_FREE_BLOCK(in, nr) \
14735 +       __dl_free_space((in)->i_sb, (in)->i_tag, \
14736 +               ((dlsize_t)(nr)) << (in)->i_sb->s_blocksize_bits, \
14737 +               __FILE__, __LINE__ )
14738 +
14739 +
14740 +#define DLIMIT_ALLOC_INODE(in) \
14741 +       __dl_alloc_inode((in)->i_sb, (in)->i_tag, __FILE__, __LINE__ )
14742 +
14743 +#define DLIMIT_FREE_INODE(in) \
14744 +       __dl_free_inode((in)->i_sb, (in)->i_tag, __FILE__, __LINE__ )
14745 +
14746 +
14747 +#define DLIMIT_ADJUST_BLOCK(sb, tag, fb, rb) \
14748 +       __dl_adjust_block(sb, tag, fb, rb, __FILE__, __LINE__ )
14749 +
14750 +
14751 +#else
14752 +#warning duplicate inclusion
14753 +#endif
14754 diff -NurpP --minimal linux-2.6.16.20/include/linux/vs_limit.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_limit.h
14755 --- linux-2.6.16.20/include/linux/vs_limit.h    1970-01-01 01:00:00 +0100
14756 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_limit.h       2006-05-02 02:10:17 +0200
14757 @@ -0,0 +1,137 @@
14758 +#ifndef _VX_VS_LIMIT_H
14759 +#define _VX_VS_LIMIT_H
14760 +
14761 +#include "vserver/limit.h"
14762 +#include "vserver/debug.h"
14763 +#include "vserver/limit_int.h"
14764 +
14765 +
14766 +#define vx_acc_cres(v,d,p,r) \
14767 +       __vx_acc_cres(v, r, d, p, __FILE__, __LINE__)
14768 +
14769 +#define vx_acc_cres_cond(x,d,p,r) \
14770 +       __vx_acc_cres(((x) == vx_current_xid()) ? current->vx_info : 0, \
14771 +       r, d, p, __FILE__, __LINE__)
14772 +
14773 +
14774 +#define vx_add_cres(v,a,p,r) \
14775 +       __vx_add_cres(v, r, a, p, __FILE__, __LINE__)
14776 +#define vx_sub_cres(v,a,p,r)           vx_add_cres(v,-(a),p,r)
14777 +
14778 +#define vx_add_cres_cond(x,a,p,r) \
14779 +       __vx_add_cres(((x) == vx_current_xid()) ? current->vx_info : 0, \
14780 +       r, a, p, __FILE__, __LINE__)
14781 +#define vx_sub_cres_cond(x,a,p,r)      vx_add_cres_cond(x,-(a),p,r)
14782 +
14783 +
14784 +/* process and file limits */
14785 +
14786 +#define vx_nproc_inc(p) \
14787 +       vx_acc_cres((p)->vx_info, 1, p, RLIMIT_NPROC)
14788 +
14789 +#define vx_nproc_dec(p) \
14790 +       vx_acc_cres((p)->vx_info,-1, p, RLIMIT_NPROC)
14791 +
14792 +#define vx_files_inc(f) \
14793 +       vx_acc_cres_cond((f)->f_xid, 1, f, RLIMIT_NOFILE)
14794 +
14795 +#define vx_files_dec(f) \
14796 +       vx_acc_cres_cond((f)->f_xid,-1, f, RLIMIT_NOFILE)
14797 +
14798 +#define vx_locks_inc(l) \
14799 +       vx_acc_cres_cond((l)->fl_xid, 1, l, RLIMIT_LOCKS)
14800 +
14801 +#define vx_locks_dec(l) \
14802 +       vx_acc_cres_cond((l)->fl_xid,-1, l, RLIMIT_LOCKS)
14803 +
14804 +#define vx_openfd_inc(f) \
14805 +       vx_acc_cres(current->vx_info, 1, (void *)(long)(f), VLIMIT_OPENFD)
14806 +
14807 +#define vx_openfd_dec(f) \
14808 +       vx_acc_cres(current->vx_info,-1, (void *)(long)(f), VLIMIT_OPENFD)
14809 +
14810 +
14811 +#define vx_cres_avail(v,n,r) \
14812 +       __vx_cres_avail(v, r, n, __FILE__, __LINE__)
14813 +
14814 +
14815 +#define vx_nproc_avail(n) \
14816 +       vx_cres_avail(current->vx_info, n, RLIMIT_NPROC)
14817 +
14818 +#define vx_files_avail(n) \
14819 +       vx_cres_avail(current->vx_info, n, RLIMIT_NOFILE)
14820 +
14821 +#define vx_locks_avail(n) \
14822 +       vx_cres_avail(current->vx_info, n, RLIMIT_LOCKS)
14823 +
14824 +#define vx_openfd_avail(n) \
14825 +       vx_cres_avail(current->vx_info, n, VLIMIT_OPENFD)
14826 +
14827 +
14828 +/* dentry limits */
14829 +
14830 +#define vx_dentry_inc(d) do {                                          \
14831 +       if (atomic_read(&d->d_count) == 1)                              \
14832 +               vx_acc_cres(current->vx_info, 1, d, VLIMIT_DENTRY);     \
14833 +       } while (0)
14834 +
14835 +#define vx_dentry_dec(d) do {                                          \
14836 +       if (atomic_read(&d->d_count) == 0)                              \
14837 +               vx_acc_cres(current->vx_info,-1, d, VLIMIT_DENTRY);     \
14838 +       } while (0)
14839 +
14840 +#define vx_dentry_avail(n) \
14841 +       vx_cres_avail(current->vx_info, n, VLIMIT_DENTRY)
14842 +
14843 +
14844 +/* socket limits */
14845 +
14846 +#define vx_sock_inc(s) \
14847 +       vx_acc_cres((s)->sk_vx_info, 1, s, VLIMIT_NSOCK)
14848 +
14849 +#define vx_sock_dec(s) \
14850 +       vx_acc_cres((s)->sk_vx_info,-1, s, VLIMIT_NSOCK)
14851 +
14852 +#define vx_sock_avail(n) \
14853 +       vx_cres_avail(current->vx_info, n, VLIMIT_NSOCK)
14854 +
14855 +
14856 +/* ipc resource limits */
14857 +
14858 +#define vx_ipcmsg_add(v,u,a) \
14859 +       vx_add_cres(v, a, u, RLIMIT_MSGQUEUE)
14860 +
14861 +#define vx_ipcmsg_sub(v,u,a) \
14862 +       vx_sub_cres(v, a, u, RLIMIT_MSGQUEUE)
14863 +
14864 +#define vx_ipcmsg_avail(v,a) \
14865 +       vx_cres_avail(v, a, RLIMIT_MSGQUEUE)
14866 +
14867 +
14868 +#define vx_ipcshm_add(v,k,a) \
14869 +       vx_add_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM)
14870 +
14871 +#define vx_ipcshm_sub(v,k,a) \
14872 +       vx_sub_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM)
14873 +
14874 +#define vx_ipcshm_avail(v,a) \
14875 +       vx_cres_avail(v, a, VLIMIT_SHMEM)
14876 +
14877 +
14878 +#define vx_semary_inc(a) \
14879 +       vx_acc_cres(current->vx_info, 1, a, VLIMIT_SEMARY)
14880 +
14881 +#define vx_semary_dec(a) \
14882 +       vx_acc_cres(current->vx_info,-1, a, VLIMIT_SEMARY)
14883 +
14884 +
14885 +#define vx_nsems_add(a,n) \
14886 +       vx_add_cres(current->vx_info, n, a, VLIMIT_NSEMS)
14887 +
14888 +#define vx_nsems_sub(a,n) \
14889 +       vx_sub_cres(current->vx_info, n, a, VLIMIT_NSEMS)
14890 +
14891 +
14892 +#else
14893 +#warning duplicate inclusion
14894 +#endif
14895 diff -NurpP --minimal linux-2.6.16.20/include/linux/vs_memory.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_memory.h
14896 --- linux-2.6.16.20/include/linux/vs_memory.h   1970-01-01 01:00:00 +0100
14897 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_memory.h      2006-04-26 19:07:00 +0200
14898 @@ -0,0 +1,149 @@
14899 +#ifndef _VX_VS_MEMORY_H
14900 +#define _VX_VS_MEMORY_H
14901 +
14902 +#include "vserver/limit.h"
14903 +#include "vserver/debug.h"
14904 +#include "vserver/limit_int.h"
14905 +
14906 +
14907 +#define __acc_add_long(a,v)    (*(v) += (a))
14908 +#define __acc_inc_long(v)      (++*(v))
14909 +#define __acc_dec_long(v)      (--*(v))
14910 +
14911 +#if    NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
14912 +#define __acc_add_atomic(a,v)  atomic_long_add(a,v)
14913 +#define __acc_inc_atomic(v)    atomic_long_inc(v)
14914 +#define __acc_dec_atomic(v)    atomic_long_dec(v)
14915 +#else  /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
14916 +#define __acc_add_atomic(a,v)  __acc_add_long(a,v)
14917 +#define __acc_inc_atomic(v)    __acc_inc_long(v)
14918 +#define __acc_dec_atomic(v)    __acc_dec_long(v)
14919 +#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
14920 +
14921 +
14922 +#define vx_acc_page(m,d,v,r) do {                                      \
14923 +       if ((d) > 0)                                                    \
14924 +               __acc_inc_long(&(m->v));                                \
14925 +       else                                                            \
14926 +               __acc_dec_long(&(m->v));                                \
14927 +       __vx_acc_cres(m->mm_vx_info, r, d, m, __FILE__, __LINE__);      \
14928 +} while (0)
14929 +
14930 +#define vx_acc_page_atomic(m,d,v,r) do {                               \
14931 +       if ((d) > 0)                                                    \
14932 +               __acc_inc_atomic(&(m->v));                              \
14933 +       else                                                            \
14934 +               __acc_dec_atomic(&(m->v));                              \
14935 +       __vx_acc_cres(m->mm_vx_info, r, d, m, __FILE__, __LINE__);      \
14936 +} while (0)
14937 +
14938 +
14939 +#define vx_acc_pages(m,p,v,r) do {                                     \
14940 +       unsigned long __p = (p);                                        \
14941 +       __acc_add_long(__p, &(m->v));                                   \
14942 +       __vx_add_cres(m->mm_vx_info, r, __p, m, __FILE__, __LINE__);    \
14943 +} while (0)
14944 +
14945 +#define vx_acc_pages_atomic(m,p,v,r) do {                              \
14946 +       unsigned long __p = (p);                                        \
14947 +       __acc_add_atomic(__p, &(m->v));                                 \
14948 +       __vx_add_cres(m->mm_vx_info, r, __p, m, __FILE__, __LINE__);    \
14949 +} while (0)
14950 +
14951 +
14952 +
14953 +#define vx_acc_vmpage(m,d) \
14954 +       vx_acc_page(m, d, total_vm,  RLIMIT_AS)
14955 +#define vx_acc_vmlpage(m,d) \
14956 +       vx_acc_page(m, d, locked_vm, RLIMIT_MEMLOCK)
14957 +#define vx_acc_file_rsspage(m,d) \
14958 +       vx_acc_page_atomic(m, d, _file_rss, RLIMIT_RSS)
14959 +#define vx_acc_anon_rsspage(m,d) \
14960 +       vx_acc_page_atomic(m, d, _anon_rss, VLIMIT_ANON)
14961 +
14962 +#define vx_acc_vmpages(m,p) \
14963 +       vx_acc_pages(m, p, total_vm,  RLIMIT_AS)
14964 +#define vx_acc_vmlpages(m,p) \
14965 +       vx_acc_pages(m, p, locked_vm, RLIMIT_MEMLOCK)
14966 +#define vx_acc_file_rsspages(m,p) \
14967 +       vx_acc_pages_atomic(m, p, _file_rss, RLIMIT_RSS)
14968 +#define vx_acc_anon_rsspages(m,p) \
14969 +       vx_acc_pages_atomic(m, p, _anon_rss, VLIMIT_ANON)
14970 +
14971 +#define vx_pages_add(s,r,p)    __vx_add_cres(s, r, p, 0, __FILE__, __LINE__)
14972 +#define vx_pages_sub(s,r,p)    vx_pages_add(s, r, -(p))
14973 +
14974 +#define vx_vmpages_inc(m)              vx_acc_vmpage(m, 1)
14975 +#define vx_vmpages_dec(m)              vx_acc_vmpage(m,-1)
14976 +#define vx_vmpages_add(m,p)            vx_acc_vmpages(m, p)
14977 +#define vx_vmpages_sub(m,p)            vx_acc_vmpages(m,-(p))
14978 +
14979 +#define vx_vmlocked_inc(m)             vx_acc_vmlpage(m, 1)
14980 +#define vx_vmlocked_dec(m)             vx_acc_vmlpage(m,-1)
14981 +#define vx_vmlocked_add(m,p)           vx_acc_vmlpages(m, p)
14982 +#define vx_vmlocked_sub(m,p)           vx_acc_vmlpages(m,-(p))
14983 +
14984 +#define vx_file_rsspages_inc(m)                vx_acc_file_rsspage(m, 1)
14985 +#define vx_file_rsspages_dec(m)                vx_acc_file_rsspage(m,-1)
14986 +#define vx_file_rsspages_add(m,p)      vx_acc_file_rsspages(m, p)
14987 +#define vx_file_rsspages_sub(m,p)      vx_acc_file_rsspages(m,-(p))
14988 +
14989 +#define vx_anon_rsspages_inc(m)                vx_acc_anon_rsspage(m, 1)
14990 +#define vx_anon_rsspages_dec(m)                vx_acc_anon_rsspage(m,-1)
14991 +#define vx_anon_rsspages_add(m,p)      vx_acc_anon_rsspages(m, p)
14992 +#define vx_anon_rsspages_sub(m,p)      vx_acc_anon_rsspages(m,-(p))
14993 +
14994 +
14995 +#define vx_pages_avail(m,p,r) \
14996 +       __vx_cres_avail((m)->mm_vx_info, r, p, __FILE__, __LINE__)
14997 +
14998 +#define vx_vmpages_avail(m,p)  vx_pages_avail(m, p, RLIMIT_AS)
14999 +#define vx_vmlocked_avail(m,p) vx_pages_avail(m, p, RLIMIT_MEMLOCK)
15000 +#define vx_rsspages_avail(m,p) vx_pages_avail(m, p, RLIMIT_RSS)
15001 +#define vx_anonpages_avail(m,p)        vx_pages_avail(m, p, VLIMIT_ANON)
15002 +
15003 +enum {
15004 +       VXPT_UNKNOWN = 0,
15005 +       VXPT_ANON,
15006 +       VXPT_NONE,
15007 +       VXPT_FILE,
15008 +       VXPT_SWAP,
15009 +       VXPT_WRITE
15010 +};
15011 +
15012 +#if 0
15013 +#define        vx_page_fault(mm,vma,type,ret)
15014 +#else
15015 +
15016 +static inline
15017 +void __vx_page_fault(struct mm_struct *mm,
15018 +       struct vm_area_struct *vma, int type, int ret)
15019 +{
15020 +       struct vx_info *vxi = mm->mm_vx_info;
15021 +       int what;
15022 +/*
15023 +       static char *page_type[6] =
15024 +               { "UNKNOWN", "ANON","NONE", "FILE", "SWAP", "WRITE" };
15025 +       static char *page_what[4] =
15026 +               { "FAULT_OOM", "FAULT_SIGBUS", "FAULT_MINOR", "FAULT_MAJOR" };
15027 +*/
15028 +
15029 +       if (!vxi)
15030 +               return;
15031 +
15032 +       what = (ret & 0x3);
15033 +
15034 +/*     printk("[%d] page[%d][%d] %2x %s %s\n", vxi->vx_id,
15035 +               type, what, ret, page_type[type], page_what[what]);
15036 +*/
15037 +       if (ret & VM_FAULT_WRITE)
15038 +               what |= 0x4;
15039 +       atomic_inc(&vxi->cacct.page[type][what]);
15040 +}
15041 +
15042 +#define        vx_page_fault(mm,vma,type,ret)  __vx_page_fault(mm,vma,type,ret)
15043 +#endif
15044 +
15045 +#else
15046 +#warning duplicate inclusion
15047 +#endif
15048 diff -NurpP --minimal linux-2.6.16.20/include/linux/vs_network.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_network.h
15049 --- linux-2.6.16.20/include/linux/vs_network.h  1970-01-01 01:00:00 +0100
15050 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_network.h     2006-04-26 19:07:00 +0200
15051 @@ -0,0 +1,244 @@
15052 +#ifndef _NX_VS_NETWORK_H
15053 +#define _NX_VS_NETWORK_H
15054 +
15055 +#include "vserver/network.h"
15056 +#include "vserver/debug.h"
15057 +
15058 +
15059 +#define get_nx_info(i) __get_nx_info(i,__FILE__,__LINE__)
15060 +
15061 +static inline struct nx_info *__get_nx_info(struct nx_info *nxi,
15062 +       const char *_file, int _line)
15063 +{
15064 +       if (!nxi)
15065 +               return NULL;
15066 +
15067 +       vxlprintk(VXD_CBIT(nid, 2), "get_nx_info(%p[#%d.%d])",
15068 +               nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0,
15069 +               _file, _line);
15070 +
15071 +       atomic_inc(&nxi->nx_usecnt);
15072 +       return nxi;
15073 +}
15074 +
15075 +
15076 +extern void free_nx_info(struct nx_info *);
15077 +
15078 +#define put_nx_info(i) __put_nx_info(i,__FILE__,__LINE__)
15079 +
15080 +static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line)
15081 +{
15082 +       if (!nxi)
15083 +               return;
15084 +
15085 +       vxlprintk(VXD_CBIT(nid, 2), "put_nx_info(%p[#%d.%d])",
15086 +               nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0,
15087 +               _file, _line);
15088 +
15089 +       if (atomic_dec_and_test(&nxi->nx_usecnt))
15090 +               free_nx_info(nxi);
15091 +}
15092 +
15093 +
15094 +#define init_nx_info(p,i) __init_nx_info(p,i,__FILE__,__LINE__)
15095 +
15096 +static inline void __init_nx_info(struct nx_info **nxp, struct nx_info *nxi,
15097 +               const char *_file, int _line)
15098 +{
15099 +       if (nxi) {
15100 +               vxlprintk(VXD_CBIT(nid, 3),
15101 +                       "init_nx_info(%p[#%d.%d])",
15102 +                       nxi, nxi?nxi->nx_id:0,
15103 +                       nxi?atomic_read(&nxi->nx_usecnt):0,
15104 +                       _file, _line);
15105 +
15106 +               atomic_inc(&nxi->nx_usecnt);
15107 +       }
15108 +       *nxp = nxi;
15109 +}
15110 +
15111 +
15112 +#define set_nx_info(p,i) __set_nx_info(p,i,__FILE__,__LINE__)
15113 +
15114 +static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi,
15115 +       const char *_file, int _line)
15116 +{
15117 +       struct nx_info *nxo;
15118 +
15119 +       if (!nxi)
15120 +               return;
15121 +
15122 +       vxlprintk(VXD_CBIT(nid, 3), "set_nx_info(%p[#%d.%d])",
15123 +               nxi, nxi?nxi->nx_id:0,
15124 +               nxi?atomic_read(&nxi->nx_usecnt):0,
15125 +               _file, _line);
15126 +
15127 +       atomic_inc(&nxi->nx_usecnt);
15128 +       nxo = xchg(nxp, nxi);
15129 +       BUG_ON(nxo);
15130 +}
15131 +
15132 +#define clr_nx_info(p) __clr_nx_info(p,__FILE__,__LINE__)
15133 +
15134 +static inline void __clr_nx_info(struct nx_info **nxp,
15135 +       const char *_file, int _line)
15136 +{
15137 +       struct nx_info *nxo;
15138 +
15139 +       nxo = xchg(nxp, NULL);
15140 +       if (!nxo)
15141 +               return;
15142 +
15143 +       vxlprintk(VXD_CBIT(nid, 3), "clr_nx_info(%p[#%d.%d])",
15144 +               nxo, nxo?nxo->nx_id:0,
15145 +               nxo?atomic_read(&nxo->nx_usecnt):0,
15146 +               _file, _line);
15147 +
15148 +       if (atomic_dec_and_test(&nxo->nx_usecnt))
15149 +               free_nx_info(nxo);
15150 +}
15151 +
15152 +
15153 +#define claim_nx_info(v,p) __claim_nx_info(v,p,__FILE__,__LINE__)
15154 +
15155 +static inline void __claim_nx_info(struct nx_info *nxi,
15156 +       struct task_struct *task, const char *_file, int _line)
15157 +{
15158 +       vxlprintk(VXD_CBIT(nid, 3), "claim_nx_info(%p[#%d.%d.%d]) %p",
15159 +               nxi, nxi?nxi->nx_id:0,
15160 +               nxi?atomic_read(&nxi->nx_usecnt):0,
15161 +               nxi?atomic_read(&nxi->nx_tasks):0,
15162 +               task, _file, _line);
15163 +
15164 +       atomic_inc(&nxi->nx_tasks);
15165 +}
15166 +
15167 +
15168 +extern void unhash_nx_info(struct nx_info *);
15169 +
15170 +#define release_nx_info(v,p) __release_nx_info(v,p,__FILE__,__LINE__)
15171 +
15172 +static inline void __release_nx_info(struct nx_info *nxi,
15173 +       struct task_struct *task, const char *_file, int _line)
15174 +{
15175 +       vxlprintk(VXD_CBIT(nid, 3), "release_nx_info(%p[#%d.%d.%d]) %p",
15176 +               nxi, nxi?nxi->nx_id:0,
15177 +               nxi?atomic_read(&nxi->nx_usecnt):0,
15178 +               nxi?atomic_read(&nxi->nx_tasks):0,
15179 +               task, _file, _line);
15180 +
15181 +       might_sleep();
15182 +
15183 +       if (atomic_dec_and_test(&nxi->nx_tasks))
15184 +               unhash_nx_info(nxi);
15185 +}
15186 +
15187 +
15188 +#define task_get_nx_info(i)    __task_get_nx_info(i,__FILE__,__LINE__)
15189 +
15190 +static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p,
15191 +       const char *_file, int _line)
15192 +{
15193 +       struct nx_info *nxi;
15194 +
15195 +       task_lock(p);
15196 +       vxlprintk(VXD_CBIT(nid, 5), "task_get_nx_info(%p)",
15197 +               p, _file, _line);
15198 +       nxi = __get_nx_info(p->nx_info, _file, _line);
15199 +       task_unlock(p);
15200 +       return nxi;
15201 +}
15202 +
15203 +
15204 +#define nx_task_nid(t) ((t)->nid)
15205 +
15206 +#define nx_current_nid() nx_task_nid(current)
15207 +
15208 +#define nx_check(c,m)  __nx_check(nx_current_nid(),c,m)
15209 +
15210 +#define nx_weak_check(c,m)     ((m) ? nx_check(c,m) : 1)
15211 +
15212 +
15213 +/*
15214 + * check current context for ADMIN/WATCH and
15215 + * optionally against supplied argument
15216 + */
15217 +static inline int __nx_check(nid_t cid, nid_t id, unsigned int mode)
15218 +{
15219 +       if (mode & NX_ARG_MASK) {
15220 +               if ((mode & NX_IDENT) &&
15221 +                       (id == cid))
15222 +                       return 1;
15223 +       }
15224 +       if (mode & NX_ATR_MASK) {
15225 +               if ((mode & NX_DYNAMIC) &&
15226 +                       (id >= MIN_D_CONTEXT) &&
15227 +                       (id <= MAX_S_CONTEXT))
15228 +                       return 1;
15229 +               if ((mode & NX_STATIC) &&
15230 +                       (id > 1) && (id < MIN_D_CONTEXT))
15231 +                       return 1;
15232 +       }
15233 +       return (((mode & NX_ADMIN) && (cid == 0)) ||
15234 +               ((mode & NX_WATCH) && (cid == 1)) ||
15235 +               ((mode & NX_BLEND) && (id == 1)) ||
15236 +               ((mode & NX_HOSTID) && (id == 0)));
15237 +}
15238 +
15239 +
15240 +#define __nx_state(v)  ((v) ? ((v)->nx_state) : 0)
15241 +
15242 +#define nx_info_state(v,m)     (__nx_state(v) & (m))
15243 +
15244 +
15245 +#define __nx_flags(v)  ((v) ? (v)->nx_flags : 0)
15246 +
15247 +#define nx_current_flags()     __nx_flags(current->nx_info)
15248 +
15249 +#define nx_info_flags(v,m,f) \
15250 +       vx_check_flags(__nx_flags(v),(m),(f))
15251 +
15252 +#define task_nx_flags(t,m,f) \
15253 +       ((t) && nx_info_flags((t)->nx_info, (m), (f)))
15254 +
15255 +#define nx_flags(m,f)  nx_info_flags(current->nx_info,(m),(f))
15256 +
15257 +
15258 +/* context caps */
15259 +
15260 +#define __nx_ncaps(v)  ((v) ? (v)->nx_ncaps : 0)
15261 +
15262 +#define nx_current_ncaps()     __nx_ncaps(current->nx_info)
15263 +
15264 +#define nx_info_ncaps(v,c)     (__nx_ncaps(v) & (c))
15265 +
15266 +#define nx_ncaps(c)    nx_info_ncaps(current->nx_info,(c))
15267 +
15268 +
15269 +static inline int addr_in_nx_info(struct nx_info *nxi, uint32_t addr)
15270 +{
15271 +       int n,i;
15272 +
15273 +       if (!nxi)
15274 +               return 1;
15275 +
15276 +       n = nxi->nbipv4;
15277 +       if (n && (nxi->ipv4[0] == 0))
15278 +               return 1;
15279 +       for (i=0; i<n; i++) {
15280 +               if (nxi->ipv4[i] == addr)
15281 +                       return 1;
15282 +       }
15283 +       return 0;
15284 +}
15285 +
15286 +static inline void exit_nx_info(struct task_struct *p)
15287 +{
15288 +       if (p->nx_info)
15289 +               release_nx_info(p->nx_info, p);
15290 +}
15291 +
15292 +
15293 +#else
15294 +#warning duplicate inclusion
15295 +#endif
15296 diff -NurpP --minimal linux-2.6.16.20/include/linux/vs_pid.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_pid.h
15297 --- linux-2.6.16.20/include/linux/vs_pid.h      1970-01-01 01:00:00 +0100
15298 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_pid.h 2006-04-26 19:07:00 +0200
15299 @@ -0,0 +1,57 @@
15300 +#ifndef _VX_VS_PID_H
15301 +#define _VX_VS_PID_H
15302 +
15303 +#include "vserver/debug.h"
15304 +
15305 +
15306 +/* pid faking stuff */
15307 +
15308 +
15309 +#define vx_info_map_pid(v,p) \
15310 +       __vx_info_map_pid((v), (p), __FUNC__, __FILE__, __LINE__)
15311 +#define vx_info_map_tgid(v,p)  vx_info_map_pid(v,p)
15312 +#define vx_map_pid(p)  vx_info_map_pid(current->vx_info, p)
15313 +#define vx_map_tgid(p) vx_map_pid(p)
15314 +
15315 +static inline int __vx_info_map_pid(struct vx_info *vxi, int pid,
15316 +       const char *func, const char *file, int line)
15317 +{
15318 +       if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
15319 +               vxfprintk(VXD_CBIT(cvirt, 2),
15320 +                       "vx_map_tgid: %p/%llx: %d -> %d",
15321 +                       vxi, (long long)vxi->vx_flags, pid,
15322 +                       (pid && pid == vxi->vx_initpid)?1:pid,
15323 +                       func, file, line);
15324 +               if (pid == 0)
15325 +                       return 0;
15326 +               if (pid == vxi->vx_initpid)
15327 +                       return 1;
15328 +       }
15329 +       return pid;
15330 +}
15331 +
15332 +#define vx_info_rmap_pid(v,p) \
15333 +       __vx_info_rmap_pid((v), (p), __FUNC__, __FILE__, __LINE__)
15334 +#define vx_rmap_pid(p) vx_info_rmap_pid(current->vx_info, p)
15335 +#define vx_rmap_tgid(p) vx_rmap_pid(p)
15336 +
15337 +static inline int __vx_info_rmap_pid(struct vx_info *vxi, int pid,
15338 +       const char *func, const char *file, int line)
15339 +{
15340 +       if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
15341 +               vxfprintk(VXD_CBIT(cvirt, 2),
15342 +                       "vx_rmap_tgid: %p/%llx: %d -> %d",
15343 +                       vxi, (long long)vxi->vx_flags, pid,
15344 +                       (pid == 1)?vxi->vx_initpid:pid,
15345 +                       func, file, line);
15346 +               if ((pid == 1) && vxi->vx_initpid)
15347 +                       return vxi->vx_initpid;
15348 +               if (pid == vxi->vx_initpid)
15349 +                       return ~0U;
15350 +       }
15351 +       return pid;
15352 +}
15353 +
15354 +#else
15355 +#warning duplicate inclusion
15356 +#endif
15357 diff -NurpP --minimal linux-2.6.16.20/include/linux/vs_sched.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_sched.h
15358 --- linux-2.6.16.20/include/linux/vs_sched.h    1970-01-01 01:00:00 +0100
15359 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_sched.h       2006-04-27 21:05:56 +0200
15360 @@ -0,0 +1,107 @@
15361 +#ifndef _VX_VS_SCHED_H
15362 +#define _VX_VS_SCHED_H
15363 +
15364 +#include "vserver/sched.h"
15365 +
15366 +
15367 +#define VAVAVOOM_RATIO          50
15368 +
15369 +#define MAX_PRIO_BIAS           20
15370 +#define MIN_PRIO_BIAS          -20
15371 +
15372 +
15373 +#ifdef CONFIG_VSERVER_HARDCPU
15374 +
15375 +/*
15376 + * effective_prio - return the priority that is based on the static
15377 + * priority but is modified by bonuses/penalties.
15378 + *
15379 + * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
15380 + * into a -4 ... 0 ... +4 bonus/penalty range.
15381 + *
15382 + * Additionally, we scale another amount based on the number of
15383 + * CPU tokens currently held by the context, if the process is
15384 + * part of a context (and the appropriate SCHED flag is set).
15385 + * This ranges from -5 ... 0 ... +15, quadratically.
15386 + *
15387 + * So, the total bonus is -9 .. 0 .. +19
15388 + * We use ~50% of the full 0...39 priority range so that:
15389 + *
15390 + * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
15391 + * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
15392 + *    unless that context is far exceeding its CPU allocation.
15393 + *
15394 + * Both properties are important to certain workloads.
15395 + */
15396 +static inline
15397 +int vx_effective_vavavoom(struct _vx_sched_pc *sched_pc, int max_prio)
15398 +{
15399 +       int vavavoom, max;
15400 +
15401 +       /* lots of tokens = lots of vavavoom
15402 +        *      no tokens = no vavavoom      */
15403 +       if ((vavavoom = sched_pc->tokens) >= 0) {
15404 +               max = sched_pc->tokens_max;
15405 +               vavavoom = max - vavavoom;
15406 +               max = max * max;
15407 +               vavavoom = max_prio * VAVAVOOM_RATIO / 100
15408 +                       * (vavavoom*vavavoom - (max >> 2)) / max;
15409 +               return vavavoom;
15410 +       }
15411 +       return 0;
15412 +}
15413 +
15414 +
15415 +static inline
15416 +int vx_adjust_prio(struct task_struct *p, int prio, int max_user)
15417 +{
15418 +       struct vx_info *vxi = p->vx_info;
15419 +
15420 +       if (!vxi)
15421 +               return prio;
15422 +
15423 +       if (vx_info_flags(vxi, VXF_SCHED_PRIO, 0)) {
15424 +               struct _vx_sched_pc *sched_pc = &vx_cpu(vxi, sched_pc);
15425 +               int vavavoom = vx_effective_vavavoom(sched_pc, max_user);
15426 +
15427 +               vxi->sched.vavavoom = vavavoom;
15428 +               prio += vavavoom;
15429 +       }
15430 +       prio += vxi->sched.prio_bias;
15431 +       return prio;
15432 +}
15433 +
15434 +#else /* !CONFIG_VSERVER_HARDCPU */
15435 +
15436 +static inline
15437 +int vx_adjust_prio(struct task_struct *p, int prio, int max_user)
15438 +{
15439 +       struct vx_info *vxi = p->vx_info;
15440 +
15441 +       if (vxi)
15442 +               prio += vxi->sched.prio_bias;
15443 +       return prio;
15444 +}
15445 +
15446 +#endif /* CONFIG_VSERVER_HARDCPU */
15447 +
15448 +
15449 +static inline void vx_account_user(struct vx_info *vxi,
15450 +       cputime_t cputime, int nice)
15451 +{
15452 +       if (!vxi)
15453 +               return;
15454 +       vx_cpu(vxi, sched_pc).user_ticks += cputime;
15455 +}
15456 +
15457 +static inline void vx_account_system(struct vx_info *vxi,
15458 +       cputime_t cputime, int idle)
15459 +{
15460 +       if (!vxi)
15461 +               return;
15462 +       vx_cpu(vxi, sched_pc).sys_ticks += cputime;
15463 +}
15464 +
15465 +#else
15466 +#warning duplicate inclusion
15467 +#endif
15468 diff -NurpP --minimal linux-2.6.16.20/include/linux/vs_socket.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_socket.h
15469 --- linux-2.6.16.20/include/linux/vs_socket.h   1970-01-01 01:00:00 +0100
15470 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_socket.h      2006-04-26 19:07:00 +0200
15471 @@ -0,0 +1,57 @@
15472 +#ifndef _VX_VS_SOCKET_H
15473 +#define _VX_VS_SOCKET_H
15474 +
15475 +#include "vserver/debug.h"
15476 +
15477 +
15478 +/* socket accounting */
15479 +
15480 +#include <linux/socket.h>
15481 +
15482 +static inline int vx_sock_type(int family)
15483 +{
15484 +       int type = 4;
15485 +
15486 +       if (family > 0 && family < 3)
15487 +               type = family;
15488 +       else if (family == PF_INET6)
15489 +               type = 3;
15490 +       return type;
15491 +}
15492 +
15493 +#define vx_acc_sock(v,f,p,s) \
15494 +       __vx_acc_sock((v), (f), (p), (s), __FILE__, __LINE__)
15495 +
15496 +static inline void __vx_acc_sock(struct vx_info *vxi,
15497 +       int family, int pos, int size, char *file, int line)
15498 +{
15499 +       if (vxi) {
15500 +               int type = vx_sock_type(family);
15501 +
15502 +               atomic_inc(&vxi->cacct.sock[type][pos].count);
15503 +               atomic_add(size, &vxi->cacct.sock[type][pos].total);
15504 +       }
15505 +}
15506 +
15507 +#define vx_sock_recv(sk,s) \
15508 +       vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 0, (s))
15509 +#define vx_sock_send(sk,s) \
15510 +       vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 1, (s))
15511 +#define vx_sock_fail(sk,s) \
15512 +       vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, (s))
15513 +
15514 +
15515 +#define sock_vx_init(s) do {           \
15516 +       (s)->sk_xid = 0;                \
15517 +       (s)->sk_vx_info = NULL;         \
15518 +       } while (0)
15519 +
15520 +#define sock_nx_init(s) do {           \
15521 +       (s)->sk_nid = 0;                \
15522 +       (s)->sk_nx_info = NULL;         \
15523 +       } while (0)
15524 +
15525 +
15526 +#else
15527 +#warning duplicate inclusion
15528 +#endif
15529 diff -NurpP --minimal linux-2.6.16.20/include/linux/vs_tag.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_tag.h
15530 --- linux-2.6.16.20/include/linux/vs_tag.h      1970-01-01 01:00:00 +0100
15531 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_tag.h 2006-04-26 19:07:00 +0200
15532 @@ -0,0 +1,45 @@
15533 +#ifndef _VX_VS_TAG_H
15534 +#define _VX_VS_TAG_H
15535 +
15536 +#include <linux/kernel.h>
15537 +#include <linux/vserver/tag.h>
15538 +
15539 +/* check conditions */
15540 +
15541 +#define DX_ADMIN       0x0001
15542 +#define DX_WATCH       0x0002
15543 +#define DX_HOSTID      0x0008
15544 +
15545 +#define DX_IDENT       0x0010
15546 +
15547 +#define DX_ARG_MASK    0x0010
15548 +
15549 +
15550 +#define dx_task_tag(t) ((t)->xid)
15551 +
15552 +#define dx_current_tag() dx_task_tag(current)
15553 +
15554 +#define dx_check(c,m)  __dx_check(dx_current_tag(),c,m)
15555 +
15556 +#define dx_weak_check(c,m)     ((m) ? dx_check(c,m) : 1)
15557 +
15558 +
15559 +/*
15560 + * check current context for ADMIN/WATCH and
15561 + * optionally against supplied argument
15562 + */
15563 +static inline int __dx_check(tag_t cid, tag_t id, unsigned int mode)
15564 +{
15565 +       if (mode & DX_ARG_MASK) {
15566 +               if ((mode & DX_IDENT) &&
15567 +                       (id == cid))
15568 +                       return 1;
15569 +       }
15570 +       return (((mode & DX_ADMIN) && (cid == 0)) ||
15571 +               ((mode & DX_WATCH) && (cid == 1)) ||
15572 +               ((mode & DX_HOSTID) && (id == 0)));
15573 +}
15574 +
15575 +#else
15576 +#warning duplicate inclusion
15577 +#endif
15578 diff -NurpP --minimal linux-2.6.16.20/include/linux/vs_time.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_time.h
15579 --- linux-2.6.16.20/include/linux/vs_time.h     1970-01-01 01:00:00 +0100
15580 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_time.h        2006-05-31 01:27:45 +0200
15581 @@ -0,0 +1,19 @@
15582 +#ifndef _VX_VS_TIME_H
15583 +#define _VX_VS_TIME_H
15584 +
15585 +
15586 +/* time faking stuff */
15587 +
15588 +#ifdef CONFIG_VSERVER_VTIME
15589 +
15590 +extern void vx_gettimeofday(struct timeval *tv);
15591 +extern int vx_settimeofday(struct timespec *ts);
15592 +
15593 +#else
15594 +#define        vx_gettimeofday(t)      do_gettimeofday(t)
15595 +#define        vx_settimeofday(t)      do_settimeofday(t)
15596 +#endif
15597 +
15598 +#else
15599 +#warning duplicate inclusion
15600 +#endif
15601 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/context.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/context.h
15602 --- linux-2.6.16.20/include/linux/vserver/context.h     1970-01-01 01:00:00 +0100
15603 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/context.h        2006-05-29 17:49:04 +0200
15604 @@ -0,0 +1,206 @@
15605 +#ifndef _VX_CONTEXT_H
15606 +#define _VX_CONTEXT_H
15607 +
15608 +#include <linux/types.h>
15609 +#include <linux/capability.h>
15610 +
15611 +
15612 +#define MAX_S_CONTEXT  65535   /* Arbitrary limit */
15613 +
15614 +#ifdef CONFIG_VSERVER_DYNAMIC_IDS
15615 +#define MIN_D_CONTEXT  49152   /* dynamic contexts start here */
15616 +#else
15617 +#define MIN_D_CONTEXT  65536
15618 +#endif
15619 +
15620 +#define VX_DYNAMIC_ID  ((uint32_t)-1)          /* id for dynamic context */
15621 +
15622 +/* context flags */
15623 +
15624 +#define VXF_INFO_LOCK          0x00000001
15625 +#define VXF_INFO_SCHED         0x00000002
15626 +#define VXF_INFO_NPROC         0x00000004
15627 +#define VXF_INFO_PRIVATE       0x00000008
15628 +
15629 +#define VXF_INFO_INIT          0x00000010
15630 +#define VXF_INFO_HIDE          0x00000020
15631 +#define VXF_INFO_ULIMIT                0x00000040
15632 +#define VXF_INFO_NSPACE                0x00000080
15633 +
15634 +#define VXF_SCHED_HARD         0x00000100
15635 +#define VXF_SCHED_PRIO         0x00000200
15636 +#define VXF_SCHED_PAUSE                0x00000400
15637 +
15638 +#define VXF_VIRT_MEM           0x00010000
15639 +#define VXF_VIRT_UPTIME                0x00020000
15640 +#define VXF_VIRT_CPU           0x00040000
15641 +#define VXF_VIRT_LOAD          0x00080000
15642 +#define VXF_VIRT_TIME          0x00100000
15643 +
15644 +#define VXF_HIDE_MOUNT         0x01000000
15645 +#define VXF_HIDE_NETIF         0x02000000
15646 +
15647 +#define VXF_STATE_SETUP                (1ULL<<32)
15648 +#define VXF_STATE_INIT         (1ULL<<33)
15649 +
15650 +#define VXF_SC_HELPER          (1ULL<<36)
15651 +#define VXF_REBOOT_KILL                (1ULL<<37)
15652 +#define VXF_PERSISTENT         (1ULL<<38)
15653 +
15654 +#define VXF_FORK_RSS           (1ULL<<48)
15655 +#define VXF_PROLIFIC           (1ULL<<49)
15656 +
15657 +#define VXF_IGNEG_NICE         (1ULL<<52)
15658 +
15659 +#define VXF_ONE_TIME           (0x0003ULL<<32)
15660 +
15661 +#define VXF_INIT_SET           (VXF_STATE_SETUP|VXF_STATE_INIT)
15662 +
15663 +
15664 +/* context migration */
15665 +
15666 +#define VXM_SET_INIT           0x00000001
15667 +#define VXM_SET_REAPER         0x00000002
15668 +
15669 +/* context caps */
15670 +
15671 +#define VXC_CAP_MASK           0x00000000
15672 +
15673 +#define VXC_SET_UTSNAME                0x00000001
15674 +#define VXC_SET_RLIMIT         0x00000002
15675 +
15676 +#define VXC_RAW_ICMP           0x00000100
15677 +#define VXC_SYSLOG             0x00001000
15678 +
15679 +#define VXC_SECURE_MOUNT       0x00010000
15680 +#define VXC_SECURE_REMOUNT     0x00020000
15681 +#define VXC_BINARY_MOUNT       0x00040000
15682 +
15683 +#define VXC_QUOTA_CTL          0x00100000
15684 +
15685 +
15686 +/* context state changes */
15687 +
15688 +enum {
15689 +       VSC_STARTUP = 1,
15690 +       VSC_SHUTDOWN,
15691 +
15692 +       VSC_NETUP,
15693 +       VSC_NETDOWN,
15694 +};
15695 +
15696 +
15697 +#ifdef __KERNEL__
15698 +
15699 +#include <linux/list.h>
15700 +#include <linux/spinlock.h>
15701 +#include <linux/rcupdate.h>
15702 +
15703 +#include "limit_def.h"
15704 +#include "sched_def.h"
15705 +#include "cvirt_def.h"
15706 +
15707 +struct _vx_info_pc {
15708 +       struct _vx_sched_pc sched_pc;
15709 +       struct _vx_cvirt_pc cvirt_pc;
15710 +};
15711 +
15712 +struct vx_info {
15713 +       struct hlist_node vx_hlist;             /* linked list of contexts */
15714 +       xid_t vx_id;                            /* context id */
15715 +       atomic_t vx_usecnt;                     /* usage count */
15716 +       atomic_t vx_tasks;                      /* tasks count */
15717 +       struct vx_info *vx_parent;              /* parent context */
15718 +       int vx_state;                           /* context state */
15719 +
15720 +       struct namespace *vx_namespace;         /* private namespace */
15721 +       struct fs_struct *vx_fs;                /* private namespace fs */
15722 +       uint64_t vx_flags;                      /* context flags */
15723 +       uint64_t vx_bcaps;                      /* bounding caps (system) */
15724 +       uint64_t vx_ccaps;                      /* context caps (vserver) */
15725 +       kernel_cap_t vx_cap_bset;               /* the guest's bset */
15726 +
15727 +       struct task_struct *vx_reaper;          /* guest reaper process */
15728 +       pid_t vx_initpid;                       /* PID of guest init */
15729 +
15730 +       struct _vx_limit limit;                 /* vserver limits */
15731 +       struct _vx_sched sched;                 /* vserver scheduler */
15732 +       struct _vx_cvirt cvirt;                 /* virtual/bias stuff */
15733 +       struct _vx_cacct cacct;                 /* context accounting */
15734 +
15735 +#ifndef CONFIG_SMP
15736 +       struct _vx_info_pc info_pc;             /* per cpu data */
15737 +#else
15738 +       struct _vx_info_pc *ptr_pc;             /* per cpu array */
15739 +#endif
15740 +
15741 +       wait_queue_head_t vx_wait;              /* context exit waitqueue */
15742 +       int reboot_cmd;                         /* last sys_reboot() cmd */
15743 +       int exit_code;                          /* last process exit code */
15744 +
15745 +       char vx_name[65];                       /* vserver name */
15746 +};
15747 +
15748 +#ifndef CONFIG_SMP
15749 +#define        vx_ptr_pc(vxi)          (&(vxi)->info_pc)
15750 +#define        vx_per_cpu(vxi, v, id)  vx_ptr_pc(vxi)->v
15751 +#else
15752 +#define        vx_ptr_pc(vxi)          ((vxi)->ptr_pc)
15753 +#define        vx_per_cpu(vxi, v, id)  per_cpu_ptr(vx_ptr_pc(vxi), id)->v
15754 +#endif
15755 +
15756 +#define        vx_cpu(vxi, v)          vx_per_cpu(vxi, v, smp_processor_id())
15757 +
15758 +
15759 +struct vx_info_save {
15760 +       struct vx_info *vxi;
15761 +       xid_t xid;
15762 +};
15763 +
15764 +
15765 +/* status flags */
15766 +
15767 +#define VXS_HASHED     0x0001
15768 +#define VXS_PAUSED     0x0010
15769 +#define VXS_SHUTDOWN   0x0100
15770 +#define VXS_HELPER     0x1000
15771 +#define VXS_RELEASED   0x8000
15772 +
15773 +/* check conditions */
15774 +
15775 +#define VX_ADMIN       0x0001
15776 +#define VX_WATCH       0x0002
15777 +#define VX_HIDE                0x0004
15778 +#define VX_HOSTID      0x0008
15779 +
15780 +#define VX_IDENT       0x0010
15781 +#define VX_EQUIV       0x0020
15782 +#define VX_PARENT      0x0040
15783 +#define VX_CHILD       0x0080
15784 +
15785 +#define VX_ARG_MASK    0x00F0
15786 +
15787 +#define VX_DYNAMIC     0x0100
15788 +#define VX_STATIC      0x0200
15789 +
15790 +#define VX_ATR_MASK    0x0F00
15791 +
15792 +
15793 +extern void claim_vx_info(struct vx_info *, struct task_struct *);
15794 +extern void release_vx_info(struct vx_info *, struct task_struct *);
15795 +
15796 +extern struct vx_info *lookup_vx_info(int);
15797 +extern struct vx_info *lookup_or_create_vx_info(int);
15798 +
15799 +extern int get_xid_list(int, unsigned int *, int);
15800 +extern int xid_is_hashed(xid_t);
15801 +
15802 +extern int vx_migrate_task(struct task_struct *, struct vx_info *);
15803 +
15804 +extern long vs_state_change(struct vx_info *, unsigned int);
15805 +
15806 +
15807 +#endif /* __KERNEL__ */
15808 +#else  /* _VX_CONTEXT_H */
15809 +#warning duplicate inclusion
15810 +#endif /* _VX_CONTEXT_H */
15811 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/context_cmd.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/context_cmd.h
15812 --- linux-2.6.16.20/include/linux/vserver/context_cmd.h 1970-01-01 01:00:00 +0100
15813 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/context_cmd.h    2006-04-26 19:07:00 +0200
15814 @@ -0,0 +1,111 @@
15815 +#ifndef _VX_CONTEXT_CMD_H
15816 +#define _VX_CONTEXT_CMD_H
15817 +
15818 +
15819 +/* vinfo commands */
15820 +
15821 +#define VCMD_task_xid          VC_CMD(VINFO, 1, 0)
15822 +
15823 +#ifdef __KERNEL__
15824 +extern int vc_task_xid(uint32_t, void __user *);
15825 +
15826 +#endif /* __KERNEL__ */
15827 +
15828 +#define VCMD_vx_info           VC_CMD(VINFO, 5, 0)
15829 +
15830 +struct vcmd_vx_info_v0 {
15831 +       uint32_t xid;
15832 +       uint32_t initpid;
15833 +       /* more to come */
15834 +};
15835 +
15836 +#ifdef __KERNEL__
15837 +extern int vc_vx_info(uint32_t, void __user *);
15838 +
15839 +#endif /* __KERNEL__ */
15840 +
15841 +
15842 +/* context commands */
15843 +
15844 +#define VCMD_ctx_create_v0     VC_CMD(VPROC, 1, 0)
15845 +#define VCMD_ctx_create                VC_CMD(VPROC, 1, 1)
15846 +
15847 +struct vcmd_ctx_create {
15848 +       uint64_t flagword;
15849 +};
15850 +
15851 +#define VCMD_ctx_migrate_v0    VC_CMD(PROCMIG, 1, 0)
15852 +#define VCMD_ctx_migrate       VC_CMD(PROCMIG, 1, 1)
15853 +
15854 +struct vcmd_ctx_migrate {
15855 +       uint64_t flagword;
15856 +};
15857 +
15858 +#ifdef __KERNEL__
15859 +extern int vc_ctx_create(uint32_t, void __user *);
15860 +extern int vc_ctx_migrate(uint32_t, void __user *);
15861 +
15862 +#endif /* __KERNEL__ */
15863 +
15864 +
15865 +/* flag commands */
15866 +
15867 +#define VCMD_get_cflags                VC_CMD(FLAGS, 1, 0)
15868 +#define VCMD_set_cflags                VC_CMD(FLAGS, 2, 0)
15869 +
15870 +struct vcmd_ctx_flags_v0 {
15871 +       uint64_t flagword;
15872 +       uint64_t mask;
15873 +};
15874 +
15875 +#ifdef __KERNEL__
15876 +extern int vc_get_cflags(uint32_t, void __user *);
15877 +extern int vc_set_cflags(uint32_t, void __user *);
15878 +
15879 +#endif /* __KERNEL__ */
15880 +
15881 +
15882 +/* context caps commands */
15883 +
15884 +#define VCMD_get_ccaps_v0      VC_CMD(FLAGS, 3, 0)
15885 +#define VCMD_set_ccaps_v0      VC_CMD(FLAGS, 4, 0)
15886 +
15887 +struct vcmd_ctx_caps_v0 {
15888 +       uint64_t bcaps;
15889 +       uint64_t ccaps;
15890 +       uint64_t cmask;
15891 +};
15892 +
15893 +#define VCMD_get_ccaps         VC_CMD(FLAGS, 3, 1)
15894 +#define VCMD_set_ccaps         VC_CMD(FLAGS, 4, 1)
15895 +
15896 +struct vcmd_ctx_caps_v1 {
15897 +       uint64_t ccaps;
15898 +       uint64_t cmask;
15899 +};
15900 +
15901 +#ifdef __KERNEL__
15902 +extern int vc_get_ccaps_v0(uint32_t, void __user *);
15903 +extern int vc_set_ccaps_v0(uint32_t, void __user *);
15904 +extern int vc_get_ccaps(uint32_t, void __user *);
15905 +extern int vc_set_ccaps(uint32_t, void __user *);
15906 +
15907 +#endif /* __KERNEL__ */
15908 +
15909 +
15910 +/* bcaps commands */
15911 +
15912 +#define VCMD_get_bcaps         VC_CMD(FLAGS, 9, 0)
15913 +#define VCMD_set_bcaps         VC_CMD(FLAGS,10, 0)
15914 +
15915 +struct vcmd_bcaps {
15916 +       uint64_t bcaps;
15917 +       uint64_t bmask;
15918 +};
15919 +
15920 +#ifdef __KERNEL__
15921 +extern int vc_get_bcaps(uint32_t, void __user *);
15922 +extern int vc_set_bcaps(uint32_t, void __user *);
15923 +
15924 +#endif /* __KERNEL__ */
15925 +#endif /* _VX_CONTEXT_CMD_H */
15926 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/cvirt.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/cvirt.h
15927 --- linux-2.6.16.20/include/linux/vserver/cvirt.h       1970-01-01 01:00:00 +0100
15928 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/cvirt.h  2006-04-26 19:07:00 +0200
15929 @@ -0,0 +1,26 @@
15930 +#ifndef _VX_CVIRT_H
15931 +#define _VX_CVIRT_H
15932 +
15933 +
15934 +#ifdef __KERNEL__
15935 +
15936 +struct timespec;
15937 +
15938 +void vx_vsi_uptime(struct timespec *, struct timespec *);
15939 +
15940 +
15941 +struct vx_info;
15942 +
15943 +void vx_update_load(struct vx_info *);
15944 +
15945 +
15946 +int vx_uts_virt_handler(struct ctl_table *ctl, int write, xid_t xid,
15947 +       void **datap, size_t *lenp);
15948 +
15949 +
15950 +int vx_do_syslog(int, char __user *, int);
15951 +
15952 +#endif /* __KERNEL__ */
15953 +#else  /* _VX_CVIRT_H */
15954 +#warning duplicate inclusion
15955 +#endif /* _VX_CVIRT_H */
15956 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/cvirt_cmd.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/cvirt_cmd.h
15957 --- linux-2.6.16.20/include/linux/vserver/cvirt_cmd.h   1970-01-01 01:00:00 +0100
15958 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/cvirt_cmd.h      2006-04-26 19:07:00 +0200
15959 @@ -0,0 +1,35 @@
15960 +#ifndef _VX_CVIRT_CMD_H
15961 +#define _VX_CVIRT_CMD_H
15962 +
15963 +
15964 +/* virtual host info name commands */
15965 +
15966 +#define VCMD_set_vhi_name      VC_CMD(VHOST, 1, 0)
15967 +#define VCMD_get_vhi_name      VC_CMD(VHOST, 2, 0)
15968 +
15969 +struct vcmd_vhi_name_v0 {
15970 +       uint32_t field;
15971 +       char name[65];
15972 +};
15973 +
15974 +
15975 +enum vhi_name_field {
15976 +       VHIN_CONTEXT=0,
15977 +       VHIN_SYSNAME,
15978 +       VHIN_NODENAME,
15979 +       VHIN_RELEASE,
15980 +       VHIN_VERSION,
15981 +       VHIN_MACHINE,
15982 +       VHIN_DOMAINNAME,
15983 +};
15984 +
15985 +
15986 +#ifdef __KERNEL__
15987 +
15988 +#include <linux/compiler.h>
15989 +
15990 +extern int vc_set_vhi_name(uint32_t, void __user *);
15991 +extern int vc_get_vhi_name(uint32_t, void __user *);
15992 +
15993 +#endif /* __KERNEL__ */
15994 +#endif /* _VX_CVIRT_CMD_H */
15995 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/cvirt_def.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/cvirt_def.h
15996 --- linux-2.6.16.20/include/linux/vserver/cvirt_def.h   1970-01-01 01:00:00 +0100
15997 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/cvirt_def.h      2006-05-29 17:36:22 +0200
15998 @@ -0,0 +1,120 @@
15999 +#ifndef _VX_CVIRT_DEF_H
16000 +#define _VX_CVIRT_DEF_H
16001 +
16002 +#include <linux/jiffies.h>
16003 +#include <linux/utsname.h>
16004 +#include <linux/spinlock.h>
16005 +#include <linux/wait.h>
16006 +#include <linux/time.h>
16007 +#include <asm/atomic.h>
16008 +
16009 +
16010 +struct _vx_usage_stat {
16011 +       uint64_t user;
16012 +       uint64_t nice;
16013 +       uint64_t system;
16014 +       uint64_t softirq;
16015 +       uint64_t irq;
16016 +       uint64_t idle;
16017 +       uint64_t iowait;
16018 +};
16019 +
16020 +struct _vx_syslog {
16021 +       wait_queue_head_t log_wait;
16022 +       spinlock_t logbuf_lock;         /* lock for the log buffer */
16023 +
16024 +       unsigned long log_start;        /* next char to be read by syslog() */
16025 +       unsigned long con_start;        /* next char to be sent to consoles */
16026 +       unsigned long log_end;  /* most-recently-written-char + 1 */
16027 +       unsigned long logged_chars;     /* #chars since last read+clear operation */
16028 +
16029 +       char log_buf[1024];
16030 +};
16031 +
16032 +
16033 +/* context sub struct */
16034 +
16035 +struct _vx_cvirt {
16036 +//     int max_threads;                /* maximum allowed threads */
16037 +       atomic_t nr_threads;            /* number of current threads */
16038 +       atomic_t nr_running;            /* number of running threads */
16039 +       atomic_t nr_uninterruptible;    /* number of uninterruptible threads */
16040 +
16041 +       atomic_t nr_onhold;             /* processes on hold */
16042 +       uint32_t onhold_last;           /* jiffies when put on hold */
16043 +
16044 +       struct timeval bias_tv;         /* time offset to the host */
16045 +       struct timespec bias_idle;
16046 +       struct timespec bias_uptime;    /* context creation point */
16047 +       uint64_t bias_clock;            /* offset in clock_t */
16048 +
16049 +       struct new_utsname utsname;
16050 +
16051 +       spinlock_t load_lock;           /* lock for the load averages */
16052 +       atomic_t load_updates;          /* nr of load updates done so far */
16053 +       uint32_t load_last;             /* last time load was cacled */
16054 +       uint32_t load[3];               /* load averages 1,5,15 */
16055 +
16056 +       atomic_t total_forks;           /* number of forks so far */
16057 +
16058 +       struct _vx_syslog syslog;
16059 +};
16060 +
16061 +struct _vx_cvirt_pc {
16062 +       struct _vx_usage_stat cpustat;
16063 +};
16064 +
16065 +
16066 +#ifdef CONFIG_VSERVER_DEBUG
16067 +
16068 +static inline void __dump_vx_cvirt(struct _vx_cvirt *cvirt)
16069 +{
16070 +       printk("\t_vx_cvirt:\n");
16071 +       printk("\t threads: %4d, %4d, %4d, %4d\n",
16072 +               atomic_read(&cvirt->nr_threads),
16073 +               atomic_read(&cvirt->nr_running),
16074 +               atomic_read(&cvirt->nr_uninterruptible),
16075 +               atomic_read(&cvirt->nr_onhold));
16076 +       /* add rest here */
16077 +       printk("\t total_forks = %d\n", atomic_read(&cvirt->total_forks));
16078 +}
16079 +
16080 +#endif
16081 +
16082 +
16083 +struct _vx_sock_acc {
16084 +       atomic_t count;
16085 +       atomic_t total;
16086 +};
16087 +
16088 +/* context sub struct */
16089 +
16090 +struct _vx_cacct {
16091 +       struct _vx_sock_acc sock[5][3];
16092 +       atomic_t slab[8];
16093 +       atomic_t page[6][8];
16094 +};
16095 +
16096 +#ifdef CONFIG_VSERVER_DEBUG
16097 +
16098 +static inline void __dump_vx_cacct(struct _vx_cacct *cacct)
16099 +{
16100 +       int i,j;
16101 +
16102 +       printk("\t_vx_cacct:");
16103 +       for (i=0; i<5; i++) {
16104 +               struct _vx_sock_acc *ptr = cacct->sock[i];
16105 +
16106 +               printk("\t [%d] =", i);
16107 +               for (j=0; j<3; j++) {
16108 +                       printk(" [%d] = %8d, %8d", j,
16109 +                               atomic_read(&ptr[j].count),
16110 +                               atomic_read(&ptr[j].total));
16111 +               }
16112 +               printk("\n");
16113 +       }
16114 +}
16115 +
16116 +#endif
16117 +
16118 +#endif /* _VX_CVIRT_DEF_H */
16119 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/debug.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/debug.h
16120 --- linux-2.6.16.20/include/linux/vserver/debug.h       1970-01-01 01:00:00 +0100
16121 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/debug.h  2006-04-26 19:07:00 +0200
16122 @@ -0,0 +1,110 @@
16123 +#ifndef _VX_DEBUG_H
16124 +#define _VX_DEBUG_H
16125 +
16126 +
16127 +#define VXD_CBIT(n,m)  (vx_debug_ ## n & (1 << (m)))
16128 +#define VXD_CMIN(n,m)  (vx_debug_ ## n > (m))
16129 +#define VXD_MASK(n,m)  (vx_debug_ ## n & (m))
16130 +
16131 +#define VXD_QPOS(v,p)  (((uint32_t)(v) >> ((p)*8)) & 0xFF)
16132 +#define VXD_QUAD(v)    VXD_QPOS(v,0), VXD_QPOS(v,1),           \
16133 +                       VXD_QPOS(v,2), VXD_QPOS(v,3)
16134 +#define VXF_QUAD       "%u.%u.%u.%u"
16135 +
16136 +#define VXD_DEV(d)     (d), (d)->bd_inode->i_ino,              \
16137 +                       imajor((d)->bd_inode), iminor((d)->bd_inode)
16138 +#define VXF_DEV                "%p[%lu,%d:%d]"
16139 +
16140 +
16141 +#define __FUNC__       __func__
16142 +
16143 +
16144 +#ifdef CONFIG_VSERVER_DEBUG
16145 +
16146 +extern unsigned int vx_debug_switch;
16147 +extern unsigned int vx_debug_xid;
16148 +extern unsigned int vx_debug_nid;
16149 +extern unsigned int vx_debug_tag;
16150 +extern unsigned int vx_debug_net;
16151 +extern unsigned int vx_debug_limit;
16152 +extern unsigned int vx_debug_cres;
16153 +extern unsigned int vx_debug_dlim;
16154 +extern unsigned int vx_debug_quota;
16155 +extern unsigned int vx_debug_cvirt;
16156 +extern unsigned int vx_debug_misc;
16157 +
16158 +
16159 +#define VX_LOGLEVEL    "vxD: "
16160 +#define VX_WARNLEVEL   KERN_WARNING "vxW: "
16161 +
16162 +#define vxdprintk(c,f,x...)                                    \
16163 +       do {                                                    \
16164 +               if (c)                                          \
16165 +                       printk(VX_LOGLEVEL f "\n" , ##x);       \
16166 +       } while (0)
16167 +
16168 +#define vxlprintk(c,f,x...)                                    \
16169 +       do {                                                    \
16170 +               if (c)                                          \
16171 +                       printk(VX_LOGLEVEL f " @%s:%d\n", x);   \
16172 +       } while (0)
16173 +
16174 +#define vxfprintk(c,f,x...)                                    \
16175 +       do {                                                    \
16176 +               if (c)                                          \
16177 +                       printk(VX_LOGLEVEL f " %s@%s:%d\n", x); \
16178 +       } while (0)
16179 +
16180 +
16181 +#define vxwprintk(c,f,x...)                                    \
16182 +       do {                                                    \
16183 +               if (c)                                          \
16184 +                       printk(VX_WARNLEVEL f "\n" , ##x);      \
16185 +       } while (0)
16186 +
16187 +
16188 +#define vxd_path(d,m)                                          \
16189 +       ({ static char _buffer[PATH_MAX];                       \
16190 +          d_path((d), (m), _buffer, sizeof(_buffer)); })
16191 +
16192 +#define vxd_cond_path(n)                                       \
16193 +       ((n) ? vxd_path((n)->dentry, (n)->mnt) : "<null>" )
16194 +
16195 +
16196 +void dump_vx_info(struct vx_info *, int);
16197 +void dump_vx_info_inactive(int);
16198 +
16199 +#else  /* CONFIG_VSERVER_DEBUG */
16200 +
16201 +#define vx_debug_switch 0
16202 +#define vx_debug_xid   0
16203 +#define vx_debug_nid   0
16204 +#define vx_debug_tag   0
16205 +#define vx_debug_net   0
16206 +#define vx_debug_limit 0
16207 +#define vx_debug_cres  0
16208 +#define vx_debug_dlim  0
16209 +#define vx_debug_cvirt 0
16210 +
16211 +#define vxdprintk(x...) do { } while (0)
16212 +#define vxlprintk(x...) do { } while (0)
16213 +#define vxfprintk(x...) do { } while (0)
16214 +#define vxwprintk(x...) do { } while (0)
16215 +
16216 +#define vxd_path       "<none>"
16217 +#define vxd_cond_path  vxd_path
16218 +
16219 +#endif /* CONFIG_VSERVER_DEBUG */
16220 +
16221 +
16222 +#ifdef CONFIG_VSERVER_DEBUG
16223 +#define vxd_assert_lock(l)     assert_spin_locked(l)
16224 +#define vxd_assert(c,f,x...)   vxlprintk(!(c), \
16225 +       "assertion [" f "] failed.", ##x, __FILE__, __LINE__)
16226 +#else
16227 +#define vxd_assert_lock(l)     do { } while (0)
16228 +#define vxd_assert(c,f,x...)   do { } while (0)
16229 +#endif
16230 +
16231 +
16232 +#endif /* _VX_DEBUG_H */
16233 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/debug_cmd.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/debug_cmd.h
16234 --- linux-2.6.16.20/include/linux/vserver/debug_cmd.h   1970-01-01 01:00:00 +0100
16235 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/debug_cmd.h      2006-04-26 19:07:00 +0200
16236 @@ -0,0 +1,14 @@
16237 +#ifndef _VX_DEBUG_CMD_H
16238 +#define _VX_DEBUG_CMD_H
16239 +
16240 +
16241 +/* debug commands */
16242 +
16243 +#define VCMD_dump_history      VC_CMD(DEBUG, 1, 0)
16244 +
16245 +#ifdef __KERNEL__
16246 +
16247 +extern int vc_dump_history(uint32_t);
16248 +
16249 +#endif /* __KERNEL__ */
16250 +#endif /* _VX_DEBUG_CMD_H */
16251 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/dlimit.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/dlimit.h
16252 --- linux-2.6.16.20/include/linux/vserver/dlimit.h      1970-01-01 01:00:00 +0100
16253 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/dlimit.h 2006-04-26 19:07:00 +0200
16254 @@ -0,0 +1,53 @@
16255 +#ifndef _VX_DLIMIT_H
16256 +#define _VX_DLIMIT_H
16257 +
16258 +#include "switch.h"
16259 +
16260 +
16261 +#ifdef __KERNEL__
16262 +
16263 +/*      keep in sync with CDLIM_INFINITY       */
16264 +
16265 +#define DLIM_INFINITY          (~0ULL)
16266 +
16267 +#include <linux/spinlock.h>
16268 +
16269 +struct super_block;
16270 +
16271 +struct dl_info {
16272 +       struct hlist_node dl_hlist;             /* linked list of contexts */
16273 +       struct rcu_head dl_rcu;                 /* the rcu head */
16274 +       tag_t dl_tag;                           /* context tag */
16275 +       atomic_t dl_usecnt;                     /* usage count */
16276 +       atomic_t dl_refcnt;                     /* reference count */
16277 +
16278 +       struct super_block *dl_sb;              /* associated superblock */
16279 +
16280 +       spinlock_t dl_lock;                     /* protect the values */
16281 +
16282 +       unsigned long long dl_space_used;       /* used space in bytes */
16283 +       unsigned long long dl_space_total;      /* maximum space in bytes */
16284 +       unsigned long dl_inodes_used;           /* used inodes */
16285 +       unsigned long dl_inodes_total;          /* maximum inodes */
16286 +
16287 +       unsigned int dl_nrlmult;                /* non root limit mult */
16288 +};
16289 +
16290 +struct rcu_head;
16291 +
16292 +extern void rcu_free_dl_info(struct rcu_head *);
16293 +extern void unhash_dl_info(struct dl_info *);
16294 +
16295 +extern struct dl_info *locate_dl_info(struct super_block *, tag_t);
16296 +
16297 +
16298 +struct kstatfs;
16299 +
16300 +extern void vx_vsi_statfs(struct super_block *, struct kstatfs *);
16301 +
16302 +typedef uint64_t dlsize_t;
16303 +
16304 +#endif /* __KERNEL__ */
16305 +#else  /* _VX_DLIMIT_H */
16306 +#warning duplicate inclusion
16307 +#endif /* _VX_DLIMIT_H */
16308 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/dlimit_cmd.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/dlimit_cmd.h
16309 --- linux-2.6.16.20/include/linux/vserver/dlimit_cmd.h  1970-01-01 01:00:00 +0100
16310 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/dlimit_cmd.h     2006-04-26 19:07:00 +0200
16311 @@ -0,0 +1,72 @@
16312 +#ifndef _VX_DLIMIT_CMD_H
16313 +#define _VX_DLIMIT_CMD_H
16314 +
16315 +
16316 +/*  dlimit vserver commands */
16317 +
16318 +#define VCMD_add_dlimit                VC_CMD(DLIMIT, 1, 0)
16319 +#define VCMD_rem_dlimit                VC_CMD(DLIMIT, 2, 0)
16320 +
16321 +#define VCMD_set_dlimit                VC_CMD(DLIMIT, 5, 0)
16322 +#define VCMD_get_dlimit                VC_CMD(DLIMIT, 6, 0)
16323 +
16324 +struct vcmd_ctx_dlimit_base_v0 {
16325 +       const char __user *name;
16326 +       uint32_t flags;
16327 +};
16328 +
16329 +struct vcmd_ctx_dlimit_v0 {
16330 +       const char __user *name;
16331 +       uint32_t space_used;                    /* used space in kbytes */
16332 +       uint32_t space_total;                   /* maximum space in kbytes */
16333 +       uint32_t inodes_used;                   /* used inodes */
16334 +       uint32_t inodes_total;                  /* maximum inodes */
16335 +       uint32_t reserved;                      /* reserved for root in % */
16336 +       uint32_t flags;
16337 +};
16338 +
16339 +#define CDLIM_UNSET            ((uint32_t)0UL)
16340 +#define CDLIM_INFINITY         ((uint32_t)~0UL)
16341 +#define CDLIM_KEEP             ((uint32_t)~1UL)
16342 +
16343 +#ifdef __KERNEL__
16344 +
16345 +#ifdef CONFIG_COMPAT
16346 +
16347 +struct vcmd_ctx_dlimit_base_v0_x32 {
16348 +       compat_uptr_t name_ptr;
16349 +       uint32_t flags;
16350 +};
16351 +
16352 +struct vcmd_ctx_dlimit_v0_x32 {
16353 +       compat_uptr_t name_ptr;
16354 +       uint32_t space_used;                    /* used space in kbytes */
16355 +       uint32_t space_total;                   /* maximum space in kbytes */
16356 +       uint32_t inodes_used;                   /* used inodes */
16357 +       uint32_t inodes_total;                  /* maximum inodes */
16358 +       uint32_t reserved;                      /* reserved for root in % */
16359 +       uint32_t flags;
16360 +};
16361 +
16362 +#endif /* CONFIG_COMPAT */
16363 +
16364 +#include <linux/compiler.h>
16365 +
16366 +extern int vc_add_dlimit(uint32_t, void __user *);
16367 +extern int vc_rem_dlimit(uint32_t, void __user *);
16368 +
16369 +extern int vc_set_dlimit(uint32_t, void __user *);
16370 +extern int vc_get_dlimit(uint32_t, void __user *);
16371 +
16372 +#ifdef CONFIG_COMPAT
16373 +
16374 +extern int vc_add_dlimit_x32(uint32_t, void __user *);
16375 +extern int vc_rem_dlimit_x32(uint32_t, void __user *);
16376 +
16377 +extern int vc_set_dlimit_x32(uint32_t, void __user *);
16378 +extern int vc_get_dlimit_x32(uint32_t, void __user *);
16379 +
16380 +#endif /* CONFIG_COMPAT */
16381 +
16382 +#endif /* __KERNEL__ */
16383 +#endif /* _VX_DLIMIT_CMD_H */
16384 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/global.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/global.h
16385 --- linux-2.6.16.20/include/linux/vserver/global.h      1970-01-01 01:00:00 +0100
16386 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/global.h 2006-04-26 19:07:00 +0200
16387 @@ -0,0 +1,8 @@
16388 +#ifndef _VX_GLOBAL_H
16389 +#define _VX_GLOBAL_H
16390 +
16391 +
16392 +extern atomic_t vx_global_ctotal;
16393 +extern atomic_t vx_global_cactive;
16394 +
16395 +#endif /* _VX_GLOBAL_H */
16396 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/history.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/history.h
16397 --- linux-2.6.16.20/include/linux/vserver/history.h     1970-01-01 01:00:00 +0100
16398 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/history.h        2006-04-26 19:07:00 +0200
16399 @@ -0,0 +1,196 @@
16400 +#ifndef _VX_HISTORY_H
16401 +#define _VX_HISTORY_H
16402 +
16403 +#ifdef CONFIG_VSERVER_HISTORY
16404 +
16405 +extern unsigned volatile int vxh_active;
16406 +
16407 +struct _vxhe_vxi {
16408 +       struct vx_info *ptr;
16409 +       unsigned xid;
16410 +       unsigned usecnt;
16411 +       unsigned tasks;
16412 +};
16413 +
16414 +struct _vxhe_set_clr {
16415 +       void *data;
16416 +};
16417 +
16418 +struct _vxhe_loc_lookup {
16419 +       unsigned arg;
16420 +};
16421 +
16422 +enum {
16423 +       VXH_UNUSED=0,
16424 +       VXH_THROW_OOPS=1,
16425 +
16426 +       VXH_GET_VX_INFO,
16427 +       VXH_PUT_VX_INFO,
16428 +       VXH_INIT_VX_INFO,
16429 +       VXH_SET_VX_INFO,
16430 +       VXH_CLR_VX_INFO,
16431 +       VXH_CLAIM_VX_INFO,
16432 +       VXH_RELEASE_VX_INFO,
16433 +       VXH_ALLOC_VX_INFO,
16434 +       VXH_DEALLOC_VX_INFO,
16435 +       VXH_HASH_VX_INFO,
16436 +       VXH_UNHASH_VX_INFO,
16437 +       VXH_LOC_VX_INFO,
16438 +       VXH_LOOKUP_VX_INFO,
16439 +       VXH_CREATE_VX_INFO,
16440 +};
16441 +
16442 +struct _vx_hist_entry {
16443 +       void *loc;
16444 +       unsigned short seq;
16445 +       unsigned short type;
16446 +       struct _vxhe_vxi vxi;
16447 +       union {
16448 +               struct _vxhe_set_clr sc;
16449 +               struct _vxhe_loc_lookup ll;
16450 +       };
16451 +};
16452 +
16453 +struct _vx_hist_entry *vxh_advance(void *loc);
16454 +
16455 +
16456 +static inline
16457 +void   __vxh_copy_vxi(struct _vx_hist_entry *entry, struct vx_info *vxi)
16458 +{
16459 +       entry->vxi.ptr = vxi;
16460 +       if (vxi) {
16461 +               entry->vxi.usecnt = atomic_read(&vxi->vx_usecnt);
16462 +               entry->vxi.tasks = atomic_read(&vxi->vx_tasks);
16463 +               entry->vxi.xid = vxi->vx_id;
16464 +       }
16465 +}
16466 +
16467 +
16468 +#define        __HERE__ current_text_addr()
16469 +
16470 +#define __VXH_BODY(__type, __data, __here)     \
16471 +       struct _vx_hist_entry *entry;           \
16472 +                                               \
16473 +       preempt_disable();                      \
16474 +       entry = vxh_advance(__here);            \
16475 +       __data;                                 \
16476 +       entry->type = __type;                   \
16477 +       preempt_enable();
16478 +
16479 +
16480 +       /* pass vxi only */
16481 +
16482 +#define __VXH_SMPL                             \
16483 +       __vxh_copy_vxi(entry, vxi)
16484 +
16485 +static inline
16486 +void   __vxh_smpl(struct vx_info *vxi, int __type, void *__here)
16487 +{
16488 +       __VXH_BODY(__type, __VXH_SMPL, __here)
16489 +}
16490 +
16491 +       /* pass vxi and data (void *) */
16492 +
16493 +#define __VXH_DATA                             \
16494 +       __vxh_copy_vxi(entry, vxi);             \
16495 +       entry->sc.data = data
16496 +
16497 +static inline
16498 +void   __vxh_data(struct vx_info *vxi, void *data,
16499 +                       int __type, void *__here)
16500 +{
16501 +       __VXH_BODY(__type, __VXH_DATA, __here)
16502 +}
16503 +
16504 +       /* pass vxi and arg (long) */
16505 +
16506 +#define __VXH_LONG                             \
16507 +       __vxh_copy_vxi(entry, vxi);             \
16508 +       entry->ll.arg = arg
16509 +
16510 +static inline
16511 +void   __vxh_long(struct vx_info *vxi, long arg,
16512 +                       int __type, void *__here)
16513 +{
16514 +       __VXH_BODY(__type, __VXH_LONG, __here)
16515 +}
16516 +
16517 +
16518 +static inline
16519 +void   __vxh_throw_oops(void *__here)
16520 +{
16521 +       __VXH_BODY(VXH_THROW_OOPS, {}, __here);
16522 +       /* prevent further acquisition */
16523 +       vxh_active = 0;
16524 +}
16525 +
16526 +
16527 +#define vxh_throw_oops()       __vxh_throw_oops(__HERE__);
16528 +
16529 +#define __vxh_get_vx_info(v,h) __vxh_smpl(v, VXH_GET_VX_INFO, h);
16530 +#define __vxh_put_vx_info(v,h) __vxh_smpl(v, VXH_PUT_VX_INFO, h);
16531 +
16532 +#define __vxh_init_vx_info(v,d,h) \
16533 +       __vxh_data(v,d, VXH_INIT_VX_INFO, h);
16534 +#define __vxh_set_vx_info(v,d,h) \
16535 +       __vxh_data(v,d, VXH_SET_VX_INFO, h);
16536 +#define __vxh_clr_vx_info(v,d,h) \
16537 +       __vxh_data(v,d, VXH_CLR_VX_INFO, h);
16538 +
16539 +#define __vxh_claim_vx_info(v,d,h) \
16540 +       __vxh_data(v,d, VXH_CLAIM_VX_INFO, h);
16541 +#define __vxh_release_vx_info(v,d,h) \
16542 +       __vxh_data(v,d, VXH_RELEASE_VX_INFO, h);
16543 +
16544 +#define vxh_alloc_vx_info(v) \
16545 +       __vxh_smpl(v, VXH_ALLOC_VX_INFO, __HERE__);
16546 +#define vxh_dealloc_vx_info(v) \
16547 +       __vxh_smpl(v, VXH_DEALLOC_VX_INFO, __HERE__);
16548 +
16549 +#define vxh_hash_vx_info(v) \
16550 +       __vxh_smpl(v, VXH_HASH_VX_INFO, __HERE__);
16551 +#define vxh_unhash_vx_info(v) \
16552 +       __vxh_smpl(v, VXH_UNHASH_VX_INFO, __HERE__);
16553 +
16554 +#define vxh_loc_vx_info(v,l) \
16555 +       __vxh_long(v,l, VXH_LOC_VX_INFO, __HERE__);
16556 +#define vxh_lookup_vx_info(v,l) \
16557 +       __vxh_long(v,l, VXH_LOOKUP_VX_INFO, __HERE__);
16558 +#define vxh_create_vx_info(v,l) \
16559 +       __vxh_long(v,l, VXH_CREATE_VX_INFO, __HERE__);
16560 +
16561 +extern void vxh_dump_history(void);
16562 +
16563 +
16564 +#else  /* CONFIG_VSERVER_HISTORY */
16565 +
16566 +#define        __HERE__        0
16567 +
16568 +#define vxh_throw_oops()               do { } while (0)
16569 +
16570 +#define __vxh_get_vx_info(v,h)         do { } while (0)
16571 +#define __vxh_put_vx_info(v,h)         do { } while (0)
16572 +
16573 +#define __vxh_init_vx_info(v,d,h)      do { } while (0)
16574 +#define __vxh_set_vx_info(v,d,h)       do { } while (0)
16575 +#define __vxh_clr_vx_info(v,d,h)       do { } while (0)
16576 +
16577 +#define __vxh_claim_vx_info(v,d,h)     do { } while (0)
16578 +#define __vxh_release_vx_info(v,d,h)   do { } while (0)
16579 +
16580 +#define vxh_alloc_vx_info(v)           do { } while (0)
16581 +#define vxh_dealloc_vx_info(v)         do { } while (0)
16582 +
16583 +#define vxh_hash_vx_info(v)            do { } while (0)
16584 +#define vxh_unhash_vx_info(v)          do { } while (0)
16585 +
16586 +#define vxh_loc_vx_info(a,v)           do { } while (0)
16587 +#define vxh_lookup_vx_info(a,v)                do { } while (0)
16588 +#define vxh_create_vx_info(a,v)                do { } while (0)
16589 +
16590 +#define vxh_dump_history()             do { } while (0)
16591 +
16592 +
16593 +#endif /* CONFIG_VSERVER_HISTORY */
16594 +
16595 +#endif /* _VX_HISTORY_H */
16596 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/inode.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/inode.h
16597 --- linux-2.6.16.20/include/linux/vserver/inode.h       1970-01-01 01:00:00 +0100
16598 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/inode.h  2006-04-26 19:07:00 +0200
16599 @@ -0,0 +1,38 @@
16600 +#ifndef _VX_INODE_H
16601 +#define _VX_INODE_H
16602 +
16603 +
16604 +#define IATTR_TAG      0x01000000
16605 +
16606 +#define IATTR_ADMIN    0x00000001
16607 +#define IATTR_WATCH    0x00000002
16608 +#define IATTR_HIDE     0x00000004
16609 +#define IATTR_FLAGS    0x00000007
16610 +
16611 +#define IATTR_BARRIER  0x00010000
16612 +#define IATTR_IUNLINK  0x00020000
16613 +#define IATTR_IMMUTABLE 0x00040000
16614 +
16615 +#ifdef __KERNEL__
16616 +
16617 +
16618 +#ifdef CONFIG_VSERVER_PROC_SECURE
16619 +#define IATTR_PROC_DEFAULT     ( IATTR_ADMIN | IATTR_HIDE )
16620 +#define IATTR_PROC_SYMLINK     ( IATTR_ADMIN )
16621 +#else
16622 +#define IATTR_PROC_DEFAULT     ( IATTR_ADMIN )
16623 +#define IATTR_PROC_SYMLINK     ( IATTR_ADMIN )
16624 +#endif
16625 +
16626 +#define vx_hide_check(c,m)     (((m) & IATTR_HIDE) ? vx_check(c,m) : 1)
16627 +
16628 +#endif /* __KERNEL__ */
16629 +
16630 +/* inode ioctls */
16631 +
16632 +#define FIOC_GETXFLG   _IOR('x', 5, long)
16633 +#define FIOC_SETXFLG   _IOW('x', 6, long)
16634 +
16635 +#else  /* _VX_INODE_H */
16636 +#warning duplicate inclusion
16637 +#endif /* _VX_INODE_H */
16638 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/inode_cmd.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/inode_cmd.h
16639 --- linux-2.6.16.20/include/linux/vserver/inode_cmd.h   1970-01-01 01:00:00 +0100
16640 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/inode_cmd.h      2006-04-26 19:07:00 +0200
16641 @@ -0,0 +1,59 @@
16642 +#ifndef _VX_INODE_CMD_H
16643 +#define _VX_INODE_CMD_H
16644 +
16645 +
16646 +/*  inode vserver commands */
16647 +
16648 +#define VCMD_get_iattr_v0      VC_CMD(INODE, 1, 0)
16649 +#define VCMD_set_iattr_v0      VC_CMD(INODE, 2, 0)
16650 +
16651 +#define VCMD_get_iattr         VC_CMD(INODE, 1, 1)
16652 +#define VCMD_set_iattr         VC_CMD(INODE, 2, 1)
16653 +
16654 +struct vcmd_ctx_iattr_v0 {
16655 +       /* device handle in id */
16656 +       uint64_t ino;
16657 +       uint32_t xid;
16658 +       uint32_t flags;
16659 +       uint32_t mask;
16660 +};
16661 +
16662 +struct vcmd_ctx_iattr_v1 {
16663 +       const char __user *name;
16664 +       uint32_t xid;
16665 +       uint32_t flags;
16666 +       uint32_t mask;
16667 +};
16668 +
16669 +
16670 +#ifdef __KERNEL__
16671 +
16672 +
16673 +#ifdef CONFIG_COMPAT
16674 +
16675 +struct vcmd_ctx_iattr_v1_x32 {
16676 +       compat_uptr_t name_ptr;
16677 +       uint32_t xid;
16678 +       uint32_t flags;
16679 +       uint32_t mask;
16680 +};
16681 +
16682 +#endif /* CONFIG_COMPAT */
16683 +
16684 +#include <linux/compiler.h>
16685 +
16686 +extern int vc_get_iattr_v0(uint32_t, void __user *);
16687 +extern int vc_set_iattr_v0(uint32_t, void __user *);
16688 +
16689 +extern int vc_get_iattr(uint32_t, void __user *);
16690 +extern int vc_set_iattr(uint32_t, void __user *);
16691 +
16692 +#ifdef CONFIG_COMPAT
16693 +
16694 +extern int vc_get_iattr_x32(uint32_t, void __user *);
16695 +extern int vc_set_iattr_x32(uint32_t, void __user *);
16696 +
16697 +#endif /* CONFIG_COMPAT */
16698 +
16699 +#endif /* __KERNEL__ */
16700 +#endif /* _VX_INODE_CMD_H */
16701 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/legacy.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/legacy.h
16702 --- linux-2.6.16.20/include/linux/vserver/legacy.h      1970-01-01 01:00:00 +0100
16703 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/legacy.h 2006-04-26 19:07:00 +0200
16704 @@ -0,0 +1,49 @@
16705 +#ifndef _VX_LEGACY_H
16706 +#define _VX_LEGACY_H
16707 +
16708 +#include "switch.h"
16709 +
16710 +
16711 +/*  compatibiliy vserver commands */
16712 +
16713 +#define VCMD_new_s_context     VC_CMD(COMPAT, 1, 1)
16714 +#define VCMD_set_ipv4root      VC_CMD(COMPAT, 2, 3)
16715 +
16716 +#define VCMD_create_context    VC_CMD(VSETUP, 1, 0)
16717 +
16718 +/*  compatibiliy vserver arguments */
16719 +
16720 +struct vcmd_new_s_context_v1 {
16721 +       uint32_t remove_cap;
16722 +       uint32_t flags;
16723 +};
16724 +
16725 +struct vcmd_set_ipv4root_v3 {
16726 +       /* number of pairs in id */
16727 +       uint32_t broadcast;
16728 +       struct {
16729 +               uint32_t ip;
16730 +               uint32_t mask;
16731 +       } nx_mask_pair[NB_IPV4ROOT];
16732 +};
16733 +
16734 +
16735 +#define VX_INFO_LOCK           1       /* Can't request a new vx_id */
16736 +#define VX_INFO_NPROC          4       /* Limit number of processes in a context */
16737 +#define VX_INFO_PRIVATE                8       /* Noone can join this security context */
16738 +#define VX_INFO_INIT           16      /* This process wants to become the */
16739 +                                       /* logical process 1 of the security */
16740 +                                       /* context */
16741 +#define VX_INFO_HIDEINFO       32      /* Hide some information in /proc */
16742 +#define VX_INFO_ULIMIT         64      /* Use ulimit of the current process */
16743 +                                       /* to become the global limits */
16744 +                                       /* of the context */
16745 +#define VX_INFO_NAMESPACE      128     /* save private namespace */
16746 +
16747 +
16748 +#ifdef __KERNEL__
16749 +extern int vc_new_s_context(uint32_t, void __user *);
16750 +extern int vc_set_ipv4root(uint32_t, void __user *);
16751 +
16752 +#endif /* __KERNEL__ */
16753 +#endif /* _VX_LEGACY_H */
16754 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/limit.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/limit.h
16755 --- linux-2.6.16.20/include/linux/vserver/limit.h       1970-01-01 01:00:00 +0100
16756 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/limit.h  2006-05-02 02:06:16 +0200
16757 @@ -0,0 +1,64 @@
16758 +#ifndef _VX_LIMIT_H
16759 +#define _VX_LIMIT_H
16760 +
16761 +
16762 +#define VLIMIT_NSOCK   16
16763 +#define VLIMIT_OPENFD  17
16764 +#define VLIMIT_ANON    18
16765 +#define VLIMIT_SHMEM   19
16766 +#define VLIMIT_SEMARY  20
16767 +#define VLIMIT_NSEMS   21
16768 +#define VLIMIT_DENTRY  22
16769 +
16770 +#ifdef __KERNEL__
16771 +
16772 +/*     keep in sync with CRLIM_INFINITY */
16773 +
16774 +#define        VLIM_INFINITY   (~0ULL)
16775 +
16776 +#ifndef RLIM_INFINITY
16777 +#warning RLIM_INFINITY is undefined
16778 +#endif
16779 +
16780 +#define __rlim_val(l,r,v)      ((l)->res[(r)].v)
16781 +
16782 +#define __rlim_soft(l,r)       __rlim_val(l,r,soft)
16783 +#define __rlim_hard(l,r)       __rlim_val(l,r,hard)
16784 +
16785 +#define __rlim_rcur(l,r)       __rlim_val(l,r,rcur)
16786 +#define __rlim_rmin(l,r)       __rlim_val(l,r,rmin)
16787 +#define __rlim_rmax(l,r)       __rlim_val(l,r,rmax)
16788 +
16789 +#define __rlim_lhit(l,r)       __rlim_val(l,r,lhit)
16790 +#define __rlim_hit(l,r)                atomic_inc(&__rlim_lhit(l,r))
16791 +
16792 +typedef atomic_long_t rlim_atomic_t;
16793 +typedef unsigned long rlim_t;
16794 +
16795 +#define __rlim_get(l,r)                atomic_long_read(&__rlim_rcur(l,r))
16796 +#define __rlim_set(l,r,v)      atomic_long_set(&__rlim_rcur(l,r), v)
16797 +#define __rlim_inc(l,r)                atomic_long_inc(&__rlim_rcur(l,r))
16798 +#define __rlim_dec(l,r)                atomic_long_dec(&__rlim_rcur(l,r))
16799 +#define __rlim_add(l,r,v)      atomic_long_add(v, &__rlim_rcur(l,r))
16800 +#define __rlim_sub(l,r,v)      atomic_long_sub(v, &__rlim_rcur(l,r))
16801 +
16802 +
16803 +#if    (RLIM_INFINITY == VLIM_INFINITY)
16804 +#define        VX_VLIM(r) ((long long)(long)(r))
16805 +#define        VX_RLIM(v) ((rlim_t)(v))
16806 +#else
16807 +#define        VX_VLIM(r) (((r) == RLIM_INFINITY) \
16808 +               ? VLIM_INFINITY : (long long)(r))
16809 +#define        VX_RLIM(v) (((v) == VLIM_INFINITY) \
16810 +               ? RLIM_INFINITY : (rlim_t)(v))
16811 +#endif
16812 +
16813 +struct sysinfo;
16814 +
16815 +void vx_vsi_meminfo(struct sysinfo *);
16816 +void vx_vsi_swapinfo(struct sysinfo *);
16817 +
16818 +#define NUM_LIMITS     24
16819 +
16820 +#endif /* __KERNEL__ */
16821 +#endif /* _VX_LIMIT_H */
16822 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/limit_cmd.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/limit_cmd.h
16823 --- linux-2.6.16.20/include/linux/vserver/limit_cmd.h   1970-01-01 01:00:00 +0100
16824 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/limit_cmd.h      2006-04-26 19:07:00 +0200
16825 @@ -0,0 +1,55 @@
16826 +#ifndef _VX_LIMIT_CMD_H
16827 +#define _VX_LIMIT_CMD_H
16828 +
16829 +
16830 +/*  rlimit vserver commands */
16831 +
16832 +#define VCMD_get_rlimit                VC_CMD(RLIMIT, 1, 0)
16833 +#define VCMD_set_rlimit                VC_CMD(RLIMIT, 2, 0)
16834 +#define VCMD_get_rlimit_mask   VC_CMD(RLIMIT, 3, 0)
16835 +
16836 +struct vcmd_ctx_rlimit_v0 {
16837 +       uint32_t id;
16838 +       uint64_t minimum;
16839 +       uint64_t softlimit;
16840 +       uint64_t maximum;
16841 +};
16842 +
16843 +struct vcmd_ctx_rlimit_mask_v0 {
16844 +       uint32_t minimum;
16845 +       uint32_t softlimit;
16846 +       uint32_t maximum;
16847 +};
16848 +
16849 +#define CRLIM_UNSET            (0ULL)
16850 +#define CRLIM_INFINITY         (~0ULL)
16851 +#define CRLIM_KEEP             (~1ULL)
16852 +
16853 +#ifdef __KERNEL__
16854 +
16855 +#ifdef CONFIG_IA32_EMULATION
16856 +
16857 +struct vcmd_ctx_rlimit_v0_x32 {
16858 +       uint32_t id;
16859 +       uint64_t minimum;
16860 +       uint64_t softlimit;
16861 +       uint64_t maximum;
16862 +} __attribute__ ((aligned (4)));
16863 +
16864 +#endif /* CONFIG_IA32_EMULATION */
16865 +
16866 +#include <linux/compiler.h>
16867 +
16868 +extern int vc_get_rlimit(uint32_t, void __user *);
16869 +extern int vc_set_rlimit(uint32_t, void __user *);
16870 +extern int vc_get_rlimit_mask(uint32_t, void __user *);
16871 +
16872 +#ifdef CONFIG_IA32_EMULATION
16873 +
16874 +extern int vc_get_rlimit_x32(uint32_t, void __user *);
16875 +extern int vc_set_rlimit_x32(uint32_t, void __user *);
16876 +
16877 +#endif /* CONFIG_IA32_EMULATION */
16878 +
16879 +#endif /* __KERNEL__ */
16880 +#endif /* _VX_LIMIT_CMD_H */
16881 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/limit_def.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/limit_def.h
16882 --- linux-2.6.16.20/include/linux/vserver/limit_def.h   1970-01-01 01:00:00 +0100
16883 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/limit_def.h      2006-04-26 19:07:00 +0200
16884 @@ -0,0 +1,47 @@
16885 +#ifndef _VX_LIMIT_DEF_H
16886 +#define _VX_LIMIT_DEF_H
16887 +
16888 +#include <asm/atomic.h>
16889 +#include <asm/resource.h>
16890 +
16891 +#include "limit.h"
16892 +
16893 +
16894 +struct _vx_res_limit {
16895 +       rlim_t soft;            /* Context soft limit */
16896 +       rlim_t hard;            /* Context hard limit */
16897 +
16898 +       rlim_atomic_t rcur;     /* Current value */
16899 +       rlim_t rmin;            /* Context minimum */
16900 +       rlim_t rmax;            /* Context maximum */
16901 +
16902 +       atomic_t lhit;          /* Limit hits */
16903 +};
16904 +
16905 +/* context sub struct */
16906 +
16907 +struct _vx_limit {
16908 +       struct _vx_res_limit res[NUM_LIMITS];
16909 +};
16910 +
16911 +#ifdef CONFIG_VSERVER_DEBUG
16912 +
16913 +static inline void __dump_vx_limit(struct _vx_limit *limit)
16914 +{
16915 +       int i;
16916 +
16917 +       printk("\t_vx_limit:");
16918 +       for (i=0; i<NUM_LIMITS; i++) {
16919 +               printk("\t [%2d] = %8lu %8lu/%8lu, %8ld/%8ld, %8d\n",
16920 +                       i, (unsigned long)__rlim_get(limit, i),
16921 +                       (unsigned long)__rlim_rmin(limit, i),
16922 +                       (unsigned long)__rlim_rmax(limit, i),
16923 +                       (long)__rlim_soft(limit, i),
16924 +                       (long)__rlim_hard(limit, i),
16925 +                       atomic_read(&__rlim_lhit(limit, i)));
16926 +       }
16927 +}
16928 +
16929 +#endif
16930 +
16931 +#endif /* _VX_LIMIT_DEF_H */
16932 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/limit_int.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/limit_int.h
16933 --- linux-2.6.16.20/include/linux/vserver/limit_int.h   1970-01-01 01:00:00 +0100
16934 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/limit_int.h      2006-04-26 19:07:00 +0200
16935 @@ -0,0 +1,83 @@
16936 +#ifndef _VX_LIMIT_INT_H
16937 +#define _VX_LIMIT_INT_H
16938 +
16939 +
16940 +#ifdef __KERNEL__
16941 +
16942 +#define VXD_RCRES_COND(r)      VXD_CBIT(cres, (r))
16943 +#define VXD_RLIMIT_COND(r)     VXD_CBIT(limit, (r))
16944 +
16945 +extern const char *vlimit_name[NUM_LIMITS];
16946 +
16947 +static inline void __vx_acc_cres(struct vx_info *vxi,
16948 +       int res, int dir, void *_data, char *_file, int _line)
16949 +{
16950 +       if (VXD_RCRES_COND(res))
16951 +               vxlprintk(1, "vx_acc_cres[%5d,%s,%2d]: %5ld%s (%p)",
16952 +                       (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
16953 +                       (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
16954 +                       (dir > 0) ? "++" : "--", _data, _file, _line);
16955 +       if (!vxi)
16956 +               return;
16957 +
16958 +       if (dir > 0)
16959 +               __rlim_inc(&vxi->limit, res);
16960 +       else
16961 +               __rlim_dec(&vxi->limit, res);
16962 +}
16963 +
16964 +static inline void __vx_add_cres(struct vx_info *vxi,
16965 +       int res, int amount, void *_data, char *_file, int _line)
16966 +{
16967 +       if (VXD_RCRES_COND(res))
16968 +               vxlprintk(1, "vx_add_cres[%5d,%s,%2d]: %5ld += %5d (%p)",
16969 +                       (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
16970 +                       (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
16971 +                       amount, _data, _file, _line);
16972 +       if (amount == 0)
16973 +               return;
16974 +       if (!vxi)
16975 +               return;
16976 +       __rlim_add(&vxi->limit, res, amount);
16977 +}
16978 +
16979 +static inline int __vx_cres_avail(struct vx_info *vxi,
16980 +               int res, int num, char *_file, int _line)
16981 +{
16982 +       rlim_t value;
16983 +
16984 +       if (VXD_RLIMIT_COND(res))
16985 +               vxlprintk(1, "vx_cres_avail[%5d,%s,%2d]: %5ld/%5ld > %5ld + %5d",
16986 +                       (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
16987 +                       (vxi ? (long)__rlim_soft(&vxi->limit, res) : -1),
16988 +                       (vxi ? (long)__rlim_hard(&vxi->limit, res) : -1),
16989 +                       (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
16990 +                       num, _file, _line);
16991 +       if (num == 0)
16992 +               return 1;
16993 +       if (!vxi)
16994 +               return 1;
16995 +
16996 +       value = __rlim_get(&vxi->limit, res);
16997 +
16998 +       if (value > __rlim_rmax(&vxi->limit, res))
16999 +               __rlim_rmax(&vxi->limit, res) = value;
17000 +       else if (value < __rlim_rmin(&vxi->limit, res))
17001 +               __rlim_rmin(&vxi->limit, res) = value;
17002 +
17003 +       if (__rlim_soft(&vxi->limit, res) == RLIM_INFINITY)
17004 +               return -1;
17005 +       if (value + num <= __rlim_soft(&vxi->limit, res))
17006 +               return -1;
17007 +
17008 +       if (__rlim_hard(&vxi->limit, res) == RLIM_INFINITY)
17009 +               return 1;
17010 +       if (value + num <= __rlim_hard(&vxi->limit, res))
17011 +               return 1;
17012 +
17013 +       __rlim_hit(&vxi->limit, res);
17014 +       return 0;
17015 +}
17016 +
17017 +#endif /* __KERNEL__ */
17018 +#endif /* _VX_LIMIT_INT_H */
17019 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/monitor.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/monitor.h
17020 --- linux-2.6.16.20/include/linux/vserver/monitor.h     1970-01-01 01:00:00 +0100
17021 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/monitor.h        2006-04-26 19:07:00 +0200
17022 @@ -0,0 +1,97 @@
17023 +#ifndef _VX_MONITOR_H
17024 +#define _VX_MONITOR_H
17025 +
17026 +#include <linux/config.h>
17027 +
17028 +
17029 +enum {
17030 +       VXM_UNUSED = 0,
17031 +
17032 +       VXM_SYNC = 0x10,
17033 +
17034 +       VXM_UPDATE = 0x20,
17035 +       VXM_UPDATE_1,
17036 +       VXM_UPDATE_2,
17037 +
17038 +       VXM_RQINFO_1 = 0x24,
17039 +       VXM_RQINFO_2,
17040 +
17041 +       VXM_ACTIVATE = 0x40,
17042 +       VXM_DEACTIVATE,
17043 +       VXM_IDLE,
17044 +
17045 +       VXM_HOLD = 0x44,
17046 +       VXM_UNHOLD,
17047 +
17048 +       VXM_MIGRATE = 0x48,
17049 +       VXM_RESCHED,
17050 +
17051 +       /* all other bits are flags */
17052 +       VXM_SCHED = 0x80,
17053 +};
17054 +
17055 +struct _vxm_update_1 {
17056 +       uint32_t tokens_max;
17057 +       uint32_t fill_rate;
17058 +       uint32_t interval;
17059 +};
17060 +
17061 +struct _vxm_update_2 {
17062 +       uint32_t tokens_min;
17063 +       uint32_t fill_rate;
17064 +       uint32_t interval;
17065 +};
17066 +
17067 +struct _vxm_rqinfo_1 {
17068 +       uint16_t running;
17069 +       uint16_t onhold;
17070 +       uint16_t iowait;
17071 +       uint16_t uintr;
17072 +       uint32_t idle_tokens;
17073 +};
17074 +
17075 +struct _vxm_rqinfo_2 {
17076 +       uint32_t norm_time;
17077 +       uint32_t idle_time;
17078 +       uint32_t idle_skip;
17079 +};
17080 +
17081 +struct _vxm_sched {
17082 +       uint32_t tokens;
17083 +       uint32_t norm_time;
17084 +       uint32_t idle_time;
17085 +};
17086 +
17087 +struct _vxm_task {
17088 +       uint16_t pid;
17089 +       uint16_t state;
17090 +};
17091 +
17092 +struct _vxm_event {
17093 +       uint32_t jif;
17094 +       union {
17095 +               uint32_t seq;
17096 +               uint32_t sec;
17097 +       };
17098 +       union {
17099 +               uint32_t tokens;
17100 +               uint32_t nsec;
17101 +               struct _vxm_task tsk;
17102 +       };
17103 +};
17104 +
17105 +struct _vx_mon_entry {
17106 +       uint16_t type;
17107 +       uint16_t xid;
17108 +       union {
17109 +               struct _vxm_event ev;
17110 +               struct _vxm_sched sd;
17111 +               struct _vxm_update_1 u1;
17112 +               struct _vxm_update_2 u2;
17113 +               struct _vxm_rqinfo_1 q1;
17114 +               struct _vxm_rqinfo_2 q2;
17115 +       };
17116 +};
17117 +
17118 +
17119 +#endif /* _VX_MONITOR_H */
17120 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/namespace.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/namespace.h
17121 --- linux-2.6.16.20/include/linux/vserver/namespace.h   1970-01-01 01:00:00 +0100
17122 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/namespace.h      2006-04-26 19:07:00 +0200
17123 @@ -0,0 +1,15 @@
17124 +#ifndef _VX_NAMESPACE_H
17125 +#define _VX_NAMESPACE_H
17126 +
17127 +
17128 +#include <linux/types.h>
17129 +
17130 +struct vx_info;
17131 +struct namespace;
17132 +struct fs_struct;
17133 +
17134 +extern int vx_set_namespace(struct vx_info *, struct namespace *, struct fs_struct *);
17135 +
17136 +#else  /* _VX_NAMESPACE_H */
17137 +#warning duplicate inclusion
17138 +#endif /* _VX_NAMESPACE_H */
17139 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/namespace_cmd.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/namespace_cmd.h
17140 --- linux-2.6.16.20/include/linux/vserver/namespace_cmd.h       1970-01-01 01:00:00 +0100
17141 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/namespace_cmd.h  2006-04-26 19:07:00 +0200
17142 @@ -0,0 +1,19 @@
17143 +#ifndef _VX_NAMESPACE_CMD_H
17144 +#define _VX_NAMESPACE_CMD_H
17145 +
17146 +
17147 +#define VCMD_enter_namespace   VC_CMD(PROCALT, 1, 0)
17148 +#define VCMD_cleanup_namespace VC_CMD(PROCALT, 2, 0)
17149 +
17150 +#define VCMD_set_namespace_v0  VC_CMD(PROCALT, 3, 0)
17151 +#define VCMD_set_namespace     VC_CMD(PROCALT, 3, 1)
17152 +
17153 +
17154 +#ifdef __KERNEL__
17155 +
17156 +extern int vc_enter_namespace(uint32_t, void __user *);
17157 +extern int vc_cleanup_namespace(uint32_t, void __user *);
17158 +extern int vc_set_namespace(uint32_t, void __user *);
17159 +
17160 +#endif /* __KERNEL__ */
17161 +#endif /* _VX_NAMESPACE_CMD_H */
17162 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/network.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/network.h
17163 --- linux-2.6.16.20/include/linux/vserver/network.h     1970-01-01 01:00:00 +0100
17164 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/network.h        2006-04-27 20:28:48 +0200
17165 @@ -0,0 +1,139 @@
17166 +#ifndef _VX_NETWORK_H
17167 +#define _VX_NETWORK_H
17168 +
17169 +#include <linux/types.h>
17170 +
17171 +
17172 +#define MAX_N_CONTEXT  65535   /* Arbitrary limit */
17173 +
17174 +#define NX_DYNAMIC_ID  ((uint32_t)-1)          /* id for dynamic context */
17175 +
17176 +#define NB_IPV4ROOT    16
17177 +
17178 +
17179 +/* network flags */
17180 +
17181 +#define NXF_STATE_SETUP                (1ULL<<32)
17182 +
17183 +#define NXF_SC_HELPER          (1ULL<<36)
17184 +#define NXF_PERSISTENT         (1ULL<<38)
17185 +
17186 +#define NXF_ONE_TIME           (0x0001ULL<<32)
17187 +
17188 +#define NXF_INIT_SET           (0)
17189 +
17190 +
17191 +/* address types */
17192 +
17193 +#define NXA_TYPE_IPV4          1
17194 +#define NXA_TYPE_IPV6          2
17195 +
17196 +#define NXA_MOD_BCAST          (1<<8)
17197 +
17198 +#define NXA_TYPE_ANY           (~0)
17199 +
17200 +
17201 +#ifdef __KERNEL__
17202 +
17203 +#include <linux/list.h>
17204 +#include <linux/spinlock.h>
17205 +#include <linux/rcupdate.h>
17206 +#include <asm/atomic.h>
17207 +
17208 +
17209 +struct nx_info {
17210 +       struct hlist_node nx_hlist;     /* linked list of nxinfos */
17211 +       nid_t nx_id;                    /* vnet id */
17212 +       atomic_t nx_usecnt;             /* usage count */
17213 +       atomic_t nx_tasks;              /* tasks count */
17214 +       int nx_state;                   /* context state */
17215 +
17216 +       uint64_t nx_flags;              /* network flag word */
17217 +       uint64_t nx_ncaps;              /* network capabilities */
17218 +
17219 +       int nbipv4;
17220 +       __u32 ipv4[NB_IPV4ROOT];        /* Process can only bind to these IPs */
17221 +                                       /* The first one is used to connect */
17222 +                                       /* and for bind any service */
17223 +                                       /* The other must be used explicity */
17224 +       __u32 mask[NB_IPV4ROOT];        /* Netmask for each ipv4 */
17225 +                                       /* Used to select the proper source */
17226 +                                       /* address for sockets */
17227 +       __u32 v4_bcast;                 /* Broadcast address to receive UDP  */
17228 +
17229 +       char nx_name[65];               /* network context name */
17230 +};
17231 +
17232 +
17233 +/* status flags */
17234 +
17235 +#define NXS_HASHED      0x0001
17236 +#define NXS_SHUTDOWN    0x0100
17237 +#define NXS_RELEASED    0x8000
17238 +
17239 +/* check conditions */
17240 +
17241 +#define NX_ADMIN       0x0001
17242 +#define NX_WATCH       0x0002
17243 +#define NX_BLEND       0x0004
17244 +#define NX_HOSTID      0x0008
17245 +
17246 +#define NX_IDENT       0x0010
17247 +#define NX_EQUIV       0x0020
17248 +#define NX_PARENT      0x0040
17249 +#define NX_CHILD       0x0080
17250 +
17251 +#define NX_ARG_MASK    0x00F0
17252 +
17253 +#define NX_DYNAMIC     0x0100
17254 +#define NX_STATIC      0x0200
17255 +
17256 +#define NX_ATR_MASK    0x0F00
17257 +
17258 +
17259 +extern struct nx_info *lookup_nx_info(int);
17260 +
17261 +extern int get_nid_list(int, unsigned int *, int);
17262 +extern int nid_is_hashed(nid_t);
17263 +
17264 +extern int nx_migrate_task(struct task_struct *, struct nx_info *);
17265 +
17266 +extern long vs_net_change(struct nx_info *, unsigned int);
17267 +
17268 +struct in_ifaddr;
17269 +struct net_device;
17270 +
17271 +#ifdef CONFIG_INET
17272 +int ifa_in_nx_info(struct in_ifaddr *, struct nx_info *);
17273 +int dev_in_nx_info(struct net_device *, struct nx_info *);
17274 +
17275 +#else /* CONFIG_INET */
17276 +static inline
17277 +int ifa_in_nx_info(struct in_ifaddr *a, struct nx_info *n)
17278 +{
17279 +       return 1;
17280 +}
17281 +
17282 +static inline
17283 +int dev_in_nx_info(struct net_device *d, struct nx_info *n)
17284 +{
17285 +       return 1;
17286 +}
17287 +#endif /* CONFIG_INET */
17288 +
17289 +struct sock;
17290 +
17291 +#ifdef CONFIG_INET
17292 +int nx_addr_conflict(struct nx_info *, uint32_t, struct sock *);
17293 +#else /* CONFIG_INET */
17294 +static inline
17295 +int nx_addr_conflict(struct nx_info *n, uint32_t a, struct sock *s)
17296 +{
17297 +       return 1;
17298 +}
17299 +#endif /* CONFIG_INET */
17300 +
17301 +#endif /* __KERNEL__ */
17302 +#else  /* _VX_NETWORK_H */
17303 +#warning duplicate inclusion
17304 +#endif /* _VX_NETWORK_H */
17305 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/network_cmd.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/network_cmd.h
17306 --- linux-2.6.16.20/include/linux/vserver/network_cmd.h 1970-01-01 01:00:00 +0100
17307 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/network_cmd.h    2006-04-26 19:07:00 +0200
17308 @@ -0,0 +1,89 @@
17309 +#ifndef _VX_NETWORK_CMD_H
17310 +#define _VX_NETWORK_CMD_H
17311 +
17312 +
17313 +/* vinfo commands */
17314 +
17315 +#define VCMD_task_nid          VC_CMD(VINFO, 2, 0)
17316 +
17317 +#ifdef __KERNEL__
17318 +extern int vc_task_nid(uint32_t, void __user *);
17319 +
17320 +#endif /* __KERNEL__ */
17321 +
17322 +#define VCMD_nx_info           VC_CMD(VINFO, 6, 0)
17323 +
17324 +struct vcmd_nx_info_v0 {
17325 +       uint32_t nid;
17326 +       /* more to come */
17327 +};
17328 +
17329 +#ifdef __KERNEL__
17330 +extern int vc_nx_info(uint32_t, void __user *);
17331 +
17332 +#endif /* __KERNEL__ */
17333 +
17334 +#define VCMD_net_create_v0     VC_CMD(VNET, 1, 0)
17335 +#define VCMD_net_create                VC_CMD(VNET, 1, 1)
17336 +
17337 +struct  vcmd_net_create {
17338 +       uint64_t flagword;
17339 +};
17340 +
17341 +#define VCMD_net_migrate       VC_CMD(NETMIG, 1, 0)
17342 +
17343 +#define VCMD_net_add           VC_CMD(NETALT, 1, 0)
17344 +#define VCMD_net_remove                VC_CMD(NETALT, 2, 0)
17345 +
17346 +struct vcmd_net_addr_v0 {
17347 +       uint16_t type;
17348 +       uint16_t count;
17349 +       uint32_t ip[4];
17350 +       uint32_t mask[4];
17351 +       /* more to come */
17352 +};
17353 +
17354 +
17355 +#ifdef __KERNEL__
17356 +extern int vc_net_create(uint32_t, void __user *);
17357 +extern int vc_net_migrate(uint32_t, void __user *);
17358 +
17359 +extern int vc_net_add(uint32_t, void __user *);
17360 +extern int vc_net_remove(uint32_t, void __user *);
17361 +
17362 +#endif /* __KERNEL__ */
17363 +
17364 +
17365 +/* flag commands */
17366 +
17367 +#define VCMD_get_nflags                VC_CMD(FLAGS, 5, 0)
17368 +#define VCMD_set_nflags                VC_CMD(FLAGS, 6, 0)
17369 +
17370 +struct vcmd_net_flags_v0 {
17371 +       uint64_t flagword;
17372 +       uint64_t mask;
17373 +};
17374 +
17375 +#ifdef __KERNEL__
17376 +extern int vc_get_nflags(uint32_t, void __user *);
17377 +extern int vc_set_nflags(uint32_t, void __user *);
17378 +
17379 +#endif /* __KERNEL__ */
17380 +
17381 +
17382 +/* network caps commands */
17383 +
17384 +#define VCMD_get_ncaps         VC_CMD(FLAGS, 7, 0)
17385 +#define VCMD_set_ncaps         VC_CMD(FLAGS, 8, 0)
17386 +
17387 +struct vcmd_net_caps_v0 {
17388 +       uint64_t ncaps;
17389 +       uint64_t cmask;
17390 +};
17391 +
17392 +#ifdef __KERNEL__
17393 +extern int vc_get_ncaps(uint32_t, void __user *);
17394 +extern int vc_set_ncaps(uint32_t, void __user *);
17395 +
17396 +#endif /* __KERNEL__ */
17397 +#endif /* _VX_CONTEXT_CMD_H */
17398 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/sched.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/sched.h
17399 --- linux-2.6.16.20/include/linux/vserver/sched.h       1970-01-01 01:00:00 +0100
17400 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/sched.h  2006-04-26 19:07:00 +0200
17401 @@ -0,0 +1,26 @@
17402 +#ifndef _VX_SCHED_H
17403 +#define _VX_SCHED_H
17404 +
17405 +
17406 +#ifdef __KERNEL__
17407 +
17408 +struct timespec;
17409 +
17410 +void vx_vsi_uptime(struct timespec *, struct timespec *);
17411 +
17412 +
17413 +struct vx_info;
17414 +
17415 +void vx_update_load(struct vx_info *);
17416 +
17417 +
17418 +int vx_tokens_recalc(struct _vx_sched_pc *,
17419 +       unsigned long *, unsigned long *, int [2]);
17420 +
17421 +void vx_update_sched_param(struct _vx_sched *sched,
17422 +       struct _vx_sched_pc *sched_pc);
17423 +
17424 +#endif /* __KERNEL__ */
17425 +#else  /* _VX_SCHED_H */
17426 +#warning duplicate inclusion
17427 +#endif /* _VX_SCHED_H */
17428 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/sched_cmd.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/sched_cmd.h
17429 --- linux-2.6.16.20/include/linux/vserver/sched_cmd.h   1970-01-01 01:00:00 +0100
17430 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/sched_cmd.h      2006-04-26 19:07:00 +0200
17431 @@ -0,0 +1,72 @@
17432 +#ifndef _VX_SCHED_CMD_H
17433 +#define _VX_SCHED_CMD_H
17434 +
17435 +
17436 +/*  sched vserver commands */
17437 +
17438 +#define VCMD_set_sched_v2      VC_CMD(SCHED, 1, 2)
17439 +#define VCMD_set_sched_v3      VC_CMD(SCHED, 1, 3)
17440 +#define VCMD_set_sched         VC_CMD(SCHED, 1, 4)
17441 +
17442 +struct vcmd_set_sched_v2 {
17443 +       int32_t fill_rate;
17444 +       int32_t interval;
17445 +       int32_t tokens;
17446 +       int32_t tokens_min;
17447 +       int32_t tokens_max;
17448 +       uint64_t cpu_mask;
17449 +};
17450 +
17451 +struct vcmd_set_sched_v3 {
17452 +       uint32_t set_mask;
17453 +       int32_t fill_rate;
17454 +       int32_t interval;
17455 +       int32_t tokens;
17456 +       int32_t tokens_min;
17457 +       int32_t tokens_max;
17458 +       int32_t priority_bias;
17459 +};
17460 +
17461 +struct vcmd_set_sched_v4 {
17462 +       uint32_t set_mask;
17463 +       int32_t fill_rate;
17464 +       int32_t interval;
17465 +       int32_t tokens;
17466 +       int32_t tokens_min;
17467 +       int32_t tokens_max;
17468 +       int32_t prio_bias;
17469 +       int32_t cpu_id;
17470 +       int32_t bucket_id;
17471 +};
17472 +
17473 +
17474 +#define VXSM_FILL_RATE         0x0001
17475 +#define VXSM_INTERVAL          0x0002
17476 +#define VXSM_FILL_RATE2                0x0004
17477 +#define VXSM_INTERVAL2         0x0008
17478 +#define VXSM_TOKENS            0x0010
17479 +#define VXSM_TOKENS_MIN                0x0020
17480 +#define VXSM_TOKENS_MAX                0x0040
17481 +#define VXSM_PRIO_BIAS         0x0100
17482 +
17483 +#define VXSM_IDLE_TIME         0x0200
17484 +#define VXSM_FORCE             0x0400
17485 +
17486 +#define        VXSM_V3_MASK            0x0173
17487 +#define        VXSM_SET_MASK           0x01FF
17488 +
17489 +#define VXSM_CPU_ID            0x1000
17490 +#define VXSM_BUCKET_ID         0x2000
17491 +
17492 +#define SCHED_KEEP             (-2)    /* only for v2 */
17493 +
17494 +#ifdef __KERNEL__
17495 +
17496 +#include <linux/compiler.h>
17497 +
17498 +extern int vc_set_sched_v2(uint32_t, void __user *);
17499 +extern int vc_set_sched_v3(uint32_t, void __user *);
17500 +extern int vc_set_sched(uint32_t, void __user *);
17501 +
17502 +#endif /* __KERNEL__ */
17503 +#endif /* _VX_SCHED_CMD_H */
17504 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/sched_def.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/sched_def.h
17505 --- linux-2.6.16.20/include/linux/vserver/sched_def.h   1970-01-01 01:00:00 +0100
17506 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/sched_def.h      2006-04-26 19:07:00 +0200
17507 @@ -0,0 +1,67 @@
17508 +#ifndef _VX_SCHED_DEF_H
17509 +#define _VX_SCHED_DEF_H
17510 +
17511 +#include <linux/spinlock.h>
17512 +#include <linux/jiffies.h>
17513 +#include <linux/cpumask.h>
17514 +#include <asm/atomic.h>
17515 +#include <asm/param.h>
17516 +
17517 +
17518 +/* context sub struct */
17519 +
17520 +struct _vx_sched {
17521 +       spinlock_t tokens_lock;         /* lock for token bucket */
17522 +
17523 +       int tokens;                     /* number of CPU tokens */
17524 +       int fill_rate[2];               /* Fill rate: add X tokens... */
17525 +       int interval[2];                /* Divisor:   per Y jiffies   */
17526 +       int tokens_min;                 /* Limit:     minimum for unhold */
17527 +       int tokens_max;                 /* Limit:     no more than N tokens */
17528 +
17529 +       unsigned update_mask;           /* which features should be updated */
17530 +       cpumask_t update;               /* CPUs which should update */
17531 +
17532 +       int prio_bias;                  /* bias offset for priority */
17533 +       int vavavoom;                   /* last calculated vavavoom */
17534 +};
17535 +
17536 +struct _vx_sched_pc {
17537 +       int tokens;                     /* number of CPU tokens */
17538 +       int flags;                      /* bucket flags */
17539 +
17540 +       int fill_rate[2];               /* Fill rate: add X tokens... */
17541 +       int interval[2];                /* Divisor:   per Y jiffies   */
17542 +       int tokens_min;                 /* Limit:     minimum for unhold */
17543 +       int tokens_max;                 /* Limit:     no more than N tokens */
17544 +
17545 +       unsigned long norm_time;        /* last time accounted */
17546 +       unsigned long idle_time;        /* non linear time for fair sched */
17547 +       unsigned long token_time;       /* token time for accounting */
17548 +       unsigned long onhold;           /* jiffies when put on hold */
17549 +
17550 +       uint64_t user_ticks;            /* token tick events */
17551 +       uint64_t sys_ticks;             /* token tick events */
17552 +       uint64_t hold_ticks;            /* token ticks paused */
17553 +};
17554 +
17555 +
17556 +#define VXSF_ONHOLD    0x0001
17557 +#define VXSF_IDLE_TIME 0x0100
17558 +
17559 +#ifdef CONFIG_VSERVER_DEBUG
17560 +
17561 +static inline void __dump_vx_sched(struct _vx_sched *sched)
17562 +{
17563 +       printk("\t_vx_sched:\n");
17564 +       printk("\t tokens: %4d/%4d, %4d/%4d, %4d, %4d\n",
17565 +               sched->fill_rate[0], sched->interval[0],
17566 +               sched->fill_rate[1], sched->interval[1],
17567 +               sched->tokens_min, sched->tokens_max);
17568 +       printk("\t priority = %4d, %4d\n",
17569 +               sched->prio_bias, sched->vavavoom);
17570 +}
17571 +
17572 +#endif
17573 +
17574 +#endif /* _VX_SCHED_DEF_H */
17575 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/signal.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/signal.h
17576 --- linux-2.6.16.20/include/linux/vserver/signal.h      1970-01-01 01:00:00 +0100
17577 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/signal.h 2006-04-26 19:07:00 +0200
17578 @@ -0,0 +1,14 @@
17579 +#ifndef _VX_SIGNAL_H
17580 +#define _VX_SIGNAL_H
17581 +
17582 +
17583 +#ifdef __KERNEL__
17584 +
17585 +struct vx_info;
17586 +
17587 +int vx_info_kill(struct vx_info *, int, int);
17588 +
17589 +#endif /* __KERNEL__ */
17590 +#else  /* _VX_SIGNAL_H */
17591 +#warning duplicate inclusion
17592 +#endif /* _VX_SIGNAL_H */
17593 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/signal_cmd.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/signal_cmd.h
17594 --- linux-2.6.16.20/include/linux/vserver/signal_cmd.h  1970-01-01 01:00:00 +0100
17595 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/signal_cmd.h     2006-04-26 19:07:00 +0200
17596 @@ -0,0 +1,26 @@
17597 +#ifndef _VX_SIGNAL_CMD_H
17598 +#define _VX_SIGNAL_CMD_H
17599 +
17600 +
17601 +/*  signalling vserver commands */
17602 +
17603 +#define VCMD_ctx_kill          VC_CMD(PROCTRL, 1, 0)
17604 +#define VCMD_wait_exit         VC_CMD(EVENT, 99, 0)
17605 +
17606 +struct vcmd_ctx_kill_v0 {
17607 +       int32_t pid;
17608 +       int32_t sig;
17609 +};
17610 +
17611 +struct vcmd_wait_exit_v0 {
17612 +       int32_t reboot_cmd;
17613 +       int32_t exit_code;
17614 +};
17615 +
17616 +#ifdef __KERNEL__
17617 +
17618 +extern int vc_ctx_kill(uint32_t, void __user *);
17619 +extern int vc_wait_exit(uint32_t, void __user *);
17620 +
17621 +#endif /* __KERNEL__ */
17622 +#endif /* _VX_SIGNAL_CMD_H */
17623 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/switch.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/switch.h
17624 --- linux-2.6.16.20/include/linux/vserver/switch.h      1970-01-01 01:00:00 +0100
17625 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/switch.h 2006-04-26 19:07:00 +0200
17626 @@ -0,0 +1,99 @@
17627 +#ifndef _VX_SWITCH_H
17628 +#define _VX_SWITCH_H
17629 +
17630 +#include <linux/types.h>
17631 +
17632 +
17633 +#define VC_CATEGORY(c)         (((c) >> 24) & 0x3F)
17634 +#define VC_COMMAND(c)          (((c) >> 16) & 0xFF)
17635 +#define VC_VERSION(c)          ((c) & 0xFFF)
17636 +
17637 +#define VC_CMD(c,i,v)          ((((VC_CAT_ ## c) & 0x3F) << 24) \
17638 +                               | (((i) & 0xFF) << 16) | ((v) & 0xFFF))
17639 +
17640 +/*
17641 +
17642 +  Syscall Matrix V2.8
17643 +
17644 +        |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL|
17645 +        |STATS  |DESTROY|ALTER  |CHANGE |LIMIT  |TEST   | |       |       |
17646 +        |INFO   |SETUP  |       |MOVE   |       |       | |       |       |
17647 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
17648 +  SYSTEM |VERSION|VSETUP |VHOST  |       |       |       | |DEVICES|       |
17649 +  HOST   |     00|     01|     02|     03|     04|     05| |     06|     07|
17650 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
17651 +  CPU    |       |VPROC  |PROCALT|PROCMIG|PROCTRL|       | |SCHED. |       |
17652 +  PROCESS|     08|     09|     10|     11|     12|     13| |     14|     15|
17653 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
17654 +  MEMORY |       |       |       |       |       |       | |SWAP   |       |
17655 +        |     16|     17|     18|     19|     20|     21| |     22|     23|
17656 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
17657 +  NETWORK|       |VNET   |NETALT |NETMIG |NETCTL |       | |SERIAL |       |
17658 +        |     24|     25|     26|     27|     28|     29| |     30|     31|
17659 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
17660 +  DISK   |       |       |       |       |DLIMIT |       | |INODE  |       |
17661 +  VFS    |     32|     33|     34|     35|     36|     37| |     38|     39|
17662 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
17663 +  OTHER  |       |       |       |       |       |       | |VINFO  |       |
17664 +        |     40|     41|     42|     43|     44|     45| |     46|     47|
17665 +  =======+=======+=======+=======+=======+=======+=======+ +=======+=======+
17666 +  SPECIAL|EVENT  |       |       |       |FLAGS  |       | |       |       |
17667 +        |     48|     49|     50|     51|     52|     53| |     54|     55|
17668 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
17669 +  SPECIAL|DEBUG  |       |       |       |RLIMIT |SYSCALL| |       |COMPAT |
17670 +        |     56|     57|     58|     59|     60|TEST 61| |     62|     63|
17671 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
17672 +
17673 +*/
17674 +
17675 +#define VC_CAT_VERSION         0
17676 +
17677 +#define VC_CAT_VSETUP          1
17678 +#define VC_CAT_VHOST           2
17679 +
17680 +#define VC_CAT_VPROC           9
17681 +#define VC_CAT_PROCALT         10
17682 +#define VC_CAT_PROCMIG         11
17683 +#define VC_CAT_PROCTRL         12
17684 +
17685 +#define VC_CAT_SCHED           14
17686 +
17687 +#define VC_CAT_VNET            25
17688 +#define VC_CAT_NETALT          26
17689 +#define VC_CAT_NETMIG          27
17690 +#define VC_CAT_NETCTRL         28
17691 +
17692 +#define VC_CAT_DLIMIT          36
17693 +#define VC_CAT_INODE           38
17694 +
17695 +#define VC_CAT_VINFO           46
17696 +#define VC_CAT_EVENT           48
17697 +
17698 +#define VC_CAT_FLAGS           52
17699 +#define VC_CAT_DEBUG           56
17700 +#define VC_CAT_RLIMIT          60
17701 +
17702 +#define VC_CAT_SYSTEST         61
17703 +#define VC_CAT_COMPAT          63
17704 +
17705 +/*  interface version */
17706 +
17707 +#define VCI_VERSION            0x00020101
17708 +#define VCI_LEGACY_VERSION     0x000100FF
17709 +
17710 +/*  query version */
17711 +
17712 +#define VCMD_get_version       VC_CMD(VERSION, 0, 0)
17713 +#define VCMD_get_vci           VC_CMD(VERSION, 1, 0)
17714 +
17715 +
17716 +#ifdef __KERNEL__
17717 +
17718 +#include <linux/errno.h>
17719 +
17720 +
17721 +#else  /* __KERNEL__ */
17722 +#define __user
17723 +#endif /* __KERNEL__ */
17724 +
17725 +#endif /* _VX_SWITCH_H */
17726 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/tag.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/tag.h
17727 --- linux-2.6.16.20/include/linux/vserver/tag.h 1970-01-01 01:00:00 +0100
17728 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/tag.h    2006-04-26 19:07:00 +0200
17729 @@ -0,0 +1,153 @@
17730 +#ifndef _DX_TAG_H
17731 +#define _DX_TAG_H
17732 +
17733 +
17734 +#define DX_TAG(in)     (IS_TAGGED(in))
17735 +
17736 +
17737 +#ifdef CONFIG_DX_TAG_NFSD
17738 +#define DX_TAG_NFSD    1
17739 +#else
17740 +#define DX_TAG_NFSD    0
17741 +#endif
17742 +
17743 +
17744 +#ifdef CONFIG_TAGGING_NONE
17745 +
17746 +#define MAX_UID                0xFFFFFFFF
17747 +#define MAX_GID                0xFFFFFFFF
17748 +
17749 +#define INOTAG_TAG(cond, uid, gid, tag)        (0)
17750 +
17751 +#define TAGINO_UID(cond, uid, tag)     (uid)
17752 +#define TAGINO_GID(cond, gid, tag)     (gid)
17753 +
17754 +#endif
17755 +
17756 +
17757 +#ifdef CONFIG_TAGGING_GID16
17758 +
17759 +#define MAX_UID                0xFFFFFFFF
17760 +#define MAX_GID                0x0000FFFF
17761 +
17762 +#define INOTAG_TAG(cond, uid, gid, tag)        \
17763 +       ((cond) ? (((gid) >> 16) & 0xFFFF) : 0)
17764 +
17765 +#define TAGINO_UID(cond, uid, tag)     (uid)
17766 +#define TAGINO_GID(cond, gid, tag)     \
17767 +       ((cond) ? (((gid) & 0xFFFF) | ((tag) << 16)) : (gid))
17768 +
17769 +#endif
17770 +
17771 +
17772 +#ifdef CONFIG_TAGGING_ID24
17773 +
17774 +#define MAX_UID                0x00FFFFFF
17775 +#define MAX_GID                0x00FFFFFF
17776 +
17777 +#define INOTAG_TAG(cond, uid, gid, tag)        \
17778 +       ((cond) ? ((((uid) >> 16) & 0xFF00) | (((gid) >> 24) & 0xFF)) : 0)
17779 +
17780 +#define TAGINO_UID(cond, uid, tag)     \
17781 +       ((cond) ? (((uid) & 0xFFFFFF) | (((tag) & 0xFF00) << 16)) : (uid))
17782 +#define TAGINO_GID(cond, gid, tag)     \
17783 +       ((cond) ? (((gid) & 0xFFFFFF) | (((tag) & 0x00FF) << 24)) : (gid))
17784 +
17785 +#endif
17786 +
17787 +
17788 +#ifdef CONFIG_TAGGING_UID16
17789 +
17790 +#define MAX_UID                0x0000FFFF
17791 +#define MAX_GID                0xFFFFFFFF
17792 +
17793 +#define INOTAG_TAG(cond, uid, gid, tag)        \
17794 +       ((cond) ? (((uid) >> 16) & 0xFFFF) : 0)
17795 +
17796 +#define TAGINO_UID(cond, uid, tag)     \
17797 +       ((cond) ? (((uid) & 0xFFFF) | ((tag) << 16)) : (uid))
17798 +#define TAGINO_GID(cond, gid, tag)     (gid)
17799 +
17800 +#endif
17801 +
17802 +
17803 +#ifdef CONFIG_TAGGING_INTERN
17804 +
17805 +#define MAX_UID                0xFFFFFFFF
17806 +#define MAX_GID                0xFFFFFFFF
17807 +
17808 +#define INOTAG_TAG(cond, uid, gid, tag)        \
17809 +       ((cond) ? (tag) : 0)
17810 +
17811 +#define TAGINO_UID(cond, uid, tag)     (uid)
17812 +#define TAGINO_GID(cond, gid, tag)     (gid)
17813 +
17814 +#endif
17815 +
17816 +
17817 +#ifdef CONFIG_TAGGING_RUNTIME
17818 +
17819 +#define MAX_UID                0xFFFFFFFF
17820 +#define MAX_GID                0xFFFFFFFF
17821 +
17822 +#define INOTAG_TAG(cond, uid, gid, tag)        (0)
17823 +
17824 +#define TAGINO_UID(cond, uid, tag)     (uid)
17825 +#define TAGINO_GID(cond, gid, tag)     (gid)
17826 +
17827 +#endif
17828 +
17829 +
17830 +#ifndef CONFIG_TAGGING_NONE
17831 +#define dx_current_fstag(sb)   \
17832 +       ((sb)->s_flags & MS_TAGGED ? dx_current_tag(): 0)
17833 +#else
17834 +#define dx_current_fstag(sb)   (0)
17835 +#endif
17836 +
17837 +#ifndef CONFIG_TAGGING_INTERN
17838 +#define TAGINO_TAG(cond, tag)  (0)
17839 +#else
17840 +#define TAGINO_TAG(cond, tag)  ((cond) ? (tag) : 0)
17841 +#endif
17842 +
17843 +#define INOTAG_UID(cond, uid, gid)     \
17844 +       ((cond) ? ((uid) & MAX_UID) : (uid))
17845 +#define INOTAG_GID(cond, uid, gid)     \
17846 +       ((cond) ? ((gid) & MAX_GID) : (gid))
17847 +
17848 +
17849 +static inline uid_t dx_map_uid(uid_t uid)
17850 +{
17851 +       if ((uid > MAX_UID) && (uid != -1))
17852 +               uid = -2;
17853 +       return (uid & MAX_UID);
17854 +}
17855 +
17856 +static inline gid_t dx_map_gid(gid_t gid)
17857 +{
17858 +       if ((gid > MAX_GID) && (gid != -1))
17859 +               gid = -2;
17860 +       return (gid & MAX_GID);
17861 +}
17862 +
17863 +
17864 +#ifdef CONFIG_VSERVER_LEGACY
17865 +#define FIOC_GETTAG    _IOR('x', 1, long)
17866 +#define FIOC_SETTAG    _IOW('x', 2, long)
17867 +#define FIOC_SETTAGJ   _IOW('x', 3, long)
17868 +#endif
17869 +
17870 +#ifdef CONFIG_PROPAGATE
17871 +
17872 +int dx_parse_tag(char *string, tag_t *tag, int remove);
17873 +
17874 +void __dx_propagate_tag(struct nameidata *nd, struct inode *inode);
17875 +
17876 +#define dx_propagate_tag(n,i)  __dx_propagate_tag(n,i)
17877 +
17878 +#else
17879 +#define dx_propagate_tag(n,i)  do { } while (0)
17880 +#endif
17881 +
17882 +#endif /* _DX_TAG_H */
17883 diff -NurpP --minimal linux-2.6.16.20/include/net/af_unix.h linux-2.6.16.20-vs2.1.1-rc22/include/net/af_unix.h
17884 --- linux-2.6.16.20/include/net/af_unix.h       2006-02-18 14:40:36 +0100
17885 +++ linux-2.6.16.20-vs2.1.1-rc22/include/net/af_unix.h  2006-04-26 19:07:00 +0200
17886 @@ -17,9 +17,9 @@ extern spinlock_t unix_table_lock;
17887  
17888  extern atomic_t unix_tot_inflight;
17889  
17890 -static inline struct sock *first_unix_socket(int *i)
17891 +static inline struct sock *next_unix_socket_table(int *i)
17892  {
17893 -       for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
17894 +       for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
17895                 if (!hlist_empty(&unix_socket_table[*i]))
17896                         return __sk_head(&unix_socket_table[*i]);
17897         }
17898 @@ -28,16 +28,19 @@ static inline struct sock *first_unix_so
17899  
17900  static inline struct sock *next_unix_socket(int *i, struct sock *s)
17901  {
17902 -       struct sock *next = sk_next(s);
17903 -       /* More in this chain? */
17904 -       if (next)
17905 -               return next;
17906 -       /* Look for next non-empty chain. */
17907 -       for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
17908 -               if (!hlist_empty(&unix_socket_table[*i]))
17909 -                       return __sk_head(&unix_socket_table[*i]);
17910 -       }
17911 -       return NULL;
17912 +       do {
17913 +               if (s)
17914 +                       s = sk_next(s);
17915 +               if (!s)
17916 +                       s = next_unix_socket_table(i);
17917 +       } while (s && !vx_check(s->sk_xid, VX_IDENT|VX_WATCH));
17918 +       return s;
17919 +}
17920 +
17921 +static inline struct sock *first_unix_socket(int *i)
17922 +{
17923 +       *i = 0;
17924 +       return next_unix_socket(i, NULL);
17925  }
17926  
17927  #define forall_unix_sockets(i, s) \
17928 diff -NurpP --minimal linux-2.6.16.20/include/net/inet_hashtables.h linux-2.6.16.20-vs2.1.1-rc22/include/net/inet_hashtables.h
17929 --- linux-2.6.16.20/include/net/inet_hashtables.h       2006-04-09 13:49:58 +0200
17930 +++ linux-2.6.16.20-vs2.1.1-rc22/include/net/inet_hashtables.h  2006-04-26 19:07:00 +0200
17931 @@ -272,6 +272,25 @@ static inline int inet_iif(const struct 
17932         return ((struct rtable *)skb->dst)->rt_iif;
17933  }
17934  
17935 +/*
17936 + *      Check if a given address matches for an inet socket
17937 + *
17938 + *      nxi:   the socket's nx_info if any
17939 + *      addr:  to be verified address
17940 + *      saddr: socket addresses
17941 + */
17942 +static inline int inet_addr_match (
17943 +       struct nx_info *nxi,
17944 +       uint32_t addr,
17945 +       uint32_t saddr)
17946 +{
17947 +       if (addr && (saddr == addr))
17948 +               return 1;
17949 +       if (!saddr)
17950 +               return addr_in_nx_info(nxi, addr);
17951 +       return 0;
17952 +}
17953 +
17954  extern struct sock *__inet_lookup_listener(const struct hlist_head *head,
17955                                            const u32 daddr,
17956                                            const unsigned short hnum,
17957 @@ -292,7 +311,7 @@ static inline struct sock *
17958                 const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
17959  
17960                 if (inet->num == hnum && !sk->sk_node.next &&
17961 -                   (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
17962 +                   inet_addr_match(sk->sk_nx_info, daddr, inet->rcv_saddr) &&
17963                     (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
17964                     !sk->sk_bound_dev_if)
17965                         goto sherry_cache;
17966 diff -NurpP --minimal linux-2.6.16.20/include/net/inet_sock.h linux-2.6.16.20-vs2.1.1-rc22/include/net/inet_sock.h
17967 --- linux-2.6.16.20/include/net/inet_sock.h     2006-04-09 13:49:58 +0200
17968 +++ linux-2.6.16.20-vs2.1.1-rc22/include/net/inet_sock.h        2006-04-26 19:07:00 +0200
17969 @@ -115,6 +115,7 @@ struct inet_sock {
17970         /* Socket demultiplex comparisons on incoming packets. */
17971         __u32                   daddr;
17972         __u32                   rcv_saddr;
17973 +       __u32                   rcv_saddr2;     /* Second bound ipv4 addr, for ipv4root */
17974         __u16                   dport;
17975         __u16                   num;
17976         __u32                   saddr;
17977 diff -NurpP --minimal linux-2.6.16.20/include/net/inet_timewait_sock.h linux-2.6.16.20-vs2.1.1-rc22/include/net/inet_timewait_sock.h
17978 --- linux-2.6.16.20/include/net/inet_timewait_sock.h    2006-04-09 13:49:58 +0200
17979 +++ linux-2.6.16.20-vs2.1.1-rc22/include/net/inet_timewait_sock.h       2006-04-26 19:07:00 +0200
17980 @@ -116,6 +116,10 @@ struct inet_timewait_sock {
17981  #define tw_refcnt              __tw_common.skc_refcnt
17982  #define tw_hash                        __tw_common.skc_hash
17983  #define tw_prot                        __tw_common.skc_prot
17984 +#define tw_xid         __tw_common.skc_xid
17985 +#define tw_vx_info             __tw_common.skc_vx_info
17986 +#define tw_nid         __tw_common.skc_nid
17987 +#define tw_nx_info             __tw_common.skc_nx_info
17988         volatile unsigned char  tw_substate;
17989         /* 3 bits hole, try to pack */
17990         unsigned char           tw_rcv_wscale;
17991 diff -NurpP --minimal linux-2.6.16.20/include/net/route.h linux-2.6.16.20-vs2.1.1-rc22/include/net/route.h
17992 --- linux-2.6.16.20/include/net/route.h 2006-02-18 14:40:36 +0100
17993 +++ linux-2.6.16.20-vs2.1.1-rc22/include/net/route.h    2006-05-21 23:37:08 +0200
17994 @@ -28,11 +28,14 @@
17995  #include <net/dst.h>
17996  #include <net/inetpeer.h>
17997  #include <net/flow.h>
17998 +#include <net/inet_sock.h>
17999  #include <linux/in_route.h>
18000  #include <linux/rtnetlink.h>
18001  #include <linux/route.h>
18002  #include <linux/ip.h>
18003  #include <linux/cache.h>
18004 +#include <linux/vs_network.h>
18005 +#include <linux/in.h>
18006  
18007  #ifndef __KERNEL__
18008  #warning This file is not supposed to be used outside of kernel.
18009 @@ -144,6 +147,59 @@ static inline char rt_tos2priority(u8 to
18010         return ip_tos2prio[IPTOS_TOS(tos)>>1];
18011  }
18012  
18013 +#define IPI_LOOPBACK   htonl(INADDR_LOOPBACK)
18014 +
18015 +static inline int ip_find_src(struct nx_info *nxi, struct rtable **rp, struct flowi *fl)
18016 +{
18017 +       int err;
18018 +       int i, n = nxi->nbipv4;
18019 +       u32 ipv4root = nxi->ipv4[0];
18020 +
18021 +       if (ipv4root == 0)
18022 +               return 0;
18023 +
18024 +       if (fl->fl4_src == 0) {
18025 +               if (n > 1) {
18026 +                       u32 foundsrc;
18027 +
18028 +                       err = __ip_route_output_key(rp, fl);
18029 +                       if (err) {
18030 +                               fl->fl4_src = ipv4root;
18031 +                               err = __ip_route_output_key(rp, fl);
18032 +                       }
18033 +                       if (err)
18034 +                               return err;
18035 +
18036 +                       foundsrc = (*rp)->rt_src;
18037 +                       ip_rt_put(*rp);
18038 +
18039 +                       for (i=0; i<n; i++){
18040 +                               u32 mask = nxi->mask[i];
18041 +                               u32 ipv4 = nxi->ipv4[i];
18042 +                               u32 net4 = ipv4 & mask;
18043 +
18044 +                               if (foundsrc == ipv4) {
18045 +                                       fl->fl4_src = ipv4;
18046 +                                       break;
18047 +                               }
18048 +                               if (!fl->fl4_src && (foundsrc & mask) == net4)
18049 +                                       fl->fl4_src = ipv4;
18050 +                       }
18051 +               }
18052 +               if (fl->fl4_src == 0)
18053 +                       fl->fl4_src = (fl->fl4_dst == IPI_LOOPBACK)
18054 +                               ? IPI_LOOPBACK : ipv4root;
18055 +       } else {
18056 +               for (i=0; i<n; i++) {
18057 +                       if (nxi->ipv4[i] == fl->fl4_src)
18058 +                               break;
18059 +               }
18060 +               if (i == n)
18061 +                       return -EPERM;
18062 +       }
18063 +       return 0;
18064 +}
18065 +
18066  static inline int ip_route_connect(struct rtable **rp, u32 dst,
18067                                    u32 src, u32 tos, int oif, u8 protocol,
18068                                    u16 sport, u16 dport, struct sock *sk)
18069 @@ -158,7 +214,27 @@ static inline int ip_route_connect(struc
18070                                          .dport = dport } } };
18071  
18072         int err;
18073 -       if (!dst || !src) {
18074 +       struct nx_info *nx_info = current->nx_info;
18075 +
18076 +       if (sk)
18077 +               nx_info = sk->sk_nx_info;
18078 +       vxdprintk(VXD_CBIT(net, 4),
18079 +               "ip_route_connect(%p) %p,%p;%lx",
18080 +               sk, nx_info, sk->sk_socket,
18081 +               (sk->sk_socket?sk->sk_socket->flags:0));
18082 +
18083 +       if (nx_info) {
18084 +               err = ip_find_src(nx_info, rp, &fl);
18085 +               if (err)
18086 +                       return err;
18087 +               if (fl.fl4_dst == IPI_LOOPBACK && !vx_check(0, VX_ADMIN))
18088 +                       fl.fl4_dst = nx_info->ipv4[0];
18089 +#ifdef CONFIG_VSERVER_REMAP_SADDR
18090 +               if (fl.fl4_src == IPI_LOOPBACK && !vx_check(0, VX_ADMIN))
18091 +                       fl.fl4_src = nx_info->ipv4[0];
18092 +#endif
18093 +       }
18094 +       if (!fl.fl4_dst || !fl.fl4_src) {
18095                 err = __ip_route_output_key(rp, &fl);
18096                 if (err)
18097                         return err;
18098 diff -NurpP --minimal linux-2.6.16.20/include/net/sock.h linux-2.6.16.20-vs2.1.1-rc22/include/net/sock.h
18099 --- linux-2.6.16.20/include/net/sock.h  2006-04-09 13:49:58 +0200
18100 +++ linux-2.6.16.20-vs2.1.1-rc22/include/net/sock.h     2006-04-26 19:07:00 +0200
18101 @@ -115,6 +115,10 @@ struct sock_common {
18102         atomic_t                skc_refcnt;
18103         unsigned int            skc_hash;
18104         struct proto            *skc_prot;
18105 +       xid_t                   skc_xid;
18106 +       struct vx_info  *skc_vx_info;
18107 +       nid_t                   skc_nid;
18108 +       struct nx_info  *skc_nx_info;
18109  };
18110  
18111  /**
18112 @@ -189,6 +193,10 @@ struct sock {
18113  #define sk_refcnt              __sk_common.skc_refcnt
18114  #define sk_hash                        __sk_common.skc_hash
18115  #define sk_prot                        __sk_common.skc_prot
18116 +#define sk_xid                 __sk_common.skc_xid
18117 +#define sk_vx_info             __sk_common.skc_vx_info
18118 +#define sk_nid                 __sk_common.skc_nid
18119 +#define sk_nx_info             __sk_common.skc_nx_info
18120         unsigned char           sk_shutdown : 2,
18121                                 sk_no_check : 2,
18122                                 sk_userlocks : 4;
18123 diff -NurpP --minimal linux-2.6.16.20/init/version.c linux-2.6.16.20-vs2.1.1-rc22/init/version.c
18124 --- linux-2.6.16.20/init/version.c      2005-03-02 12:39:08 +0100
18125 +++ linux-2.6.16.20-vs2.1.1-rc22/init/version.c 2006-04-26 19:07:00 +0200
18126 @@ -31,3 +31,8 @@ EXPORT_SYMBOL(system_utsname);
18127  const char linux_banner[] =
18128         "Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@"
18129         LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n";
18130 +
18131 +const char vx_linux_banner[] =
18132 +       "Linux version %s (" LINUX_COMPILE_BY "@"
18133 +       LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") %s\n";
18134 +
18135 diff -NurpP --minimal linux-2.6.16.20/ipc/mqueue.c linux-2.6.16.20-vs2.1.1-rc22/ipc/mqueue.c
18136 --- linux-2.6.16.20/ipc/mqueue.c        2006-02-18 14:40:37 +0100
18137 +++ linux-2.6.16.20-vs2.1.1-rc22/ipc/mqueue.c   2006-04-26 19:07:00 +0200
18138 @@ -25,6 +25,8 @@
18139  #include <linux/netlink.h>
18140  #include <linux/syscalls.h>
18141  #include <linux/signal.h>
18142 +#include <linux/vs_context.h>
18143 +#include <linux/vs_limit.h>
18144  #include <net/sock.h>
18145  #include "util.h"
18146  
18147 @@ -148,17 +150,20 @@ static struct inode *mqueue_get_inode(st
18148                         spin_lock(&mq_lock);
18149                         if (u->mq_bytes + mq_bytes < u->mq_bytes ||
18150                             u->mq_bytes + mq_bytes >
18151 -                           p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur) {
18152 +                           p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur ||
18153 +                           !vx_ipcmsg_avail(p->vx_info, mq_bytes)) {
18154                                 spin_unlock(&mq_lock);
18155                                 goto out_inode;
18156                         }
18157                         u->mq_bytes += mq_bytes;
18158 +                       vx_ipcmsg_add(p->vx_info, u, mq_bytes);
18159                         spin_unlock(&mq_lock);
18160  
18161                         info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
18162                         if (!info->messages) {
18163                                 spin_lock(&mq_lock);
18164                                 u->mq_bytes -= mq_bytes;
18165 +                               vx_ipcmsg_sub(p->vx_info, u, mq_bytes);
18166                                 spin_unlock(&mq_lock);
18167                                 goto out_inode;
18168                         }
18169 @@ -256,10 +261,14 @@ static void mqueue_delete_inode(struct i
18170                    (info->attr.mq_maxmsg * info->attr.mq_msgsize));
18171         user = info->user;
18172         if (user) {
18173 +               struct vx_info *vxi = lookup_vx_info(user->xid);
18174 +
18175                 spin_lock(&mq_lock);
18176                 user->mq_bytes -= mq_bytes;
18177 +               vx_ipcmsg_sub(vxi, user, mq_bytes);
18178                 queues_count--;
18179                 spin_unlock(&mq_lock);
18180 +               put_vx_info(vxi);
18181                 free_uid(user);
18182         }
18183  }
18184 @@ -738,7 +747,7 @@ asmlinkage long sys_mq_unlink(const char
18185         if (inode)
18186                 atomic_inc(&inode->i_count);
18187  
18188 -       err = vfs_unlink(dentry->d_parent->d_inode, dentry);
18189 +       err = vfs_unlink(dentry->d_parent->d_inode, dentry, NULL);
18190  out_err:
18191         dput(dentry);
18192  
18193 diff -NurpP --minimal linux-2.6.16.20/ipc/msg.c linux-2.6.16.20-vs2.1.1-rc22/ipc/msg.c
18194 --- linux-2.6.16.20/ipc/msg.c   2006-02-18 14:40:37 +0100
18195 +++ linux-2.6.16.20-vs2.1.1-rc22/ipc/msg.c      2006-04-26 19:07:00 +0200
18196 @@ -100,6 +100,7 @@ static int newque (key_t key, int msgflg
18197  
18198         msq->q_perm.mode = (msgflg & S_IRWXUGO);
18199         msq->q_perm.key = key;
18200 +       msq->q_perm.xid = vx_current_xid();
18201  
18202         msq->q_perm.security = NULL;
18203         retval = security_msg_queue_alloc(msq);
18204 @@ -815,6 +816,9 @@ static int sysvipc_msg_proc_show(struct 
18205  {
18206         struct msg_queue *msq = it;
18207  
18208 +       if (!vx_check(msq->q_perm.xid, VX_IDENT))
18209 +               return 0;
18210 +
18211         return seq_printf(s,
18212                           "%10d %10d  %4o  %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n",
18213                           msq->q_perm.key,
18214 diff -NurpP --minimal linux-2.6.16.20/ipc/sem.c linux-2.6.16.20-vs2.1.1-rc22/ipc/sem.c
18215 --- linux-2.6.16.20/ipc/sem.c   2006-04-09 13:49:58 +0200
18216 +++ linux-2.6.16.20-vs2.1.1-rc22/ipc/sem.c      2006-04-26 19:07:00 +0200
18217 @@ -75,6 +75,7 @@
18218  #include <linux/audit.h>
18219  #include <linux/capability.h>
18220  #include <linux/seq_file.h>
18221 +#include <linux/vs_limit.h>
18222  #include <asm/uaccess.h>
18223  #include "util.h"
18224  
18225 @@ -179,6 +180,7 @@ static int newary (key_t key, int nsems,
18226  
18227         sma->sem_perm.mode = (semflg & S_IRWXUGO);
18228         sma->sem_perm.key = key;
18229 +       sma->sem_perm.xid = vx_current_xid();
18230  
18231         sma->sem_perm.security = NULL;
18232         retval = security_sem_alloc(sma);
18233 @@ -194,6 +196,8 @@ static int newary (key_t key, int nsems,
18234                 return -ENOSPC;
18235         }
18236         used_sems += nsems;
18237 +       vx_semary_inc(sma);
18238 +       vx_nsems_add(sma, nsems);
18239  
18240         sma->sem_id = sem_buildid(id, sma->sem_perm.seq);
18241         sma->sem_base = (struct sem *) &sma[1];
18242 @@ -473,6 +477,8 @@ static void freeary (struct sem_array *s
18243         sem_unlock(sma);
18244  
18245         used_sems -= sma->sem_nsems;
18246 +       vx_nsems_sub(sma, sma->sem_nsems);
18247 +       vx_semary_dec(sma);
18248         size = sizeof (*sma) + sma->sem_nsems * sizeof (struct sem);
18249         security_sem_free(sma);
18250         ipc_rcu_putref(sma);
18251 @@ -1337,6 +1343,9 @@ static int sysvipc_sem_proc_show(struct 
18252  {
18253         struct sem_array *sma = it;
18254  
18255 +       if (!vx_check(sma->sem_perm.xid, VX_IDENT))
18256 +               return 0;
18257 +
18258         return seq_printf(s,
18259                           "%10d %10d  %4o %10lu %5u %5u %5u %5u %10lu %10lu\n",
18260                           sma->sem_perm.key,
18261 diff -NurpP --minimal linux-2.6.16.20/ipc/shm.c linux-2.6.16.20-vs2.1.1-rc22/ipc/shm.c
18262 --- linux-2.6.16.20/ipc/shm.c   2006-05-11 21:25:36 +0200
18263 +++ linux-2.6.16.20-vs2.1.1-rc22/ipc/shm.c      2006-04-26 19:07:00 +0200
18264 @@ -30,6 +30,8 @@
18265  #include <linux/capability.h>
18266  #include <linux/ptrace.h>
18267  #include <linux/seq_file.h>
18268 +#include <linux/vs_context.h>
18269 +#include <linux/vs_limit.h>
18270  
18271  #include <asm/uaccess.h>
18272  
18273 @@ -114,7 +116,12 @@ static void shm_open (struct vm_area_str
18274   */
18275  static void shm_destroy (struct shmid_kernel *shp)
18276  {
18277 -       shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
18278 +       struct vx_info *vxi = lookup_vx_info(shp->shm_perm.xid);
18279 +       int numpages = (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
18280 +
18281 +       vx_ipcshm_sub(vxi, shp, numpages);
18282 +       shm_tot -= numpages;
18283 +
18284         shm_rmid (shp->id);
18285         shm_unlock(shp);
18286         if (!is_file_hugepages(shp->shm_file))
18287 @@ -124,6 +131,7 @@ static void shm_destroy (struct shmid_ke
18288                                                 shp->mlock_user);
18289         fput (shp->shm_file);
18290         security_shm_free(shp);
18291 +       put_vx_info(vxi);
18292         ipc_rcu_putref(shp);
18293  }
18294  
18295 @@ -200,12 +208,15 @@ static int newseg (key_t key, int shmflg
18296  
18297         if (shm_tot + numpages >= shm_ctlall)
18298                 return -ENOSPC;
18299 +       if (!vx_ipcshm_avail(current->vx_info, numpages))
18300 +               return -ENOSPC;
18301  
18302         shp = ipc_rcu_alloc(sizeof(*shp));
18303         if (!shp)
18304                 return -ENOMEM;
18305  
18306         shp->shm_perm.key = key;
18307 +       shp->shm_perm.xid = vx_current_xid();
18308         shp->shm_perm.mode = (shmflg & S_IRWXUGO);
18309         shp->mlock_user = NULL;
18310  
18311 @@ -256,6 +267,7 @@ static int newseg (key_t key, int shmflg
18312                 file->f_op = &shm_file_operations;
18313  
18314         shm_tot += numpages;
18315 +       vx_ipcshm_add(current->vx_info, key, numpages);
18316         shm_unlock(shp);
18317         return shp->id;
18318  
18319 @@ -897,6 +909,9 @@ static int sysvipc_shm_proc_show(struct 
18320  #define SMALL_STRING "%10d %10d  %4o %10u %5u %5u  %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
18321  #define BIG_STRING   "%10d %10d  %4o %21u %5u %5u  %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
18322  
18323 +       if (!vx_check(shp->shm_perm.xid, VX_IDENT))
18324 +               return 0;
18325 +
18326         if (sizeof(size_t) <= sizeof(int))
18327                 format = SMALL_STRING;
18328         else
18329 diff -NurpP --minimal linux-2.6.16.20/ipc/util.c linux-2.6.16.20-vs2.1.1-rc22/ipc/util.c
18330 --- linux-2.6.16.20/ipc/util.c  2006-05-11 21:25:36 +0200
18331 +++ linux-2.6.16.20-vs2.1.1-rc22/ipc/util.c     2006-04-26 19:07:00 +0200
18332 @@ -154,7 +154,9 @@ int ipc_findkey(struct ipc_ids* ids, key
18333          */
18334         for (id = 0; id <= max_id; id++) {
18335                 p = ids->entries->p[id];
18336 -               if(p==NULL)
18337 +               if (p==NULL)
18338 +                       continue;
18339 +               if (!vx_check(p->xid, VX_IDENT))
18340                         continue;
18341                 if (key == p->key)
18342                         return id;
18343 @@ -467,6 +469,8 @@ int ipcperms (struct kern_ipc_perm *ipcp
18344  {      /* flag will most probably be 0 or S_...UGO from <linux/stat.h> */
18345         int requested_mode, granted_mode;
18346  
18347 +       if (!vx_check(ipcp->xid, VX_ADMIN|VX_IDENT)) /* maybe just VX_IDENT? */
18348 +               return -1;
18349         requested_mode = (flag >> 6) | (flag >> 3) | flag;
18350         granted_mode = ipcp->mode;
18351         if (current->euid == ipcp->cuid || current->euid == ipcp->uid)
18352 diff -NurpP --minimal linux-2.6.16.20/kernel/Makefile linux-2.6.16.20-vs2.1.1-rc22/kernel/Makefile
18353 --- linux-2.6.16.20/kernel/Makefile     2006-02-18 14:40:37 +0100
18354 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/Makefile        2006-04-26 19:07:00 +0200
18355 @@ -10,6 +10,8 @@ obj-y     = sched.o fork.o exec_domain.o
18356             kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
18357             hrtimer.o
18358  
18359 +obj-y    += vserver/
18360 +
18361  obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
18362  obj-$(CONFIG_FUTEX) += futex.o
18363  obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
18364 diff -NurpP --minimal linux-2.6.16.20/kernel/capability.c linux-2.6.16.20-vs2.1.1-rc22/kernel/capability.c
18365 --- linux-2.6.16.20/kernel/capability.c 2006-02-18 14:40:37 +0100
18366 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/capability.c    2006-04-26 19:07:00 +0200
18367 @@ -12,6 +12,7 @@
18368  #include <linux/module.h>
18369  #include <linux/security.h>
18370  #include <linux/syscalls.h>
18371 +#include <linux/vs_pid.h>
18372  #include <asm/uaccess.h>
18373  
18374  unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
18375 diff -NurpP --minimal linux-2.6.16.20/kernel/compat.c linux-2.6.16.20-vs2.1.1-rc22/kernel/compat.c
18376 --- linux-2.6.16.20/kernel/compat.c     2006-02-18 14:40:37 +0100
18377 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/compat.c        2006-05-29 16:58:56 +0200
18378 @@ -841,7 +841,7 @@ asmlinkage long compat_sys_time(compat_t
18379         compat_time_t i;
18380         struct timeval tv;
18381  
18382 -       do_gettimeofday(&tv);
18383 +       vx_gettimeofday(&tv);
18384         i = tv.tv_sec;
18385  
18386         if (tloc) {
18387 @@ -865,7 +865,7 @@ asmlinkage long compat_sys_stime(compat_
18388         if (err)
18389                 return err;
18390  
18391 -       do_settimeofday(&tv);
18392 +       vx_settimeofday(&tv);
18393         return 0;
18394  }
18395  
18396 diff -NurpP --minimal linux-2.6.16.20/kernel/cpuset.c linux-2.6.16.20-vs2.1.1-rc22/kernel/cpuset.c
18397 --- linux-2.6.16.20/kernel/cpuset.c     2006-02-18 14:40:37 +0100
18398 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/cpuset.c        2006-04-26 19:07:00 +0200
18399 @@ -50,6 +50,7 @@
18400  #include <linux/time.h>
18401  #include <linux/backing-dev.h>
18402  #include <linux/sort.h>
18403 +#include <linux/vs_pid.h>
18404  
18405  #include <asm/uaccess.h>
18406  #include <asm/atomic.h>
18407 diff -NurpP --minimal linux-2.6.16.20/kernel/exit.c linux-2.6.16.20-vs2.1.1-rc22/kernel/exit.c
18408 --- linux-2.6.16.20/kernel/exit.c       2006-04-09 13:49:58 +0200
18409 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/exit.c  2006-05-02 03:05:29 +0200
18410 @@ -31,6 +31,10 @@
18411  #include <linux/signal.h>
18412  #include <linux/cn_proc.h>
18413  #include <linux/mutex.h>
18414 +#include <linux/vs_limit.h>
18415 +#include <linux/vs_context.h>
18416 +#include <linux/vs_network.h>
18417 +#include <linux/vs_pid.h>
18418  
18419  #include <asm/uaccess.h>
18420  #include <asm/unistd.h>
18421 @@ -396,6 +400,7 @@ static void close_files(struct files_str
18422                                 struct file * file = xchg(&fdt->fd[i], NULL);
18423                                 if (file)
18424                                         filp_close(file, files);
18425 +                               vx_openfd_dec(i);
18426                         }
18427                         i++;
18428                         set >>= 1;
18429 @@ -536,8 +541,13 @@ static void exit_mm(struct task_struct *
18430         mmput(mm);
18431  }
18432  
18433 -static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_reaper)
18434 +static inline void choose_new_parent(task_t *p, task_t *reaper)
18435  {
18436 +       /* check for reaper context */
18437 +       vxwprintk((p->xid != reaper->xid) && (reaper != child_reaper),
18438 +               "rogue reaper: %p[%d,#%u] <> %p[%d,#%u]",
18439 +               p, p->pid, p->xid, reaper, reaper->pid, reaper->xid);
18440 +
18441         /*
18442          * Make sure we're not reparenting to ourselves and that
18443          * the parent is not a zombie.
18444 @@ -619,7 +629,7 @@ static void forget_original_parent(struc
18445         do {
18446                 reaper = next_thread(reaper);
18447                 if (reaper == father) {
18448 -                       reaper = child_reaper;
18449 +                       reaper = vx_child_reaper(father);
18450                         break;
18451                 }
18452         } while (reaper->exit_state);
18453 @@ -643,7 +653,7 @@ static void forget_original_parent(struc
18454  
18455                 if (father == p->real_parent) {
18456                         /* reparent with a reaper, real father it's us */
18457 -                       choose_new_parent(p, reaper, child_reaper);
18458 +                       choose_new_parent(p, vx_child_reaper(p));
18459                         reparent_thread(p, father, 0);
18460                 } else {
18461                         /* reparent ptraced task to its real parent */
18462 @@ -664,7 +674,8 @@ static void forget_original_parent(struc
18463         }
18464         list_for_each_safe(_p, _n, &father->ptrace_children) {
18465                 p = list_entry(_p,struct task_struct,ptrace_list);
18466 -               choose_new_parent(p, reaper, child_reaper);
18467 +
18468 +               choose_new_parent(p, reaper);
18469                 reparent_thread(p, father, 1);
18470         }
18471  }
18472 @@ -858,6 +869,8 @@ fastcall NORET_TYPE void do_exit(long co
18473         __exit_files(tsk);
18474         __exit_fs(tsk);
18475         exit_namespace(tsk);
18476 +       exit_vx_info(tsk, code);
18477 +       exit_nx_info(tsk);
18478         exit_thread();
18479         cpuset_exit(tsk);
18480         exit_keys(tsk);
18481 diff -NurpP --minimal linux-2.6.16.20/kernel/fork.c linux-2.6.16.20-vs2.1.1-rc22/kernel/fork.c
18482 --- linux-2.6.16.20/kernel/fork.c       2006-05-11 21:25:36 +0200
18483 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/fork.c  2006-04-26 19:07:00 +0200
18484 @@ -44,6 +44,10 @@
18485  #include <linux/rmap.h>
18486  #include <linux/acct.h>
18487  #include <linux/cn_proc.h>
18488 +#include <linux/vs_context.h>
18489 +#include <linux/vs_network.h>
18490 +#include <linux/vs_limit.h>
18491 +#include <linux/vs_memory.h>
18492  
18493  #include <asm/pgtable.h>
18494  #include <asm/pgalloc.h>
18495 @@ -104,6 +108,8 @@ static kmem_cache_t *mm_cachep;
18496  void free_task(struct task_struct *tsk)
18497  {
18498         free_thread_info(tsk->thread_info);
18499 +       clr_vx_info(&tsk->vx_info);
18500 +       clr_nx_info(&tsk->nx_info);
18501         free_task_struct(tsk);
18502  }
18503  EXPORT_SYMBOL(free_task);
18504 @@ -203,6 +209,8 @@ static inline int dup_mmap(struct mm_str
18505         mm->free_area_cache = oldmm->mmap_base;
18506         mm->cached_hole_size = ~0UL;
18507         mm->map_count = 0;
18508 +       __set_mm_counter(mm, file_rss, 0);
18509 +       __set_mm_counter(mm, anon_rss, 0);
18510         cpus_clear(mm->cpu_vm_mask);
18511         mm->mm_rb = RB_ROOT;
18512         rb_link = &mm->mm_rb.rb_node;
18513 @@ -214,7 +222,7 @@ static inline int dup_mmap(struct mm_str
18514  
18515                 if (mpnt->vm_flags & VM_DONTCOPY) {
18516                         long pages = vma_pages(mpnt);
18517 -                       mm->total_vm -= pages;
18518 +                       vx_vmpages_sub(mm, pages);
18519                         vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
18520                                                                 -pages);
18521                         continue;
18522 @@ -321,8 +329,6 @@ static struct mm_struct * mm_init(struct
18523         INIT_LIST_HEAD(&mm->mmlist);
18524         mm->core_waiters = 0;
18525         mm->nr_ptes = 0;
18526 -       set_mm_counter(mm, file_rss, 0);
18527 -       set_mm_counter(mm, anon_rss, 0);
18528         spin_lock_init(&mm->page_table_lock);
18529         rwlock_init(&mm->ioctx_list_lock);
18530         mm->ioctx_list = NULL;
18531 @@ -331,6 +337,7 @@ static struct mm_struct * mm_init(struct
18532  
18533         if (likely(!mm_alloc_pgd(mm))) {
18534                 mm->def_flags = 0;
18535 +               set_vx_info(&mm->mm_vx_info, current->vx_info);
18536                 return mm;
18537         }
18538         free_mm(mm);
18539 @@ -362,6 +369,7 @@ void fastcall __mmdrop(struct mm_struct 
18540         BUG_ON(mm == &init_mm);
18541         mm_free_pgd(mm);
18542         destroy_context(mm);
18543 +       clr_vx_info(&mm->mm_vx_info);
18544         free_mm(mm);
18545  }
18546  
18547 @@ -465,6 +473,7 @@ static struct mm_struct *dup_mm(struct t
18548                 goto fail_nomem;
18549  
18550         memcpy(mm, oldmm, sizeof(*mm));
18551 +       mm->mm_vx_info = NULL;
18552  
18553         if (!mm_init(mm))
18554                 goto fail_nomem;
18555 @@ -492,6 +501,7 @@ fail_nocontext:
18556          * If init_new_context() failed, we cannot use mmput() to free the mm
18557          * because it calls destroy_context()
18558          */
18559 +       clr_vx_info(&mm->mm_vx_info);
18560         mm_free_pgd(mm);
18561         free_mm(mm);
18562         return NULL;
18563 @@ -685,6 +695,8 @@ static struct files_struct *dup_fd(struc
18564                 struct file *f = *old_fds++;
18565                 if (f) {
18566                         get_file(f);
18567 +                       /* FIXME: sum it first for check and performance */
18568 +                       vx_openfd_inc(open_files - i);
18569                 } else {
18570                         /*
18571                          * The fd may be claimed in the fd bitmap but not yet
18572 @@ -917,6 +929,8 @@ static task_t *copy_process(unsigned lon
18573  {
18574         int retval;
18575         struct task_struct *p = NULL;
18576 +       struct vx_info *vxi;
18577 +       struct nx_info *nxi;
18578  
18579         if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
18580                 return ERR_PTR(-EINVAL);
18581 @@ -945,12 +959,30 @@ static task_t *copy_process(unsigned lon
18582         if (!p)
18583                 goto fork_out;
18584  
18585 +       init_vx_info(&p->vx_info, current->vx_info);
18586 +       init_nx_info(&p->nx_info, current->nx_info);
18587 +
18588 +       /* check vserver memory */
18589 +       if (p->mm && !(clone_flags & CLONE_VM)) {
18590 +               if (vx_vmpages_avail(p->mm, p->mm->total_vm))
18591 +                       vx_pages_add(p->vx_info, RLIMIT_AS, p->mm->total_vm);
18592 +               else
18593 +                       goto bad_fork_free;
18594 +       }
18595 +       if (p->mm && vx_flags(VXF_FORK_RSS, 0)) {
18596 +               if (!vx_rsspages_avail(p->mm, get_mm_counter(p->mm, file_rss)))
18597 +                       goto bad_fork_cleanup_vm;
18598 +       }
18599 +
18600         retval = -EAGAIN;
18601 +       if (!vx_nproc_avail(1))
18602 +               goto bad_fork_cleanup_vm;
18603 +
18604         if (atomic_read(&p->user->processes) >=
18605                         p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
18606                 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
18607                                 p->user != &root_user)
18608 -                       goto bad_fork_free;
18609 +                       goto bad_fork_cleanup_vm;
18610         }
18611  
18612         atomic_inc(&p->user->__count);
18613 @@ -1196,6 +1228,18 @@ static task_t *copy_process(unsigned lon
18614         nr_threads++;
18615         total_forks++;
18616         spin_unlock(&current->sighand->siglock);
18617 +
18618 +       /* p is copy of current */
18619 +       vxi = p->vx_info;
18620 +       if (vxi) {
18621 +               claim_vx_info(vxi, p);
18622 +               atomic_inc(&vxi->cvirt.nr_threads);
18623 +               atomic_inc(&vxi->cvirt.total_forks);
18624 +               vx_nproc_inc(p);
18625 +       }
18626 +       nxi = p->nx_info;
18627 +       if (nxi)
18628 +               claim_nx_info(nxi, p);
18629         write_unlock_irq(&tasklist_lock);
18630         proc_fork_connector(p);
18631         return p;
18632 @@ -1236,6 +1280,9 @@ bad_fork_cleanup_count:
18633         put_group_info(p->group_info);
18634         atomic_dec(&p->user->processes);
18635         free_uid(p->user);
18636 +bad_fork_cleanup_vm:
18637 +       if (p->mm && !(clone_flags & CLONE_VM))
18638 +               vx_pages_sub(p->vx_info, RLIMIT_AS, p->mm->total_vm);
18639  bad_fork_free:
18640         free_task(p);
18641  fork_out:
18642 @@ -1296,6 +1343,15 @@ long do_fork(unsigned long clone_flags,
18643  
18644         if (pid < 0)
18645                 return -EAGAIN;
18646 +
18647 +       /* kernel threads are host only */
18648 +       if ((clone_flags & CLONE_KTHREAD) && !vx_check(0, VX_ADMIN)) {
18649 +               vxwprintk(1, "xid=%d tried to spawn a kernel thread.",
18650 +                       vx_current_xid());
18651 +               free_pidmap(pid);
18652 +               return -EPERM;
18653 +       }
18654 +
18655         if (unlikely(current->ptrace)) {
18656                 trace = fork_traceflag (clone_flags);
18657                 if (trace)
18658 diff -NurpP --minimal linux-2.6.16.20/kernel/kthread.c linux-2.6.16.20-vs2.1.1-rc22/kernel/kthread.c
18659 --- linux-2.6.16.20/kernel/kthread.c    2006-01-03 17:30:12 +0100
18660 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/kthread.c       2006-04-26 19:07:00 +0200
18661 @@ -114,7 +114,7 @@ static void keventd_create_kthread(void 
18662                 create->result = ERR_PTR(pid);
18663         } else {
18664                 wait_for_completion(&create->started);
18665 -               create->result = find_task_by_pid(pid);
18666 +               create->result = find_task_by_real_pid(pid);
18667         }
18668         complete(&create->done);
18669  }
18670 diff -NurpP --minimal linux-2.6.16.20/kernel/posix-cpu-timers.c linux-2.6.16.20-vs2.1.1-rc22/kernel/posix-cpu-timers.c
18671 --- linux-2.6.16.20/kernel/posix-cpu-timers.c   2006-02-18 14:40:37 +0100
18672 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/posix-cpu-timers.c      2006-04-26 19:07:00 +0200
18673 @@ -6,6 +6,7 @@
18674  #include <linux/posix-timers.h>
18675  #include <asm/uaccess.h>
18676  #include <linux/errno.h>
18677 +#include <linux/vs_pid.h>
18678  
18679  static int check_clock(const clockid_t which_clock)
18680  {
18681 diff -NurpP --minimal linux-2.6.16.20/kernel/posix-timers.c linux-2.6.16.20-vs2.1.1-rc22/kernel/posix-timers.c
18682 --- linux-2.6.16.20/kernel/posix-timers.c       2006-04-09 13:49:58 +0200
18683 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/posix-timers.c  2006-04-28 05:07:10 +0200
18684 @@ -47,6 +47,7 @@
18685  #include <linux/wait.h>
18686  #include <linux/workqueue.h>
18687  #include <linux/module.h>
18688 +#include <linux/vs_context.h>
18689  
18690  /*
18691   * Management arrays for POSIX timers.  Timers are kept in slab memory
18692 @@ -294,6 +295,10 @@ void do_schedule_next_timer(struct sigin
18693  
18694  int posix_timer_event(struct k_itimer *timr,int si_private)
18695  {
18696 +       struct vx_info_save vxis;
18697 +       int ret;
18698 +
18699 +       enter_vx_info(task_get_vx_info(timr->it_process), &vxis);
18700         memset(&timr->sigq->info, 0, sizeof(siginfo_t));
18701         timr->sigq->info.si_sys_private = si_private;
18702         /* Send signal to the process that owns this timer.*/
18703 @@ -306,11 +311,11 @@ int posix_timer_event(struct k_itimer *t
18704  
18705         if (timr->it_sigev_notify & SIGEV_THREAD_ID) {
18706                 struct task_struct *leader;
18707 -               int ret = send_sigqueue(timr->it_sigev_signo, timr->sigq,
18708 -                                       timr->it_process);
18709  
18710 +               ret = send_sigqueue(timr->it_sigev_signo, timr->sigq,
18711 +                                   timr->it_process);
18712                 if (likely(ret >= 0))
18713 -                       return ret;
18714 +                       goto out;
18715  
18716                 timr->it_sigev_notify = SIGEV_SIGNAL;
18717                 leader = timr->it_process->group_leader;
18718 @@ -318,8 +323,12 @@ int posix_timer_event(struct k_itimer *t
18719                 timr->it_process = leader;
18720         }
18721  
18722 -       return send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
18723 -                                  timr->it_process);
18724 +       ret = send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
18725 +                                 timr->it_process);
18726 +out:
18727 +       leave_vx_info(&vxis);
18728 +       put_vx_info(vxis.vxi);
18729 +       return ret;
18730  }
18731  EXPORT_SYMBOL_GPL(posix_timer_event);
18732  
18733 @@ -366,7 +375,7 @@ static struct task_struct * good_sigeven
18734         struct task_struct *rtn = current->group_leader;
18735  
18736         if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
18737 -               (!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) ||
18738 +               (!(rtn = find_task_by_real_pid(event->sigev_notify_thread_id)) ||
18739                  rtn->tgid != current->tgid ||
18740                  (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
18741                 return NULL;
18742 diff -NurpP --minimal linux-2.6.16.20/kernel/printk.c linux-2.6.16.20-vs2.1.1-rc22/kernel/printk.c
18743 --- linux-2.6.16.20/kernel/printk.c     2006-02-18 14:40:37 +0100
18744 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/printk.c        2006-04-26 19:07:00 +0200
18745 @@ -31,6 +31,8 @@
18746  #include <linux/security.h>
18747  #include <linux/bootmem.h>
18748  #include <linux/syscalls.h>
18749 +#include <linux/vs_context.h>
18750 +#include <linux/vserver/cvirt.h>
18751  
18752  #include <asm/uaccess.h>
18753  
18754 @@ -221,18 +223,13 @@ int do_syslog(int type, char __user *buf
18755         unsigned long i, j, limit, count;
18756         int do_clear = 0;
18757         char c;
18758 -       int error = 0;
18759 +       int error;
18760  
18761         error = security_syslog(type);
18762         if (error)
18763                 return error;
18764  
18765 -       switch (type) {
18766 -       case 0:         /* Close log */
18767 -               break;
18768 -       case 1:         /* Open log */
18769 -               break;
18770 -       case 2:         /* Read from log */
18771 +       if ((type >= 2) && (type <= 4)) {
18772                 error = -EINVAL;
18773                 if (!buf || len < 0)
18774                         goto out;
18775 @@ -243,6 +240,16 @@ int do_syslog(int type, char __user *buf
18776                         error = -EFAULT;
18777                         goto out;
18778                 }
18779 +       }
18780 +       if (!vx_check(0, VX_ADMIN|VX_WATCH))
18781 +               return vx_do_syslog(type, buf, len);
18782 +
18783 +       switch (type) {
18784 +       case 0:         /* Close log */
18785 +               break;
18786 +       case 1:         /* Open log */
18787 +               break;
18788 +       case 2:         /* Read from log */
18789                 error = wait_event_interruptible(log_wait,
18790                                                         (log_start - log_end));
18791                 if (error)
18792 @@ -267,16 +274,6 @@ int do_syslog(int type, char __user *buf
18793                 do_clear = 1;
18794                 /* FALL THRU */
18795         case 3:         /* Read last kernel messages */
18796 -               error = -EINVAL;
18797 -               if (!buf || len < 0)
18798 -                       goto out;
18799 -               error = 0;
18800 -               if (!len)
18801 -                       goto out;
18802 -               if (!access_ok(VERIFY_WRITE, buf, len)) {
18803 -                       error = -EFAULT;
18804 -                       goto out;
18805 -               }
18806                 count = len;
18807                 if (count > log_buf_len)
18808                         count = log_buf_len;
18809 @@ -513,11 +510,14 @@ __attribute__((weak)) unsigned long long
18810  
18811  asmlinkage int printk(const char *fmt, ...)
18812  {
18813 +       struct vx_info_save vxis;
18814         va_list args;
18815         int r;
18816  
18817         va_start(args, fmt);
18818 +       __enter_vx_admin(&vxis);
18819         r = vprintk(fmt, args);
18820 +       __leave_vx_admin(&vxis);
18821         va_end(args);
18822  
18823         return r;
18824 diff -NurpP --minimal linux-2.6.16.20/kernel/ptrace.c linux-2.6.16.20-vs2.1.1-rc22/kernel/ptrace.c
18825 --- linux-2.6.16.20/kernel/ptrace.c     2006-05-22 16:25:40 +0200
18826 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/ptrace.c        2006-05-22 06:19:46 +0200
18827 @@ -18,6 +18,7 @@
18828  #include <linux/ptrace.h>
18829  #include <linux/security.h>
18830  #include <linux/signal.h>
18831 +#include <linux/vs_pid.h>
18832  
18833  #include <asm/pgtable.h>
18834  #include <asm/uaccess.h>
18835 @@ -507,6 +508,10 @@ asmlinkage long sys_ptrace(long request,
18836                 goto out;
18837         }
18838  
18839 +       ret = -EPERM;
18840 +       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
18841 +               goto out_put_task_struct;
18842 +
18843         if (request == PTRACE_ATTACH) {
18844                 ret = ptrace_attach(child);
18845                 goto out_put_task_struct;
18846 diff -NurpP --minimal linux-2.6.16.20/kernel/sched.c linux-2.6.16.20-vs2.1.1-rc22/kernel/sched.c
18847 --- linux-2.6.16.20/kernel/sched.c      2006-05-11 21:25:36 +0200
18848 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/sched.c 2006-04-26 19:07:00 +0200
18849 @@ -52,6 +52,10 @@
18850  #include <asm/tlb.h>
18851  
18852  #include <asm/unistd.h>
18853 +#include <linux/vs_context.h>
18854 +#include <linux/vs_cvirt.h>
18855 +#include <linux/vs_pid.h>
18856 +#include <linux/vs_sched.h>
18857  
18858  /*
18859   * Convert user-nice values [ -20 ... 0 ... 19 ]
18860 @@ -239,6 +243,16 @@ struct runqueue {
18861         struct list_head migration_queue;
18862         int cpu;
18863  #endif
18864 +       unsigned long norm_time;
18865 +       unsigned long idle_time;
18866 +#ifdef CONFIG_VSERVER_IDLETIME
18867 +       int idle_skip;
18868 +#endif
18869 +#ifdef CONFIG_VSERVER_HARDCPU
18870 +       struct list_head hold_queue;
18871 +       unsigned long nr_onhold;
18872 +       int idle_tokens;
18873 +#endif
18874  
18875  #ifdef CONFIG_SCHEDSTATS
18876         /* latency stats */
18877 @@ -599,6 +613,7 @@ static inline void sched_info_switch(tas
18878   */
18879  static void dequeue_task(struct task_struct *p, prio_array_t *array)
18880  {
18881 +       BUG_ON(p->state & TASK_ONHOLD);
18882         array->nr_active--;
18883         list_del(&p->run_list);
18884         if (list_empty(array->queue + p->prio))
18885 @@ -607,6 +622,7 @@ static void dequeue_task(struct task_str
18886  
18887  static void enqueue_task(struct task_struct *p, prio_array_t *array)
18888  {
18889 +       BUG_ON(p->state & TASK_ONHOLD);
18890         sched_info_queued(p);
18891         list_add_tail(&p->run_list, array->queue + p->prio);
18892         __set_bit(p->prio, array->bitmap);
18893 @@ -620,11 +636,13 @@ static void enqueue_task(struct task_str
18894   */
18895  static void requeue_task(struct task_struct *p, prio_array_t *array)
18896  {
18897 +       BUG_ON(p->state & TASK_ONHOLD);
18898         list_move_tail(&p->run_list, array->queue + p->prio);
18899  }
18900  
18901  static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array)
18902  {
18903 +       BUG_ON(p->state & TASK_ONHOLD);
18904         list_add(&p->run_list, array->queue + p->prio);
18905         __set_bit(p->prio, array->bitmap);
18906         array->nr_active++;
18907 @@ -655,6 +673,10 @@ static int effective_prio(task_t *p)
18908         bonus = CURRENT_BONUS(p) - MAX_BONUS / 2;
18909  
18910         prio = p->static_prio - bonus;
18911 +
18912 +       /* adjust effective priority */
18913 +       prio = vx_adjust_prio(p, prio, MAX_USER_PRIO);
18914 +
18915         if (prio < MAX_RT_PRIO)
18916                 prio = MAX_RT_PRIO;
18917         if (prio > MAX_PRIO-1)
18918 @@ -662,11 +684,15 @@ static int effective_prio(task_t *p)
18919         return prio;
18920  }
18921  
18922 +#include "sched_mon.h"
18923 +
18924 +
18925  /*
18926   * __activate_task - move a task to the runqueue.
18927   */
18928  static inline void __activate_task(task_t *p, runqueue_t *rq)
18929  {
18930 +       vxm_activate_task(p, rq);
18931         enqueue_task(p, rq->active);
18932         rq->nr_running++;
18933  }
18934 @@ -676,6 +702,7 @@ static inline void __activate_task(task_
18935   */
18936  static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
18937  {
18938 +       vxm_activate_idle(p, rq);
18939         enqueue_task_head(p, rq->active);
18940         rq->nr_running++;
18941  }
18942 @@ -793,19 +820,31 @@ static void activate_task(task_t *p, run
18943         }
18944         p->timestamp = now;
18945  
18946 +       vx_activate_task(p);
18947         __activate_task(p, rq);
18948  }
18949  
18950  /*
18951   * deactivate_task - remove a task from the runqueue.
18952   */
18953 -static void deactivate_task(struct task_struct *p, runqueue_t *rq)
18954 +static void __deactivate_task(struct task_struct *p, runqueue_t *rq)
18955  {
18956         rq->nr_running--;
18957         dequeue_task(p, p->array);
18958 +       vxm_deactivate_task(p, rq);
18959         p->array = NULL;
18960  }
18961  
18962 +static inline
18963 +void deactivate_task(struct task_struct *p, runqueue_t *rq)
18964 +{
18965 +       vx_deactivate_task(p);
18966 +       __deactivate_task(p, rq);
18967 +}
18968 +
18969 +
18970 +#include "sched_hard.h"
18971 +
18972  /*
18973   * resched_task - mark a task 'to be rescheduled now'.
18974   *
18975 @@ -869,6 +908,7 @@ static int migrate_task(task_t *p, int d
18976  {
18977         runqueue_t *rq = task_rq(p);
18978  
18979 +       vxm_migrate_task(p, rq, dest_cpu);
18980         /*
18981          * If the task is not on a runqueue (and not running), then
18982          * it is sufficient to simply update the task's cpu field.
18983 @@ -1169,6 +1209,12 @@ static int try_to_wake_up(task_t *p, uns
18984  
18985         rq = task_rq_lock(p, &flags);
18986         old_state = p->state;
18987 +
18988 +       /* we need to unhold suspended tasks */
18989 +       if (old_state & TASK_ONHOLD) {
18990 +               vx_unhold_task(p, rq);
18991 +               old_state = p->state;
18992 +       }
18993         if (!(old_state & state))
18994                 goto out;
18995  
18996 @@ -1285,10 +1331,16 @@ out_activate:
18997          * sleep is handled in a priority-neutral manner, no priority
18998          * boost and no penalty.)
18999          */
19000 -       if (old_state & TASK_NONINTERACTIVE)
19001 +       if (old_state & TASK_NONINTERACTIVE) {
19002 +               vx_activate_task(p);
19003                 __activate_task(p, rq);
19004 -       else
19005 +       } else
19006                 activate_task(p, rq, cpu == this_cpu);
19007 +
19008 +       /* this is to get the accounting behind the load update */
19009 +       if (old_state & TASK_UNINTERRUPTIBLE)
19010 +               vx_uninterruptible_dec(p);
19011 +
19012         /*
19013          * Sync wakeups (i.e. those types of wakeups where the waker
19014          * has indicated that it will leave the CPU in short order)
19015 @@ -1412,6 +1464,7 @@ void fastcall wake_up_new_task(task_t *p
19016  
19017         p->prio = effective_prio(p);
19018  
19019 +       vx_activate_task(p);
19020         if (likely(cpu == this_cpu)) {
19021                 if (!(clone_flags & CLONE_VM)) {
19022                         /*
19023 @@ -1423,6 +1476,7 @@ void fastcall wake_up_new_task(task_t *p
19024                                 __activate_task(p, rq);
19025                         else {
19026                                 p->prio = current->prio;
19027 +                               BUG_ON(p->state & TASK_ONHOLD);
19028                                 list_add_tail(&p->run_list, &current->run_list);
19029                                 p->array = current->array;
19030                                 p->array->nr_active++;
19031 @@ -2489,13 +2543,16 @@ unsigned long long current_sched_time(co
19032  void account_user_time(struct task_struct *p, cputime_t cputime)
19033  {
19034         struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
19035 +       struct vx_info *vxi = p->vx_info;  /* p is _always_ current */
19036         cputime64_t tmp;
19037 +       int nice = (TASK_NICE(p) > 0);
19038  
19039         p->utime = cputime_add(p->utime, cputime);
19040 +       vx_account_user(vxi, cputime, nice);
19041  
19042         /* Add user time to cpustat. */
19043         tmp = cputime_to_cputime64(cputime);
19044 -       if (TASK_NICE(p) > 0)
19045 +       if (nice)
19046                 cpustat->nice = cputime64_add(cpustat->nice, tmp);
19047         else
19048                 cpustat->user = cputime64_add(cpustat->user, tmp);
19049 @@ -2511,10 +2568,12 @@ void account_system_time(struct task_str
19050                          cputime_t cputime)
19051  {
19052         struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
19053 +       struct vx_info *vxi = p->vx_info;  /* p is _always_ current */
19054         runqueue_t *rq = this_rq();
19055         cputime64_t tmp;
19056  
19057         p->stime = cputime_add(p->stime, cputime);
19058 +       vx_account_system(vxi, cputime, (p == rq->idle));
19059  
19060         /* Add system time to cpustat. */
19061         tmp = cputime_to_cputime64(cputime);
19062 @@ -2568,12 +2627,14 @@ void scheduler_tick(void)
19063         unsigned long long now = sched_clock();
19064  
19065         update_cpu_clock(p, rq, now);
19066 +       vxm_sync(now, cpu);
19067  
19068         rq->timestamp_last_tick = now;
19069  
19070         if (p == rq->idle) {
19071                 if (wake_priority_sleeper(rq))
19072                         goto out;
19073 +               vx_idle_resched(rq);
19074                 rebalance_tick(cpu, rq, SCHED_IDLE);
19075                 return;
19076         }
19077 @@ -2606,7 +2667,7 @@ void scheduler_tick(void)
19078                 }
19079                 goto out_unlock;
19080         }
19081 -       if (!--p->time_slice) {
19082 +       if (vx_need_resched(p, --p->time_slice, cpu)) {
19083                 dequeue_task(p, rq->active);
19084                 set_tsk_need_resched(p);
19085                 p->prio = effective_prio(p);
19086 @@ -2930,15 +2991,26 @@ need_resched_nonpreemptible:
19087                                 unlikely(signal_pending(prev))))
19088                         prev->state = TASK_RUNNING;
19089                 else {
19090 -                       if (prev->state == TASK_UNINTERRUPTIBLE)
19091 +                       if (prev->state == TASK_UNINTERRUPTIBLE) {
19092                                 rq->nr_uninterruptible++;
19093 +                               vx_uninterruptible_inc(prev);
19094 +                       }
19095                         deactivate_task(prev, rq);
19096                 }
19097         }
19098  
19099         cpu = smp_processor_id();
19100 +       vx_set_rq_time(rq, jiffies);
19101 +try_unhold:
19102 +       vx_try_unhold(rq, cpu);
19103 +pick_next:
19104 +
19105         if (unlikely(!rq->nr_running)) {
19106  go_idle:
19107 +               /* can we skip idle time? */
19108 +               if (vx_try_skip(rq, cpu))
19109 +                       goto try_unhold;
19110 +
19111                 idle_balance(cpu, rq);
19112                 if (!rq->nr_running) {
19113                         next = rq->idle;
19114 @@ -2983,6 +3055,10 @@ go_idle:
19115         queue = array->queue + idx;
19116         next = list_entry(queue->next, task_t, run_list);
19117  
19118 +       /* check before we schedule this context */
19119 +       if (!vx_schedule(next, rq, cpu))
19120 +               goto pick_next;
19121 +
19122         if (!rt_task(next) && next->activated > 0) {
19123                 unsigned long long delta = now - next->timestamp;
19124                 if (unlikely((long long)(now - next->timestamp) < 0))
19125 @@ -3538,7 +3614,7 @@ asmlinkage long sys_nice(int increment)
19126                 nice = 19;
19127  
19128         if (increment < 0 && !can_nice(current, nice))
19129 -               return -EPERM;
19130 +               return vx_flags(VXF_IGNEG_NICE, 0) ? 0 : -EPERM;
19131  
19132         retval = security_task_setnice(current, nice);
19133         if (retval)
19134 @@ -3698,6 +3774,7 @@ recheck:
19135         oldprio = p->prio;
19136         __setscheduler(p, policy, param->sched_priority);
19137         if (array) {
19138 +               vx_activate_task(p);
19139                 __activate_task(p, rq);
19140                 /*
19141                  * Reschedule if we are currently running on this runqueue and
19142 @@ -6042,7 +6119,10 @@ void __init sched_init(void)
19143                 rq->cpu = i;
19144  #endif
19145                 atomic_set(&rq->nr_iowait, 0);
19146 -
19147 +#ifdef CONFIG_VSERVER_HARDCPU
19148 +               INIT_LIST_HEAD(&rq->hold_queue);
19149 +               rq->nr_onhold = 0;
19150 +#endif
19151                 for (j = 0; j < 2; j++) {
19152                         array = rq->arrays + j;
19153                         for (k = 0; k < MAX_PRIO; k++) {
19154 @@ -6111,6 +6191,7 @@ void normalize_rt_tasks(void)
19155                         deactivate_task(p, task_rq(p));
19156                 __setscheduler(p, SCHED_NORMAL, 0);
19157                 if (array) {
19158 +                       vx_activate_task(p);
19159                         __activate_task(p, task_rq(p));
19160                         resched_task(rq->curr);
19161                 }
19162 diff -NurpP --minimal linux-2.6.16.20/kernel/sched_hard.h linux-2.6.16.20-vs2.1.1-rc22/kernel/sched_hard.h
19163 --- linux-2.6.16.20/kernel/sched_hard.h 1970-01-01 01:00:00 +0100
19164 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/sched_hard.h    2006-04-26 19:07:00 +0200
19165 @@ -0,0 +1,324 @@
19166 +
19167 +#ifdef CONFIG_VSERVER_IDLELIMIT
19168 +
19169 +/*
19170 + * vx_idle_resched - reschedule after maxidle
19171 + */
19172 +static inline
19173 +void vx_idle_resched(runqueue_t *rq)
19174 +{
19175 +       /* maybe have a better criterion for paused */
19176 +       if (!--rq->idle_tokens && !list_empty(&rq->hold_queue))
19177 +               set_need_resched();
19178 +}
19179 +
19180 +#else /* !CONFIG_VSERVER_IDLELIMIT */
19181 +
19182 +#define vx_idle_resched(rq)
19183 +
19184 +#endif /* CONFIG_VSERVER_IDLELIMIT */
19185 +
19186 +
19187 +
19188 +#ifdef CONFIG_VSERVER_IDLETIME
19189 +
19190 +#define vx_set_rq_min_skip(rq, min)            \
19191 +       (rq)->idle_skip = (min)
19192 +
19193 +#define vx_save_min_skip(ret, min, val)                \
19194 +       __vx_save_min_skip(ret, min, val)
19195 +
19196 +static inline
19197 +void __vx_save_min_skip(int ret, int *min, int val)
19198 +{
19199 +       if (ret > -2)
19200 +               return;
19201 +       if ((*min > val) || !*min)
19202 +               *min = val;
19203 +}
19204 +
19205 +static inline
19206 +int vx_try_skip(runqueue_t *rq, int cpu)
19207 +{
19208 +       /* artificially advance time */
19209 +       if (rq->idle_skip > 0) {
19210 +               vxdprintk(list_empty(&rq->hold_queue),
19211 +                       "hold queue empty on cpu %d", cpu);
19212 +               rq->idle_time += rq->idle_skip;
19213 +               vxm_idle_skip(rq, cpu);
19214 +               return 1;
19215 +       }
19216 +       return 0;
19217 +}
19218 +
19219 +#else /* !CONFIG_VSERVER_IDLETIME */
19220 +
19221 +#define vx_set_rq_min_skip(rq, min)            \
19222 +       ({ int dummy = (min); dummy; })
19223 +
19224 +#define vx_save_min_skip(ret, min, val)
19225 +
19226 +static inline
19227 +int vx_try_skip(runqueue_t *rq, int cpu)
19228 +{
19229 +       return 0;
19230 +}
19231 +
19232 +#endif /* CONFIG_VSERVER_IDLETIME */
19233 +
19234 +
19235 +
19236 +#ifdef CONFIG_VSERVER_HARDCPU
19237 +
19238 +#define vx_set_rq_max_idle(rq, max)            \
19239 +       (rq)->idle_tokens = (max)
19240 +
19241 +#define vx_save_max_idle(ret, min, val)                \
19242 +       __vx_save_max_idle(ret, min, val)
19243 +
19244 +static inline
19245 +void __vx_save_max_idle(int ret, int *min, int val)
19246 +{
19247 +       if (*min > val)
19248 +               *min = val;
19249 +}
19250 +
19251 +
19252 +/*
19253 + * vx_hold_task - put a task on the hold queue
19254 + */
19255 +static inline
19256 +void vx_hold_task(struct task_struct *p, runqueue_t *rq)
19257 +{
19258 +       __deactivate_task(p, rq);
19259 +       p->state |= TASK_ONHOLD;
19260 +       /* a new one on hold */
19261 +       rq->nr_onhold++;
19262 +       vxm_hold_task(p, rq);
19263 +       list_add_tail(&p->run_list, &rq->hold_queue);
19264 +}
19265 +
19266 +/*
19267 + * vx_unhold_task - put a task back to the runqueue
19268 + */
19269 +static inline
19270 +void vx_unhold_task(struct task_struct *p, runqueue_t *rq)
19271 +{
19272 +       list_del(&p->run_list);
19273 +       /* one less waiting */
19274 +       rq->nr_onhold--;
19275 +       p->state &= ~TASK_ONHOLD;
19276 +       enqueue_task(p, rq->expired);
19277 +       rq->nr_running++;
19278 +       vxm_unhold_task(p, rq);
19279 +
19280 +       if (p->static_prio < rq->best_expired_prio)
19281 +               rq->best_expired_prio = p->static_prio;
19282 +}
19283 +
19284 +unsigned long nr_onhold(void)
19285 +{
19286 +       unsigned long i, sum = 0;
19287 +
19288 +       for_each_online_cpu(i)
19289 +               sum += cpu_rq(i)->nr_onhold;
19290 +
19291 +       return sum;
19292 +}
19293 +
19294 +
19295 +
19296 +static inline
19297 +int __vx_tokens_avail(struct _vx_sched_pc *sched_pc)
19298 +{
19299 +       return sched_pc->tokens;
19300 +}
19301 +
19302 +static inline
19303 +void __vx_consume_token(struct _vx_sched_pc *sched_pc)
19304 +{
19305 +       sched_pc->tokens--;
19306 +}
19307 +
19308 +static inline
19309 +int vx_need_resched(struct task_struct *p, int slice, int cpu)
19310 +{
19311 +       struct vx_info *vxi = p->vx_info;
19312 +
19313 +       if (vx_info_flags(vxi, VXF_SCHED_HARD|VXF_SCHED_PRIO, 0)) {
19314 +               struct _vx_sched_pc *sched_pc =
19315 +                       &vx_per_cpu(vxi, sched_pc, cpu);
19316 +               int tokens;
19317 +
19318 +               /* maybe we can simplify that to decrement
19319 +                  the token counter unconditional? */
19320 +
19321 +               if ((tokens = __vx_tokens_avail(sched_pc)) > 0)
19322 +                       __vx_consume_token(sched_pc);
19323 +
19324 +               /* for tokens > 0, one token was consumed */
19325 +               if (tokens < 2)
19326 +                       slice = 0;
19327 +       }
19328 +       vxm_need_resched(p, slice, cpu);
19329 +       return (slice == 0);
19330 +}
19331 +
19332 +
19333 +#define vx_set_rq_time(rq, time) do {  \
19334 +       rq->norm_time = time;           \
19335 +} while (0)
19336 +
19337 +
19338 +static inline
19339 +void vx_try_unhold(runqueue_t *rq, int cpu)
19340 +{
19341 +       struct vx_info *vxi = NULL;
19342 +       struct list_head *l, *n;
19343 +       int maxidle = HZ;
19344 +       int minskip = 0;
19345 +
19346 +       /* nothing to do? what about pause? */
19347 +       if (list_empty(&rq->hold_queue))
19348 +               return;
19349 +
19350 +       list_for_each_safe(l, n, &rq->hold_queue) {
19351 +               int ret, delta_min[2];
19352 +               struct _vx_sched_pc *sched_pc;
19353 +               struct task_struct *p;
19354 +
19355 +               p = list_entry(l, task_t, run_list);
19356 +               /* don't bother with same context */
19357 +               if (vxi == p->vx_info)
19358 +                       continue;
19359 +
19360 +               vxi = p->vx_info;
19361 +               /* ignore paused contexts */
19362 +               if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
19363 +                       continue;
19364 +
19365 +               sched_pc = &vx_per_cpu(vxi, sched_pc, cpu);
19366 +
19367 +               /* recalc tokens */
19368 +               vxm_sched_info(sched_pc, vxi, cpu);
19369 +               ret = vx_tokens_recalc(sched_pc,
19370 +                       &rq->norm_time, &rq->idle_time, delta_min);
19371 +               vxm_tokens_recalc(sched_pc, rq, vxi, cpu);
19372 +
19373 +               if (ret > 0) {
19374 +                       /* we found a runable context */
19375 +                       vx_unhold_task(p, rq);
19376 +                       break;
19377 +               }
19378 +               vx_save_max_idle(ret, &maxidle, delta_min[0]);
19379 +               vx_save_min_skip(ret, &minskip, delta_min[1]);
19380 +       }
19381 +       vx_set_rq_max_idle(rq, maxidle);
19382 +       vx_set_rq_min_skip(rq, minskip);
19383 +       vxm_rq_max_min(rq, cpu);
19384 +}
19385 +
19386 +
19387 +static inline
19388 +int vx_schedule(struct task_struct *next, runqueue_t *rq, int cpu)
19389 +{
19390 +       struct vx_info *vxi = next->vx_info;
19391 +       struct _vx_sched_pc *sched_pc;
19392 +       int delta_min[2];
19393 +       int flags, ret;
19394 +
19395 +       if (!vxi)
19396 +               return 1;
19397 +
19398 +       flags = vxi->vx_flags;
19399 +
19400 +       if (unlikely(vx_check_flags(flags , VXF_SCHED_PAUSE, 0)))
19401 +               goto put_on_hold;
19402 +       if (!vx_check_flags(flags , VXF_SCHED_HARD|VXF_SCHED_PRIO, 0))
19403 +               return 1;
19404 +
19405 +       sched_pc = &vx_per_cpu(vxi, sched_pc, cpu);
19406 +#ifdef CONFIG_SMP
19407 +       /* update scheduler params */
19408 +       if (cpu_isset(cpu, vxi->sched.update)) {
19409 +               vx_update_sched_param(&vxi->sched, sched_pc);
19410 +               vxm_update_sched(sched_pc, vxi, cpu);
19411 +               cpu_clear(cpu, vxi->sched.update);
19412 +       }
19413 +#endif
19414 +       vxm_sched_info(sched_pc, vxi, cpu);
19415 +       ret  = vx_tokens_recalc(sched_pc,
19416 +               &rq->norm_time, &rq->idle_time, delta_min);
19417 +       vxm_tokens_recalc(sched_pc, rq, vxi, cpu);
19418 +
19419 +       if (!vx_check_flags(flags , VXF_SCHED_HARD, 0))
19420 +               return 1;
19421 +
19422 +       if (unlikely(ret < 0)) {
19423 +               vx_save_max_idle(ret, &rq->idle_tokens, delta_min[0]);
19424 +               vx_save_min_skip(ret, &rq->idle_skip, delta_min[1]);
19425 +               vxm_rq_max_min(rq, cpu);
19426 +       put_on_hold:
19427 +               vx_hold_task(next, rq);
19428 +               return 0;
19429 +       }
19430 +       return 1;
19431 +}
19432 +
19433 +
19434 +#else /* CONFIG_VSERVER_HARDCPU */
19435 +
19436 +static inline
19437 +void vx_hold_task(struct task_struct *p, runqueue_t *rq)
19438 +{
19439 +       return;
19440 +}
19441 +
19442 +static inline
19443 +void vx_unhold_task(struct task_struct *p, runqueue_t *rq)
19444 +{
19445 +       return;
19446 +}
19447 +
19448 +unsigned long nr_onhold(void)
19449 +{
19450 +       return 0;
19451 +}
19452 +
19453 +
19454 +static inline
19455 +int vx_need_resched(struct task_struct *p, int slice, int cpu)
19456 +{
19457 +       return (slice == 0);
19458 +}
19459 +
19460 +
19461 +#define vx_set_rq_time(rq, time)
19462 +
19463 +static inline
19464 +void vx_try_unhold(runqueue_t *rq, int cpu)
19465 +{
19466 +       return;
19467 +}
19468 +
19469 +static inline
19470 +int vx_schedule(struct task_struct *next, runqueue_t *rq, int cpu)
19471 +{
19472 +       struct vx_info *vxi = next->vx_info;
19473 +       struct _vx_sched_pc *sched_pc;
19474 +       int delta_min[2];
19475 +       int ret;
19476 +
19477 +       if (!vx_info_flags(vxi, VXF_SCHED_PRIO, 0))
19478 +               return 1;
19479 +
19480 +       sched_pc = &vx_per_cpu(vxi, sched_pc, cpu);
19481 +       vxm_sched_info(sched_pc, vxi, cpu);
19482 +       ret  = vx_tokens_recalc(sched_pc,
19483 +               &rq->norm_time, &rq->idle_time, delta_min);
19484 +       vxm_tokens_recalc(sched_pc, rq, vxi, cpu);
19485 +       return 1;
19486 +}
19487 +
19488 +#endif /* CONFIG_VSERVER_HARDCPU */
19489 +
19490 diff -NurpP --minimal linux-2.6.16.20/kernel/sched_mon.h linux-2.6.16.20-vs2.1.1-rc22/kernel/sched_mon.h
19491 --- linux-2.6.16.20/kernel/sched_mon.h  1970-01-01 01:00:00 +0100
19492 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/sched_mon.h     2006-04-26 19:07:00 +0200
19493 @@ -0,0 +1,188 @@
19494 +
19495 +#include <linux/vserver/monitor.h>
19496 +
19497 +#ifdef  CONFIG_VSERVER_MONITOR
19498 +
19499 +struct _vx_mon_entry *vxm_advance(int cpu);
19500 +
19501 +
19502 +static inline
19503 +void   __vxm_basic(struct _vx_mon_entry *entry, xid_t xid, int type)
19504 +{
19505 +       entry->type = type;
19506 +       entry->xid = xid;
19507 +}
19508 +
19509 +static inline
19510 +void   __vxm_sync(int cpu)
19511 +{
19512 +       struct _vx_mon_entry *entry = vxm_advance(cpu);
19513 +
19514 +       __vxm_basic(entry, 0, VXM_SYNC);
19515 +       entry->ev.sec = xtime.tv_sec;
19516 +       entry->ev.nsec = xtime.tv_nsec;
19517 +}
19518 +
19519 +static inline
19520 +void   __vxm_task(struct task_struct *p, int type)
19521 +{
19522 +       struct _vx_mon_entry *entry = vxm_advance(task_cpu(p));
19523 +
19524 +       __vxm_basic(entry, p->xid, type);
19525 +       entry->ev.tsk.pid = p->pid;
19526 +       entry->ev.tsk.state = p->state;
19527 +}
19528 +
19529 +static inline
19530 +void   __vxm_sched(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu)
19531 +{
19532 +       struct _vx_mon_entry *entry = vxm_advance(cpu);
19533 +
19534 +       __vxm_basic(entry, vxi->vx_id, (VXM_SCHED | s->flags));
19535 +       entry->sd.tokens = s->tokens;
19536 +       entry->sd.norm_time = s->norm_time;
19537 +       entry->sd.idle_time = s->idle_time;
19538 +}
19539 +
19540 +static inline
19541 +void   __vxm_rqinfo1(runqueue_t *q, int cpu)
19542 +{
19543 +       struct _vx_mon_entry *entry = vxm_advance(cpu);
19544 +
19545 +       entry->type = VXM_RQINFO_1;
19546 +       entry->xid = ((unsigned long)q >> 16) & 0xffff;
19547 +       entry->q1.running = q->nr_running;
19548 +       entry->q1.onhold = q->nr_onhold;
19549 +       entry->q1.iowait = atomic_read(&q->nr_iowait);
19550 +       entry->q1.uintr = q->nr_uninterruptible;
19551 +       entry->q1.idle_tokens = q->idle_tokens;
19552 +}
19553 +
19554 +static inline
19555 +void   __vxm_rqinfo2(runqueue_t *q, int cpu)
19556 +{
19557 +       struct _vx_mon_entry *entry = vxm_advance(cpu);
19558 +
19559 +       entry->type = VXM_RQINFO_2;
19560 +       entry->xid = (unsigned long)q & 0xffff;
19561 +       entry->q2.norm_time = q->norm_time;
19562 +       entry->q2.idle_time = q->idle_time;
19563 +       entry->q2.idle_skip = q->idle_skip;
19564 +}
19565 +
19566 +static inline
19567 +void   __vxm_update(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu)
19568 +{
19569 +       struct _vx_mon_entry *entry = vxm_advance(cpu);
19570 +
19571 +       __vxm_basic(entry, vxi->vx_id, VXM_UPDATE);
19572 +       entry->ev.tokens = s->tokens;
19573 +}
19574 +
19575 +static inline
19576 +void   __vxm_update1(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu)
19577 +{
19578 +       struct _vx_mon_entry *entry = vxm_advance(cpu);
19579 +
19580 +       __vxm_basic(entry, vxi->vx_id, VXM_UPDATE_1);
19581 +       entry->u1.tokens_max = s->tokens_max;
19582 +       entry->u1.fill_rate = s->fill_rate[0];
19583 +       entry->u1.interval = s->interval[0];
19584 +}
19585 +
19586 +static inline
19587 +void   __vxm_update2(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu)
19588 +{
19589 +       struct _vx_mon_entry *entry = vxm_advance(cpu);
19590 +
19591 +       __vxm_basic(entry, vxi->vx_id, VXM_UPDATE_2);
19592 +       entry->u2.tokens_min = s->tokens_min;
19593 +       entry->u2.fill_rate = s->fill_rate[1];
19594 +       entry->u2.interval = s->interval[1];
19595 +}
19596 +
19597 +
19598 +#define        vxm_activate_task(p,q)          __vxm_task(p, VXM_ACTIVATE)
19599 +#define        vxm_activate_idle(p,q)          __vxm_task(p, VXM_IDLE)
19600 +#define        vxm_deactivate_task(p,q)        __vxm_task(p, VXM_DEACTIVATE)
19601 +#define        vxm_hold_task(p,q)              __vxm_task(p, VXM_HOLD)
19602 +#define        vxm_unhold_task(p,q)            __vxm_task(p, VXM_UNHOLD)
19603 +
19604 +static inline
19605 +void   vxm_migrate_task(struct task_struct *p, runqueue_t *rq, int dest)
19606 +{
19607 +       __vxm_task(p, VXM_MIGRATE);
19608 +       __vxm_rqinfo1(rq, task_cpu(p));
19609 +       __vxm_rqinfo2(rq, task_cpu(p));
19610 +}
19611 +
19612 +static inline
19613 +void   vxm_idle_skip(runqueue_t *rq, int cpu)
19614 +{
19615 +       __vxm_rqinfo1(rq, cpu);
19616 +       __vxm_rqinfo2(rq, cpu);
19617 +}
19618 +
19619 +static inline
19620 +void   vxm_need_resched(struct task_struct *p, int slice, int cpu)
19621 +{
19622 +       if (slice)
19623 +               return;
19624 +
19625 +       __vxm_task(p, VXM_RESCHED);
19626 +}
19627 +
19628 +static inline
19629 +void   vxm_sync(unsigned long now, int cpu)
19630 +{
19631 +       if (!CONFIG_VSERVER_MONITOR_SYNC ||
19632 +               (now % CONFIG_VSERVER_MONITOR_SYNC))
19633 +               return;
19634 +
19635 +       __vxm_sync(cpu);
19636 +}
19637 +
19638 +#define        vxm_sched_info(s,v,c)           __vxm_sched(s,v,c)
19639 +
19640 +static inline
19641 +void   vxm_tokens_recalc(struct _vx_sched_pc *s, runqueue_t *rq,
19642 +       struct vx_info *vxi, int cpu)
19643 +{
19644 +       __vxm_sched(s, vxi, cpu);
19645 +       __vxm_rqinfo2(rq, cpu);
19646 +}
19647 +
19648 +static inline
19649 +void   vxm_update_sched(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu)
19650 +{
19651 +       __vxm_sched(s, vxi, cpu);
19652 +       __vxm_update(s, vxi, cpu);
19653 +       __vxm_update1(s, vxi, cpu);
19654 +       __vxm_update2(s, vxi, cpu);
19655 +}
19656 +
19657 +static inline
19658 +void   vxm_rq_max_min(runqueue_t *rq, int cpu)
19659 +{
19660 +       __vxm_rqinfo1(rq, cpu);
19661 +       __vxm_rqinfo2(rq, cpu);
19662 +}
19663 +
19664 +#else  /* CONFIG_VSERVER_MONITOR */
19665 +
19666 +#define        vxm_activate_task(t,q)          do { } while (0)
19667 +#define        vxm_activate_idle(t,q)          do { } while (0)
19668 +#define        vxm_deactivate_task(t,q)        do { } while (0)
19669 +#define        vxm_hold_task(t,q)              do { } while (0)
19670 +#define        vxm_unhold_task(t,q)            do { } while (0)
19671 +#define        vxm_migrate_task(t,q,d)         do { } while (0)
19672 +#define        vxm_idle_skip(q,c)              do { } while (0)
19673 +#define        vxm_need_resched(t,s,c)         do { } while (0)
19674 +#define        vxm_sync(s,c)                   do { } while (0)
19675 +#define        vxm_sched_info(s,v,c)           do { } while (0)
19676 +#define        vxm_tokens_recalc(s,q,v,c)      do { } while (0)
19677 +#define        vxm_update_sched(s,v,c)         do { } while (0)
19678 +#define        vxm_rq_max_min(q,c)             do { } while (0)
19679 +
19680 +#endif /* CONFIG_VSERVER_MONITOR */
19681 +
19682 diff -NurpP --minimal linux-2.6.16.20/kernel/signal.c linux-2.6.16.20-vs2.1.1-rc22/kernel/signal.c
19683 --- linux-2.6.16.20/kernel/signal.c     2006-05-11 21:25:36 +0200
19684 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/signal.c        2006-04-26 19:07:00 +0200
19685 @@ -26,6 +26,7 @@
19686  #include <linux/signal.h>
19687  #include <linux/audit.h>
19688  #include <linux/capability.h>
19689 +#include <linux/vs_pid.h>
19690  #include <asm/param.h>
19691  #include <asm/uaccess.h>
19692  #include <asm/unistd.h>
19693 @@ -676,18 +677,27 @@ static int rm_from_queue(unsigned long m
19694  static int check_kill_permission(int sig, struct siginfo *info,
19695                                  struct task_struct *t)
19696  {
19697 +       int user;
19698         int error = -EINVAL;
19699 +
19700         if (!valid_signal(sig))
19701                 return error;
19702 +
19703 +       user = ((info == SEND_SIG_NOINFO) ||
19704 +               (!is_si_special(info) && SI_FROMUSER(info)));
19705 +
19706         error = -EPERM;
19707 -       if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info)))
19708 -           && ((sig != SIGCONT) ||
19709 +       if (user && ((sig != SIGCONT) ||
19710                 (current->signal->session != t->signal->session))
19711             && (current->euid ^ t->suid) && (current->euid ^ t->uid)
19712             && (current->uid ^ t->suid) && (current->uid ^ t->uid)
19713             && !capable(CAP_KILL))
19714                 return error;
19715  
19716 +       error = -ESRCH;
19717 +       if (user && !vx_check(vx_task_xid(t), VX_ADMIN|VX_IDENT))
19718 +               return error;
19719 +
19720         error = security_task_kill(t, info, sig);
19721         if (!error)
19722                 audit_signal_info(sig, t); /* Let audit system see the signal */
19723 @@ -1991,6 +2001,11 @@ relock:
19724                 if (current->pid == 1)
19725                         continue;
19726  
19727 +               /* virtual init is protected against user signals */
19728 +               if ((info->si_code == SI_USER) &&
19729 +                       vx_current_initpid(current->pid))
19730 +                       continue;
19731 +
19732                 if (sig_kernel_stop(signr)) {
19733                         /*
19734                          * The default action is to stop all threads in
19735 diff -NurpP --minimal linux-2.6.16.20/kernel/softirq.c linux-2.6.16.20-vs2.1.1-rc22/kernel/softirq.c
19736 --- linux-2.6.16.20/kernel/softirq.c    2006-01-03 17:30:12 +0100
19737 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/softirq.c       2006-04-26 19:07:00 +0200
19738 @@ -16,6 +16,7 @@
19739  #include <linux/cpu.h>
19740  #include <linux/kthread.h>
19741  #include <linux/rcupdate.h>
19742 +#include <linux/vs_context.h>
19743  
19744  #include <asm/irq.h>
19745  /*
19746 @@ -73,6 +74,7 @@ static inline void wakeup_softirqd(void)
19747  
19748  asmlinkage void __do_softirq(void)
19749  {
19750 +       struct vx_info_save vxis;
19751         struct softirq_action *h;
19752         __u32 pending;
19753         int max_restart = MAX_SOFTIRQ_RESTART;
19754 @@ -81,6 +83,7 @@ asmlinkage void __do_softirq(void)
19755         pending = local_softirq_pending();
19756  
19757         local_bh_disable();
19758 +       __enter_vx_admin(&vxis);
19759         cpu = smp_processor_id();
19760  restart:
19761         /* Reset the pending bitmask before enabling irqs */
19762 @@ -108,6 +111,7 @@ restart:
19763         if (pending)
19764                 wakeup_softirqd();
19765  
19766 +       __leave_vx_admin(&vxis);
19767         __local_bh_enable();
19768  }
19769  
19770 diff -NurpP --minimal linux-2.6.16.20/kernel/sys.c linux-2.6.16.20-vs2.1.1-rc22/kernel/sys.c
19771 --- linux-2.6.16.20/kernel/sys.c        2006-05-11 21:25:36 +0200
19772 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/sys.c   2006-04-29 02:58:07 +0200
19773 @@ -11,6 +11,7 @@
19774  #include <linux/mman.h>
19775  #include <linux/smp_lock.h>
19776  #include <linux/notifier.h>
19777 +#include <linux/kmod.h>
19778  #include <linux/reboot.h>
19779  #include <linux/prctl.h>
19780  #include <linux/init.h>
19781 @@ -30,6 +31,8 @@
19782  #include <linux/tty.h>
19783  #include <linux/signal.h>
19784  #include <linux/cn_proc.h>
19785 +#include <linux/vs_cvirt.h>
19786 +#include <linux/vs_pid.h>
19787  
19788  #include <linux/compat.h>
19789  #include <linux/syscalls.h>
19790 @@ -227,7 +230,9 @@ EXPORT_SYMBOL(unregister_reboot_notifier
19791  #ifndef CONFIG_SECURITY
19792  int capable(int cap)
19793  {
19794 -        if (cap_raised(current->cap_effective, cap)) {
19795 +       if (vx_check_bit(VXC_CAP_MASK, cap) && !vx_mcaps(1L << cap))
19796 +               return 0;
19797 +       if (vx_cap_raised(current->vx_info, current->cap_effective, cap)) {
19798                current->flags |= PF_SUPERPRIV;
19799                return 1;
19800          }
19801 @@ -246,7 +251,10 @@ static int set_one_prio(struct task_stru
19802                 goto out;
19803         }
19804         if (niceval < task_nice(p) && !can_nice(p, niceval)) {
19805 -               error = -EACCES;
19806 +               if (vx_flags(VXF_IGNEG_NICE, 0))
19807 +                       error = 0;
19808 +               else
19809 +                       error = -EACCES;
19810                 goto out;
19811         }
19812         no_nice = security_task_setnice(p, niceval);
19813 @@ -298,7 +306,8 @@ asmlinkage long sys_setpriority(int whic
19814                         if (!who)
19815                                 who = current->uid;
19816                         else
19817 -                               if ((who != current->uid) && !(user = find_user(who)))
19818 +                               if ((who != current->uid) &&
19819 +                                       !(user = find_user(vx_current_xid(), who)))
19820                                         goto out_unlock;        /* No processes for this user */
19821  
19822                         do_each_thread(g, p)
19823 @@ -356,7 +365,8 @@ asmlinkage long sys_getpriority(int whic
19824                         if (!who)
19825                                 who = current->uid;
19826                         else
19827 -                               if ((who != current->uid) && !(user = find_user(who)))
19828 +                               if ((who != current->uid) &&
19829 +                                       !(user = find_user(vx_current_xid(), who)))
19830                                         goto out_unlock;        /* No processes for this user */
19831  
19832                         do_each_thread(g, p)
19833 @@ -473,6 +483,9 @@ void kernel_power_off(void)
19834         machine_power_off();
19835  }
19836  EXPORT_SYMBOL_GPL(kernel_power_off);
19837 +
19838 +long vs_reboot(unsigned int, void __user *);
19839 +
19840  /*
19841   * Reboot system call: for obvious reasons only root may call it,
19842   * and even root needs to set up some magic numbers in the registers
19843 @@ -503,6 +516,9 @@ asmlinkage long sys_reboot(int magic1, i
19844         if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
19845                 cmd = LINUX_REBOOT_CMD_HALT;
19846  
19847 +       if (!vx_check(0, VX_ADMIN|VX_WATCH))
19848 +               return vs_reboot(cmd, arg);
19849 +
19850         lock_kernel();
19851         switch (cmd) {
19852         case LINUX_REBOOT_CMD_RESTART:
19853 @@ -690,7 +706,7 @@ static int set_user(uid_t new_ruid, int 
19854  {
19855         struct user_struct *new_user;
19856  
19857 -       new_user = alloc_uid(new_ruid);
19858 +       new_user = alloc_uid(vx_current_xid(), new_ruid);
19859         if (!new_user)
19860                 return -EAGAIN;
19861  
19862 @@ -1099,15 +1115,18 @@ asmlinkage long sys_setpgid(pid_t pid, p
19863  {
19864         struct task_struct *p;
19865         struct task_struct *group_leader = current->group_leader;
19866 +       pid_t rpgid;
19867         int err = -EINVAL;
19868  
19869         if (!pid)
19870 -               pid = group_leader->pid;
19871 +               pid = vx_map_pid(group_leader->pid);
19872         if (!pgid)
19873                 pgid = pid;
19874         if (pgid < 0)
19875                 return -EINVAL;
19876  
19877 +       rpgid = vx_rmap_pid(pgid);
19878 +
19879         /* From this point forward we keep holding onto the tasklist lock
19880          * so that our parent does not change from under us. -DaveM
19881          */
19882 @@ -1142,22 +1161,22 @@ asmlinkage long sys_setpgid(pid_t pid, p
19883         if (pgid != pid) {
19884                 struct task_struct *p;
19885  
19886 -               do_each_task_pid(pgid, PIDTYPE_PGID, p) {
19887 +               do_each_task_pid(rpgid, PIDTYPE_PGID, p) {
19888                         if (p->signal->session == group_leader->signal->session)
19889                                 goto ok_pgid;
19890 -               } while_each_task_pid(pgid, PIDTYPE_PGID, p);
19891 +               } while_each_task_pid(rpgid, PIDTYPE_PGID, p);
19892                 goto out;
19893         }
19894  
19895  ok_pgid:
19896 -       err = security_task_setpgid(p, pgid);
19897 +       err = security_task_setpgid(p, rpgid);
19898         if (err)
19899                 goto out;
19900  
19901 -       if (process_group(p) != pgid) {
19902 +       if (process_group(p) != rpgid) {
19903                 detach_pid(p, PIDTYPE_PGID);
19904 -               p->signal->pgrp = pgid;
19905 -               attach_pid(p, PIDTYPE_PGID, pgid);
19906 +               p->signal->pgrp = rpgid;
19907 +               attach_pid(p, PIDTYPE_PGID, rpgid);
19908         }
19909  
19910         err = 0;
19911 @@ -1170,7 +1189,7 @@ out:
19912  asmlinkage long sys_getpgid(pid_t pid)
19913  {
19914         if (!pid) {
19915 -               return process_group(current);
19916 +               return vx_rmap_pid(process_group(current));
19917         } else {
19918                 int retval;
19919                 struct task_struct *p;
19920 @@ -1182,7 +1201,7 @@ asmlinkage long sys_getpgid(pid_t pid)
19921                 if (p) {
19922                         retval = security_task_getpgid(p);
19923                         if (!retval)
19924 -                               retval = process_group(p);
19925 +                               retval = vx_rmap_pid(process_group(p));
19926                 }
19927                 read_unlock(&tasklist_lock);
19928                 return retval;
19929 @@ -1518,7 +1537,7 @@ asmlinkage long sys_newuname(struct new_
19930         int errno = 0;
19931  
19932         down_read(&uts_sem);
19933 -       if (copy_to_user(name,&system_utsname,sizeof *name))
19934 +       if (copy_to_user(name, vx_new_utsname(), sizeof *name))
19935                 errno = -EFAULT;
19936         up_read(&uts_sem);
19937         return errno;
19938 @@ -1529,15 +1548,17 @@ asmlinkage long sys_sethostname(char __u
19939         int errno;
19940         char tmp[__NEW_UTS_LEN];
19941  
19942 -       if (!capable(CAP_SYS_ADMIN))
19943 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SET_UTSNAME))
19944                 return -EPERM;
19945         if (len < 0 || len > __NEW_UTS_LEN)
19946                 return -EINVAL;
19947         down_write(&uts_sem);
19948         errno = -EFAULT;
19949         if (!copy_from_user(tmp, name, len)) {
19950 -               memcpy(system_utsname.nodename, tmp, len);
19951 -               system_utsname.nodename[len] = 0;
19952 +               char *ptr = vx_new_uts(nodename);
19953 +
19954 +               memcpy(ptr, tmp, len);
19955 +               ptr[len] = 0;
19956                 errno = 0;
19957         }
19958         up_write(&uts_sem);
19959 @@ -1549,15 +1570,17 @@ asmlinkage long sys_sethostname(char __u
19960  asmlinkage long sys_gethostname(char __user *name, int len)
19961  {
19962         int i, errno;
19963 +       char *ptr;
19964  
19965         if (len < 0)
19966                 return -EINVAL;
19967         down_read(&uts_sem);
19968 -       i = 1 + strlen(system_utsname.nodename);
19969 +       ptr = vx_new_uts(nodename);
19970 +       i = 1 + strlen(ptr);
19971         if (i > len)
19972                 i = len;
19973         errno = 0;
19974 -       if (copy_to_user(name, system_utsname.nodename, i))
19975 +       if (copy_to_user(name, ptr, i))
19976                 errno = -EFAULT;
19977         up_read(&uts_sem);
19978         return errno;
19979 @@ -1574,7 +1597,7 @@ asmlinkage long sys_setdomainname(char _
19980         int errno;
19981         char tmp[__NEW_UTS_LEN];
19982  
19983 -       if (!capable(CAP_SYS_ADMIN))
19984 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SET_UTSNAME))
19985                 return -EPERM;
19986         if (len < 0 || len > __NEW_UTS_LEN)
19987                 return -EINVAL;
19988 @@ -1582,8 +1605,10 @@ asmlinkage long sys_setdomainname(char _
19989         down_write(&uts_sem);
19990         errno = -EFAULT;
19991         if (!copy_from_user(tmp, name, len)) {
19992 -               memcpy(system_utsname.domainname, tmp, len);
19993 -               system_utsname.domainname[len] = 0;
19994 +               char *ptr = vx_new_uts(domainname);
19995 +
19996 +               memcpy(ptr, tmp, len);
19997 +               ptr[len] = 0;
19998                 errno = 0;
19999         }
20000         up_write(&uts_sem);
20001 @@ -1640,7 +1665,7 @@ asmlinkage long sys_setrlimit(unsigned i
20002                 return -EINVAL;
20003         old_rlim = current->signal->rlim + resource;
20004         if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
20005 -           !capable(CAP_SYS_RESOURCE))
20006 +           !vx_capable(CAP_SYS_RESOURCE, VXC_SET_RLIMIT))
20007                 return -EPERM;
20008         if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN)
20009                         return -EPERM;
20010 diff -NurpP --minimal linux-2.6.16.20/kernel/sysctl.c linux-2.6.16.20-vs2.1.1-rc22/kernel/sysctl.c
20011 --- linux-2.6.16.20/kernel/sysctl.c     2006-04-09 13:49:58 +0200
20012 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/sysctl.c        2006-04-26 19:07:00 +0200
20013 @@ -46,6 +46,7 @@
20014  #include <linux/syscalls.h>
20015  #include <linux/nfs_fs.h>
20016  #include <linux/acpi.h>
20017 +#include <linux/vserver/cvirt.h>
20018  
20019  #include <asm/uaccess.h>
20020  #include <asm/processor.h>
20021 @@ -89,6 +90,7 @@ static int ngroups_max = NGROUPS_MAX;
20022  #ifdef CONFIG_KMOD
20023  extern char modprobe_path[];
20024  #endif
20025 +extern char vshelper_path[];
20026  #ifdef CONFIG_CHR_DEV_SG
20027  extern int sg_big_buff;
20028  #endif
20029 @@ -237,6 +239,7 @@ static ctl_table kern_table[] = {
20030                 .maxlen         = sizeof(system_utsname.sysname),
20031                 .mode           = 0444,
20032                 .proc_handler   = &proc_doutsstring,
20033 +               .virt_handler   = &vx_uts_virt_handler,
20034                 .strategy       = &sysctl_string,
20035         },
20036         {
20037 @@ -246,6 +249,7 @@ static ctl_table kern_table[] = {
20038                 .maxlen         = sizeof(system_utsname.release),
20039                 .mode           = 0444,
20040                 .proc_handler   = &proc_doutsstring,
20041 +               .virt_handler   = &vx_uts_virt_handler,
20042                 .strategy       = &sysctl_string,
20043         },
20044         {
20045 @@ -255,6 +259,7 @@ static ctl_table kern_table[] = {
20046                 .maxlen         = sizeof(system_utsname.version),
20047                 .mode           = 0444,
20048                 .proc_handler   = &proc_doutsstring,
20049 +               .virt_handler   = &vx_uts_virt_handler,
20050                 .strategy       = &sysctl_string,
20051         },
20052         {
20053 @@ -264,6 +269,7 @@ static ctl_table kern_table[] = {
20054                 .maxlen         = sizeof(system_utsname.nodename),
20055                 .mode           = 0644,
20056                 .proc_handler   = &proc_doutsstring,
20057 +               .virt_handler   = &vx_uts_virt_handler,
20058                 .strategy       = &sysctl_string,
20059         },
20060         {
20061 @@ -273,6 +279,7 @@ static ctl_table kern_table[] = {
20062                 .maxlen         = sizeof(system_utsname.domainname),
20063                 .mode           = 0644,
20064                 .proc_handler   = &proc_doutsstring,
20065 +               .virt_handler   = &vx_uts_virt_handler,
20066                 .strategy       = &sysctl_string,
20067         },
20068         {
20069 @@ -409,6 +416,15 @@ static ctl_table kern_table[] = {
20070                 .strategy       = &sysctl_string,
20071         },
20072  #endif
20073 +       {
20074 +               .ctl_name       = KERN_VSHELPER,
20075 +               .procname       = "vshelper",
20076 +               .data           = &vshelper_path,
20077 +               .maxlen         = 256,
20078 +               .mode           = 0644,
20079 +               .proc_handler   = &proc_dostring,
20080 +               .strategy       = &sysctl_string,
20081 +       },
20082  #ifdef CONFIG_CHR_DEV_SG
20083         {
20084                 .ctl_name       = KERN_SG_BIG_BUFF,
20085 @@ -1564,16 +1580,20 @@ static ssize_t proc_writesys(struct file
20086  int proc_dostring(ctl_table *table, int write, struct file *filp,
20087                   void __user *buffer, size_t *lenp, loff_t *ppos)
20088  {
20089 -       size_t len;
20090 +       size_t len, maxlen;
20091         char __user *p;
20092         char c;
20093 +       void *data;
20094 +
20095 +       data = table->data;
20096 +       maxlen = table->maxlen;
20097 +
20098 +       if (!data || !maxlen || !*lenp || (*ppos && !write))
20099 +               return (*lenp = 0);
20100         
20101 -       if (!table->data || !table->maxlen || !*lenp ||
20102 -           (*ppos && !write)) {
20103 -               *lenp = 0;
20104 -               return 0;
20105 -       }
20106 -       
20107 +       if (table->virt_handler)
20108 +               table->virt_handler(table, write, filp->f_xid, &data, &maxlen);
20109 +
20110         if (write) {
20111                 len = 0;
20112                 p = buffer;
20113 @@ -1584,20 +1604,20 @@ int proc_dostring(ctl_table *table, int 
20114                                 break;
20115                         len++;
20116                 }
20117 -               if (len >= table->maxlen)
20118 -                       len = table->maxlen-1;
20119 -               if(copy_from_user(table->data, buffer, len))
20120 +               if (len >= maxlen)
20121 +                       len = maxlen-1;
20122 +               if(copy_from_user(data, buffer, len))
20123                         return -EFAULT;
20124 -               ((char *) table->data)[len] = 0;
20125 +               ((char *) data)[len] = 0;
20126                 *ppos += *lenp;
20127         } else {
20128 -               len = strlen(table->data);
20129 -               if (len > table->maxlen)
20130 -                       len = table->maxlen;
20131 +               len = strlen(data);
20132 +               if (len > maxlen)
20133 +                       len = maxlen;
20134                 if (len > *lenp)
20135                         len = *lenp;
20136                 if (len)
20137 -                       if(copy_to_user(buffer, table->data, len))
20138 +                       if(copy_to_user(buffer, data, len))
20139                                 return -EFAULT;
20140                 if (len < *lenp) {
20141                         if(put_user('\n', ((char __user *) buffer) + len))
20142 diff -NurpP --minimal linux-2.6.16.20/kernel/time.c linux-2.6.16.20-vs2.1.1-rc22/kernel/time.c
20143 --- linux-2.6.16.20/kernel/time.c       2006-02-18 14:40:38 +0100
20144 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/time.c  2006-05-29 16:59:20 +0200
20145 @@ -61,7 +61,7 @@ asmlinkage long sys_time(time_t __user *
20146         time_t i;
20147         struct timeval tv;
20148  
20149 -       do_gettimeofday(&tv);
20150 +       vx_gettimeofday(&tv);
20151         i = tv.tv_sec;
20152  
20153         if (tloc) {
20154 @@ -92,7 +92,7 @@ asmlinkage long sys_stime(time_t __user 
20155         if (err)
20156                 return err;
20157  
20158 -       do_settimeofday(&tv);
20159 +       vx_settimeofday(&tv);
20160         return 0;
20161  }
20162  
20163 @@ -102,7 +102,7 @@ asmlinkage long sys_gettimeofday(struct 
20164  {
20165         if (likely(tv != NULL)) {
20166                 struct timeval ktv;
20167 -               do_gettimeofday(&ktv);
20168 +               vx_gettimeofday(&ktv);
20169                 if (copy_to_user(tv, &ktv, sizeof(ktv)))
20170                         return -EFAULT;
20171         }
20172 @@ -176,7 +176,7 @@ int do_sys_settimeofday(struct timespec 
20173                 /* SMP safe, again the code in arch/foo/time.c should
20174                  * globally block out interrupts when it runs.
20175                  */
20176 -               return do_settimeofday(tv);
20177 +               return vx_settimeofday(tv);
20178         }
20179         return 0;
20180  }
20181 @@ -558,7 +558,7 @@ void getnstimeofday(struct timespec *tv)
20182  {
20183         struct timeval x;
20184  
20185 -       do_gettimeofday(&x);
20186 +       vx_gettimeofday(&x);
20187         tv->tv_sec = x.tv_sec;
20188         tv->tv_nsec = x.tv_usec * NSEC_PER_USEC;
20189  }
20190 diff -NurpP --minimal linux-2.6.16.20/kernel/timer.c linux-2.6.16.20-vs2.1.1-rc22/kernel/timer.c
20191 --- linux-2.6.16.20/kernel/timer.c      2006-04-09 13:49:58 +0200
20192 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/timer.c 2006-04-26 19:07:00 +0200
20193 @@ -34,6 +34,9 @@
20194  #include <linux/time.h>
20195  #include <linux/jiffies.h>
20196  #include <linux/posix-timers.h>
20197 +#include <linux/vs_cvirt.h>
20198 +#include <linux/vs_pid.h>
20199 +#include <linux/vserver/sched.h>
20200  #include <linux/cpu.h>
20201  #include <linux/syscalls.h>
20202  #include <linux/delay.h>
20203 @@ -972,12 +975,6 @@ asmlinkage unsigned long sys_alarm(unsig
20204  
20205  #endif
20206  
20207 -#ifndef __alpha__
20208 -
20209 -/*
20210 - * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this
20211 - * should be moved into arch/i386 instead?
20212 - */
20213  
20214  /**
20215   * sys_getpid - return the thread group id of the current process
20216 @@ -990,7 +987,7 @@ asmlinkage unsigned long sys_alarm(unsig
20217   */
20218  asmlinkage long sys_getpid(void)
20219  {
20220 -       return current->tgid;
20221 +       return vx_map_tgid(current->tgid);
20222  }
20223  
20224  /*
20225 @@ -1034,9 +1031,23 @@ asmlinkage long sys_getppid(void)
20226         pid = rcu_dereference(current->real_parent)->tgid;
20227         rcu_read_unlock();
20228  
20229 -       return pid;
20230 +       return vx_map_pid(pid);
20231  }
20232  
20233 +#ifdef __alpha__
20234 +
20235 +/*
20236 + * The Alpha uses getxpid, getxuid, and getxgid instead.
20237 + */
20238 +
20239 +asmlinkage long do_getxpid(long *ppid)
20240 +{
20241 +       *ppid = sys_getppid();
20242 +       return sys_getpid();
20243 +}
20244 +
20245 +#else /* _alpha_ */
20246 +
20247  asmlinkage long sys_getuid(void)
20248  {
20249         /* Only we change this so SMP safe */
20250 @@ -1197,6 +1208,8 @@ asmlinkage long sys_sysinfo(struct sysin
20251                         tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
20252                         tp.tv_sec++;
20253                 }
20254 +               if (vx_flags(VXF_VIRT_UPTIME, 0))
20255 +                       vx_vsi_uptime(&tp, NULL);
20256                 val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
20257  
20258                 val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
20259 diff -NurpP --minimal linux-2.6.16.20/kernel/user.c linux-2.6.16.20-vs2.1.1-rc22/kernel/user.c
20260 --- linux-2.6.16.20/kernel/user.c       2006-02-18 14:40:38 +0100
20261 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/user.c  2006-04-26 19:07:00 +0200
20262 @@ -23,8 +23,8 @@
20263  #define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8)
20264  #define UIDHASH_SZ             (1 << UIDHASH_BITS)
20265  #define UIDHASH_MASK           (UIDHASH_SZ - 1)
20266 -#define __uidhashfn(uid)       (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK)
20267 -#define uidhashentry(uid)      (uidhash_table + __uidhashfn((uid)))
20268 +#define __uidhashfn(xid,uid)   ((((uid) >> UIDHASH_BITS) + ((uid)^(xid))) & UIDHASH_MASK)
20269 +#define uidhashentry(xid,uid)  (uidhash_table + __uidhashfn((xid),(uid)))
20270  
20271  static kmem_cache_t *uid_cachep;
20272  static struct list_head uidhash_table[UIDHASH_SZ];
20273 @@ -66,7 +66,7 @@ static inline void uid_hash_remove(struc
20274         list_del(&up->uidhash_list);
20275  }
20276  
20277 -static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *hashent)
20278 +static inline struct user_struct *uid_hash_find(xid_t xid, uid_t uid, struct list_head *hashent)
20279  {
20280         struct list_head *up;
20281  
20282 @@ -75,7 +75,7 @@ static inline struct user_struct *uid_ha
20283  
20284                 user = list_entry(up, struct user_struct, uidhash_list);
20285  
20286 -               if(user->uid == uid) {
20287 +               if(user->uid == uid && user->xid == xid) {
20288                         atomic_inc(&user->__count);
20289                         return user;
20290                 }
20291 @@ -90,13 +90,13 @@ static inline struct user_struct *uid_ha
20292   *
20293   * If the user_struct could not be found, return NULL.
20294   */
20295 -struct user_struct *find_user(uid_t uid)
20296 +struct user_struct *find_user(xid_t xid, uid_t uid)
20297  {
20298         struct user_struct *ret;
20299         unsigned long flags;
20300  
20301         spin_lock_irqsave(&uidhash_lock, flags);
20302 -       ret = uid_hash_find(uid, uidhashentry(uid));
20303 +       ret = uid_hash_find(xid, uid, uidhashentry(xid, uid));
20304         spin_unlock_irqrestore(&uidhash_lock, flags);
20305         return ret;
20306  }
20307 @@ -116,13 +116,13 @@ void free_uid(struct user_struct *up)
20308         local_irq_restore(flags);
20309  }
20310  
20311 -struct user_struct * alloc_uid(uid_t uid)
20312 +struct user_struct * alloc_uid(xid_t xid, uid_t uid)
20313  {
20314 -       struct list_head *hashent = uidhashentry(uid);
20315 +       struct list_head *hashent = uidhashentry(xid, uid);
20316         struct user_struct *up;
20317  
20318         spin_lock_irq(&uidhash_lock);
20319 -       up = uid_hash_find(uid, hashent);
20320 +       up = uid_hash_find(xid, uid, hashent);
20321         spin_unlock_irq(&uidhash_lock);
20322  
20323         if (!up) {
20324 @@ -132,6 +132,7 @@ struct user_struct * alloc_uid(uid_t uid
20325                 if (!new)
20326                         return NULL;
20327                 new->uid = uid;
20328 +               new->xid = xid;
20329                 atomic_set(&new->__count, 1);
20330                 atomic_set(&new->processes, 0);
20331                 atomic_set(&new->files, 0);
20332 @@ -154,7 +155,7 @@ struct user_struct * alloc_uid(uid_t uid
20333                  * on adding the same user already..
20334                  */
20335                 spin_lock_irq(&uidhash_lock);
20336 -               up = uid_hash_find(uid, hashent);
20337 +               up = uid_hash_find(xid, uid, hashent);
20338                 if (up) {
20339                         key_put(new->uid_keyring);
20340                         key_put(new->session_keyring);
20341 @@ -200,7 +201,7 @@ static int __init uid_cache_init(void)
20342  
20343         /* Insert the root user immediately (init already runs as root) */
20344         spin_lock_irq(&uidhash_lock);
20345 -       uid_hash_insert(&root_user, uidhashentry(0));
20346 +       uid_hash_insert(&root_user, uidhashentry(0,0));
20347         spin_unlock_irq(&uidhash_lock);
20348  
20349         return 0;
20350 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/Kconfig linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/Kconfig
20351 --- linux-2.6.16.20/kernel/vserver/Kconfig      1970-01-01 01:00:00 +0100
20352 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/Kconfig 2006-05-29 18:02:20 +0200
20353 @@ -0,0 +1,265 @@
20354 +#
20355 +# Linux VServer configuration
20356 +#
20357 +
20358 +menu "Linux VServer"
20359 +
20360 +config VSERVER_LEGACY
20361 +       bool    "Enable Legacy Kernel API"
20362 +       default y
20363 +       help
20364 +         This enables the legacy API used in vs1.xx, maintaining
20365 +         compatibility with older vserver tools, and guest images
20366 +         that are configured using the legacy method.  This is
20367 +         probably a good idea for now, for migration purposes.
20368 +
20369 +         Note that some tools have not yet been altered to use
20370 +         this API, so disabling this option may reduce some
20371 +         functionality.
20372 +
20373 +config VSERVER_LEGACY_VERSION
20374 +       bool    "Show a Legacy Version ID"
20375 +       depends on VSERVER_LEGACY
20376 +       default n
20377 +       help
20378 +         This shows a special legacy version to very old tools
20379 +         which do not handle the current version correctly.
20380 +
20381 +         This will probably disable some features of newer tools
20382 +         so better avoid it, unless you really, really need it
20383 +         for backwards compatibility.
20384 +
20385 +config VSERVER_DYNAMIC_IDS
20386 +       bool    "Enable dynamic context IDs"
20387 +       depends on VSERVER_LEGACY
20388 +       default y
20389 +       help
20390 +         This enables support of in kernel dynamic context IDs,
20391 +         which is deprecated and will probably be removed in the
20392 +         next release.
20393 +
20394 +config VSERVER_NGNET
20395 +       bool    "Disable Legacy Networking Kernel API"
20396 +       depends on EXPERIMENTAL
20397 +       default n
20398 +       help
20399 +         This disables the legacy networking API which is required
20400 +         by the chbind tool. Do not disable it unless you exactly
20401 +         know what you are doing.
20402 +
20403 +config VSERVER_REMAP_SADDR
20404 +       bool    "Remap Source IP Address"
20405 +       depends on EXPERIMENTAL && !VSERVER_LEGACY
20406 +       default n
20407 +       help
20408 +         This allows to remap the source IP address of 'local'
20409 +         connections from 127.0.0.1 to the first assigned
20410 +         guest IP.
20411 +
20412 +config VSERVER_COWBL
20413 +       bool    "Enable COW Immutable Link Breaking"
20414 +       depends on EXPERIMENTAL
20415 +       default y
20416 +       help
20417 +         This enables the COW (Copy-On-Write) link break code.
20418 +         It allows you to treat unified files like normal files
20419 +         when writing to them (which will implicitely break the
20420 +         link and create a copy of the unified file)
20421 +
20422 +config VSERVER_VTIME
20423 +       bool    "Enable Virtualized Guest Time"
20424 +       depends on EXPERIMENTAL
20425 +       default n
20426 +       help
20427 +         This enables per guest time offsets to allow for
20428 +         adjusting the system clock individually per guest.
20429 +         this adds some overhead to the time functions and
20430 +         therefore should not be enabled without good reason.
20431 +
20432 +config VSERVER_PROC_SECURE
20433 +       bool    "Enable Proc Security"
20434 +       depends on PROC_FS
20435 +       default y
20436 +       help
20437 +         This configures ProcFS security to initially hide
20438 +         non-process entries for all contexts except the main and
20439 +         spectator context (i.e. for all guests), which is a secure
20440 +         default.
20441 +
20442 +         (note: on 1.2x the entries were visible by default)
20443 +
20444 +config VSERVER_HARDCPU
20445 +       bool    "Enable Hard CPU Limits"
20446 +       depends on EXPERIMENTAL
20447 +       default n
20448 +       help
20449 +         Activate the Hard CPU Limits
20450 +
20451 +         This will compile in code that allows the Token Bucket
20452 +         Scheduler to put processes on hold when a context's
20453 +         tokens are depleted (provided that its per-context
20454 +         sched_hard flag is set).
20455 +
20456 +         Processes belonging to that context will not be able
20457 +         to consume CPU resources again until a per-context
20458 +         configured minimum of tokens has been reached.
20459 +
20460 +config VSERVER_IDLETIME
20461 +       bool    "Avoid idle CPUs by skipping Time"
20462 +       depends on VSERVER_HARDCPU
20463 +       default n
20464 +       help
20465 +         This option allows the scheduler to artificially
20466 +         advance time (per cpu) when otherwise the idle
20467 +         task would be scheduled, thus keeping the cpu
20468 +         busy and sharing the available resources among
20469 +         certain contexts.
20470 +
20471 +config VSERVER_IDLELIMIT
20472 +       bool    "Limit the IDLE task"
20473 +       depends on VSERVER_HARDCPU
20474 +       default n
20475 +       help
20476 +         Limit the idle slices, so the the next context
20477 +         will be scheduled as soon as possible.
20478 +
20479 +         This might improve interactivity and latency, but
20480 +         will also marginally increase scheduling overhead.
20481 +
20482 +choice
20483 +       prompt  "Persistent Inode Tagging"
20484 +       default TAGGING_ID24
20485 +       help
20486 +         This adds persistent context information to filesystems
20487 +         mounted with the tagxid option. Tagging is a requirement
20488 +         for per-context disk limits and per-context quota.
20489 +
20490 +
20491 +config TAGGING_NONE
20492 +       bool    "Disabled"
20493 +       help
20494 +         do not store per-context information in inodes.
20495 +
20496 +config TAGGING_UID16
20497 +       bool    "UID16/GID32"
20498 +       help
20499 +         reduces UID to 16 bit, but leaves GID at 32 bit.
20500 +
20501 +config TAGGING_GID16
20502 +       bool    "UID32/GID16"
20503 +       help
20504 +         reduces GID to 16 bit, but leaves UID at 32 bit.
20505 +
20506 +config TAGGING_ID24
20507 +       bool    "UID24/GID24"
20508 +       help
20509 +         uses the upper 8bit from UID and GID for XID tagging
20510 +         which leaves 24bit for UID/GID each, which should be
20511 +         more than sufficient for normal use.
20512 +
20513 +config TAGGING_INTERN
20514 +       bool    "UID32/GID32"
20515 +       help
20516 +         this uses otherwise reserved inode fields in the on
20517 +         disk representation, which limits the use to a few
20518 +         filesystems (currently ext2 and ext3)
20519 +
20520 +config TAGGING_RUNTIME
20521 +       bool    "Runtime"
20522 +       depends on EXPERIMENTAL
20523 +       help
20524 +         inodes are tagged when first accessed, this doesn't
20525 +         require any persistant information, but might give
20526 +         funny results for mixed access.
20527 +
20528 +endchoice
20529 +
20530 +config TAG_NFSD
20531 +       bool    "Tag NFSD User Auth and Files"
20532 +       default n
20533 +       help
20534 +         Enable this if you do want the in-kernel NFS
20535 +         Server to use the tagging specified above.
20536 +         (will require patched clients too)
20537 +
20538 +config PROPAGATE
20539 +       bool    "Enable Inode Tag Propagation"
20540 +       default n
20541 +       depends on EXPERIMENTAL
20542 +       help
20543 +         This allows for the tagid= mount option to specify
20544 +         a tagid which is to be used for the entire mount
20545 +         tree.
20546 +
20547 +config VSERVER_DEBUG
20548 +       bool    "VServer Debugging Code"
20549 +       default n
20550 +       help
20551 +         Set this to yes if you want to be able to activate
20552 +         debugging output at runtime. It adds a probably small
20553 +         overhead to all vserver related functions and
20554 +         increases the kernel size by about 20k.
20555 +
20556 +config VSERVER_HISTORY
20557 +       bool    "VServer History Tracing"
20558 +       depends on VSERVER_DEBUG
20559 +       default n
20560 +       help
20561 +         Set this to yes if you want to record the history of
20562 +         linux-vserver activities, so they can be replayed in
20563 +         the event of a kernel panic or oops.
20564 +
20565 +config VSERVER_HISTORY_SIZE
20566 +       int "Per-CPU History Size (32-65536)"
20567 +       depends on VSERVER_HISTORY
20568 +       range 32 65536
20569 +       default 64
20570 +       help
20571 +         This allows you to specify the number of entries in
20572 +         the per-CPU history buffer.
20573 +
20574 +config VSERVER_MONITOR
20575 +       bool    "VServer Scheduling Monitor"
20576 +       depends on VSERVER_DEBUG
20577 +       default n
20578 +       help
20579 +         Set this to yes if you want to record the scheduling
20580 +         decisions, so that they can be relayed to userspace
20581 +         for detailed analysis.
20582 +
20583 +config VSERVER_MONITOR_SIZE
20584 +       int "Per-CPU Monitor Queue Size (32-65536)"
20585 +       depends on VSERVER_MONITOR
20586 +       range 32 65536
20587 +       default 1024
20588 +       help
20589 +         This allows you to specify the number of entries in
20590 +         the per-CPU scheduling monitor buffer.
20591 +
20592 +config VSERVER_MONITOR_SYNC
20593 +       int "Per-CPU Monitor Sync Interval (0-65536)"
20594 +       depends on VSERVER_MONITOR
20595 +       range 0 65536
20596 +       default 256
20597 +       help
20598 +         This allows you to specify the interval in ticks
20599 +         when a time sync entry is inserted.
20600 +
20601 +endmenu
20602 +
20603 +
20604 +config VSERVER
20605 +       bool
20606 +       default y
20607 +
20608 +config VSERVER_SECURITY
20609 +       bool
20610 +       depends on SECURITY
20611 +       default y
20612 +       select SECURITY_CAPABILITIES
20613 +
20614 +config VSERVER_LEGACYNET
20615 +       bool
20616 +       depends on !VSERVER_NGNET
20617 +       default y
20618 +
20619 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/Makefile linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/Makefile
20620 --- linux-2.6.16.20/kernel/vserver/Makefile     1970-01-01 01:00:00 +0100
20621 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/Makefile        2006-04-26 19:07:00 +0200
20622 @@ -0,0 +1,17 @@
20623 +#
20624 +# Makefile for the Linux vserver routines.
20625 +#
20626 +
20627 +
20628 +obj-y          += vserver.o
20629 +
20630 +vserver-y      := switch.o context.o namespace.o sched.o network.o inode.o \
20631 +                  limit.o cvirt.o signal.o helper.o init.o dlimit.o
20632 +
20633 +vserver-$(CONFIG_PROC_FS) += proc.o
20634 +vserver-$(CONFIG_VSERVER_DEBUG) += sysctl.o debug.o
20635 +vserver-$(CONFIG_VSERVER_LEGACY) += legacy.o
20636 +vserver-$(CONFIG_VSERVER_LEGACYNET) += legacynet.o
20637 +vserver-$(CONFIG_VSERVER_HISTORY) += history.o
20638 +vserver-$(CONFIG_VSERVER_MONITOR) += monitor.o
20639 +
20640 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/context.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/context.c
20641 --- linux-2.6.16.20/kernel/vserver/context.c    1970-01-01 01:00:00 +0100
20642 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/context.c       2006-05-02 04:01:31 +0200
20643 @@ -0,0 +1,1051 @@
20644 +/*
20645 + *  linux/kernel/vserver/context.c
20646 + *
20647 + *  Virtual Server: Context Support
20648 + *
20649 + *  Copyright (C) 2003-2005  Herbert Pötzl
20650 + *
20651 + *  V0.01  context helper
20652 + *  V0.02  vx_ctx_kill syscall command
20653 + *  V0.03  replaced context_info calls
20654 + *  V0.04  redesign of struct (de)alloc
20655 + *  V0.05  rlimit basic implementation
20656 + *  V0.06  task_xid and info commands
20657 + *  V0.07  context flags and caps
20658 + *  V0.08  switch to RCU based hash
20659 + *  V0.09  revert to non RCU for now
20660 + *  V0.10  and back to working RCU hash
20661 + *  V0.11  and back to locking again
20662 + *  V0.12  referenced context store
20663 + *  V0.13  separate per cpu data
20664 + *
20665 + */
20666 +
20667 +#include <linux/slab.h>
20668 +#include <linux/types.h>
20669 +#include <linux/namespace.h>
20670 +
20671 +#include <linux/sched.h>
20672 +#include <linux/vserver/network.h>
20673 +#include <linux/vserver/legacy.h>
20674 +#include <linux/vserver/limit.h>
20675 +#include <linux/vserver/debug.h>
20676 +#include <linux/vserver/limit_int.h>
20677 +
20678 +#include <linux/vs_context.h>
20679 +#include <linux/vs_limit.h>
20680 +#include <linux/vserver/context_cmd.h>
20681 +
20682 +#include <linux/err.h>
20683 +#include <asm/errno.h>
20684 +
20685 +#include "cvirt_init.h"
20686 +#include "limit_init.h"
20687 +#include "sched_init.h"
20688 +
20689 +
20690 +atomic_t vx_global_ctotal      = ATOMIC_INIT(0);
20691 +atomic_t vx_global_cactive     = ATOMIC_INIT(0);
20692 +
20693 +
20694 +/*     now inactive context structures */
20695 +
20696 +static struct hlist_head vx_info_inactive = HLIST_HEAD_INIT;
20697 +
20698 +static spinlock_t vx_info_inactive_lock = SPIN_LOCK_UNLOCKED;
20699 +
20700 +
20701 +/*     __alloc_vx_info()
20702 +
20703 +       * allocate an initialized vx_info struct
20704 +       * doesn't make it visible (hash)                        */
20705 +
20706 +static struct vx_info *__alloc_vx_info(xid_t xid)
20707 +{
20708 +       struct vx_info *new = NULL;
20709 +       int cpu;
20710 +
20711 +       vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid);
20712 +
20713 +       /* would this benefit from a slab cache? */
20714 +       new = kmalloc(sizeof(struct vx_info), GFP_KERNEL);
20715 +       if (!new)
20716 +               return 0;
20717 +
20718 +       memset (new, 0, sizeof(struct vx_info));
20719 +#ifdef CONFIG_SMP
20720 +       new->ptr_pc = alloc_percpu(struct _vx_info_pc);
20721 +       if (!new->ptr_pc)
20722 +               goto error;
20723 +#endif
20724 +       new->vx_id = xid;
20725 +       INIT_HLIST_NODE(&new->vx_hlist);
20726 +       atomic_set(&new->vx_usecnt, 0);
20727 +       atomic_set(&new->vx_tasks, 0);
20728 +       new->vx_parent = NULL;
20729 +       new->vx_state = 0;
20730 +       init_waitqueue_head(&new->vx_wait);
20731 +
20732 +       /* prepare reaper */
20733 +       get_task_struct(child_reaper);
20734 +       new->vx_reaper = child_reaper;
20735 +
20736 +       /* rest of init goes here */
20737 +       vx_info_init_limit(&new->limit);
20738 +       vx_info_init_sched(&new->sched);
20739 +       vx_info_init_cvirt(&new->cvirt);
20740 +       vx_info_init_cacct(&new->cacct);
20741 +
20742 +       /* per cpu data structures */
20743 +       for_each_cpu(cpu) {
20744 +               vx_info_init_sched_pc(
20745 +                       &vx_per_cpu(new, sched_pc, cpu), cpu);
20746 +               vx_info_init_cvirt_pc(
20747 +                       &vx_per_cpu(new, cvirt_pc, cpu), cpu);
20748 +       }
20749 +
20750 +       new->vx_flags = VXF_INIT_SET;
20751 +       new->vx_bcaps = CAP_INIT_EFF_SET;
20752 +       new->vx_ccaps = 0;
20753 +       new->vx_cap_bset = cap_bset;
20754 +
20755 +       new->reboot_cmd = 0;
20756 +       new->exit_code = 0;
20757 +
20758 +       vxdprintk(VXD_CBIT(xid, 0),
20759 +               "alloc_vx_info(%d) = %p", xid, new);
20760 +       vxh_alloc_vx_info(new);
20761 +       atomic_inc(&vx_global_ctotal);
20762 +       return new;
20763 +#ifdef CONFIG_SMP
20764 +error:
20765 +       kfree(new);
20766 +       return 0;
20767 +#endif
20768 +}
20769 +
20770 +/*     __dealloc_vx_info()
20771 +
20772 +       * final disposal of vx_info                             */
20773 +
20774 +static void __dealloc_vx_info(struct vx_info *vxi)
20775 +{
20776 +       int cpu;
20777 +
20778 +       vxdprintk(VXD_CBIT(xid, 0),
20779 +               "dealloc_vx_info(%p)", vxi);
20780 +       vxh_dealloc_vx_info(vxi);
20781 +
20782 +       vxi->vx_id = -1;
20783 +
20784 +       vx_info_exit_limit(&vxi->limit);
20785 +       vx_info_exit_sched(&vxi->sched);
20786 +       vx_info_exit_cvirt(&vxi->cvirt);
20787 +       vx_info_exit_cacct(&vxi->cacct);
20788 +
20789 +       for_each_cpu(cpu) {
20790 +               vx_info_exit_sched_pc(
20791 +                       &vx_per_cpu(vxi, sched_pc, cpu), cpu);
20792 +               vx_info_exit_cvirt_pc(
20793 +                       &vx_per_cpu(vxi, cvirt_pc, cpu), cpu);
20794 +       }
20795 +
20796 +       vxi->vx_state |= VXS_RELEASED;
20797 +
20798 +#ifdef CONFIG_SMP
20799 +       free_percpu(vxi->ptr_pc);
20800 +#endif
20801 +       kfree(vxi);
20802 +       atomic_dec(&vx_global_ctotal);
20803 +}
20804 +
20805 +static void __shutdown_vx_info(struct vx_info *vxi)
20806 +{
20807 +       struct namespace *namespace;
20808 +       struct fs_struct *fs;
20809 +
20810 +       might_sleep();
20811 +
20812 +       vxi->vx_state |= VXS_SHUTDOWN;
20813 +       vs_state_change(vxi, VSC_SHUTDOWN);
20814 +
20815 +       namespace = xchg(&vxi->vx_namespace, NULL);
20816 +       if (namespace)
20817 +               put_namespace(namespace);
20818 +
20819 +       fs = xchg(&vxi->vx_fs, NULL);
20820 +       if (fs)
20821 +               put_fs_struct(fs);
20822 +}
20823 +
20824 +/* exported stuff */
20825 +
20826 +void free_vx_info(struct vx_info *vxi)
20827 +{
20828 +       /* context shutdown is mandatory */
20829 +       BUG_ON(!vx_info_state(vxi, VXS_SHUTDOWN));
20830 +
20831 +       BUG_ON(atomic_read(&vxi->vx_usecnt));
20832 +       BUG_ON(atomic_read(&vxi->vx_tasks));
20833 +
20834 +       BUG_ON(vx_info_state(vxi, VXS_HASHED));
20835 +
20836 +       BUG_ON(vxi->vx_namespace);
20837 +       BUG_ON(vxi->vx_fs);
20838 +
20839 +       spin_lock(&vx_info_inactive_lock);
20840 +       hlist_del(&vxi->vx_hlist);
20841 +       spin_unlock(&vx_info_inactive_lock);
20842 +
20843 +       __dealloc_vx_info(vxi);
20844 +}
20845 +
20846 +
20847 +/*     hash table for vx_info hash */
20848 +
20849 +#define VX_HASH_SIZE   13
20850 +
20851 +static struct hlist_head vx_info_hash[VX_HASH_SIZE] =
20852 +       { [0 ... VX_HASH_SIZE-1] = HLIST_HEAD_INIT };
20853 +
20854 +static spinlock_t vx_info_hash_lock = SPIN_LOCK_UNLOCKED;
20855 +
20856 +
20857 +static inline unsigned int __hashval(xid_t xid)
20858 +{
20859 +       return (xid % VX_HASH_SIZE);
20860 +}
20861 +
20862 +
20863 +
20864 +/*     __hash_vx_info()
20865 +
20866 +       * add the vxi to the global hash table
20867 +       * requires the hash_lock to be held                     */
20868 +
20869 +static inline void __hash_vx_info(struct vx_info *vxi)
20870 +{
20871 +       struct hlist_head *head;
20872 +
20873 +       vxd_assert_lock(&vx_info_hash_lock);
20874 +       vxdprintk(VXD_CBIT(xid, 4),
20875 +               "__hash_vx_info: %p[#%d]", vxi, vxi->vx_id);
20876 +       vxh_hash_vx_info(vxi);
20877 +
20878 +       /* context must not be hashed */
20879 +       BUG_ON(vx_info_state(vxi, VXS_HASHED));
20880 +
20881 +       vxi->vx_state |= VXS_HASHED;
20882 +       head = &vx_info_hash[__hashval(vxi->vx_id)];
20883 +       hlist_add_head(&vxi->vx_hlist, head);
20884 +       atomic_inc(&vx_global_cactive);
20885 +}
20886 +
20887 +/*     __unhash_vx_info()
20888 +
20889 +       * remove the vxi from the global hash table
20890 +       * requires the hash_lock to be held                     */
20891 +
20892 +static inline void __unhash_vx_info(struct vx_info *vxi)
20893 +{
20894 +       vxd_assert_lock(&vx_info_hash_lock);
20895 +       vxdprintk(VXD_CBIT(xid, 4),
20896 +               "__unhash_vx_info: %p[#%d]", vxi, vxi->vx_id);
20897 +       vxh_unhash_vx_info(vxi);
20898 +
20899 +       /* context must be hashed */
20900 +       BUG_ON(!vx_info_state(vxi, VXS_HASHED));
20901 +
20902 +       vxi->vx_state &= ~VXS_HASHED;
20903 +       hlist_del_init(&vxi->vx_hlist);
20904 +       spin_lock(&vx_info_inactive_lock);
20905 +       hlist_add_head(&vxi->vx_hlist, &vx_info_inactive);
20906 +       spin_unlock(&vx_info_inactive_lock);
20907 +       atomic_dec(&vx_global_cactive);
20908 +}
20909 +
20910 +
20911 +/*     __lookup_vx_info()
20912 +
20913 +       * requires the hash_lock to be held
20914 +       * doesn't increment the vx_refcnt                       */
20915 +
20916 +static inline struct vx_info *__lookup_vx_info(xid_t xid)
20917 +{
20918 +       struct hlist_head *head = &vx_info_hash[__hashval(xid)];
20919 +       struct hlist_node *pos;
20920 +       struct vx_info *vxi;
20921 +
20922 +       vxd_assert_lock(&vx_info_hash_lock);
20923 +       hlist_for_each(pos, head) {
20924 +               vxi = hlist_entry(pos, struct vx_info, vx_hlist);
20925 +
20926 +               if (vxi->vx_id == xid)
20927 +                       goto found;
20928 +       }
20929 +       vxi = NULL;
20930 +found:
20931 +       vxdprintk(VXD_CBIT(xid, 0),
20932 +               "__lookup_vx_info(#%u): %p[#%u]",
20933 +               xid, vxi, vxi?vxi->vx_id:0);
20934 +       vxh_lookup_vx_info(vxi, xid);
20935 +       return vxi;
20936 +}
20937 +
20938 +
20939 +/*     __vx_dynamic_id()
20940 +
20941 +       * find unused dynamic xid
20942 +       * requires the hash_lock to be held                     */
20943 +
20944 +static inline xid_t __vx_dynamic_id(void)
20945 +{
20946 +       static xid_t seq = MAX_S_CONTEXT;
20947 +       xid_t barrier = seq;
20948 +
20949 +       vxd_assert_lock(&vx_info_hash_lock);
20950 +       do {
20951 +               if (++seq > MAX_S_CONTEXT)
20952 +                       seq = MIN_D_CONTEXT;
20953 +               if (!__lookup_vx_info(seq)) {
20954 +                       vxdprintk(VXD_CBIT(xid, 4),
20955 +                               "__vx_dynamic_id: [#%d]", seq);
20956 +                       return seq;
20957 +               }
20958 +       } while (barrier != seq);
20959 +       return 0;
20960 +}
20961 +
20962 +#ifdef CONFIG_VSERVER_LEGACY
20963 +
20964 +/*     __loc_vx_info()
20965 +
20966 +       * locate or create the requested context
20967 +       * get() it and if new hash it                           */
20968 +
20969 +static struct vx_info * __loc_vx_info(int id, int *err)
20970 +{
20971 +       struct vx_info *new, *vxi = NULL;
20972 +
20973 +       vxdprintk(VXD_CBIT(xid, 1), "loc_vx_info(%d)*", id);
20974 +
20975 +       if (!(new = __alloc_vx_info(id))) {
20976 +               *err = -ENOMEM;
20977 +               return NULL;
20978 +       }
20979 +
20980 +       /* required to make dynamic xids unique */
20981 +       spin_lock(&vx_info_hash_lock);
20982 +
20983 +       /* dynamic context requested */
20984 +       if (id == VX_DYNAMIC_ID) {
20985 +#ifdef CONFIG_VSERVER_DYNAMIC_IDS
20986 +               id = __vx_dynamic_id();
20987 +               if (!id) {
20988 +                       printk(KERN_ERR "no dynamic context available.\n");
20989 +                       goto out_unlock;
20990 +               }
20991 +               new->vx_id = id;
20992 +#else
20993 +               printk(KERN_ERR "dynamic contexts disabled.\n");
20994 +               goto out_unlock;
20995 +#endif
20996 +       }
20997 +       /* existing context requested */
20998 +       else if ((vxi = __lookup_vx_info(id))) {
20999 +               /* context in setup is not available */
21000 +               if (vxi->vx_flags & VXF_STATE_SETUP) {
21001 +                       vxdprintk(VXD_CBIT(xid, 0),
21002 +                               "loc_vx_info(%d) = %p (not available)", id, vxi);
21003 +                       vxi = NULL;
21004 +                       *err = -EBUSY;
21005 +               } else {
21006 +                       vxdprintk(VXD_CBIT(xid, 0),
21007 +                               "loc_vx_info(%d) = %p (found)", id, vxi);
21008 +                       get_vx_info(vxi);
21009 +                       *err = 0;
21010 +               }
21011 +               goto out_unlock;
21012 +       }
21013 +
21014 +       /* new context requested */
21015 +       vxdprintk(VXD_CBIT(xid, 0),
21016 +               "loc_vx_info(%d) = %p (new)", id, new);
21017 +       __hash_vx_info(get_vx_info(new));
21018 +       vxi = new, new = NULL;
21019 +       *err = 1;
21020 +
21021 +out_unlock:
21022 +       spin_unlock(&vx_info_hash_lock);
21023 +       vxh_loc_vx_info(vxi, id);
21024 +       if (new)
21025 +               __dealloc_vx_info(new);
21026 +       return vxi;
21027 +}
21028 +
21029 +#endif
21030 +
21031 +/*     __create_vx_info()
21032 +
21033 +       * create the requested context
21034 +       * get() and hash it                                     */
21035 +
21036 +static struct vx_info * __create_vx_info(int id)
21037 +{
21038 +       struct vx_info *new, *vxi = NULL;
21039 +
21040 +       vxdprintk(VXD_CBIT(xid, 1), "create_vx_info(%d)*", id);
21041 +
21042 +       if (!(new = __alloc_vx_info(id)))
21043 +               return ERR_PTR(-ENOMEM);
21044 +
21045 +       /* required to make dynamic xids unique */
21046 +       spin_lock(&vx_info_hash_lock);
21047 +
21048 +       /* dynamic context requested */
21049 +       if (id == VX_DYNAMIC_ID) {
21050 +#ifdef CONFIG_VSERVER_DYNAMIC_IDS
21051 +               id = __vx_dynamic_id();
21052 +               if (!id) {
21053 +                       printk(KERN_ERR "no dynamic context available.\n");
21054 +                       vxi = ERR_PTR(-EAGAIN);
21055 +                       goto out_unlock;
21056 +               }
21057 +               new->vx_id = id;
21058 +#else
21059 +               printk(KERN_ERR "dynamic contexts disabled.\n");
21060 +               vxi = ERR_PTR(-EINVAL);
21061 +               goto out_unlock;
21062 +#endif
21063 +       }
21064 +       /* static context requested */
21065 +       else if ((vxi = __lookup_vx_info(id))) {
21066 +               vxdprintk(VXD_CBIT(xid, 0),
21067 +                       "create_vx_info(%d) = %p (already there)", id, vxi);
21068 +               if (vx_info_flags(vxi, VXF_STATE_SETUP, 0))
21069 +                       vxi = ERR_PTR(-EBUSY);
21070 +               else
21071 +                       vxi = ERR_PTR(-EEXIST);
21072 +               goto out_unlock;
21073 +       }
21074 +#ifdef CONFIG_VSERVER_DYNAMIC_IDS
21075 +       /* dynamic xid creation blocker */
21076 +       else if (id >= MIN_D_CONTEXT) {
21077 +               vxdprintk(VXD_CBIT(xid, 0),
21078 +                       "create_vx_info(%d) (dynamic rejected)", id);
21079 +               vxi = ERR_PTR(-EINVAL);
21080 +               goto out_unlock;
21081 +       }
21082 +#endif
21083 +
21084 +       /* new context */
21085 +       vxdprintk(VXD_CBIT(xid, 0),
21086 +               "create_vx_info(%d) = %p (new)", id, new);
21087 +       __hash_vx_info(get_vx_info(new));
21088 +       vxi = new, new = NULL;
21089 +
21090 +out_unlock:
21091 +       spin_unlock(&vx_info_hash_lock);
21092 +       vxh_create_vx_info(IS_ERR(vxi)?NULL:vxi, id);
21093 +       if (new)
21094 +               __dealloc_vx_info(new);
21095 +       return vxi;
21096 +}
21097 +
21098 +
21099 +/*     exported stuff                                          */
21100 +
21101 +
21102 +void unhash_vx_info(struct vx_info *vxi)
21103 +{
21104 +       __shutdown_vx_info(vxi);
21105 +       spin_lock(&vx_info_hash_lock);
21106 +       __unhash_vx_info(vxi);
21107 +       spin_unlock(&vx_info_hash_lock);
21108 +       __wakeup_vx_info(vxi);
21109 +}
21110 +
21111 +
21112 +/*     lookup_vx_info()
21113 +
21114 +       * search for a vx_info and get() it
21115 +       * negative id means current                             */
21116 +
21117 +struct vx_info *lookup_vx_info(int id)
21118 +{
21119 +       struct vx_info *vxi = NULL;
21120 +
21121 +       if (id < 0) {
21122 +               vxi = get_vx_info(current->vx_info);
21123 +       } else if (id > 1) {
21124 +               spin_lock(&vx_info_hash_lock);
21125 +               vxi = get_vx_info(__lookup_vx_info(id));
21126 +               spin_unlock(&vx_info_hash_lock);
21127 +       }
21128 +       return vxi;
21129 +}
21130 +
21131 +/*     xid_is_hashed()
21132 +
21133 +       * verify that xid is still hashed                       */
21134 +
21135 +int xid_is_hashed(xid_t xid)
21136 +{
21137 +       int hashed;
21138 +
21139 +       spin_lock(&vx_info_hash_lock);
21140 +       hashed = (__lookup_vx_info(xid) != NULL);
21141 +       spin_unlock(&vx_info_hash_lock);
21142 +       return hashed;
21143 +}
21144 +
21145 +#ifdef CONFIG_VSERVER_LEGACY
21146 +
21147 +struct vx_info *lookup_or_create_vx_info(int id)
21148 +{
21149 +       int err;
21150 +
21151 +       return __loc_vx_info(id, &err);
21152 +}
21153 +
21154 +#endif
21155 +
21156 +#ifdef CONFIG_PROC_FS
21157 +
21158 +/*     get_xid_list()
21159 +
21160 +       * get a subset of hashed xids for proc
21161 +       * assumes size is at least one                          */
21162 +
21163 +int get_xid_list(int index, unsigned int *xids, int size)
21164 +{
21165 +       int hindex, nr_xids = 0;
21166 +
21167 +       /* only show current and children */
21168 +       if (!vx_check(0, VX_ADMIN|VX_WATCH)) {
21169 +               if (index > 0)
21170 +                       return 0;
21171 +               xids[nr_xids] = vx_current_xid();
21172 +               return 1;
21173 +       }
21174 +
21175 +       for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) {
21176 +               struct hlist_head *head = &vx_info_hash[hindex];
21177 +               struct hlist_node *pos;
21178 +
21179 +               spin_lock(&vx_info_hash_lock);
21180 +               hlist_for_each(pos, head) {
21181 +                       struct vx_info *vxi;
21182 +
21183 +                       if (--index > 0)
21184 +                               continue;
21185 +
21186 +                       vxi = hlist_entry(pos, struct vx_info, vx_hlist);
21187 +                       xids[nr_xids] = vxi->vx_id;
21188 +                       if (++nr_xids >= size) {
21189 +                               spin_unlock(&vx_info_hash_lock);
21190 +                               goto out;
21191 +                       }
21192 +               }
21193 +               /* keep the lock time short */
21194 +               spin_unlock(&vx_info_hash_lock);
21195 +       }
21196 +out:
21197 +       return nr_xids;
21198 +}
21199 +#endif
21200 +
21201 +#ifdef CONFIG_VSERVER_DEBUG
21202 +
21203 +void   dump_vx_info_inactive(int level)
21204 +{
21205 +       struct hlist_node *entry, *next;
21206 +
21207 +       hlist_for_each_safe(entry, next, &vx_info_inactive) {
21208 +               struct vx_info *vxi =
21209 +                       list_entry(entry, struct vx_info, vx_hlist);
21210 +
21211 +               dump_vx_info(vxi, level);
21212 +       }
21213 +}
21214 +
21215 +#endif
21216 +
21217 +int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
21218 +{
21219 +       struct user_struct *new_user, *old_user;
21220 +
21221 +       if (!p || !vxi)
21222 +               BUG();
21223 +       new_user = alloc_uid(vxi->vx_id, p->uid);
21224 +       if (!new_user)
21225 +               return -ENOMEM;
21226 +
21227 +       old_user = p->user;
21228 +       if (new_user != old_user) {
21229 +               atomic_inc(&new_user->processes);
21230 +               atomic_dec(&old_user->processes);
21231 +               p->user = new_user;
21232 +       }
21233 +       free_uid(old_user);
21234 +       return 0;
21235 +}
21236 +
21237 +void vx_mask_cap_bset(struct vx_info *vxi, struct task_struct *p)
21238 +{
21239 +       p->cap_effective &= vxi->vx_cap_bset;
21240 +       p->cap_inheritable &= vxi->vx_cap_bset;
21241 +       p->cap_permitted &= vxi->vx_cap_bset;
21242 +}
21243 +
21244 +
21245 +#include <linux/file.h>
21246 +
21247 +static int vx_openfd_task(struct task_struct *tsk)
21248 +{
21249 +       struct files_struct *files = tsk->files;
21250 +       struct fdtable *fdt;
21251 +       const unsigned long *bptr;
21252 +       int count, total;
21253 +
21254 +       /* no rcu_read_lock() because of spin_lock() */
21255 +       spin_lock(&files->file_lock);
21256 +       fdt = files_fdtable(files);
21257 +       bptr = fdt->open_fds->fds_bits;
21258 +       count = fdt->max_fds / (sizeof(unsigned long) * 8);
21259 +       for (total = 0; count > 0; count--) {
21260 +               if (*bptr)
21261 +                       total += hweight_long(*bptr);
21262 +               bptr++;
21263 +       }
21264 +       spin_unlock(&files->file_lock);
21265 +       return total;
21266 +}
21267 +
21268 +/*
21269 + *     migrate task to new context
21270 + *     gets vxi, puts old_vxi on change
21271 + */
21272 +
21273 +int vx_migrate_task(struct task_struct *p, struct vx_info *vxi)
21274 +{
21275 +       struct vx_info *old_vxi;
21276 +       int ret = 0;
21277 +
21278 +       if (!p || !vxi)
21279 +               BUG();
21280 +
21281 +       old_vxi = task_get_vx_info(p);
21282 +       if (old_vxi == vxi)
21283 +               goto out;
21284 +
21285 +       vxdprintk(VXD_CBIT(xid, 5),
21286 +               "vx_migrate_task(%p,%p[#%d.%d])", p, vxi,
21287 +               vxi->vx_id, atomic_read(&vxi->vx_usecnt));
21288 +
21289 +       if (!(ret = vx_migrate_user(p, vxi))) {
21290 +               int openfd;
21291 +
21292 +               task_lock(p);
21293 +               openfd = vx_openfd_task(p);
21294 +
21295 +               if (old_vxi) {
21296 +                       atomic_dec(&old_vxi->cvirt.nr_threads);
21297 +                       atomic_dec(&old_vxi->cvirt.nr_running);
21298 +                       __rlim_dec(&old_vxi->limit, RLIMIT_NPROC);
21299 +                       /* FIXME: what about the struct files here? */
21300 +                       __rlim_sub(&old_vxi->limit, VLIMIT_OPENFD, openfd);
21301 +                       /* account for the executable */
21302 +                       __rlim_dec(&old_vxi->limit, VLIMIT_DENTRY);
21303 +               }
21304 +               atomic_inc(&vxi->cvirt.nr_threads);
21305 +               atomic_inc(&vxi->cvirt.nr_running);
21306 +               __rlim_inc(&vxi->limit, RLIMIT_NPROC);
21307 +               /* FIXME: what about the struct files here? */
21308 +               __rlim_add(&vxi->limit, VLIMIT_OPENFD, openfd);
21309 +               /* account for the executable */
21310 +               __rlim_inc(&vxi->limit, VLIMIT_DENTRY);
21311 +
21312 +               if (old_vxi) {
21313 +                       release_vx_info(old_vxi, p);
21314 +                       clr_vx_info(&p->vx_info);
21315 +               }
21316 +               claim_vx_info(vxi, p);
21317 +               set_vx_info(&p->vx_info, vxi);
21318 +               p->xid = vxi->vx_id;
21319 +
21320 +               vxdprintk(VXD_CBIT(xid, 5),
21321 +                       "moved task %p into vxi:%p[#%d]",
21322 +                       p, vxi, vxi->vx_id);
21323 +
21324 +               vx_mask_cap_bset(vxi, p);
21325 +               task_unlock(p);
21326 +       }
21327 +out:
21328 +       put_vx_info(old_vxi);
21329 +       return ret;
21330 +}
21331 +
21332 +int vx_set_reaper(struct vx_info *vxi, struct task_struct *p)
21333 +{
21334 +       struct task_struct *old_reaper;
21335 +
21336 +       if (!vxi)
21337 +               return -EINVAL;
21338 +
21339 +       vxdprintk(VXD_CBIT(xid, 6),
21340 +               "vx_set_reaper(%p[#%d],%p[#%d,%d])",
21341 +               vxi, vxi->vx_id, p, p->xid, p->pid);
21342 +
21343 +       old_reaper = vxi->vx_reaper;
21344 +       if (old_reaper == p)
21345 +               return 0;
21346 +
21347 +       /* set new child reaper */
21348 +       get_task_struct(p);
21349 +       vxi->vx_reaper = p;
21350 +       put_task_struct(old_reaper);
21351 +       return 0;
21352 +}
21353 +
21354 +int vx_set_init(struct vx_info *vxi, struct task_struct *p)
21355 +{
21356 +       if (!vxi)
21357 +               return -EINVAL;
21358 +
21359 +       vxdprintk(VXD_CBIT(xid, 6),
21360 +               "vx_set_init(%p[#%d],%p[#%d,%d,%d])",
21361 +               vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
21362 +
21363 +       vxi->vx_initpid = p->tgid;
21364 +       return 0;
21365 +}
21366 +
21367 +void vx_exit_init(struct vx_info *vxi, struct task_struct *p, int code)
21368 +{
21369 +       vxdprintk(VXD_CBIT(xid, 6),
21370 +               "vx_exit_init(%p[#%d],%p[#%d,%d,%d])",
21371 +               vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
21372 +
21373 +       vxi->exit_code = code;
21374 +       vxi->vx_initpid = 0;
21375 +}
21376 +
21377 +void vx_set_persistent(struct vx_info *vxi)
21378 +{
21379 +       vxdprintk(VXD_CBIT(xid, 6),
21380 +               "vx_set_persistent(%p[#%d])", vxi, vxi->vx_id);
21381 +
21382 +       if (vx_info_flags(vxi, VXF_PERSISTENT, 0)) {
21383 +               get_vx_info(vxi);
21384 +               claim_vx_info(vxi, current);
21385 +       } else {
21386 +               release_vx_info(vxi, current);
21387 +               put_vx_info(vxi);
21388 +       }
21389 +}
21390 +
21391 +
21392 +/*     task must be current or locked          */
21393 +
21394 +void   exit_vx_info(struct task_struct *p, int code)
21395 +{
21396 +       struct vx_info *vxi = p->vx_info;
21397 +
21398 +       if (vxi) {
21399 +               atomic_dec(&vxi->cvirt.nr_threads);
21400 +               vx_nproc_dec(p);
21401 +
21402 +               vxi->exit_code = code;
21403 +               if (vxi->vx_initpid == p->tgid)
21404 +                       vx_exit_init(vxi, p, code);
21405 +               if (vxi->vx_reaper == p)
21406 +                       vx_set_reaper(vxi, child_reaper);
21407 +               release_vx_info(vxi, p);
21408 +       }
21409 +}
21410 +
21411 +
21412 +/* vserver syscall commands below here */
21413 +
21414 +/* taks xid and vx_info functions */
21415 +
21416 +#include <asm/uaccess.h>
21417 +
21418 +
21419 +int vc_task_xid(uint32_t id, void __user *data)
21420 +{
21421 +       xid_t xid;
21422 +
21423 +       if (id) {
21424 +               struct task_struct *tsk;
21425 +
21426 +               if (!vx_check(0, VX_ADMIN|VX_WATCH))
21427 +                       return -EPERM;
21428 +
21429 +               read_lock(&tasklist_lock);
21430 +               tsk = find_task_by_real_pid(id);
21431 +               xid = (tsk) ? tsk->xid : -ESRCH;
21432 +               read_unlock(&tasklist_lock);
21433 +       }
21434 +       else
21435 +               xid = vx_current_xid();
21436 +       return xid;
21437 +}
21438 +
21439 +
21440 +int vc_vx_info(uint32_t id, void __user *data)
21441 +{
21442 +       struct vx_info *vxi;
21443 +       struct vcmd_vx_info_v0 vc_data;
21444 +
21445 +       if (!capable(CAP_SYS_RESOURCE))
21446 +               return -EPERM;
21447 +
21448 +       vxi = lookup_vx_info(id);
21449 +       if (!vxi)
21450 +               return -ESRCH;
21451 +
21452 +       vc_data.xid = vxi->vx_id;
21453 +       vc_data.initpid = vxi->vx_initpid;
21454 +       put_vx_info(vxi);
21455 +
21456 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
21457 +               return -EFAULT;
21458 +       return 0;
21459 +}
21460 +
21461 +
21462 +/* context functions */
21463 +
21464 +int vc_ctx_create(uint32_t xid, void __user *data)
21465 +{
21466 +       struct vcmd_ctx_create vc_data = { .flagword = VXF_INIT_SET };
21467 +       struct vx_info *new_vxi;
21468 +       int ret;
21469 +
21470 +       if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
21471 +               return -EFAULT;
21472 +
21473 +       if ((xid > MAX_S_CONTEXT) && (xid != VX_DYNAMIC_ID))
21474 +               return -EINVAL;
21475 +       if (xid < 2)
21476 +               return -EINVAL;
21477 +
21478 +       new_vxi = __create_vx_info(xid);
21479 +       if (IS_ERR(new_vxi))
21480 +               return PTR_ERR(new_vxi);
21481 +
21482 +       /* initial flags */
21483 +       new_vxi->vx_flags = vc_data.flagword;
21484 +
21485 +       /* get a reference for persistent contexts */
21486 +       if ((vc_data.flagword & VXF_PERSISTENT))
21487 +               vx_set_persistent(new_vxi);
21488 +
21489 +       vs_state_change(new_vxi, VSC_STARTUP);
21490 +       ret = new_vxi->vx_id;
21491 +       vx_migrate_task(current, new_vxi);
21492 +       /* if this fails, we might end up with a hashed vx_info */
21493 +       put_vx_info(new_vxi);
21494 +       return ret;
21495 +}
21496 +
21497 +
21498 +int vc_ctx_migrate(uint32_t id, void __user *data)
21499 +{
21500 +       struct vcmd_ctx_migrate vc_data = { .flagword = 0 };
21501 +       struct vx_info *vxi;
21502 +
21503 +       if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
21504 +               return -EFAULT;
21505 +
21506 +       /* dirty hack until Spectator becomes a cap */
21507 +       if (id == 1) {
21508 +               current->xid = 1;
21509 +               return 0;
21510 +       }
21511 +
21512 +       vxi = lookup_vx_info(id);
21513 +       if (!vxi)
21514 +               return -ESRCH;
21515 +       vx_migrate_task(current, vxi);
21516 +       if (vc_data.flagword & VXM_SET_INIT)
21517 +               vx_set_init(vxi, current);
21518 +       if (vc_data.flagword & VXM_SET_REAPER)
21519 +               vx_set_reaper(vxi, current);
21520 +       put_vx_info(vxi);
21521 +       return 0;
21522 +}
21523 +
21524 +
21525 +int vc_get_cflags(uint32_t id, void __user *data)
21526 +{
21527 +       struct vx_info *vxi;
21528 +       struct vcmd_ctx_flags_v0 vc_data;
21529 +
21530 +       vxi = lookup_vx_info(id);
21531 +       if (!vxi)
21532 +               return -ESRCH;
21533 +
21534 +       vc_data.flagword = vxi->vx_flags;
21535 +
21536 +       /* special STATE flag handling */
21537 +       vc_data.mask = vx_mask_flags(~0UL, vxi->vx_flags, VXF_ONE_TIME);
21538 +
21539 +       put_vx_info(vxi);
21540 +
21541 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
21542 +               return -EFAULT;
21543 +       return 0;
21544 +}
21545 +
21546 +int vc_set_cflags(uint32_t id, void __user *data)
21547 +{
21548 +       struct vx_info *vxi;
21549 +       struct vcmd_ctx_flags_v0 vc_data;
21550 +       uint64_t mask, trigger;
21551 +
21552 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
21553 +               return -EFAULT;
21554 +
21555 +       vxi = lookup_vx_info(id);
21556 +       if (!vxi)
21557 +               return -ESRCH;
21558 +
21559 +       /* special STATE flag handling */
21560 +       mask = vx_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME);
21561 +       trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword);
21562 +
21563 +       if (vxi == current->vx_info) {
21564 +               if (trigger & VXF_STATE_SETUP)
21565 +                       vx_mask_cap_bset(vxi, current);
21566 +               if (trigger & VXF_STATE_INIT) {
21567 +                       vx_set_init(vxi, current);
21568 +                       vx_set_reaper(vxi, current);
21569 +               }
21570 +       }
21571 +
21572 +       vxi->vx_flags = vx_mask_flags(vxi->vx_flags,
21573 +               vc_data.flagword, mask);
21574 +       if (trigger & VXF_PERSISTENT)
21575 +               vx_set_persistent(vxi);
21576 +
21577 +       put_vx_info(vxi);
21578 +       return 0;
21579 +}
21580 +
21581 +static int do_get_caps(xid_t xid, uint64_t *bcaps, uint64_t *ccaps)
21582 +{
21583 +       struct vx_info *vxi;
21584 +
21585 +       vxi = lookup_vx_info(xid);
21586 +       if (!vxi)
21587 +               return -ESRCH;
21588 +
21589 +       if (bcaps)
21590 +               *bcaps = vxi->vx_bcaps;
21591 +       if (ccaps)
21592 +               *ccaps = vxi->vx_ccaps;
21593 +
21594 +       put_vx_info(vxi);
21595 +       return 0;
21596 +}
21597 +
21598 +int vc_get_ccaps_v0(uint32_t id, void __user *data)
21599 +{
21600 +       struct vcmd_ctx_caps_v0 vc_data;
21601 +       int ret;
21602 +
21603 +       ret = do_get_caps(id, &vc_data.bcaps, &vc_data.ccaps);
21604 +       if (ret)
21605 +               return ret;
21606 +       vc_data.cmask = ~0UL;
21607 +
21608 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
21609 +               return -EFAULT;
21610 +       return 0;
21611 +}
21612 +
21613 +int vc_get_ccaps(uint32_t id, void __user *data)
21614 +{
21615 +       struct vcmd_ctx_caps_v1 vc_data;
21616 +       int ret;
21617 +
21618 +       ret = do_get_caps(id, NULL, &vc_data.ccaps);
21619 +       if (ret)
21620 +               return ret;
21621 +       vc_data.cmask = ~0UL;
21622 +
21623 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
21624 +               return -EFAULT;
21625 +       return 0;
21626 +}
21627 +
21628 +static int do_set_caps(xid_t xid, uint64_t bcaps, uint64_t bmask,
21629 +       uint64_t ccaps, uint64_t cmask)
21630 +{
21631 +       struct vx_info *vxi;
21632 +
21633 +       vxi = lookup_vx_info(xid);
21634 +       if (!vxi)
21635 +               return -ESRCH;
21636 +
21637 +       vxi->vx_bcaps = vx_mask_flags(vxi->vx_bcaps, bcaps, bmask);
21638 +       vxi->vx_ccaps = vx_mask_flags(vxi->vx_ccaps, ccaps, cmask);
21639 +
21640 +       put_vx_info(vxi);
21641 +       return 0;
21642 +}
21643 +
21644 +int vc_set_ccaps_v0(uint32_t id, void __user *data)
21645 +{
21646 +       struct vcmd_ctx_caps_v0 vc_data;
21647 +
21648 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
21649 +               return -EFAULT;
21650 +
21651 +       /* simulate old &= behaviour for bcaps */
21652 +       return do_set_caps(id, 0, ~vc_data.bcaps,
21653 +               vc_data.ccaps, vc_data.cmask);
21654 +}
21655 +
21656 +int vc_set_ccaps(uint32_t id, void __user *data)
21657 +{
21658 +       struct vcmd_ctx_caps_v1 vc_data;
21659 +
21660 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
21661 +               return -EFAULT;
21662 +
21663 +       return do_set_caps(id, 0, 0, vc_data.ccaps, vc_data.cmask);
21664 +}
21665 +
21666 +int vc_get_bcaps(uint32_t id, void __user *data)
21667 +{
21668 +       struct vcmd_bcaps vc_data;
21669 +       int ret;
21670 +
21671 +       ret = do_get_caps(id, &vc_data.bcaps, NULL);
21672 +       if (ret)
21673 +               return ret;
21674 +       vc_data.bmask = ~0UL;
21675 +
21676 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
21677 +               return -EFAULT;
21678 +       return 0;
21679 +}
21680 +
21681 +int vc_set_bcaps(uint32_t id, void __user *data)
21682 +{
21683 +       struct vcmd_bcaps vc_data;
21684 +
21685 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
21686 +               return -EFAULT;
21687 +
21688 +       return do_set_caps(id, vc_data.bcaps, vc_data.bmask, 0, 0);
21689 +}
21690 +
21691 +#include <linux/module.h>
21692 +
21693 +EXPORT_SYMBOL_GPL(free_vx_info);
21694 +
21695 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/cvirt.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/cvirt.c
21696 --- linux-2.6.16.20/kernel/vserver/cvirt.c      1970-01-01 01:00:00 +0100
21697 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/cvirt.c 2006-05-31 01:29:12 +0200
21698 @@ -0,0 +1,297 @@
21699 +/*
21700 + *  linux/kernel/vserver/cvirt.c
21701 + *
21702 + *  Virtual Server: Context Virtualization
21703 + *
21704 + *  Copyright (C) 2004-2005  Herbert Pötzl
21705 + *
21706 + *  V0.01  broken out from limit.c
21707 + *  V0.02  added utsname stuff
21708 + *
21709 + */
21710 +
21711 +#include <linux/sched.h>
21712 +#include <linux/sysctl.h>
21713 +#include <linux/types.h>
21714 +#include <linux/vs_context.h>
21715 +#include <linux/vs_cvirt.h>
21716 +#include <linux/vserver/switch.h>
21717 +#include <linux/vserver/cvirt_cmd.h>
21718 +
21719 +#include <asm/errno.h>
21720 +#include <asm/uaccess.h>
21721 +
21722 +
21723 +void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle)
21724 +{
21725 +       struct vx_info *vxi = current->vx_info;
21726 +
21727 +       set_normalized_timespec(uptime,
21728 +               uptime->tv_sec - vxi->cvirt.bias_uptime.tv_sec,
21729 +               uptime->tv_nsec - vxi->cvirt.bias_uptime.tv_nsec);
21730 +       if (!idle)
21731 +               return;
21732 +       set_normalized_timespec(idle,
21733 +               idle->tv_sec - vxi->cvirt.bias_idle.tv_sec,
21734 +               idle->tv_nsec - vxi->cvirt.bias_idle.tv_nsec);
21735 +       return;
21736 +}
21737 +
21738 +uint64_t vx_idle_jiffies(void)
21739 +{
21740 +       return init_task.utime + init_task.stime;
21741 +}
21742 +
21743 +
21744 +
21745 +static inline uint32_t __update_loadavg(uint32_t load,
21746 +       int wsize, int delta, int n)
21747 +{
21748 +       unsigned long long calc, prev;
21749 +
21750 +       /* just set it to n */
21751 +       if (unlikely(delta >= wsize))
21752 +               return (n << FSHIFT);
21753 +
21754 +       calc = delta * n;
21755 +       calc <<= FSHIFT;
21756 +       prev = (wsize - delta);
21757 +       prev *= load;
21758 +       calc += prev;
21759 +       do_div(calc, wsize);
21760 +       return calc;
21761 +}
21762 +
21763 +
21764 +void vx_update_load(struct vx_info *vxi)
21765 +{
21766 +       uint32_t now, last, delta;
21767 +       unsigned int nr_running, nr_uninterruptible;
21768 +       unsigned int total;
21769 +
21770 +       spin_lock(&vxi->cvirt.load_lock);
21771 +
21772 +       now = jiffies;
21773 +       last = vxi->cvirt.load_last;
21774 +       delta = now - last;
21775 +
21776 +       if (delta < 5*HZ)
21777 +               goto out;
21778 +
21779 +       nr_running = atomic_read(&vxi->cvirt.nr_running);
21780 +       nr_uninterruptible = atomic_read(&vxi->cvirt.nr_uninterruptible);
21781 +       total = nr_running + nr_uninterruptible;
21782 +
21783 +       vxi->cvirt.load[0] = __update_loadavg(vxi->cvirt.load[0],
21784 +               60*HZ, delta, total);
21785 +       vxi->cvirt.load[1] = __update_loadavg(vxi->cvirt.load[1],
21786 +               5*60*HZ, delta, total);
21787 +       vxi->cvirt.load[2] = __update_loadavg(vxi->cvirt.load[2],
21788 +               15*60*HZ, delta, total);
21789 +
21790 +       vxi->cvirt.load_last = now;
21791 +out:
21792 +       atomic_inc(&vxi->cvirt.load_updates);
21793 +       spin_unlock(&vxi->cvirt.load_lock);
21794 +}
21795 +
21796 +
21797 +int vx_uts_virt_handler(struct ctl_table *ctl, int write, xid_t xid,
21798 +       void **datap, size_t *lenp)
21799 +{
21800 +       switch (ctl->ctl_name) {
21801 +       case KERN_OSTYPE:
21802 +               *datap = vx_new_uts(sysname);
21803 +               break;
21804 +       case KERN_OSRELEASE:
21805 +               *datap = vx_new_uts(release);
21806 +               break;
21807 +       case KERN_VERSION:
21808 +               *datap = vx_new_uts(version);
21809 +               break;
21810 +       case KERN_NODENAME:
21811 +               *datap = vx_new_uts(nodename);
21812 +               break;
21813 +       case KERN_DOMAINNAME:
21814 +               *datap = vx_new_uts(domainname);
21815 +               break;
21816 +       }
21817 +
21818 +       return 0;
21819 +}
21820 +
21821 +
21822 +
21823 +/*
21824 + * Commands to do_syslog:
21825 + *
21826 + *      0 -- Close the log.  Currently a NOP.
21827 + *      1 -- Open the log. Currently a NOP.
21828 + *      2 -- Read from the log.
21829 + *      3 -- Read all messages remaining in the ring buffer.
21830 + *      4 -- Read and clear all messages remaining in the ring buffer
21831 + *      5 -- Clear ring buffer.
21832 + *      6 -- Disable printk's to console
21833 + *      7 -- Enable printk's to console
21834 + *      8 -- Set level of messages printed to console
21835 + *      9 -- Return number of unread characters in the log buffer
21836 + *     10 -- Return size of the log buffer
21837 + */
21838 +int vx_do_syslog(int type, char __user *buf, int len)
21839 +{
21840 +       int error = 0;
21841 +       int do_clear = 0;
21842 +       struct vx_info *vxi = current->vx_info;
21843 +       struct _vx_syslog *log;
21844 +
21845 +       if (!vxi)
21846 +               return -EINVAL;
21847 +       log = &vxi->cvirt.syslog;
21848 +
21849 +       switch (type) {
21850 +       case 0:         /* Close log */
21851 +       case 1:         /* Open log */
21852 +               break;
21853 +       case 2:         /* Read from log */
21854 +               error = wait_event_interruptible(log->log_wait,
21855 +                       (log->log_start - log->log_end));
21856 +               if (error)
21857 +                       break;
21858 +               spin_lock_irq(&log->logbuf_lock);
21859 +               spin_unlock_irq(&log->logbuf_lock);
21860 +               break;
21861 +       case 4:         /* Read/clear last kernel messages */
21862 +               do_clear = 1;
21863 +               /* fall through */
21864 +       case 3:         /* Read last kernel messages */
21865 +               return 0;
21866 +
21867 +       case 5:         /* Clear ring buffer */
21868 +               return 0;
21869 +
21870 +       case 6:         /* Disable logging to console */
21871 +       case 7:         /* Enable logging to console */
21872 +       case 8:         /* Set level of messages printed to console */
21873 +               break;
21874 +
21875 +       case 9:         /* Number of chars in the log buffer */
21876 +               return 0;
21877 +       case 10:        /* Size of the log buffer */
21878 +               return 0;
21879 +       default:
21880 +               error = -EINVAL;
21881 +               break;
21882 +       }
21883 +       return error;
21884 +}
21885 +
21886 +
21887 +/* virtual host info names */
21888 +
21889 +static char * vx_vhi_name(struct vx_info *vxi, int id)
21890 +{
21891 +       switch (id) {
21892 +       case VHIN_CONTEXT:
21893 +               return vxi->vx_name;
21894 +       case VHIN_SYSNAME:
21895 +               return vxi->cvirt.utsname.sysname;
21896 +       case VHIN_NODENAME:
21897 +               return vxi->cvirt.utsname.nodename;
21898 +       case VHIN_RELEASE:
21899 +               return vxi->cvirt.utsname.release;
21900 +       case VHIN_VERSION:
21901 +               return vxi->cvirt.utsname.version;
21902 +       case VHIN_MACHINE:
21903 +               return vxi->cvirt.utsname.machine;
21904 +       case VHIN_DOMAINNAME:
21905 +               return vxi->cvirt.utsname.domainname;
21906 +       default:
21907 +               return NULL;
21908 +       }
21909 +       return NULL;
21910 +}
21911 +
21912 +int vc_set_vhi_name(uint32_t id, void __user *data)
21913 +{
21914 +       struct vx_info *vxi;
21915 +       struct vcmd_vhi_name_v0 vc_data;
21916 +       char *name;
21917 +
21918 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
21919 +               return -EFAULT;
21920 +
21921 +       vxi = lookup_vx_info(id);
21922 +       if (!vxi)
21923 +               return -ESRCH;
21924 +
21925 +       name = vx_vhi_name(vxi, vc_data.field);
21926 +       if (name)
21927 +               memcpy(name, vc_data.name, 65);
21928 +       put_vx_info(vxi);
21929 +       return (name ? 0 : -EFAULT);
21930 +}
21931 +
21932 +int vc_get_vhi_name(uint32_t id, void __user *data)
21933 +{
21934 +       struct vx_info *vxi;
21935 +       struct vcmd_vhi_name_v0 vc_data;
21936 +       char *name;
21937 +
21938 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
21939 +               return -EFAULT;
21940 +
21941 +       vxi = lookup_vx_info(id);
21942 +       if (!vxi)
21943 +               return -ESRCH;
21944 +
21945 +       name = vx_vhi_name(vxi, vc_data.field);
21946 +       if (!name)
21947 +               goto out_put;
21948 +
21949 +       memcpy(vc_data.name, name, 65);
21950 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
21951 +               return -EFAULT;
21952 +out_put:
21953 +       put_vx_info(vxi);
21954 +       return (name ? 0 : -EFAULT);
21955 +}
21956 +
21957 +#ifdef CONFIG_VSERVER_VTIME
21958 +
21959 +/* virtualized time base */
21960 +
21961 +void vx_gettimeofday(struct timeval *tv)
21962 +{
21963 +       do_gettimeofday(tv);
21964 +       if (!vx_flags(VXF_VIRT_TIME, 0))
21965 +               return;
21966 +
21967 +       tv->tv_sec += current->vx_info->cvirt.bias_tv.tv_sec;
21968 +       tv->tv_usec += current->vx_info->cvirt.bias_tv.tv_usec;
21969 +
21970 +       if (tv->tv_usec >= USEC_PER_SEC) {
21971 +               tv->tv_sec++;
21972 +               tv->tv_usec -= USEC_PER_SEC;
21973 +       } else if (tv->tv_usec < 0) {
21974 +               tv->tv_sec--;
21975 +               tv->tv_usec += USEC_PER_SEC;
21976 +       }
21977 +}
21978 +
21979 +int vx_settimeofday(struct timespec *ts)
21980 +{
21981 +       struct timeval tv;
21982 +
21983 +       if (!vx_flags(VXF_VIRT_TIME, 0))
21984 +               return do_settimeofday(ts);
21985 +
21986 +       do_gettimeofday(&tv);
21987 +       current->vx_info->cvirt.bias_tv.tv_sec =
21988 +               ts->tv_sec - tv.tv_sec;
21989 +       current->vx_info->cvirt.bias_tv.tv_usec =
21990 +               (ts->tv_nsec/NSEC_PER_USEC) - tv.tv_usec;
21991 +       return 0;
21992 +}
21993 +
21994 +#endif
21995 +
21996 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/cvirt_init.h linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/cvirt_init.h
21997 --- linux-2.6.16.20/kernel/vserver/cvirt_init.h 1970-01-01 01:00:00 +0100
21998 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/cvirt_init.h    2006-05-29 17:38:33 +0200
21999 @@ -0,0 +1,99 @@
22000 +
22001 +
22002 +extern uint64_t vx_idle_jiffies(void);
22003 +
22004 +static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt)
22005 +{
22006 +       uint64_t idle_jiffies = vx_idle_jiffies();
22007 +       uint64_t nsuptime;
22008 +
22009 +       do_posix_clock_monotonic_gettime(&cvirt->bias_uptime);
22010 +       nsuptime = (unsigned long long)cvirt->bias_uptime.tv_sec
22011 +               * NSEC_PER_SEC + cvirt->bias_uptime.tv_nsec;
22012 +       cvirt->bias_clock = nsec_to_clock_t(nsuptime);
22013 +       cvirt->bias_tv.tv_sec = 0;
22014 +       cvirt->bias_tv.tv_usec = 0;
22015 +
22016 +       jiffies_to_timespec(idle_jiffies, &cvirt->bias_idle);
22017 +       atomic_set(&cvirt->nr_threads, 0);
22018 +       atomic_set(&cvirt->nr_running, 0);
22019 +       atomic_set(&cvirt->nr_uninterruptible, 0);
22020 +       atomic_set(&cvirt->nr_onhold, 0);
22021 +
22022 +       down_read(&uts_sem);
22023 +       cvirt->utsname = system_utsname;
22024 +       up_read(&uts_sem);
22025 +
22026 +       spin_lock_init(&cvirt->load_lock);
22027 +       cvirt->load_last = jiffies;
22028 +       atomic_set(&cvirt->load_updates, 0);
22029 +       cvirt->load[0] = 0;
22030 +       cvirt->load[1] = 0;
22031 +       cvirt->load[2] = 0;
22032 +       atomic_set(&cvirt->total_forks, 0);
22033 +
22034 +       spin_lock_init(&cvirt->syslog.logbuf_lock);
22035 +       init_waitqueue_head(&cvirt->syslog.log_wait);
22036 +       cvirt->syslog.log_start = 0;
22037 +       cvirt->syslog.log_end = 0;
22038 +       cvirt->syslog.con_start = 0;
22039 +       cvirt->syslog.logged_chars = 0;
22040 +}
22041 +
22042 +static inline
22043 +void vx_info_init_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu)
22044 +{
22045 +       // cvirt_pc->cpustat = { 0 };
22046 +}
22047 +
22048 +static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt)
22049 +{
22050 +#ifdef CONFIG_VSERVER_DEBUG
22051 +       int value;
22052 +
22053 +       vxwprintk((value = atomic_read(&cvirt->nr_threads)),
22054 +               "!!! cvirt: %p[nr_threads] = %d on exit.",
22055 +               cvirt, value);
22056 +       vxwprintk((value = atomic_read(&cvirt->nr_running)),
22057 +               "!!! cvirt: %p[nr_running] = %d on exit.",
22058 +               cvirt, value);
22059 +       vxwprintk((value = atomic_read(&cvirt->nr_uninterruptible)),
22060 +               "!!! cvirt: %p[nr_uninterruptible] = %d on exit.",
22061 +               cvirt, value);
22062 +       vxwprintk((value = atomic_read(&cvirt->nr_onhold)),
22063 +               "!!! cvirt: %p[nr_onhold] = %d on exit.",
22064 +               cvirt, value);
22065 +#endif
22066 +       return;
22067 +}
22068 +
22069 +static inline
22070 +void vx_info_exit_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu)
22071 +{
22072 +       return;
22073 +}
22074 +
22075 +
22076 +static inline void vx_info_init_cacct(struct _vx_cacct *cacct)
22077 +{
22078 +       int i,j;
22079 +
22080 +
22081 +       for (i=0; i<5; i++) {
22082 +               for (j=0; j<3; j++) {
22083 +                       atomic_set(&cacct->sock[i][j].count, 0);
22084 +                       atomic_set(&cacct->sock[i][j].total, 0);
22085 +               }
22086 +       }
22087 +       for (i=0; i<8; i++)
22088 +               atomic_set(&cacct->slab[i], 0);
22089 +       for (i=0; i<5; i++)
22090 +               for (j=0; j<4; j++)
22091 +                       atomic_set(&cacct->page[i][j], 0);
22092 +}
22093 +
22094 +static inline void vx_info_exit_cacct(struct _vx_cacct *cacct)
22095 +{
22096 +       return;
22097 +}
22098 +
22099 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/cvirt_proc.h linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/cvirt_proc.h
22100 --- linux-2.6.16.20/kernel/vserver/cvirt_proc.h 1970-01-01 01:00:00 +0100
22101 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/cvirt_proc.h    2006-04-26 19:07:00 +0200
22102 @@ -0,0 +1,129 @@
22103 +#ifndef _VX_CVIRT_PROC_H
22104 +#define _VX_CVIRT_PROC_H
22105 +
22106 +#include <linux/sched.h>
22107 +
22108 +
22109 +#define LOAD_INT(x) ((x) >> FSHIFT)
22110 +#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
22111 +
22112 +static inline
22113 +int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer)
22114 +{
22115 +       int length = 0;
22116 +       int a, b, c;
22117 +
22118 +       length += sprintf(buffer + length,
22119 +               "BiasUptime:\t%lu.%02lu\n",
22120 +                       (unsigned long)cvirt->bias_uptime.tv_sec,
22121 +                       (cvirt->bias_uptime.tv_nsec / (NSEC_PER_SEC / 100)));
22122 +       length += sprintf(buffer + length,
22123 +               "SysName:\t%.*s\n"
22124 +               "NodeName:\t%.*s\n"
22125 +               "Release:\t%.*s\n"
22126 +               "Version:\t%.*s\n"
22127 +               "Machine:\t%.*s\n"
22128 +               "DomainName:\t%.*s\n"
22129 +               ,__NEW_UTS_LEN, cvirt->utsname.sysname
22130 +               ,__NEW_UTS_LEN, cvirt->utsname.nodename
22131 +               ,__NEW_UTS_LEN, cvirt->utsname.release
22132 +               ,__NEW_UTS_LEN, cvirt->utsname.version
22133 +               ,__NEW_UTS_LEN, cvirt->utsname.machine
22134 +               ,__NEW_UTS_LEN, cvirt->utsname.domainname
22135 +               );
22136 +
22137 +       a = cvirt->load[0] + (FIXED_1/200);
22138 +       b = cvirt->load[1] + (FIXED_1/200);
22139 +       c = cvirt->load[2] + (FIXED_1/200);
22140 +       length += sprintf(buffer + length,
22141 +               "nr_threads:\t%d\n"
22142 +               "nr_running:\t%d\n"
22143 +               "nr_unintr:\t%d\n"
22144 +               "nr_onhold:\t%d\n"
22145 +               "load_updates:\t%d\n"
22146 +               "loadavg:\t%d.%02d %d.%02d %d.%02d\n"
22147 +               "total_forks:\t%d\n"
22148 +               ,atomic_read(&cvirt->nr_threads)
22149 +               ,atomic_read(&cvirt->nr_running)
22150 +               ,atomic_read(&cvirt->nr_uninterruptible)
22151 +               ,atomic_read(&cvirt->nr_onhold)
22152 +               ,atomic_read(&cvirt->load_updates)
22153 +               ,LOAD_INT(a), LOAD_FRAC(a)
22154 +               ,LOAD_INT(b), LOAD_FRAC(b)
22155 +               ,LOAD_INT(c), LOAD_FRAC(c)
22156 +               ,atomic_read(&cvirt->total_forks)
22157 +               );
22158 +       return length;
22159 +}
22160 +
22161 +static inline
22162 +int vx_info_proc_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc,
22163 +       char *buffer, int cpu)
22164 +{
22165 +       int length = 0;
22166 +       return length;
22167 +}
22168 +
22169 +static inline long vx_sock_count(struct _vx_cacct *cacct, int type, int pos)
22170 +{
22171 +       return atomic_read(&cacct->sock[type][pos].count);
22172 +}
22173 +
22174 +
22175 +static inline long vx_sock_total(struct _vx_cacct *cacct, int type, int pos)
22176 +{
22177 +       return atomic_read(&cacct->sock[type][pos].total);
22178 +}
22179 +
22180 +
22181 +#define VX_SOCKA_TOP   \
22182 +       "Type\t    recv #/bytes\t\t   send #/bytes\t\t    fail #/bytes\n"
22183 +
22184 +static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer)
22185 +{
22186 +       int i,j, length = 0;
22187 +       static char *type[] = { "UNSPEC", "UNIX", "INET", "INET6", "OTHER" };
22188 +
22189 +       length += sprintf(buffer + length, VX_SOCKA_TOP);
22190 +       for (i=0; i<5; i++) {
22191 +               length += sprintf(buffer + length,
22192 +                       "%s:", type[i]);
22193 +               for (j=0; j<3; j++) {
22194 +                       length += sprintf(buffer + length,
22195 +                               "\t%10lu/%-10lu"
22196 +                               ,vx_sock_count(cacct, i, j)
22197 +                               ,vx_sock_total(cacct, i, j)
22198 +                               );
22199 +               }
22200 +               buffer[length++] = '\n';
22201 +       }
22202 +
22203 +       length += sprintf(buffer + length, "\n");
22204 +       length += sprintf(buffer + length,
22205 +               "slab:\t %8u %8u %8u %8u\n"
22206 +               ,atomic_read(&cacct->slab[1])
22207 +               ,atomic_read(&cacct->slab[4])
22208 +               ,atomic_read(&cacct->slab[0])
22209 +               ,atomic_read(&cacct->slab[2])
22210 +               );
22211 +
22212 +       length += sprintf(buffer + length, "\n");
22213 +       for (i=0; i<5; i++) {
22214 +               length += sprintf(buffer + length,
22215 +                       "page[%d]: %8u %8u %8u %8u\t %8u %8u %8u %8u\n"
22216 +                       ,i
22217 +                       ,atomic_read(&cacct->page[i][0])
22218 +                       ,atomic_read(&cacct->page[i][1])
22219 +                       ,atomic_read(&cacct->page[i][2])
22220 +                       ,atomic_read(&cacct->page[i][3])
22221 +                       ,atomic_read(&cacct->page[i][4])
22222 +                       ,atomic_read(&cacct->page[i][5])
22223 +                       ,atomic_read(&cacct->page[i][6])
22224 +                       ,atomic_read(&cacct->page[i][7])
22225 +                       );
22226 +       }
22227 +
22228 +       return length;
22229 +}
22230 +
22231 +#endif /* _VX_CVIRT_PROC_H */
22232 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/debug.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/debug.c
22233 --- linux-2.6.16.20/kernel/vserver/debug.c      1970-01-01 01:00:00 +0100
22234 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/debug.c 2006-04-26 19:07:00 +0200
22235 @@ -0,0 +1,36 @@
22236 +/*
22237 + *  kernel/vserver/debug.c
22238 + *
22239 + *  Copyright (C) 2005  Herbert Pötzl
22240 + *
22241 + *  V0.01  vx_info dump support
22242 + *
22243 + */
22244 +
22245 +#include <linux/errno.h>
22246 +#include <linux/kernel.h>
22247 +#include <linux/module.h>
22248 +
22249 +#include <linux/vserver/cvirt_def.h>
22250 +#include <linux/vserver/limit_def.h>
22251 +#include <linux/vserver/sched_def.h>
22252 +
22253 +
22254 +void   dump_vx_info(struct vx_info *vxi, int level)
22255 +{
22256 +       printk("vx_info %p[#%d, %d.%d, %4x]\n", vxi, vxi->vx_id,
22257 +               atomic_read(&vxi->vx_usecnt),
22258 +               atomic_read(&vxi->vx_tasks),
22259 +               vxi->vx_state);
22260 +       if (level > 0) {
22261 +               __dump_vx_limit(&vxi->limit);
22262 +               __dump_vx_sched(&vxi->sched);
22263 +               __dump_vx_cvirt(&vxi->cvirt);
22264 +               __dump_vx_cacct(&vxi->cacct);
22265 +       }
22266 +       printk("---\n");
22267 +}
22268 +
22269 +
22270 +EXPORT_SYMBOL_GPL(dump_vx_info);
22271 +
22272 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/dlimit.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/dlimit.c
22273 --- linux-2.6.16.20/kernel/vserver/dlimit.c     1970-01-01 01:00:00 +0100
22274 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/dlimit.c        2006-04-29 02:58:07 +0200
22275 @@ -0,0 +1,530 @@
22276 +/*
22277 + *  linux/kernel/vserver/dlimit.c
22278 + *
22279 + *  Virtual Server: Context Disk Limits
22280 + *
22281 + *  Copyright (C) 2004-2005  Herbert Pötzl
22282 + *
22283 + *  V0.01  initial version
22284 + *  V0.02  compat32 splitup
22285 + *
22286 + */
22287 +
22288 +#include <linux/fs.h>
22289 +#include <linux/namespace.h>
22290 +#include <linux/namei.h>
22291 +#include <linux/statfs.h>
22292 +#include <linux/compat.h>
22293 +#include <linux/vserver/switch.h>
22294 +#include <linux/vs_context.h>
22295 +#include <linux/vs_tag.h>
22296 +#include <linux/vs_dlimit.h>
22297 +#include <linux/vserver/dlimit_cmd.h>
22298 +
22299 +#include <asm/errno.h>
22300 +#include <asm/uaccess.h>
22301 +
22302 +/*     __alloc_dl_info()
22303 +
22304 +       * allocate an initialized dl_info struct
22305 +       * doesn't make it visible (hash)                        */
22306 +
22307 +static struct dl_info *__alloc_dl_info(struct super_block *sb, tag_t tag)
22308 +{
22309 +       struct dl_info *new = NULL;
22310 +
22311 +       vxdprintk(VXD_CBIT(dlim, 5),
22312 +               "alloc_dl_info(%p,%d)*", sb, tag);
22313 +
22314 +       /* would this benefit from a slab cache? */
22315 +       new = kmalloc(sizeof(struct dl_info), GFP_KERNEL);
22316 +       if (!new)
22317 +               return 0;
22318 +
22319 +       memset (new, 0, sizeof(struct dl_info));
22320 +       new->dl_tag = tag;
22321 +       new->dl_sb = sb;
22322 +       INIT_RCU_HEAD(&new->dl_rcu);
22323 +       INIT_HLIST_NODE(&new->dl_hlist);
22324 +       spin_lock_init(&new->dl_lock);
22325 +       atomic_set(&new->dl_refcnt, 0);
22326 +       atomic_set(&new->dl_usecnt, 0);
22327 +
22328 +       /* rest of init goes here */
22329 +
22330 +       vxdprintk(VXD_CBIT(dlim, 4),
22331 +               "alloc_dl_info(%p,%d) = %p", sb, tag, new);
22332 +       return new;
22333 +}
22334 +
22335 +/*     __dealloc_dl_info()
22336 +
22337 +       * final disposal of dl_info                             */
22338 +
22339 +static void __dealloc_dl_info(struct dl_info *dli)
22340 +{
22341 +       vxdprintk(VXD_CBIT(dlim, 4),
22342 +               "dealloc_dl_info(%p)", dli);
22343 +
22344 +       dli->dl_hlist.next = LIST_POISON1;
22345 +       dli->dl_tag = -1;
22346 +       dli->dl_sb = 0;
22347 +
22348 +       BUG_ON(atomic_read(&dli->dl_usecnt));
22349 +       BUG_ON(atomic_read(&dli->dl_refcnt));
22350 +
22351 +       kfree(dli);
22352 +}
22353 +
22354 +
22355 +/*     hash table for dl_info hash */
22356 +
22357 +#define DL_HASH_SIZE   13
22358 +
22359 +struct hlist_head dl_info_hash[DL_HASH_SIZE];
22360 +
22361 +static spinlock_t dl_info_hash_lock = SPIN_LOCK_UNLOCKED;
22362 +
22363 +
22364 +static inline unsigned int __hashval(struct super_block *sb, tag_t tag)
22365 +{
22366 +       return ((tag ^ (unsigned long)sb) % DL_HASH_SIZE);
22367 +}
22368 +
22369 +
22370 +
22371 +/*     __hash_dl_info()
22372 +
22373 +       * add the dli to the global hash table
22374 +       * requires the hash_lock to be held                     */
22375 +
22376 +static inline void __hash_dl_info(struct dl_info *dli)
22377 +{
22378 +       struct hlist_head *head;
22379 +
22380 +       vxdprintk(VXD_CBIT(dlim, 6),
22381 +               "__hash_dl_info: %p[#%d]", dli, dli->dl_tag);
22382 +       get_dl_info(dli);
22383 +       head = &dl_info_hash[__hashval(dli->dl_sb, dli->dl_tag)];
22384 +       hlist_add_head_rcu(&dli->dl_hlist, head);
22385 +}
22386 +
22387 +/*     __unhash_dl_info()
22388 +
22389 +       * remove the dli from the global hash table
22390 +       * requires the hash_lock to be held                     */
22391 +
22392 +static inline void __unhash_dl_info(struct dl_info *dli)
22393 +{
22394 +       vxdprintk(VXD_CBIT(dlim, 6),
22395 +               "__unhash_dl_info: %p[#%d]", dli, dli->dl_tag);
22396 +       hlist_del_rcu(&dli->dl_hlist);
22397 +       put_dl_info(dli);
22398 +}
22399 +
22400 +
22401 +/*     __lookup_dl_info()
22402 +
22403 +       * requires the rcu_read_lock()
22404 +       * doesn't increment the dl_refcnt                       */
22405 +
22406 +static inline struct dl_info *__lookup_dl_info(struct super_block *sb, tag_t tag)
22407 +{
22408 +       struct hlist_head *head = &dl_info_hash[__hashval(sb, tag)];
22409 +       struct hlist_node *pos;
22410 +       struct dl_info *dli;
22411 +
22412 +       hlist_for_each_entry_rcu(dli, pos, head, dl_hlist) {
22413 +//     hlist_for_each_rcu(pos, head) {
22414 +//             struct dl_info *dli =
22415 +//                     hlist_entry(pos, struct dl_info, dl_hlist);
22416 +
22417 +               if (dli->dl_tag == tag && dli->dl_sb == sb) {
22418 +                       return dli;
22419 +               }
22420 +       }
22421 +       return NULL;
22422 +}
22423 +
22424 +
22425 +struct dl_info *locate_dl_info(struct super_block *sb, tag_t tag)
22426 +{
22427 +       struct dl_info *dli;
22428 +
22429 +       rcu_read_lock();
22430 +       dli = get_dl_info(__lookup_dl_info(sb, tag));
22431 +       vxdprintk(VXD_CBIT(dlim, 7),
22432 +               "locate_dl_info(%p,#%d) = %p", sb, tag, dli);
22433 +       rcu_read_unlock();
22434 +       return dli;
22435 +}
22436 +
22437 +void rcu_free_dl_info(struct rcu_head *head)
22438 +{
22439 +       struct dl_info *dli = container_of(head, struct dl_info, dl_rcu);
22440 +       int usecnt, refcnt;
22441 +
22442 +       BUG_ON(!dli || !head);
22443 +
22444 +       usecnt = atomic_read(&dli->dl_usecnt);
22445 +       BUG_ON(usecnt < 0);
22446 +
22447 +       refcnt = atomic_read(&dli->dl_refcnt);
22448 +       BUG_ON(refcnt < 0);
22449 +
22450 +       vxdprintk(VXD_CBIT(dlim, 3),
22451 +               "rcu_free_dl_info(%p)", dli);
22452 +       if (!usecnt)
22453 +               __dealloc_dl_info(dli);
22454 +       else
22455 +               printk("!!! rcu didn't free\n");
22456 +}
22457 +
22458 +
22459 +
22460 +
22461 +static int do_addrem_dlimit(uint32_t id, const char __user *name,
22462 +       uint32_t flags, int add)
22463 +{
22464 +       struct nameidata nd;
22465 +       int ret;
22466 +
22467 +       ret = user_path_walk_link(name, &nd);
22468 +       if (!ret) {
22469 +               struct super_block *sb;
22470 +               struct dl_info *dli;
22471 +
22472 +               ret = -EINVAL;
22473 +               if (!nd.dentry->d_inode)
22474 +                       goto out_release;
22475 +               if (!(sb = nd.dentry->d_inode->i_sb))
22476 +                       goto out_release;
22477 +
22478 +               if (add) {
22479 +                       dli = __alloc_dl_info(sb, id);
22480 +                       spin_lock(&dl_info_hash_lock);
22481 +
22482 +                       ret = -EEXIST;
22483 +                       if (__lookup_dl_info(sb, id))
22484 +                               goto out_unlock;
22485 +                       __hash_dl_info(dli);
22486 +                       dli = NULL;
22487 +               } else {
22488 +                       spin_lock(&dl_info_hash_lock);
22489 +                       dli = __lookup_dl_info(sb, id);
22490 +
22491 +                       ret = -ESRCH;
22492 +                       if (!dli)
22493 +                               goto out_unlock;
22494 +                       __unhash_dl_info(dli);
22495 +               }
22496 +               ret = 0;
22497 +       out_unlock:
22498 +               spin_unlock(&dl_info_hash_lock);
22499 +               if (add && dli)
22500 +                       __dealloc_dl_info(dli);
22501 +       out_release:
22502 +               path_release(&nd);
22503 +       }
22504 +       return ret;
22505 +}
22506 +
22507 +int vc_add_dlimit(uint32_t id, void __user *data)
22508 +{
22509 +       struct vcmd_ctx_dlimit_base_v0 vc_data;
22510 +
22511 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
22512 +               return -EFAULT;
22513 +
22514 +       return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 1);
22515 +}
22516 +
22517 +int vc_rem_dlimit(uint32_t id, void __user *data)
22518 +{
22519 +       struct vcmd_ctx_dlimit_base_v0 vc_data;
22520 +
22521 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
22522 +               return -EFAULT;
22523 +
22524 +       return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 0);
22525 +}
22526 +
22527 +#ifdef CONFIG_COMPAT
22528 +
22529 +int vc_add_dlimit_x32(uint32_t id, void __user *data)
22530 +{
22531 +       struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
22532 +
22533 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
22534 +               return -EFAULT;
22535 +
22536 +       return do_addrem_dlimit(id,
22537 +               compat_ptr(vc_data.name_ptr), vc_data.flags, 1);
22538 +}
22539 +
22540 +int vc_rem_dlimit_x32(uint32_t id, void __user *data)
22541 +{
22542 +       struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
22543 +
22544 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
22545 +               return -EFAULT;
22546 +
22547 +       return do_addrem_dlimit(id,
22548 +               compat_ptr(vc_data.name_ptr), vc_data.flags, 0);
22549 +}
22550 +
22551 +#endif /* CONFIG_COMPAT */
22552 +
22553 +
22554 +static inline
22555 +int do_set_dlimit(uint32_t id, const char __user *name,
22556 +       uint32_t space_used, uint32_t space_total,
22557 +       uint32_t inodes_used, uint32_t inodes_total,
22558 +       uint32_t reserved, uint32_t flags)
22559 +{
22560 +       struct nameidata nd;
22561 +       int ret;
22562 +
22563 +       ret = user_path_walk_link(name, &nd);
22564 +       if (!ret) {
22565 +               struct super_block *sb;
22566 +               struct dl_info *dli;
22567 +
22568 +               ret = -EINVAL;
22569 +               if (!nd.dentry->d_inode)
22570 +                       goto out_release;
22571 +               if (!(sb = nd.dentry->d_inode->i_sb))
22572 +                       goto out_release;
22573 +               if ((reserved != CDLIM_KEEP &&
22574 +                       reserved > 100) ||
22575 +                       (inodes_used != CDLIM_KEEP &&
22576 +                       inodes_used > inodes_total) ||
22577 +                       (space_used != CDLIM_KEEP &&
22578 +                       space_used > space_total))
22579 +                       goto out_release;
22580 +
22581 +               ret = -ESRCH;
22582 +               dli = locate_dl_info(sb, id);
22583 +               if (!dli)
22584 +                       goto out_release;
22585 +
22586 +               spin_lock(&dli->dl_lock);
22587 +
22588 +               if (inodes_used != CDLIM_KEEP)
22589 +                       dli->dl_inodes_used = inodes_used;
22590 +               if (inodes_total != CDLIM_KEEP)
22591 +                       dli->dl_inodes_total = inodes_total;
22592 +               if (space_used != CDLIM_KEEP) {
22593 +                       dli->dl_space_used = space_used;
22594 +                       dli->dl_space_used <<= 10;
22595 +               }
22596 +               if (space_total == CDLIM_INFINITY)
22597 +                       dli->dl_space_total = DLIM_INFINITY;
22598 +               else if (space_total != CDLIM_KEEP) {
22599 +                       dli->dl_space_total = space_total;
22600 +                       dli->dl_space_total <<= 10;
22601 +               }
22602 +               if (reserved != CDLIM_KEEP)
22603 +                       dli->dl_nrlmult = (1 << 10) * (100 - reserved) / 100;
22604 +
22605 +               spin_unlock(&dli->dl_lock);
22606 +
22607 +               put_dl_info(dli);
22608 +               ret = 0;
22609 +
22610 +       out_release:
22611 +               path_release(&nd);
22612 +       }
22613 +       return ret;
22614 +}
22615 +
22616 +int vc_set_dlimit(uint32_t id, void __user *data)
22617 +{
22618 +       struct vcmd_ctx_dlimit_v0 vc_data;
22619 +
22620 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
22621 +               return -EFAULT;
22622 +
22623 +       return do_set_dlimit(id, vc_data.name,
22624 +               vc_data.space_used, vc_data.space_total,
22625 +               vc_data.inodes_used, vc_data.inodes_total,
22626 +               vc_data.reserved, vc_data.flags);
22627 +}
22628 +
22629 +#ifdef CONFIG_COMPAT
22630 +
22631 +int vc_set_dlimit_x32(uint32_t id, void __user *data)
22632 +{
22633 +       struct vcmd_ctx_dlimit_v0_x32 vc_data;
22634 +
22635 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
22636 +               return -EFAULT;
22637 +
22638 +       return do_set_dlimit(id, compat_ptr(vc_data.name_ptr),
22639 +               vc_data.space_used, vc_data.space_total,
22640 +               vc_data.inodes_used, vc_data.inodes_total,
22641 +               vc_data.reserved, vc_data.flags);
22642 +}
22643 +
22644 +#endif /* CONFIG_COMPAT */
22645 +
22646 +
22647 +static inline
22648 +int do_get_dlimit(uint32_t id, const char __user *name,
22649 +       uint32_t *space_used, uint32_t *space_total,
22650 +       uint32_t *inodes_used, uint32_t *inodes_total,
22651 +       uint32_t *reserved, uint32_t *flags)
22652 +{
22653 +       struct nameidata nd;
22654 +       int ret;
22655 +
22656 +       ret = user_path_walk_link(name, &nd);
22657 +       if (!ret) {
22658 +               struct super_block *sb;
22659 +               struct dl_info *dli;
22660 +
22661 +               ret = -EINVAL;
22662 +               if (!nd.dentry->d_inode)
22663 +                       goto out_release;
22664 +               if (!(sb = nd.dentry->d_inode->i_sb))
22665 +                       goto out_release;
22666 +
22667 +               ret = -ESRCH;
22668 +               dli = locate_dl_info(sb, id);
22669 +               if (!dli)
22670 +                       goto out_release;
22671 +
22672 +               spin_lock(&dli->dl_lock);
22673 +               *inodes_used = dli->dl_inodes_used;
22674 +               *inodes_total = dli->dl_inodes_total;
22675 +               *space_used = dli->dl_space_used >> 10;
22676 +               if (dli->dl_space_total == DLIM_INFINITY)
22677 +                       *space_total = CDLIM_INFINITY;
22678 +               else
22679 +                       *space_total = dli->dl_space_total >> 10;
22680 +
22681 +               *reserved = 100 - ((dli->dl_nrlmult * 100 + 512) >> 10);
22682 +               spin_unlock(&dli->dl_lock);
22683 +
22684 +               put_dl_info(dli);
22685 +               ret = -EFAULT;
22686 +
22687 +               ret = 0;
22688 +       out_release:
22689 +               path_release(&nd);
22690 +       }
22691 +       return ret;
22692 +}
22693 +
22694 +
22695 +int vc_get_dlimit(uint32_t id, void __user *data)
22696 +{
22697 +       struct vcmd_ctx_dlimit_v0 vc_data;
22698 +       int ret;
22699 +
22700 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
22701 +               return -EFAULT;
22702 +
22703 +       ret = do_get_dlimit(id, vc_data.name,
22704 +               &vc_data.space_used, &vc_data.space_total,
22705 +               &vc_data.inodes_used, &vc_data.inodes_total,
22706 +               &vc_data.reserved, &vc_data.flags);
22707 +       if (ret)
22708 +               return ret;
22709 +
22710 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
22711 +               return -EFAULT;
22712 +       return 0;
22713 +}
22714 +
22715 +#ifdef CONFIG_COMPAT
22716 +
22717 +int vc_get_dlimit_x32(uint32_t id, void __user *data)
22718 +{
22719 +       struct vcmd_ctx_dlimit_v0_x32 vc_data;
22720 +       int ret;
22721 +
22722 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
22723 +               return -EFAULT;
22724 +
22725 +       ret = do_get_dlimit(id, compat_ptr(vc_data.name_ptr),
22726 +               &vc_data.space_used, &vc_data.space_total,
22727 +               &vc_data.inodes_used, &vc_data.inodes_total,
22728 +               &vc_data.reserved, &vc_data.flags);
22729 +       if (ret)
22730 +               return ret;
22731 +
22732 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
22733 +               return -EFAULT;
22734 +       return 0;
22735 +}
22736 +
22737 +#endif /* CONFIG_COMPAT */
22738 +
22739 +
22740 +void vx_vsi_statfs(struct super_block *sb, struct kstatfs *buf)
22741 +{
22742 +       struct dl_info *dli;
22743 +       __u64 blimit, bfree, bavail;
22744 +       __u32 ifree;
22745 +
22746 +       dli = locate_dl_info(sb, dx_current_tag());
22747 +       if (!dli)
22748 +               return;
22749 +
22750 +       spin_lock(&dli->dl_lock);
22751 +       if (dli->dl_inodes_total == (unsigned long)DLIM_INFINITY)
22752 +               goto no_ilim;
22753 +
22754 +       /* reduce max inodes available to limit */
22755 +       if (buf->f_files > dli->dl_inodes_total)
22756 +               buf->f_files = dli->dl_inodes_total;
22757 +
22758 +       ifree = dli->dl_inodes_total - dli->dl_inodes_used;
22759 +       /* reduce free inodes to min */
22760 +       if (ifree < buf->f_ffree)
22761 +               buf->f_ffree = ifree;
22762 +
22763 +no_ilim:
22764 +       if (dli->dl_space_total == DLIM_INFINITY)
22765 +               goto no_blim;
22766 +
22767 +       blimit = dli->dl_space_total >> sb->s_blocksize_bits;
22768 +
22769 +       if (dli->dl_space_total < dli->dl_space_used)
22770 +               bfree = 0;
22771 +       else
22772 +               bfree = (dli->dl_space_total - dli->dl_space_used)
22773 +                       >> sb->s_blocksize_bits;
22774 +
22775 +       bavail = ((dli->dl_space_total >> 10) * dli->dl_nrlmult);
22776 +       if (bavail < dli->dl_space_used)
22777 +               bavail = 0;
22778 +       else
22779 +               bavail = (bavail - dli->dl_space_used)
22780 +                       >> sb->s_blocksize_bits;
22781 +
22782 +       /* reduce max space available to limit */
22783 +       if (buf->f_blocks > blimit)
22784 +               buf->f_blocks = blimit;
22785 +
22786 +       /* reduce free space to min */
22787 +       if (bfree < buf->f_bfree)
22788 +               buf->f_bfree = bfree;
22789 +
22790 +       /* reduce avail space to min */
22791 +       if (bavail < buf->f_bavail)
22792 +               buf->f_bavail = bavail;
22793 +
22794 +no_blim:
22795 +       spin_unlock(&dli->dl_lock);
22796 +       put_dl_info(dli);
22797 +
22798 +       return;
22799 +}
22800 +
22801 +#include <linux/module.h>
22802 +
22803 +EXPORT_SYMBOL_GPL(locate_dl_info);
22804 +EXPORT_SYMBOL_GPL(rcu_free_dl_info);
22805 +
22806 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/helper.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/helper.c
22807 --- linux-2.6.16.20/kernel/vserver/helper.c     1970-01-01 01:00:00 +0100
22808 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/helper.c        2006-04-29 02:58:07 +0200
22809 @@ -0,0 +1,210 @@
22810 +/*
22811 + *  linux/kernel/vserver/helper.c
22812 + *
22813 + *  Virtual Context Support
22814 + *
22815 + *  Copyright (C) 2004-2005  Herbert Pötzl
22816 + *
22817 + *  V0.01  basic helper
22818 + *
22819 + */
22820 +
22821 +#include <linux/errno.h>
22822 +#include <linux/kmod.h>
22823 +#include <linux/sched.h>
22824 +#include <linux/reboot.h>
22825 +#include <linux/vs_context.h>
22826 +#include <linux/vs_network.h>
22827 +#include <linux/vserver/signal.h>
22828 +
22829 +#include <asm/uaccess.h>
22830 +#include <asm/unistd.h>
22831 +
22832 +
22833 +char vshelper_path[255] = "/sbin/vshelper";
22834 +
22835 +
22836 +static int do_vshelper(char *name, char *argv[], char *envp[], int sync)
22837 +{
22838 +       int ret;
22839 +
22840 +       if ((ret = call_usermodehelper(name, argv, envp, sync))) {
22841 +               printk( KERN_WARNING
22842 +                       "%s: (%s %s) returned %s with %d\n",
22843 +                       name, argv[1], argv[2],
22844 +                       sync?"sync":"async", ret);
22845 +       }
22846 +       vxdprintk(VXD_CBIT(switch, 4),
22847 +               "%s: (%s %s) returned %s with %d",
22848 +               name, argv[1], argv[2], sync?"sync":"async", ret);
22849 +       return ret;
22850 +}
22851 +
22852 +/*
22853 + *      vshelper path is set via /proc/sys
22854 + *      invoked by vserver sys_reboot(), with
22855 + *      the following arguments
22856 + *
22857 + *      argv [0] = vshelper_path;
22858 + *      argv [1] = action: "restart", "halt", "poweroff", ...
22859 + *      argv [2] = context identifier
22860 + *
22861 + *      envp [*] = type-specific parameters
22862 + */
22863 +
22864 +long vs_reboot_helper(struct vx_info *vxi, int cmd, void __user *arg)
22865 +{
22866 +       char id_buf[8], cmd_buf[16];
22867 +       char uid_buf[16], pid_buf[16];
22868 +       int ret;
22869 +
22870 +       char *argv[] = {vshelper_path, NULL, id_buf, 0};
22871 +       char *envp[] = {"HOME=/", "TERM=linux",
22872 +                       "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
22873 +                       uid_buf, pid_buf, cmd_buf, 0};
22874 +
22875 +       if (vx_info_state(vxi, VXS_HELPER))
22876 +               return -EAGAIN;
22877 +       vxi->vx_state |= VXS_HELPER;
22878 +
22879 +       snprintf(id_buf, sizeof(id_buf)-1, "%d", vxi->vx_id);
22880 +
22881 +       snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
22882 +       snprintf(uid_buf, sizeof(uid_buf)-1, "VS_UID=%d", current->uid);
22883 +       snprintf(pid_buf, sizeof(pid_buf)-1, "VS_PID=%d", current->pid);
22884 +
22885 +       switch (cmd) {
22886 +       case LINUX_REBOOT_CMD_RESTART:
22887 +               argv[1] = "restart";
22888 +               break;
22889 +
22890 +       case LINUX_REBOOT_CMD_HALT:
22891 +               argv[1] = "halt";
22892 +               break;
22893 +
22894 +       case LINUX_REBOOT_CMD_POWER_OFF:
22895 +               argv[1] = "poweroff";
22896 +               break;
22897 +
22898 +       case LINUX_REBOOT_CMD_SW_SUSPEND:
22899 +               argv[1] = "swsusp";
22900 +               break;
22901 +
22902 +       default:
22903 +               vxi->vx_state &= ~VXS_HELPER;
22904 +               return 0;
22905 +       }
22906 +
22907 +#ifndef CONFIG_VSERVER_LEGACY
22908 +       ret = do_vshelper(vshelper_path, argv, envp, 1);
22909 +#else
22910 +       ret = do_vshelper(vshelper_path, argv, envp, 0);
22911 +#endif
22912 +       vxi->vx_state &= ~VXS_HELPER;
22913 +       __wakeup_vx_info(vxi);
22914 +       return (ret) ? -EPERM : 0;
22915 +}
22916 +
22917 +
22918 +long vs_reboot(unsigned int cmd, void __user * arg)
22919 +{
22920 +       struct vx_info *vxi = current->vx_info;
22921 +       long ret = 0;
22922 +
22923 +       vxdprintk(VXD_CBIT(misc, 5),
22924 +               "vs_reboot(%p[#%d],%d)",
22925 +               vxi, vxi?vxi->vx_id:0, cmd);
22926 +
22927 +       ret = vs_reboot_helper(vxi, cmd, arg);
22928 +       if (ret)
22929 +               return ret;
22930 +
22931 +       vxi->reboot_cmd = cmd;
22932 +       if (vx_info_flags(vxi, VXF_REBOOT_KILL, 0)) {
22933 +               switch (cmd) {
22934 +               case LINUX_REBOOT_CMD_RESTART:
22935 +               case LINUX_REBOOT_CMD_HALT:
22936 +               case LINUX_REBOOT_CMD_POWER_OFF:
22937 +                       vx_info_kill(vxi, 0, SIGKILL);
22938 +                       vx_info_kill(vxi, 1, SIGKILL);
22939 +               default:
22940 +                       break;
22941 +               }
22942 +       }
22943 +       return 0;
22944 +}
22945 +
22946 +
22947 +/*
22948 + *      argv [0] = vshelper_path;
22949 + *      argv [1] = action: "startup", "shutdown"
22950 + *      argv [2] = context identifier
22951 + *
22952 + *      envp [*] = type-specific parameters
22953 + */
22954 +
22955 +long vs_state_change(struct vx_info *vxi, unsigned int cmd)
22956 +{
22957 +       char id_buf[8], cmd_buf[16];
22958 +       char *argv[] = {vshelper_path, NULL, id_buf, 0};
22959 +       char *envp[] = {"HOME=/", "TERM=linux",
22960 +                       "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
22961 +
22962 +       if (!vx_info_flags(vxi, VXF_SC_HELPER, 0))
22963 +               return 0;
22964 +
22965 +       snprintf(id_buf, sizeof(id_buf)-1, "%d", vxi->vx_id);
22966 +       snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
22967 +
22968 +       switch (cmd) {
22969 +       case VSC_STARTUP:
22970 +               argv[1] = "startup";
22971 +               break;
22972 +       case VSC_SHUTDOWN:
22973 +               argv[1] = "shutdown";
22974 +               break;
22975 +       default:
22976 +               return 0;
22977 +       }
22978 +
22979 +       do_vshelper(vshelper_path, argv, envp, 1);
22980 +       return 0;
22981 +}
22982 +
22983 +
22984 +/*
22985 + *      argv [0] = vshelper_path;
22986 + *      argv [1] = action: "netup", "netdown"
22987 + *      argv [2] = context identifier
22988 + *
22989 + *      envp [*] = type-specific parameters
22990 + */
22991 +
22992 +long vs_net_change(struct nx_info *nxi, unsigned int cmd)
22993 +{
22994 +       char id_buf[8], cmd_buf[16];
22995 +       char *argv[] = {vshelper_path, NULL, id_buf, 0};
22996 +       char *envp[] = {"HOME=/", "TERM=linux",
22997 +                       "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
22998 +
22999 +       if (!nx_info_flags(nxi, NXF_SC_HELPER, 0))
23000 +               return 0;
23001 +
23002 +       snprintf(id_buf, sizeof(id_buf)-1, "%d", nxi->nx_id);
23003 +       snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
23004 +
23005 +       switch (cmd) {
23006 +       case VSC_NETUP:
23007 +               argv[1] = "netup";
23008 +               break;
23009 +       case VSC_NETDOWN:
23010 +               argv[1] = "netdown";
23011 +               break;
23012 +       default:
23013 +               return 0;
23014 +       }
23015 +
23016 +       do_vshelper(vshelper_path, argv, envp, 1);
23017 +       return 0;
23018 +}
23019 +
23020 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/history.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/history.c
23021 --- linux-2.6.16.20/kernel/vserver/history.c    1970-01-01 01:00:00 +0100
23022 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/history.c       2006-04-26 19:07:00 +0200
23023 @@ -0,0 +1,184 @@
23024 +/*
23025 + *  kernel/vserver/history.c
23026 + *
23027 + *  Virtual Context History Backtrace
23028 + *
23029 + *  Copyright (C) 2004-2005  Herbert Pötzl
23030 + *
23031 + *  V0.01  basic structure
23032 + *  V0.02  hash/unhash and trace
23033 + *  V0.03  preemption fixes
23034 + *
23035 + */
23036 +
23037 +#include <linux/errno.h>
23038 +#include <linux/module.h>
23039 +#include <linux/types.h>
23040 +#include <linux/ctype.h>
23041 +
23042 +#include <asm/uaccess.h>
23043 +#include <asm/atomic.h>
23044 +#include <asm/unistd.h>
23045 +
23046 +#include <linux/vserver/debug.h>
23047 +#include <linux/vserver/history.h>
23048 +
23049 +
23050 +#ifdef CONFIG_VSERVER_HISTORY
23051 +#define VXH_SIZE       CONFIG_VSERVER_HISTORY_SIZE
23052 +#else
23053 +#define VXH_SIZE       64
23054 +#endif
23055 +
23056 +struct _vx_history {
23057 +       unsigned int counter;
23058 +
23059 +       struct _vx_hist_entry entry[VXH_SIZE+1];
23060 +};
23061 +
23062 +
23063 +DEFINE_PER_CPU(struct _vx_history, vx_history_buffer);
23064 +
23065 +unsigned volatile int vxh_active = 1;
23066 +
23067 +static atomic_t sequence = ATOMIC_INIT(0);
23068 +
23069 +
23070 +/*     vxh_advance()
23071 +
23072 +       * requires disabled preemption                          */
23073 +
23074 +struct _vx_hist_entry *vxh_advance(void *loc)
23075 +{
23076 +       unsigned int cpu = smp_processor_id();
23077 +       struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu);
23078 +       struct _vx_hist_entry *entry;
23079 +       unsigned int index;
23080 +
23081 +       index = vxh_active ? (hist->counter++ % VXH_SIZE) : VXH_SIZE;
23082 +       entry = &hist->entry[index];
23083 +
23084 +       entry->seq = atomic_inc_return(&sequence);
23085 +       entry->loc = loc;
23086 +       return entry;
23087 +}
23088 +
23089 +
23090 +#define VXH_LOC_FMTS   "(#%04x,*%d):%p"
23091 +
23092 +#define VXH_LOC_ARGS(e)        (e)->seq, cpu, (e)->loc
23093 +
23094 +
23095 +#define VXH_VXI_FMTS   "%p[#%d,%d.%d]"
23096 +
23097 +#define VXH_VXI_ARGS(e)        (e)->vxi.ptr,                   \
23098 +                       (e)->vxi.ptr?(e)->vxi.xid:0,    \
23099 +                       (e)->vxi.ptr?(e)->vxi.usecnt:0, \
23100 +                       (e)->vxi.ptr?(e)->vxi.tasks:0
23101 +
23102 +void   vxh_dump_entry(struct _vx_hist_entry *e, unsigned cpu)
23103 +{
23104 +       switch (e->type) {
23105 +       case VXH_THROW_OOPS:
23106 +               printk( VXH_LOC_FMTS " oops \n", VXH_LOC_ARGS(e));
23107 +               break;
23108 +
23109 +       case VXH_GET_VX_INFO:
23110 +       case VXH_PUT_VX_INFO:
23111 +               printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
23112 +                       VXH_LOC_ARGS(e),
23113 +                       (e->type==VXH_GET_VX_INFO)?"get":"put",
23114 +                       VXH_VXI_ARGS(e));
23115 +               break;
23116 +
23117 +       case VXH_INIT_VX_INFO:
23118 +       case VXH_SET_VX_INFO:
23119 +       case VXH_CLR_VX_INFO:
23120 +               printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
23121 +                       VXH_LOC_ARGS(e),
23122 +                       (e->type==VXH_INIT_VX_INFO)?"init":
23123 +                       ((e->type==VXH_SET_VX_INFO)?"set":"clr"),
23124 +                       VXH_VXI_ARGS(e), e->sc.data);
23125 +               break;
23126 +
23127 +       case VXH_CLAIM_VX_INFO:
23128 +       case VXH_RELEASE_VX_INFO:
23129 +               printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
23130 +                       VXH_LOC_ARGS(e),
23131 +                       (e->type==VXH_CLAIM_VX_INFO)?"claim":"release",
23132 +                       VXH_VXI_ARGS(e), e->sc.data);
23133 +               break;
23134 +
23135 +       case VXH_ALLOC_VX_INFO:
23136 +       case VXH_DEALLOC_VX_INFO:
23137 +               printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
23138 +                       VXH_LOC_ARGS(e),
23139 +                       (e->type==VXH_ALLOC_VX_INFO)?"alloc":"dealloc",
23140 +                       VXH_VXI_ARGS(e));
23141 +               break;
23142 +
23143 +       case VXH_HASH_VX_INFO:
23144 +       case VXH_UNHASH_VX_INFO:
23145 +               printk( VXH_LOC_FMTS " __%s_vx_info " VXH_VXI_FMTS "\n",
23146 +                       VXH_LOC_ARGS(e),
23147 +                       (e->type==VXH_HASH_VX_INFO)?"hash":"unhash",
23148 +                       VXH_VXI_ARGS(e));
23149 +               break;
23150 +
23151 +       case VXH_LOC_VX_INFO:
23152 +       case VXH_LOOKUP_VX_INFO:
23153 +       case VXH_CREATE_VX_INFO:
23154 +               printk( VXH_LOC_FMTS " __%s_vx_info [#%d] -> " VXH_VXI_FMTS "\n",
23155 +                       VXH_LOC_ARGS(e),
23156 +                       (e->type==VXH_CREATE_VX_INFO)?"create":
23157 +                       ((e->type==VXH_LOC_VX_INFO)?"loc":"lookup"),
23158 +                       e->ll.arg, VXH_VXI_ARGS(e));
23159 +               break;
23160 +       }
23161 +}
23162 +
23163 +static void __vxh_dump_history(void)
23164 +{
23165 +       unsigned int i,j;
23166 +
23167 +       printk("History:\tSEQ: %8x\tNR_CPUS: %d\n",
23168 +               atomic_read(&sequence), NR_CPUS);
23169 +
23170 +       for (i=0; i < VXH_SIZE; i++) {
23171 +               for (j=0; j < NR_CPUS; j++) {
23172 +                       struct _vx_history *hist =
23173 +                               &per_cpu(vx_history_buffer, j);
23174 +                       unsigned int index = (hist->counter-i) % VXH_SIZE;
23175 +                       struct _vx_hist_entry *entry = &hist->entry[index];
23176 +
23177 +                       vxh_dump_entry(entry, j);
23178 +               }
23179 +       }
23180 +}
23181 +
23182 +void   vxh_dump_history(void)
23183 +{
23184 +       vxh_active = 0;
23185 +#ifdef CONFIG_SMP
23186 +       local_irq_enable();
23187 +       smp_send_stop();
23188 +       local_irq_disable();
23189 +#endif
23190 +       __vxh_dump_history();
23191 +}
23192 +
23193 +
23194 +/* vserver syscall commands below here */
23195 +
23196 +
23197 +int    vc_dump_history(uint32_t id)
23198 +{
23199 +       vxh_active = 0;
23200 +       __vxh_dump_history();
23201 +       vxh_active = 1;
23202 +
23203 +       return 0;
23204 +}
23205 +
23206 +EXPORT_SYMBOL_GPL(vxh_advance);
23207 +
23208 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/init.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/init.c
23209 --- linux-2.6.16.20/kernel/vserver/init.c       1970-01-01 01:00:00 +0100
23210 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/init.c  2006-04-26 19:07:00 +0200
23211 @@ -0,0 +1,46 @@
23212 +/*
23213 + *  linux/kernel/init.c
23214 + *
23215 + *  Virtual Server Init
23216 + *
23217 + *  Copyright (C) 2004-2005  Herbert Pötzl
23218 + *
23219 + *  V0.01  basic structure
23220 + *
23221 + */
23222 +
23223 +#include <linux/errno.h>
23224 +#include <linux/init.h>
23225 +#include <linux/module.h>
23226 +
23227 +int    vserver_register_sysctl(void);
23228 +void   vserver_unregister_sysctl(void);
23229 +
23230 +
23231 +static int __init init_vserver(void)
23232 +{
23233 +       int ret = 0;
23234 +
23235 +#ifdef CONFIG_VSERVER_DEBUG
23236 +       vserver_register_sysctl();
23237 +#endif
23238 +       return ret;
23239 +}
23240 +
23241 +
23242 +static void __exit exit_vserver(void)
23243 +{
23244 +
23245 +#ifdef CONFIG_VSERVER_DEBUG
23246 +       vserver_unregister_sysctl();
23247 +#endif
23248 +       return;
23249 +}
23250 +
23251 +long vx_slab[GFP_ZONETYPES];
23252 +long vx_area;
23253 +
23254 +
23255 +module_init(init_vserver);
23256 +module_exit(exit_vserver);
23257 +
23258 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/inode.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/inode.c
23259 --- linux-2.6.16.20/kernel/vserver/inode.c      1970-01-01 01:00:00 +0100
23260 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/inode.c 2006-04-29 02:58:07 +0200
23261 @@ -0,0 +1,369 @@
23262 +/*
23263 + *  linux/kernel/vserver/inode.c
23264 + *
23265 + *  Virtual Server: File System Support
23266 + *
23267 + *  Copyright (C) 2004-2005  Herbert Pötzl
23268 + *
23269 + *  V0.01  separated from vcontext V0.05
23270 + *
23271 + */
23272 +
23273 +#include <linux/sched.h>
23274 +#include <linux/vs_context.h>
23275 +#include <linux/proc_fs.h>
23276 +#include <linux/devpts_fs.h>
23277 +#include <linux/namei.h>
23278 +#include <linux/mount.h>
23279 +#include <linux/parser.h>
23280 +#include <linux/compat.h>
23281 +#include <linux/vserver/inode.h>
23282 +#include <linux/vserver/inode_cmd.h>
23283 +#include <linux/vserver/tag.h>
23284 +
23285 +#include <asm/errno.h>
23286 +#include <asm/uaccess.h>
23287 +
23288 +
23289 +static int __vc_get_iattr(struct inode *in, uint32_t *tag, uint32_t *flags, uint32_t *mask)
23290 +{
23291 +       struct proc_dir_entry *entry;
23292 +
23293 +       if (!in || !in->i_sb)
23294 +               return -ESRCH;
23295 +
23296 +       *flags = IATTR_TAG
23297 +               | (IS_BARRIER(in) ? IATTR_BARRIER : 0)
23298 +               | (IS_IUNLINK(in) ? IATTR_IUNLINK : 0)
23299 +               | (IS_IMMUTABLE(in) ? IATTR_IMMUTABLE : 0);
23300 +       *mask = IATTR_IUNLINK | IATTR_IMMUTABLE;
23301 +
23302 +       if (S_ISDIR(in->i_mode))
23303 +               *mask |= IATTR_BARRIER;
23304 +
23305 +       if (IS_TAGGED(in)) {
23306 +               *tag = in->i_tag;
23307 +               *mask |= IATTR_TAG;
23308 +       }
23309 +
23310 +       switch (in->i_sb->s_magic) {
23311 +       case PROC_SUPER_MAGIC:
23312 +               entry = PROC_I(in)->pde;
23313 +
23314 +               /* check for specific inodes? */
23315 +               if (entry)
23316 +                       *mask |= IATTR_FLAGS;
23317 +               if (entry)
23318 +                       *flags |= (entry->vx_flags & IATTR_FLAGS);
23319 +               else
23320 +                       *flags |= (PROC_I(in)->vx_flags & IATTR_FLAGS);
23321 +               break;
23322 +
23323 +       case DEVPTS_SUPER_MAGIC:
23324 +               *tag = in->i_tag;
23325 +               *mask |= IATTR_TAG;
23326 +               break;
23327 +
23328 +       default:
23329 +               break;
23330 +       }
23331 +       return 0;
23332 +}
23333 +
23334 +int vc_get_iattr(uint32_t id, void __user *data)
23335 +{
23336 +       struct nameidata nd;
23337 +       struct vcmd_ctx_iattr_v1 vc_data = { .xid = -1 };
23338 +       int ret;
23339 +
23340 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
23341 +               return -EFAULT;
23342 +
23343 +       ret = user_path_walk_link(vc_data.name, &nd);
23344 +       if (!ret) {
23345 +               ret = __vc_get_iattr(nd.dentry->d_inode,
23346 +                       &vc_data.xid, &vc_data.flags, &vc_data.mask);
23347 +               path_release(&nd);
23348 +       }
23349 +       if (ret)
23350 +               return ret;
23351 +
23352 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
23353 +               ret = -EFAULT;
23354 +       return ret;
23355 +}
23356 +
23357 +#ifdef CONFIG_COMPAT
23358 +
23359 +int vc_get_iattr_x32(uint32_t id, void __user *data)
23360 +{
23361 +       struct nameidata nd;
23362 +       struct vcmd_ctx_iattr_v1_x32 vc_data = { .xid = -1 };
23363 +       int ret;
23364 +
23365 +       if (!vx_check(0, VX_ADMIN))
23366 +               return -ENOSYS;
23367 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
23368 +               return -EFAULT;
23369 +
23370 +       ret = user_path_walk_link(compat_ptr(vc_data.name_ptr), &nd);
23371 +       if (!ret) {
23372 +               ret = __vc_get_iattr(nd.dentry->d_inode,
23373 +                       &vc_data.xid, &vc_data.flags, &vc_data.mask);
23374 +               path_release(&nd);
23375 +       }
23376 +       if (ret)
23377 +               return ret;
23378 +
23379 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
23380 +               ret = -EFAULT;
23381 +       return ret;
23382 +}
23383 +
23384 +#endif /* CONFIG_COMPAT */
23385 +
23386 +
23387 +static int __vc_set_iattr(struct dentry *de, uint32_t *tag, uint32_t *flags, uint32_t *mask)
23388 +{
23389 +       struct inode *in = de->d_inode;
23390 +       int error = 0, is_proc = 0, has_tag = 0;
23391 +       struct iattr attr = { 0 };
23392 +
23393 +       if (!in || !in->i_sb)
23394 +               return -ESRCH;
23395 +
23396 +       is_proc = (in->i_sb->s_magic == PROC_SUPER_MAGIC);
23397 +       if ((*mask & IATTR_FLAGS) && !is_proc)
23398 +               return -EINVAL;
23399 +
23400 +       has_tag = IS_TAGGED(in) ||
23401 +               (in->i_sb->s_magic == DEVPTS_SUPER_MAGIC);
23402 +       if ((*mask & IATTR_TAG) && !has_tag)
23403 +               return -EINVAL;
23404 +
23405 +       mutex_lock(&in->i_mutex);
23406 +       if (*mask & IATTR_TAG) {
23407 +               attr.ia_tag = *tag;
23408 +               attr.ia_valid |= ATTR_TAG;
23409 +       }
23410 +
23411 +       if (*mask & IATTR_FLAGS) {
23412 +               struct proc_dir_entry *entry = PROC_I(in)->pde;
23413 +               unsigned int iflags = PROC_I(in)->vx_flags;
23414 +
23415 +               iflags = (iflags & ~(*mask & IATTR_FLAGS))
23416 +                       | (*flags & IATTR_FLAGS);
23417 +               PROC_I(in)->vx_flags = iflags;
23418 +               if (entry)
23419 +                       entry->vx_flags = iflags;
23420 +       }
23421 +
23422 +       if (*mask & (IATTR_BARRIER | IATTR_IUNLINK | IATTR_IMMUTABLE)) {
23423 +               if (*mask & IATTR_IMMUTABLE) {
23424 +                       if (*flags & IATTR_IMMUTABLE)
23425 +                               in->i_flags |= S_IMMUTABLE;
23426 +                       else
23427 +                               in->i_flags &= ~S_IMMUTABLE;
23428 +               }
23429 +               if (*mask & IATTR_IUNLINK) {
23430 +                       if (*flags & IATTR_IUNLINK)
23431 +                               in->i_flags |= S_IUNLINK;
23432 +                       else
23433 +                               in->i_flags &= ~S_IUNLINK;
23434 +               }
23435 +               if (S_ISDIR(in->i_mode) && (*mask & IATTR_BARRIER)) {
23436 +                       if (*flags & IATTR_BARRIER)
23437 +                               in->i_flags |= S_BARRIER;
23438 +                       else
23439 +                               in->i_flags &= ~S_BARRIER;
23440 +               }
23441 +               if (in->i_op && in->i_op->sync_flags) {
23442 +                       error = in->i_op->sync_flags(in);
23443 +                       if (error)
23444 +                               goto out;
23445 +               }
23446 +       }
23447 +
23448 +       if (attr.ia_valid) {
23449 +               if (in->i_op && in->i_op->setattr)
23450 +                       error = in->i_op->setattr(de, &attr);
23451 +               else {
23452 +                       error = inode_change_ok(in, &attr);
23453 +                       if (!error)
23454 +                               error = inode_setattr(in, &attr);
23455 +               }
23456 +       }
23457 +
23458 +out:
23459 +       mutex_unlock(&in->i_mutex);
23460 +       return error;
23461 +}
23462 +
23463 +int vc_set_iattr(uint32_t id, void __user *data)
23464 +{
23465 +       struct nameidata nd;
23466 +       struct vcmd_ctx_iattr_v1 vc_data;
23467 +       int ret;
23468 +
23469 +       if (!capable(CAP_LINUX_IMMUTABLE))
23470 +               return -EPERM;
23471 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
23472 +               return -EFAULT;
23473 +
23474 +       ret = user_path_walk_link(vc_data.name, &nd);
23475 +       if (!ret) {
23476 +               ret = __vc_set_iattr(nd.dentry,
23477 +                       &vc_data.xid, &vc_data.flags, &vc_data.mask);
23478 +               path_release(&nd);
23479 +       }
23480 +
23481 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
23482 +               ret = -EFAULT;
23483 +       return ret;
23484 +}
23485 +
23486 +#ifdef CONFIG_COMPAT
23487 +
23488 +int vc_set_iattr_x32(uint32_t id, void __user *data)
23489 +{
23490 +       struct nameidata nd;
23491 +       struct vcmd_ctx_iattr_v1_x32 vc_data;
23492 +       int ret;
23493 +
23494 +       if (!capable(CAP_LINUX_IMMUTABLE))
23495 +               return -EPERM;
23496 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
23497 +               return -EFAULT;
23498 +
23499 +       ret = user_path_walk_link(compat_ptr(vc_data.name_ptr), &nd);
23500 +       if (!ret) {
23501 +               ret = __vc_set_iattr(nd.dentry,
23502 +                       &vc_data.xid, &vc_data.flags, &vc_data.mask);
23503 +               path_release(&nd);
23504 +       }
23505 +
23506 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
23507 +               ret = -EFAULT;
23508 +       return ret;
23509 +}
23510 +
23511 +#endif /* CONFIG_COMPAT */
23512 +
23513 +#ifdef CONFIG_VSERVER_LEGACY
23514 +
23515 +#define PROC_DYNAMIC_FIRST 0xF0000000UL
23516 +
23517 +int vx_proc_ioctl(struct inode * inode, struct file * filp,
23518 +       unsigned int cmd, unsigned long arg)
23519 +{
23520 +       struct proc_dir_entry *entry;
23521 +       int error = 0;
23522 +       int flags;
23523 +
23524 +       if (inode->i_ino < PROC_DYNAMIC_FIRST)
23525 +               return -ENOTTY;
23526 +
23527 +       entry = PROC_I(inode)->pde;
23528 +       if (!entry)
23529 +               return -ENOTTY;
23530 +
23531 +       switch(cmd) {
23532 +       case FIOC_GETXFLG: {
23533 +               /* fixme: if stealth, return -ENOTTY */
23534 +               error = -EPERM;
23535 +               flags = entry->vx_flags;
23536 +               if (capable(CAP_CONTEXT))
23537 +                       error = put_user(flags, (int __user *) arg);
23538 +               break;
23539 +       }
23540 +       case FIOC_SETXFLG: {
23541 +               /* fixme: if stealth, return -ENOTTY */
23542 +               error = -EPERM;
23543 +               if (!capable(CAP_CONTEXT))
23544 +                       break;
23545 +               error = -EROFS;
23546 +               if (IS_RDONLY(inode))
23547 +                       break;
23548 +               error = -EFAULT;
23549 +               if (get_user(flags, (int __user *) arg))
23550 +                       break;
23551 +               error = 0;
23552 +               entry->vx_flags = flags;
23553 +               break;
23554 +       }
23555 +       default:
23556 +               return -ENOTTY;
23557 +       }
23558 +       return error;
23559 +}
23560 +#endif /* CONFIG_VSERVER_LEGACY */
23561 +
23562 +#ifdef CONFIG_PROPAGATE
23563 +
23564 +int dx_parse_tag(char *string, tag_t *tag, int remove)
23565 +{
23566 +       static match_table_t tokens = {
23567 +               {1, "tagid=%u"},
23568 +               {0, NULL}
23569 +       };
23570 +       substring_t args[MAX_OPT_ARGS];
23571 +       int token, option = 0;
23572 +
23573 +       if (!string)
23574 +               return 0;
23575 +
23576 +       token = match_token(string, tokens, args);
23577 +       if (token && tag && !match_int(args, &option))
23578 +               *tag = option;
23579 +
23580 +       vxdprintk(VXD_CBIT(tag, 7),
23581 +               "dx_parse_tag(»%s«): %d:#%d",
23582 +               string, token, option);
23583 +
23584 +       if ((token == 1) && remove) {
23585 +               char *p = strstr(string, "tagid=");
23586 +               char *q = p;
23587 +
23588 +               if (p) {
23589 +                       while (*q != '\0' && *q != ',')
23590 +                               q++;
23591 +                       while (*q)
23592 +                               *p++ = *q++;
23593 +                       while (*p)
23594 +                               *p++ = '\0';
23595 +               }
23596 +       }
23597 +       return token;
23598 +}
23599 +
23600 +void __dx_propagate_tag(struct nameidata *nd, struct inode *inode)
23601 +{
23602 +       tag_t new_tag = 0;
23603 +       struct vfsmount *mnt;
23604 +       int propagate;
23605 +
23606 +       if (!nd)
23607 +               return;
23608 +       mnt = nd->mnt;
23609 +       if (!mnt)
23610 +               return;
23611 +
23612 +       propagate = (mnt->mnt_flags & MNT_TAGID);
23613 +       if (propagate)
23614 +               new_tag = mnt->mnt_tag;
23615 +
23616 +       vxdprintk(VXD_CBIT(tag, 7),
23617 +               "dx_propagate_tag(%p[#%lu.%d]): %d,%d",
23618 +               inode, inode->i_ino, inode->i_tag,
23619 +               new_tag, (propagate)?1:0);
23620 +
23621 +       if (propagate)
23622 +               inode->i_tag = new_tag;
23623 +}
23624 +
23625 +#include <linux/module.h>
23626 +
23627 +EXPORT_SYMBOL_GPL(__dx_propagate_tag);
23628 +
23629 +#endif /* CONFIG_PROPAGATE */
23630 +
23631 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/legacy.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/legacy.c
23632 --- linux-2.6.16.20/kernel/vserver/legacy.c     1970-01-01 01:00:00 +0100
23633 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/legacy.c        2006-04-28 04:35:46 +0200
23634 @@ -0,0 +1,115 @@
23635 +/*
23636 + *  linux/kernel/vserver/legacy.c
23637 + *
23638 + *  Virtual Server: Legacy Funtions
23639 + *
23640 + *  Copyright (C) 2001-2003  Jacques Gelinas
23641 + *  Copyright (C) 2003-2005  Herbert Pötzl
23642 + *
23643 + *  V0.01  broken out from vcontext.c V0.05
23644 + *
23645 + */
23646 +
23647 +#include <linux/sched.h>
23648 +#include <linux/vs_context.h>
23649 +#include <linux/vs_network.h>
23650 +#include <linux/vserver/legacy.h>
23651 +#include <linux/vserver/namespace.h>
23652 +#include <linux/namespace.h>
23653 +
23654 +#include <asm/errno.h>
23655 +#include <asm/uaccess.h>
23656 +
23657 +
23658 +extern int vx_set_init(struct vx_info *, struct task_struct *);
23659 +
23660 +static int vx_set_initpid(struct vx_info *vxi, int pid)
23661 +{
23662 +       struct task_struct *init;
23663 +
23664 +       init = find_task_by_real_pid(pid);
23665 +       if (!init)
23666 +               return -ESRCH;
23667 +
23668 +       vxi->vx_flags &= ~VXF_STATE_INIT;
23669 +       return vx_set_init(vxi, init);
23670 +}
23671 +
23672 +int vc_new_s_context(uint32_t ctx, void __user *data)
23673 +{
23674 +       int ret = -ENOMEM;
23675 +       struct vcmd_new_s_context_v1 vc_data;
23676 +       struct vx_info *new_vxi;
23677 +
23678 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
23679 +               return -EFAULT;
23680 +
23681 +       /* legacy hack, will be removed soon */
23682 +       if (ctx == -2) {
23683 +               /* assign flags and initpid */
23684 +               if (!current->vx_info)
23685 +                       return -EINVAL;
23686 +               ret = 0;
23687 +               if (vc_data.flags & VX_INFO_INIT)
23688 +                       ret = vx_set_initpid(current->vx_info, current->tgid);
23689 +               if (ret == 0) {
23690 +                       /* We keep the same vx_id, but lower the capabilities */
23691 +                       current->vx_info->vx_bcaps &= (~vc_data.remove_cap);
23692 +                       ret = vx_current_xid();
23693 +                       current->vx_info->vx_flags |= vc_data.flags;
23694 +               }
23695 +               return ret;
23696 +       }
23697 +
23698 +       if (!vx_check(0, VX_ADMIN) || !capable(CAP_SYS_ADMIN)
23699 +               /* might make sense in the future, or not ... */
23700 +               || vx_flags(VX_INFO_LOCK, 0))
23701 +               return -EPERM;
23702 +
23703 +       /* ugly hack for Spectator */
23704 +       if (ctx == 1) {
23705 +               current->xid = 1;
23706 +               return 0;
23707 +       }
23708 +
23709 +       if (((ctx > MAX_S_CONTEXT) && (ctx != VX_DYNAMIC_ID)) ||
23710 +               (ctx == 0))
23711 +               return -EINVAL;
23712 +
23713 +       if ((ctx == VX_DYNAMIC_ID) || (ctx < MIN_D_CONTEXT))
23714 +               new_vxi = lookup_or_create_vx_info(ctx);
23715 +       else
23716 +               new_vxi = lookup_vx_info(ctx);
23717 +
23718 +       if (!new_vxi)
23719 +               return -EINVAL;
23720 +
23721 +       ret = -EPERM;
23722 +       if (!vx_info_flags(new_vxi, VXF_STATE_SETUP, 0) &&
23723 +               vx_info_flags(new_vxi, VX_INFO_PRIVATE, 0))
23724 +               goto out_put;
23725 +
23726 +       new_vxi->vx_flags &= ~VXF_STATE_SETUP;
23727 +
23728 +       ret = vx_migrate_task(current, new_vxi);
23729 +       if (ret == 0) {
23730 +               current->vx_info->vx_bcaps &= (~vc_data.remove_cap);
23731 +               new_vxi->vx_flags |= vc_data.flags;
23732 +               if (vc_data.flags & VX_INFO_INIT)
23733 +                       vx_set_initpid(new_vxi, current->tgid);
23734 +               if (vc_data.flags & VX_INFO_NAMESPACE)
23735 +                       vx_set_namespace(new_vxi,
23736 +                               current->namespace, current->fs);
23737 +               if (vc_data.flags & VX_INFO_NPROC)
23738 +                       __rlim_set(&new_vxi->limit, RLIMIT_NPROC,
23739 +                               current->signal->rlim[RLIMIT_NPROC].rlim_max);
23740 +
23741 +               /* tweak some defaults for legacy */
23742 +               new_vxi->vx_flags |= (VXF_HIDE_NETIF|VXF_INFO_INIT);
23743 +               ret = new_vxi->vx_id;
23744 +       }
23745 +out_put:
23746 +       put_vx_info(new_vxi);
23747 +       return ret;
23748 +}
23749 +
23750 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/legacynet.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/legacynet.c
23751 --- linux-2.6.16.20/kernel/vserver/legacynet.c  1970-01-01 01:00:00 +0100
23752 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/legacynet.c     2006-04-26 19:07:00 +0200
23753 @@ -0,0 +1,85 @@
23754 +
23755 +/*
23756 + *  linux/kernel/vserver/legacynet.c
23757 + *
23758 + *  Virtual Server: Legacy Network Funtions
23759 + *
23760 + *  Copyright (C) 2001-2003  Jacques Gelinas
23761 + *  Copyright (C) 2003-2005  Herbert Pötzl
23762 + *
23763 + *  V0.01  broken out from legacy.c
23764 + *
23765 + */
23766 +
23767 +#include <linux/sched.h>
23768 +#include <linux/vs_context.h>
23769 +#include <linux/vs_network.h>
23770 +#include <linux/vserver/legacy.h>
23771 +// #include <linux/vserver/namespace.h>
23772 +#include <linux/namespace.h>
23773 +#include <linux/err.h>
23774 +
23775 +#include <asm/errno.h>
23776 +#include <asm/uaccess.h>
23777 +
23778 +
23779 +extern struct nx_info *create_nx_info(void);
23780 +
23781 +/*  set ipv4 root (syscall) */
23782 +
23783 +int vc_set_ipv4root(uint32_t nbip, void __user *data)
23784 +{
23785 +       int i, err = -EPERM;
23786 +       struct vcmd_set_ipv4root_v3 vc_data;
23787 +       struct nx_info *new_nxi, *nxi = current->nx_info;
23788 +
23789 +       if (nbip < 0 || nbip > NB_IPV4ROOT)
23790 +               return -EINVAL;
23791 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
23792 +               return -EFAULT;
23793 +
23794 +       if (!nxi || nxi->ipv4[0] == 0 || capable(CAP_NET_ADMIN))
23795 +               /* We are allowed to change everything */
23796 +               err = 0;
23797 +       else if (nxi) {
23798 +               int found = 0;
23799 +
23800 +               /* We are allowed to select a subset of the currently
23801 +                  installed IP numbers. No new one are allowed
23802 +                  We can't change the broadcast address though */
23803 +               for (i=0; i<nbip; i++) {
23804 +                       int j;
23805 +                       __u32 nxip = vc_data.nx_mask_pair[i].ip;
23806 +                       for (j=0; j<nxi->nbipv4; j++) {
23807 +                               if (nxip == nxi->ipv4[j]) {
23808 +                                       found++;
23809 +                                       break;
23810 +                               }
23811 +                       }
23812 +               }
23813 +               if ((found == nbip) &&
23814 +                       (vc_data.broadcast == nxi->v4_bcast))
23815 +                       err = 0;
23816 +       }
23817 +       if (err)
23818 +               return err;
23819 +
23820 +       new_nxi = create_nx_info();
23821 +       if (IS_ERR(new_nxi))
23822 +               return -EINVAL;
23823 +
23824 +       new_nxi->nbipv4 = nbip;
23825 +       for (i=0; i<nbip; i++) {
23826 +               new_nxi->ipv4[i] = vc_data.nx_mask_pair[i].ip;
23827 +               new_nxi->mask[i] = vc_data.nx_mask_pair[i].mask;
23828 +       }
23829 +       new_nxi->v4_bcast = vc_data.broadcast;
23830 +       if (nxi)
23831 +               printk("!!! switching nx_info %p->%p\n", nxi, new_nxi);
23832 +
23833 +       nx_migrate_task(current, new_nxi);
23834 +       put_nx_info(new_nxi);
23835 +       return 0;
23836 +}
23837 +
23838 +
23839 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/limit.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/limit.c
23840 --- linux-2.6.16.20/kernel/vserver/limit.c      1970-01-01 01:00:00 +0100
23841 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/limit.c 2006-05-02 02:12:38 +0200
23842 @@ -0,0 +1,272 @@
23843 +/*
23844 + *  linux/kernel/vserver/limit.c
23845 + *
23846 + *  Virtual Server: Context Limits
23847 + *
23848 + *  Copyright (C) 2004-2005  Herbert Pötzl
23849 + *
23850 + *  V0.01  broken out from vcontext V0.05
23851 + *
23852 + */
23853 +
23854 +#include <linux/module.h>
23855 +#include <linux/vs_context.h>
23856 +#include <linux/vs_limit.h>
23857 +#include <linux/vserver/limit.h>
23858 +#include <linux/vserver/switch.h>
23859 +#include <linux/vserver/limit_cmd.h>
23860 +
23861 +#include <asm/errno.h>
23862 +#include <asm/uaccess.h>
23863 +
23864 +
23865 +const char *vlimit_name[NUM_LIMITS] = {
23866 +       [RLIMIT_CPU]            = "CPU",
23867 +       [RLIMIT_RSS]            = "RSS",
23868 +       [RLIMIT_NPROC]          = "NPROC",
23869 +       [RLIMIT_NOFILE]         = "NOFILE",
23870 +       [RLIMIT_MEMLOCK]        = "VML",
23871 +       [RLIMIT_AS]             = "VM",
23872 +       [RLIMIT_LOCKS]          = "LOCKS",
23873 +       [RLIMIT_SIGPENDING]     = "SIGP",
23874 +       [RLIMIT_MSGQUEUE]       = "MSGQ",
23875 +
23876 +       [VLIMIT_NSOCK]          = "NSOCK",
23877 +       [VLIMIT_OPENFD]         = "OPENFD",
23878 +       [VLIMIT_ANON]           = "ANON",
23879 +       [VLIMIT_SHMEM]          = "SHMEM",
23880 +       [VLIMIT_DENTRY]         = "DENTRY",
23881 +};
23882 +
23883 +EXPORT_SYMBOL_GPL(vlimit_name);
23884 +
23885 +
23886 +static int is_valid_rlimit(int id)
23887 +{
23888 +       int valid = 0;
23889 +
23890 +       switch (id) {
23891 +       case RLIMIT_RSS:
23892 +       case RLIMIT_NPROC:
23893 +       case RLIMIT_NOFILE:
23894 +       case RLIMIT_MEMLOCK:
23895 +       case RLIMIT_AS:
23896 +       case RLIMIT_LOCKS:
23897 +       case RLIMIT_MSGQUEUE:
23898 +
23899 +       case VLIMIT_NSOCK:
23900 +       case VLIMIT_OPENFD:
23901 +       case VLIMIT_ANON:
23902 +       case VLIMIT_SHMEM:
23903 +       case VLIMIT_DENTRY:
23904 +               valid = 1;
23905 +               break;
23906 +       }
23907 +       return valid;
23908 +}
23909 +
23910 +static inline uint64_t vc_get_soft(struct vx_info *vxi, int id)
23911 +{
23912 +       rlim_t limit = __rlim_soft(&vxi->limit, id);
23913 +       return VX_VLIM(limit);
23914 +}
23915 +
23916 +static inline uint64_t vc_get_hard(struct vx_info *vxi, int id)
23917 +{
23918 +       rlim_t limit = __rlim_hard(&vxi->limit, id);
23919 +       return VX_VLIM(limit);
23920 +}
23921 +
23922 +static int do_get_rlimit(xid_t xid, uint32_t id,
23923 +       uint64_t *minimum, uint64_t *softlimit, uint64_t *maximum)
23924 +{
23925 +       struct vx_info *vxi;
23926 +
23927 +       if (!is_valid_rlimit(id))
23928 +               return -EINVAL;
23929 +
23930 +       vxi = lookup_vx_info(xid);
23931 +       if (!vxi)
23932 +               return -ESRCH;
23933 +
23934 +       if (minimum)
23935 +               *minimum = CRLIM_UNSET;
23936 +       if (softlimit)
23937 +               *softlimit = vc_get_soft(vxi, id);
23938 +       if (maximum)
23939 +               *maximum = vc_get_hard(vxi, id);
23940 +       put_vx_info(vxi);
23941 +       return 0;
23942 +}
23943 +
23944 +int vc_get_rlimit(uint32_t id, void __user *data)
23945 +{
23946 +       struct vcmd_ctx_rlimit_v0 vc_data;
23947 +       int ret;
23948 +
23949 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
23950 +               return -EFAULT;
23951 +
23952 +       ret = do_get_rlimit(id, vc_data.id,
23953 +               &vc_data.minimum, &vc_data.softlimit, &vc_data.maximum);
23954 +       if (ret)
23955 +               return ret;
23956 +
23957 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
23958 +               return -EFAULT;
23959 +       return 0;
23960 +}
23961 +
23962 +static int do_set_rlimit(xid_t xid, uint32_t id,
23963 +       uint64_t minimum, uint64_t softlimit, uint64_t maximum)
23964 +{
23965 +       struct vx_info *vxi;
23966 +
23967 +       if (!is_valid_rlimit(id))
23968 +               return -EINVAL;
23969 +
23970 +       vxi = lookup_vx_info(xid);
23971 +       if (!vxi)
23972 +               return -ESRCH;
23973 +
23974 +       if (maximum != CRLIM_KEEP)
23975 +               __rlim_hard(&vxi->limit, id) = VX_RLIM(maximum);
23976 +       if (softlimit != CRLIM_KEEP)
23977 +               __rlim_soft(&vxi->limit, id) = VX_RLIM(softlimit);
23978 +
23979 +       /* clamp soft limit */
23980 +       if (__rlim_soft(&vxi->limit, id) > __rlim_hard(&vxi->limit, id))
23981 +               __rlim_soft(&vxi->limit, id) = __rlim_hard(&vxi->limit, id);
23982 +
23983 +       put_vx_info(vxi);
23984 +       return 0;
23985 +}
23986 +
23987 +int vc_set_rlimit(uint32_t id, void __user *data)
23988 +{
23989 +       struct vcmd_ctx_rlimit_v0 vc_data;
23990 +
23991 +       if (!capable(CAP_SYS_RESOURCE))
23992 +               return -EPERM;
23993 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
23994 +               return -EFAULT;
23995 +
23996 +       return do_set_rlimit(id, vc_data.id,
23997 +               vc_data.minimum, vc_data.softlimit, vc_data.maximum);
23998 +}
23999 +
24000 +#ifdef CONFIG_IA32_EMULATION
24001 +
24002 +int vc_set_rlimit_x32(uint32_t id, void __user *data)
24003 +{
24004 +       struct vcmd_ctx_rlimit_v0_x32 vc_data;
24005 +
24006 +       if (!capable(CAP_SYS_RESOURCE))
24007 +               return -EPERM;
24008 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
24009 +               return -EFAULT;
24010 +
24011 +       return do_set_rlimit(id, vc_data.id,
24012 +               vc_data.minimum, vc_data.softlimit, vc_data.maximum);
24013 +}
24014 +
24015 +int vc_get_rlimit_x32(uint32_t id, void __user *data)
24016 +{
24017 +       struct vcmd_ctx_rlimit_v0_x32 vc_data;
24018 +       int ret;
24019 +
24020 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
24021 +               return -EFAULT;
24022 +
24023 +       ret = do_get_rlimit(id, vc_data.id,
24024 +               &vc_data.minimum, &vc_data.softlimit, &vc_data.maximum);
24025 +       if (ret)
24026 +               return ret;
24027 +
24028 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
24029 +               return -EFAULT;
24030 +       return 0;
24031 +}
24032 +
24033 +#endif /* CONFIG_IA32_EMULATION */
24034 +
24035 +
24036 +int vc_get_rlimit_mask(uint32_t id, void __user *data)
24037 +{
24038 +       static struct vcmd_ctx_rlimit_mask_v0 mask = {
24039 +                       /* minimum */
24040 +               0
24041 +               ,       /* softlimit */
24042 +               (1 << RLIMIT_RSS) |
24043 +               (1 << VLIMIT_ANON) |
24044 +               0
24045 +               ,       /* maximum */
24046 +               (1 << RLIMIT_RSS) |
24047 +               (1 << RLIMIT_NPROC) |
24048 +               (1 << RLIMIT_NOFILE) |
24049 +               (1 << RLIMIT_MEMLOCK) |
24050 +               (1 << RLIMIT_LOCKS) |
24051 +               (1 << RLIMIT_AS) |
24052 +               (1 << VLIMIT_ANON) |
24053 +               (1 << VLIMIT_DENTRY) |
24054 +               0
24055 +               };
24056 +
24057 +       if (!capable(CAP_SYS_RESOURCE))
24058 +               return -EPERM;
24059 +       if (copy_to_user(data, &mask, sizeof(mask)))
24060 +               return -EFAULT;
24061 +       return 0;
24062 +}
24063 +
24064 +
24065 +void vx_vsi_meminfo(struct sysinfo *val)
24066 +{
24067 +       struct vx_info *vxi = current->vx_info;
24068 +       unsigned long totalram, freeram;
24069 +       rlim_t v;
24070 +
24071 +       /* we blindly accept the max */
24072 +       v = __rlim_soft(&vxi->limit, RLIMIT_RSS);
24073 +       totalram = (v != RLIM_INFINITY) ? v : val->totalram;
24074 +
24075 +       /* total minus used equals free */
24076 +       v = __rlim_get(&vxi->limit, RLIMIT_RSS);
24077 +       freeram = (v < totalram) ? totalram - v : 0;
24078 +
24079 +       val->totalram = totalram;
24080 +       val->freeram = freeram;
24081 +       val->bufferram = 0;
24082 +       val->totalhigh = 0;
24083 +       val->freehigh = 0;
24084 +       return;
24085 +}
24086 +
24087 +void vx_vsi_swapinfo(struct sysinfo *val)
24088 +{
24089 +       struct vx_info *vxi = current->vx_info;
24090 +       unsigned long totalswap, freeswap;
24091 +       rlim_t v, w;
24092 +
24093 +       v = __rlim_soft(&vxi->limit, RLIMIT_RSS);
24094 +       if (v == RLIM_INFINITY) {
24095 +               val->freeswap = val->totalswap;
24096 +               return;
24097 +       }
24098 +
24099 +       /* we blindly accept the max */
24100 +       w = __rlim_hard(&vxi->limit, RLIMIT_RSS);
24101 +       totalswap = (w != RLIM_INFINITY) ? (w - v) : val->totalswap;
24102 +
24103 +       /* currently 'used' swap */
24104 +       w = __rlim_get(&vxi->limit, RLIMIT_RSS);
24105 +       w -= (w > v) ? v : w;
24106 +
24107 +       /* total minus used equals free */
24108 +       freeswap = (w < totalswap) ? totalswap - w : 0;
24109 +
24110 +       val->totalswap = totalswap;
24111 +       val->freeswap = freeswap;
24112 +       return;
24113 +}
24114 +
24115 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/limit_init.h linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/limit_init.h
24116 --- linux-2.6.16.20/kernel/vserver/limit_init.h 1970-01-01 01:00:00 +0100
24117 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/limit_init.h    2006-04-26 19:07:00 +0200
24118 @@ -0,0 +1,31 @@
24119 +
24120 +
24121 +static inline void vx_info_init_limit(struct _vx_limit *limit)
24122 +{
24123 +       int lim;
24124 +
24125 +       for (lim=0; lim<NUM_LIMITS; lim++) {
24126 +               __rlim_soft(limit, lim) = RLIM_INFINITY;
24127 +               __rlim_hard(limit, lim) = RLIM_INFINITY;
24128 +               __rlim_set(limit, lim, 0);
24129 +               atomic_set(&__rlim_lhit(limit, lim), 0);
24130 +               __rlim_rmin(limit, lim) = 0;
24131 +               __rlim_rmax(limit, lim) = 0;
24132 +       }
24133 +}
24134 +
24135 +static inline void vx_info_exit_limit(struct _vx_limit *limit)
24136 +{
24137 +#ifdef CONFIG_VSERVER_DEBUG
24138 +       rlim_t value;
24139 +       int lim;
24140 +
24141 +       for (lim=0; lim<NUM_LIMITS; lim++) {
24142 +               value = __rlim_get(limit, lim);
24143 +               vxwprintk(value,
24144 +                       "!!! limit: %p[%s,%d] = %ld on exit.",
24145 +                       limit, vlimit_name[lim], lim, (long)value);
24146 +       }
24147 +#endif
24148 +}
24149 +
24150 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/limit_proc.h linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/limit_proc.h
24151 --- linux-2.6.16.20/kernel/vserver/limit_proc.h 1970-01-01 01:00:00 +0100
24152 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/limit_proc.h    2006-05-02 02:06:16 +0200
24153 @@ -0,0 +1,71 @@
24154 +#ifndef _VX_LIMIT_PROC_H
24155 +#define _VX_LIMIT_PROC_H
24156 +
24157 +
24158 +static inline void vx_limit_fixup(struct _vx_limit *limit)
24159 +{
24160 +       rlim_t value;
24161 +       int lim;
24162 +
24163 +       for (lim=0; lim<NUM_LIMITS; lim++) {
24164 +               value = __rlim_get(limit, lim);
24165 +               if (value > __rlim_rmax(limit, lim))
24166 +                       __rlim_rmax(limit, lim) = value;
24167 +               if (value < __rlim_rmin(limit, lim))
24168 +                       __rlim_rmin(limit, lim) = value;
24169 +               if (__rlim_rmax(limit, lim) > __rlim_hard(limit, lim))
24170 +                       __rlim_rmax(limit, lim) = __rlim_hard(limit, lim);
24171 +       }
24172 +}
24173 +
24174 +
24175 +#define VX_LIMIT_FMT   ":\t%8ld\t%8ld/%8ld\t%8lld/%8lld\t%6d\n"
24176 +#define VX_LIMIT_TOP   \
24177 +       "Limit\t current\t     min/max\t\t    soft/hard\t\thits\n"
24178 +
24179 +#define VX_LIMIT_ARG(r)                                \
24180 +       ,(unsigned long)__rlim_get(limit, r)    \
24181 +       ,(unsigned long)__rlim_rmin(limit, r)   \
24182 +       ,(unsigned long)__rlim_rmax(limit, r)   \
24183 +       ,VX_VLIM(__rlim_soft(limit, r))         \
24184 +       ,VX_VLIM(__rlim_hard(limit, r))         \
24185 +       ,atomic_read(&__rlim_lhit(limit, r))
24186 +
24187 +static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer)
24188 +{
24189 +       vx_limit_fixup(limit);
24190 +       return sprintf(buffer, VX_LIMIT_TOP
24191 +               "PROC"  VX_LIMIT_FMT
24192 +               "VM"    VX_LIMIT_FMT
24193 +               "VML"   VX_LIMIT_FMT
24194 +               "RSS"   VX_LIMIT_FMT
24195 +               "ANON"  VX_LIMIT_FMT
24196 +               "FILES" VX_LIMIT_FMT
24197 +               "OFD"   VX_LIMIT_FMT
24198 +               "LOCKS" VX_LIMIT_FMT
24199 +               "SOCK"  VX_LIMIT_FMT
24200 +               "MSGQ"  VX_LIMIT_FMT
24201 +               "SHM"   VX_LIMIT_FMT
24202 +               "SEMA"  VX_LIMIT_FMT
24203 +               "SEMS"  VX_LIMIT_FMT
24204 +               "DENT"  VX_LIMIT_FMT
24205 +               VX_LIMIT_ARG(RLIMIT_NPROC)
24206 +               VX_LIMIT_ARG(RLIMIT_AS)
24207 +               VX_LIMIT_ARG(RLIMIT_MEMLOCK)
24208 +               VX_LIMIT_ARG(RLIMIT_RSS)
24209 +               VX_LIMIT_ARG(VLIMIT_ANON)
24210 +               VX_LIMIT_ARG(RLIMIT_NOFILE)
24211 +               VX_LIMIT_ARG(VLIMIT_OPENFD)
24212 +               VX_LIMIT_ARG(RLIMIT_LOCKS)
24213 +               VX_LIMIT_ARG(VLIMIT_NSOCK)
24214 +               VX_LIMIT_ARG(RLIMIT_MSGQUEUE)
24215 +               VX_LIMIT_ARG(VLIMIT_SHMEM)
24216 +               VX_LIMIT_ARG(VLIMIT_SEMARY)
24217 +               VX_LIMIT_ARG(VLIMIT_NSEMS)
24218 +               VX_LIMIT_ARG(VLIMIT_DENTRY)
24219 +               );
24220 +}
24221 +
24222 +#endif /* _VX_LIMIT_PROC_H */
24223 +
24224 +
24225 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/monitor.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/monitor.c
24226 --- linux-2.6.16.20/kernel/vserver/monitor.c    1970-01-01 01:00:00 +0100
24227 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/monitor.c       2006-04-26 19:07:00 +0200
24228 @@ -0,0 +1,64 @@
24229 +/*
24230 + *  kernel/vserver/monitor.c
24231 + *
24232 + *  Virtual Context Scheduler Monitor
24233 + *
24234 + *  Copyright (C) 2006 Herbert Pötzl
24235 + *
24236 + *  V0.01  basic design
24237 + *
24238 + */
24239 +
24240 +#include <linux/config.h>
24241 +#include <linux/errno.h>
24242 +#include <linux/module.h>
24243 +#include <linux/types.h>
24244 +#include <linux/ctype.h>
24245 +
24246 +#include <asm/uaccess.h>
24247 +#include <asm/atomic.h>
24248 +#include <asm/unistd.h>
24249 +
24250 +#include <linux/vserver/monitor.h>
24251 +
24252 +
24253 +#ifdef CONFIG_VSERVER_MONITOR
24254 +#define VXM_SIZE       CONFIG_VSERVER_MONITOR_SIZE
24255 +#else
24256 +#define VXM_SIZE       64
24257 +#endif
24258 +
24259 +struct _vx_monitor {
24260 +       unsigned int counter;
24261 +
24262 +       struct _vx_mon_entry entry[VXM_SIZE+1];
24263 +};
24264 +
24265 +
24266 +DEFINE_PER_CPU(struct _vx_monitor, vx_monitor_buffer);
24267 +
24268 +unsigned volatile int vxm_active = 1;
24269 +
24270 +static atomic_t sequence = ATOMIC_INIT(0);
24271 +
24272 +
24273 +/*     vxm_advance()
24274 +
24275 +       * requires disabled preemption                          */
24276 +
24277 +struct _vx_mon_entry *vxm_advance(int cpu)
24278 +{
24279 +       struct _vx_monitor *mon = &per_cpu(vx_monitor_buffer, cpu);
24280 +       struct _vx_mon_entry *entry;
24281 +       unsigned int index;
24282 +
24283 +       index = vxm_active ? (mon->counter++ % VXM_SIZE) : VXM_SIZE;
24284 +       entry = &mon->entry[index];
24285 +
24286 +       entry->ev.seq = atomic_inc_return(&sequence);
24287 +       entry->ev.jif = jiffies;
24288 +       return entry;
24289 +}
24290 +
24291 +EXPORT_SYMBOL_GPL(vxm_advance);
24292 +
24293 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/namespace.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/namespace.c
24294 --- linux-2.6.16.20/kernel/vserver/namespace.c  1970-01-01 01:00:00 +0100
24295 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/namespace.c     2006-04-26 19:07:00 +0200
24296 @@ -0,0 +1,120 @@
24297 +/*
24298 + *  linux/kernel/vserver/namespace.c
24299 + *
24300 + *  Virtual Server: Context Namespace Support
24301 + *
24302 + *  Copyright (C) 2003-2005  Herbert Pötzl
24303 + *
24304 + *  V0.01  broken out from context.c 0.07
24305 + *  V0.02  added task locking for namespace
24306 + *
24307 + */
24308 +
24309 +#include <linux/utsname.h>
24310 +#include <linux/sched.h>
24311 +#include <linux/vs_context.h>
24312 +#include <linux/vserver/namespace.h>
24313 +#include <linux/vserver/namespace_cmd.h>
24314 +#include <linux/dcache.h>
24315 +#include <linux/mount.h>
24316 +#include <linux/fs.h>
24317 +
24318 +#include <asm/errno.h>
24319 +#include <asm/uaccess.h>
24320 +
24321 +
24322 +/* namespace functions */
24323 +
24324 +#include <linux/namespace.h>
24325 +
24326 +int vx_set_namespace(struct vx_info *vxi, struct namespace *ns, struct fs_struct *fs)
24327 +{
24328 +       struct fs_struct *fs_copy;
24329 +
24330 +       if (vxi->vx_namespace)
24331 +               return -EPERM;
24332 +       if (!ns || !fs)
24333 +               return -EINVAL;
24334 +
24335 +       fs_copy = copy_fs_struct(fs);
24336 +       if (!fs_copy)
24337 +               return -ENOMEM;
24338 +
24339 +       get_namespace(ns);
24340 +       vxi->vx_namespace = ns;
24341 +       vxi->vx_fs = fs_copy;
24342 +       return 0;
24343 +}
24344 +
24345 +int vc_enter_namespace(uint32_t id, void __user *data)
24346 +{
24347 +       struct vx_info *vxi;
24348 +       struct fs_struct *old_fs, *fs;
24349 +       struct namespace *old_ns;
24350 +       int ret = 0;
24351 +
24352 +       vxi = lookup_vx_info(id);
24353 +       if (!vxi)
24354 +               return -ESRCH;
24355 +
24356 +       ret = -EINVAL;
24357 +       if (!vxi->vx_namespace)
24358 +               goto out_put;
24359 +
24360 +       ret = -ENOMEM;
24361 +       fs = copy_fs_struct(vxi->vx_fs);
24362 +       if (!fs)
24363 +               goto out_put;
24364 +
24365 +       ret = 0;
24366 +       task_lock(current);
24367 +       old_ns = current->namespace;
24368 +       old_fs = current->fs;
24369 +       get_namespace(vxi->vx_namespace);
24370 +       current->namespace = vxi->vx_namespace;
24371 +       current->fs = fs;
24372 +       task_unlock(current);
24373 +
24374 +       put_namespace(old_ns);
24375 +       put_fs_struct(old_fs);
24376 +out_put:
24377 +       put_vx_info(vxi);
24378 +       return ret;
24379 +}
24380 +
24381 +int vc_cleanup_namespace(uint32_t id, void __user *data)
24382 +{
24383 +       // down_write(&current->namespace->sem);
24384 +       spin_lock(&vfsmount_lock);
24385 +       umount_unused(current->namespace->root, current->fs);
24386 +       spin_unlock(&vfsmount_lock);
24387 +       // up_write(&current->namespace->sem);
24388 +       return 0;
24389 +}
24390 +
24391 +int vc_set_namespace(uint32_t id, void __user *data)
24392 +{
24393 +       struct fs_struct *fs;
24394 +       struct namespace *ns;
24395 +       struct vx_info *vxi;
24396 +       int ret;
24397 +
24398 +       vxi = lookup_vx_info(id);
24399 +       if (!vxi)
24400 +               return -ESRCH;
24401 +
24402 +       task_lock(current);
24403 +       fs = current->fs;
24404 +       atomic_inc(&fs->count);
24405 +       ns = current->namespace;
24406 +       get_namespace(current->namespace);
24407 +       task_unlock(current);
24408 +
24409 +       ret = vx_set_namespace(vxi, ns, fs);
24410 +
24411 +       put_namespace(ns);
24412 +       put_fs_struct(fs);
24413 +       put_vx_info(vxi);
24414 +       return ret;
24415 +}
24416 +
24417 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/network.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/network.c
24418 --- linux-2.6.16.20/kernel/vserver/network.c    1970-01-01 01:00:00 +0100
24419 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/network.c       2006-04-27 20:28:48 +0200
24420 @@ -0,0 +1,781 @@
24421 +/*
24422 + *  linux/kernel/vserver/network.c
24423 + *
24424 + *  Virtual Server: Network Support
24425 + *
24426 + *  Copyright (C) 2003-2005  Herbert Pötzl
24427 + *
24428 + *  V0.01  broken out from vcontext V0.05
24429 + *  V0.02  cleaned up implementation
24430 + *  V0.03  added equiv nx commands
24431 + *  V0.04  switch to RCU based hash
24432 + *  V0.05  and back to locking again
24433 + *
24434 + */
24435 +
24436 +#include <linux/slab.h>
24437 +#include <linux/vserver/network_cmd.h>
24438 +#include <linux/rcupdate.h>
24439 +#include <net/tcp.h>
24440 +
24441 +#include <asm/errno.h>
24442 +
24443 +
24444 +/*     __alloc_nx_info()
24445 +
24446 +       * allocate an initialized nx_info struct
24447 +       * doesn't make it visible (hash)                        */
24448 +
24449 +static struct nx_info *__alloc_nx_info(nid_t nid)
24450 +{
24451 +       struct nx_info *new = NULL;
24452 +
24453 +       vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid);
24454 +
24455 +       /* would this benefit from a slab cache? */
24456 +       new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
24457 +       if (!new)
24458 +               return 0;
24459 +
24460 +       memset (new, 0, sizeof(struct nx_info));
24461 +       new->nx_id = nid;
24462 +       INIT_HLIST_NODE(&new->nx_hlist);
24463 +       atomic_set(&new->nx_usecnt, 0);
24464 +       atomic_set(&new->nx_tasks, 0);
24465 +       new->nx_state = 0;
24466 +
24467 +       new->nx_flags = NXF_INIT_SET;
24468 +
24469 +       /* rest of init goes here */
24470 +
24471 +       vxdprintk(VXD_CBIT(nid, 0),
24472 +               "alloc_nx_info(%d) = %p", nid, new);
24473 +       return new;
24474 +}
24475 +
24476 +/*     __dealloc_nx_info()
24477 +
24478 +       * final disposal of nx_info                             */
24479 +
24480 +static void __dealloc_nx_info(struct nx_info *nxi)
24481 +{
24482 +       vxdprintk(VXD_CBIT(nid, 0),
24483 +               "dealloc_nx_info(%p)", nxi);
24484 +
24485 +       nxi->nx_hlist.next = LIST_POISON1;
24486 +       nxi->nx_id = -1;
24487 +
24488 +       BUG_ON(atomic_read(&nxi->nx_usecnt));
24489 +       BUG_ON(atomic_read(&nxi->nx_tasks));
24490 +
24491 +       nxi->nx_state |= NXS_RELEASED;
24492 +       kfree(nxi);
24493 +}
24494 +
24495 +static void __shutdown_nx_info(struct nx_info *nxi)
24496 +{
24497 +       nxi->nx_state |= NXS_SHUTDOWN;
24498 +       vs_net_change(nxi, VSC_NETDOWN);
24499 +}
24500 +
24501 +/*     exported stuff                                          */
24502 +
24503 +void free_nx_info(struct nx_info *nxi)
24504 +{
24505 +       /* context shutdown is mandatory */
24506 +       BUG_ON(nxi->nx_state != NXS_SHUTDOWN);
24507 +
24508 +       /* context must not be hashed */
24509 +       BUG_ON(nxi->nx_state & NXS_HASHED);
24510 +
24511 +       BUG_ON(atomic_read(&nxi->nx_usecnt));
24512 +       BUG_ON(atomic_read(&nxi->nx_tasks));
24513 +
24514 +       __dealloc_nx_info(nxi);
24515 +}
24516 +
24517 +
24518 +/*     hash table for nx_info hash */
24519 +
24520 +#define NX_HASH_SIZE   13
24521 +
24522 +struct hlist_head nx_info_hash[NX_HASH_SIZE];
24523 +
24524 +static spinlock_t nx_info_hash_lock = SPIN_LOCK_UNLOCKED;
24525 +
24526 +
24527 +static inline unsigned int __hashval(nid_t nid)
24528 +{
24529 +       return (nid % NX_HASH_SIZE);
24530 +}
24531 +
24532 +
24533 +
24534 +/*     __hash_nx_info()
24535 +
24536 +       * add the nxi to the global hash table
24537 +       * requires the hash_lock to be held                     */
24538 +
24539 +static inline void __hash_nx_info(struct nx_info *nxi)
24540 +{
24541 +       struct hlist_head *head;
24542 +
24543 +       vxd_assert_lock(&nx_info_hash_lock);
24544 +       vxdprintk(VXD_CBIT(nid, 4),
24545 +               "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id);
24546 +
24547 +       /* context must not be hashed */
24548 +       BUG_ON(nx_info_state(nxi, NXS_HASHED));
24549 +
24550 +       nxi->nx_state |= NXS_HASHED;
24551 +       head = &nx_info_hash[__hashval(nxi->nx_id)];
24552 +       hlist_add_head(&nxi->nx_hlist, head);
24553 +}
24554 +
24555 +/*     __unhash_nx_info()
24556 +
24557 +       * remove the nxi from the global hash table
24558 +       * requires the hash_lock to be held                     */
24559 +
24560 +static inline void __unhash_nx_info(struct nx_info *nxi)
24561 +{
24562 +       vxd_assert_lock(&nx_info_hash_lock);
24563 +       vxdprintk(VXD_CBIT(nid, 4),
24564 +               "__unhash_nx_info: %p[#%d]", nxi, nxi->nx_id);
24565 +
24566 +       /* context must be hashed */
24567 +       BUG_ON(!nx_info_state(nxi, NXS_HASHED));
24568 +
24569 +       nxi->nx_state &= ~NXS_HASHED;
24570 +       hlist_del(&nxi->nx_hlist);
24571 +}
24572 +
24573 +
24574 +/*     __lookup_nx_info()
24575 +
24576 +       * requires the hash_lock to be held
24577 +       * doesn't increment the nx_refcnt                       */
24578 +
24579 +static inline struct nx_info *__lookup_nx_info(nid_t nid)
24580 +{
24581 +       struct hlist_head *head = &nx_info_hash[__hashval(nid)];
24582 +       struct hlist_node *pos;
24583 +       struct nx_info *nxi;
24584 +
24585 +       vxd_assert_lock(&nx_info_hash_lock);
24586 +       hlist_for_each(pos, head) {
24587 +               nxi = hlist_entry(pos, struct nx_info, nx_hlist);
24588 +
24589 +               if (nxi->nx_id == nid)
24590 +                       goto found;
24591 +       }
24592 +       nxi = NULL;
24593 +found:
24594 +       vxdprintk(VXD_CBIT(nid, 0),
24595 +               "__lookup_nx_info(#%u): %p[#%u]",
24596 +               nid, nxi, nxi?nxi->nx_id:0);
24597 +       return nxi;
24598 +}
24599 +
24600 +
24601 +/*     __nx_dynamic_id()
24602 +
24603 +       * find unused dynamic nid
24604 +       * requires the hash_lock to be held                     */
24605 +
24606 +static inline nid_t __nx_dynamic_id(void)
24607 +{
24608 +       static nid_t seq = MAX_N_CONTEXT;
24609 +       nid_t barrier = seq;
24610 +
24611 +       vxd_assert_lock(&nx_info_hash_lock);
24612 +       do {
24613 +               if (++seq > MAX_N_CONTEXT)
24614 +                       seq = MIN_D_CONTEXT;
24615 +               if (!__lookup_nx_info(seq)) {
24616 +                       vxdprintk(VXD_CBIT(nid, 4),
24617 +                               "__nx_dynamic_id: [#%d]", seq);
24618 +                       return seq;
24619 +               }
24620 +       } while (barrier != seq);
24621 +       return 0;
24622 +}
24623 +
24624 +/*     __create_nx_info()
24625 +
24626 +       * create the requested context
24627 +       * get() and hash it                                     */
24628 +
24629 +static struct nx_info * __create_nx_info(int id)
24630 +{
24631 +       struct nx_info *new, *nxi = NULL;
24632 +
24633 +       vxdprintk(VXD_CBIT(nid, 1), "create_nx_info(%d)*", id);
24634 +
24635 +       if (!(new = __alloc_nx_info(id)))
24636 +               return ERR_PTR(-ENOMEM);
24637 +
24638 +       /* required to make dynamic xids unique */
24639 +       spin_lock(&nx_info_hash_lock);
24640 +
24641 +       /* dynamic context requested */
24642 +       if (id == NX_DYNAMIC_ID) {
24643 +#ifdef CONFIG_VSERVER_DYNAMIC_IDS
24644 +               id = __nx_dynamic_id();
24645 +               if (!id) {
24646 +                       printk(KERN_ERR "no dynamic context available.\n");
24647 +                       nxi = ERR_PTR(-EAGAIN);
24648 +                       goto out_unlock;
24649 +               }
24650 +               new->nx_id = id;
24651 +#else
24652 +               printk(KERN_ERR "dynamic contexts disabled.\n");
24653 +               nxi = ERR_PTR(-EINVAL);
24654 +               goto out_unlock;
24655 +#endif
24656 +       }
24657 +       /* static context requested */
24658 +       else if ((nxi = __lookup_nx_info(id))) {
24659 +               vxdprintk(VXD_CBIT(nid, 0),
24660 +                       "create_nx_info(%d) = %p (already there)", id, nxi);
24661 +               if (nx_info_flags(nxi, NXF_STATE_SETUP, 0))
24662 +                       nxi = ERR_PTR(-EBUSY);
24663 +               else
24664 +                       nxi = ERR_PTR(-EEXIST);
24665 +               goto out_unlock;
24666 +       }
24667 +       /* dynamic nid creation blocker */
24668 +       else if (id >= MIN_D_CONTEXT) {
24669 +               vxdprintk(VXD_CBIT(nid, 0),
24670 +                       "create_nx_info(%d) (dynamic rejected)", id);
24671 +               nxi = ERR_PTR(-EINVAL);
24672 +               goto out_unlock;
24673 +       }
24674 +
24675 +       /* new context */
24676 +       vxdprintk(VXD_CBIT(nid, 0),
24677 +               "create_nx_info(%d) = %p (new)", id, new);
24678 +       __hash_nx_info(get_nx_info(new));
24679 +       nxi = new, new = NULL;
24680 +
24681 +out_unlock:
24682 +       spin_unlock(&nx_info_hash_lock);
24683 +       if (new)
24684 +               __dealloc_nx_info(new);
24685 +       return nxi;
24686 +}
24687 +
24688 +
24689 +
24690 +/*     exported stuff                                          */
24691 +
24692 +
24693 +void unhash_nx_info(struct nx_info *nxi)
24694 +{
24695 +       __shutdown_nx_info(nxi);
24696 +       spin_lock(&nx_info_hash_lock);
24697 +       __unhash_nx_info(nxi);
24698 +       spin_unlock(&nx_info_hash_lock);
24699 +}
24700 +
24701 +#ifdef  CONFIG_VSERVER_LEGACYNET
24702 +
24703 +struct nx_info *create_nx_info(void)
24704 +{
24705 +       return __create_nx_info(NX_DYNAMIC_ID);
24706 +}
24707 +
24708 +#endif
24709 +
24710 +/*     lookup_nx_info()
24711 +
24712 +       * search for a nx_info and get() it
24713 +       * negative id means current                             */
24714 +
24715 +struct nx_info *lookup_nx_info(int id)
24716 +{
24717 +       struct nx_info *nxi = NULL;
24718 +
24719 +       if (id < 0) {
24720 +               nxi = get_nx_info(current->nx_info);
24721 +       } else if (id > 1) {
24722 +               spin_lock(&nx_info_hash_lock);
24723 +               nxi = get_nx_info(__lookup_nx_info(id));
24724 +               spin_unlock(&nx_info_hash_lock);
24725 +       }
24726 +       return nxi;
24727 +}
24728 +
24729 +/*     nid_is_hashed()
24730 +
24731 +       * verify that nid is still hashed                       */
24732 +
24733 +int nid_is_hashed(nid_t nid)
24734 +{
24735 +       int hashed;
24736 +
24737 +       spin_lock(&nx_info_hash_lock);
24738 +       hashed = (__lookup_nx_info(nid) != NULL);
24739 +       spin_unlock(&nx_info_hash_lock);
24740 +       return hashed;
24741 +}
24742 +
24743 +
24744 +#ifdef CONFIG_PROC_FS
24745 +
24746 +/*     get_nid_list()
24747 +
24748 +       * get a subset of hashed nids for proc
24749 +       * assumes size is at least one                          */
24750 +
24751 +int get_nid_list(int index, unsigned int *nids, int size)
24752 +{
24753 +       int hindex, nr_nids = 0;
24754 +
24755 +       /* only show current and children */
24756 +       if (!nx_check(0, VX_ADMIN|VX_WATCH)) {
24757 +               if (index > 0)
24758 +                       return 0;
24759 +               nids[nr_nids] = nx_current_nid();
24760 +               return 1;
24761 +       }
24762 +
24763 +       for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
24764 +               struct hlist_head *head = &nx_info_hash[hindex];
24765 +               struct hlist_node *pos;
24766 +
24767 +               spin_lock(&nx_info_hash_lock);
24768 +               hlist_for_each(pos, head) {
24769 +                       struct nx_info *nxi;
24770 +
24771 +                       if (--index > 0)
24772 +                               continue;
24773 +
24774 +                       nxi = hlist_entry(pos, struct nx_info, nx_hlist);
24775 +                       nids[nr_nids] = nxi->nx_id;
24776 +                       if (++nr_nids >= size) {
24777 +                               spin_unlock(&nx_info_hash_lock);
24778 +                               goto out;
24779 +                       }
24780 +               }
24781 +               /* keep the lock time short */
24782 +               spin_unlock(&nx_info_hash_lock);
24783 +       }
24784 +out:
24785 +       return nr_nids;
24786 +}
24787 +#endif
24788 +
24789 +
24790 +/*
24791 + *     migrate task to new network
24792 + *     gets nxi, puts old_nxi on change
24793 + */
24794 +
24795 +int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
24796 +{
24797 +       struct nx_info *old_nxi;
24798 +       int ret = 0;
24799 +
24800 +       if (!p || !nxi)
24801 +               BUG();
24802 +
24803 +       vxdprintk(VXD_CBIT(nid, 5),
24804 +               "nx_migrate_task(%p,%p[#%d.%d.%d])",
24805 +               p, nxi, nxi->nx_id,
24806 +               atomic_read(&nxi->nx_usecnt),
24807 +               atomic_read(&nxi->nx_tasks));
24808 +
24809 +       /* maybe disallow this completely? */
24810 +       old_nxi = task_get_nx_info(p);
24811 +       if (old_nxi == nxi)
24812 +               goto out;
24813 +
24814 +       task_lock(p);
24815 +       if (old_nxi)
24816 +               clr_nx_info(&p->nx_info);
24817 +       claim_nx_info(nxi, p);
24818 +       set_nx_info(&p->nx_info, nxi);
24819 +       p->nid = nxi->nx_id;
24820 +       task_unlock(p);
24821 +
24822 +       vxdprintk(VXD_CBIT(nid, 5),
24823 +               "moved task %p into nxi:%p[#%d]",
24824 +               p, nxi, nxi->nx_id);
24825 +
24826 +       if (old_nxi)
24827 +               release_nx_info(old_nxi, p);
24828 +out:
24829 +       put_nx_info(old_nxi);
24830 +       return ret;
24831 +}
24832 +
24833 +
24834 +#ifdef CONFIG_INET
24835 +
24836 +#include <linux/netdevice.h>
24837 +#include <linux/inetdevice.h>
24838 +
24839 +int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
24840 +{
24841 +       if (!nxi)
24842 +               return 1;
24843 +       if (!ifa)
24844 +               return 0;
24845 +       return addr_in_nx_info(nxi, ifa->ifa_address);
24846 +}
24847 +
24848 +int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
24849 +{
24850 +       struct in_device *in_dev;
24851 +       struct in_ifaddr **ifap;
24852 +       struct in_ifaddr *ifa;
24853 +       int ret = 0;
24854 +
24855 +       if (!nxi)
24856 +               return 1;
24857 +
24858 +       in_dev = in_dev_get(dev);
24859 +       if (!in_dev)
24860 +               goto out;
24861 +
24862 +       for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
24863 +               ifap = &ifa->ifa_next) {
24864 +               if (addr_in_nx_info(nxi, ifa->ifa_address)) {
24865 +                       ret = 1;
24866 +                       break;
24867 +               }
24868 +       }
24869 +       in_dev_put(in_dev);
24870 +out:
24871 +       return ret;
24872 +}
24873 +
24874 +/*
24875 + *     check if address is covered by socket
24876 + *
24877 + *     sk:     the socket to check against
24878 + *     addr:   the address in question (must be != 0)
24879 + */
24880 +static inline int __addr_in_socket(struct sock *sk, uint32_t addr)
24881 +{
24882 +       struct nx_info *nxi = sk->sk_nx_info;
24883 +       uint32_t saddr = inet_rcv_saddr(sk);
24884 +
24885 +       vxdprintk(VXD_CBIT(net, 5),
24886 +               "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx",
24887 +               sk, VXD_QUAD(addr), nxi, VXD_QUAD(saddr), sk->sk_socket,
24888 +               (sk->sk_socket?sk->sk_socket->flags:0));
24889 +
24890 +       if (saddr) {
24891 +               /* direct address match */
24892 +               return (saddr == addr);
24893 +       } else if (nxi) {
24894 +               /* match against nx_info */
24895 +               return addr_in_nx_info(nxi, addr);
24896 +       } else {
24897 +               /* unrestricted any socket */
24898 +               return 1;
24899 +       }
24900 +}
24901 +
24902 +
24903 +int nx_addr_conflict(struct nx_info *nxi, uint32_t addr, struct sock *sk)
24904 +{
24905 +       vxdprintk(VXD_CBIT(net, 2),
24906 +               "nx_addr_conflict(%p,%p) %d.%d,%d.%d",
24907 +               nxi, sk, VXD_QUAD(addr));
24908 +
24909 +       if (addr) {
24910 +               /* check real address */
24911 +               return __addr_in_socket(sk, addr);
24912 +       } else if (nxi) {
24913 +               /* check against nx_info */
24914 +               int i, n = nxi->nbipv4;
24915 +
24916 +               for (i=0; i<n; i++)
24917 +                       if (__addr_in_socket(sk, nxi->ipv4[i]))
24918 +                               return 1;
24919 +               return 0;
24920 +       } else {
24921 +               /* check against any */
24922 +               return 1;
24923 +       }
24924 +}
24925 +
24926 +#endif /* CONFIG_INET */
24927 +
24928 +void nx_set_persistent(struct nx_info *nxi)
24929 +{
24930 +       if (nx_info_flags(nxi, NXF_PERSISTENT, 0)) {
24931 +               get_nx_info(nxi);
24932 +               claim_nx_info(nxi, current);
24933 +       } else {
24934 +               release_nx_info(nxi, current);
24935 +               put_nx_info(nxi);
24936 +       }
24937 +}
24938 +
24939 +/* vserver syscall commands below here */
24940 +
24941 +/* taks nid and nx_info functions */
24942 +
24943 +#include <asm/uaccess.h>
24944 +
24945 +
24946 +int vc_task_nid(uint32_t id, void __user *data)
24947 +{
24948 +       nid_t nid;
24949 +
24950 +       if (id) {
24951 +               struct task_struct *tsk;
24952 +
24953 +               if (!vx_check(0, VX_ADMIN|VX_WATCH))
24954 +                       return -EPERM;
24955 +
24956 +               read_lock(&tasklist_lock);
24957 +               tsk = find_task_by_real_pid(id);
24958 +               nid = (tsk) ? tsk->nid : -ESRCH;
24959 +               read_unlock(&tasklist_lock);
24960 +       }
24961 +       else
24962 +               nid = nx_current_nid();
24963 +       return nid;
24964 +}
24965 +
24966 +
24967 +int vc_nx_info(uint32_t id, void __user *data)
24968 +{
24969 +       struct nx_info *nxi;
24970 +       struct vcmd_nx_info_v0 vc_data;
24971 +
24972 +       if (!capable(CAP_SYS_RESOURCE))
24973 +               return -EPERM;
24974 +
24975 +       nxi = lookup_nx_info(id);
24976 +       if (!nxi)
24977 +               return -ESRCH;
24978 +
24979 +       vc_data.nid = nxi->nx_id;
24980 +       put_nx_info(nxi);
24981 +
24982 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
24983 +               return -EFAULT;
24984 +       return 0;
24985 +}
24986 +
24987 +
24988 +/* network functions */
24989 +
24990 +int vc_net_create(uint32_t nid, void __user *data)
24991 +{
24992 +       struct vcmd_net_create vc_data = { .flagword = NXF_INIT_SET };
24993 +       struct nx_info *new_nxi;
24994 +       int ret;
24995 +
24996 +       if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
24997 +               return -EFAULT;
24998 +
24999 +       if ((nid > MAX_S_CONTEXT) && (nid != VX_DYNAMIC_ID))
25000 +               return -EINVAL;
25001 +       if (nid < 2)
25002 +               return -EINVAL;
25003 +
25004 +       new_nxi = __create_nx_info(nid);
25005 +       if (IS_ERR(new_nxi))
25006 +               return PTR_ERR(new_nxi);
25007 +
25008 +       /* initial flags */
25009 +       new_nxi->nx_flags = vc_data.flagword;
25010 +
25011 +       /* get a reference for persistent contexts */
25012 +       if ((vc_data.flagword & NXF_PERSISTENT))
25013 +               nx_set_persistent(new_nxi);
25014 +
25015 +       vs_net_change(new_nxi, VSC_NETUP);
25016 +       ret = new_nxi->nx_id;
25017 +       nx_migrate_task(current, new_nxi);
25018 +       /* if this fails, we might end up with a hashed nx_info */
25019 +       put_nx_info(new_nxi);
25020 +       return ret;
25021 +}
25022 +
25023 +
25024 +int vc_net_migrate(uint32_t id, void __user *data)
25025 +{
25026 +       struct nx_info *nxi;
25027 +
25028 +       nxi = lookup_nx_info(id);
25029 +       if (!nxi)
25030 +               return -ESRCH;
25031 +       nx_migrate_task(current, nxi);
25032 +       put_nx_info(nxi);
25033 +       return 0;
25034 +}
25035 +
25036 +int vc_net_add(uint32_t nid, void __user *data)
25037 +{
25038 +       struct vcmd_net_addr_v0 vc_data;
25039 +       struct nx_info *nxi;
25040 +       int index, pos, ret = 0;
25041 +
25042 +       if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
25043 +               return -EFAULT;
25044 +
25045 +       switch (vc_data.type) {
25046 +       case NXA_TYPE_IPV4:
25047 +               if ((vc_data.count < 1) || (vc_data.count > 4))
25048 +                       return -EINVAL;
25049 +               break;
25050 +
25051 +       default:
25052 +               break;
25053 +       }
25054 +
25055 +       nxi = lookup_nx_info(nid);
25056 +       if (!nxi)
25057 +               return -ESRCH;
25058 +
25059 +       switch (vc_data.type) {
25060 +       case NXA_TYPE_IPV4:
25061 +               index = 0;
25062 +               while ((index < vc_data.count) &&
25063 +                       ((pos = nxi->nbipv4) < NB_IPV4ROOT)) {
25064 +                       nxi->ipv4[pos] = vc_data.ip[index];
25065 +                       nxi->mask[pos] = vc_data.mask[index];
25066 +                       index++;
25067 +                       nxi->nbipv4++;
25068 +               }
25069 +               ret = index;
25070 +               break;
25071 +
25072 +       case NXA_TYPE_IPV4|NXA_MOD_BCAST:
25073 +               nxi->v4_bcast = vc_data.ip[0];
25074 +               ret = 1;
25075 +               break;
25076 +
25077 +       default:
25078 +               ret = -EINVAL;
25079 +               break;
25080 +       }
25081 +
25082 +       put_nx_info(nxi);
25083 +       return ret;
25084 +}
25085 +
25086 +int vc_net_remove(uint32_t nid, void __user *data)
25087 +{
25088 +       struct vcmd_net_addr_v0 vc_data;
25089 +       struct nx_info *nxi;
25090 +       int ret = 0;
25091 +
25092 +       if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
25093 +               return -EFAULT;
25094 +
25095 +       nxi = lookup_nx_info(nid);
25096 +       if (!nxi)
25097 +               return -ESRCH;
25098 +
25099 +       switch ((unsigned)vc_data.type) {
25100 +       case NXA_TYPE_ANY:
25101 +               nxi->nbipv4 = 0;
25102 +               break;
25103 +
25104 +       default:
25105 +               ret = -EINVAL;
25106 +               break;
25107 +       }
25108 +
25109 +       put_nx_info(nxi);
25110 +       return ret;
25111 +}
25112 +
25113 +int vc_get_nflags(uint32_t id, void __user *data)
25114 +{
25115 +       struct nx_info *nxi;
25116 +       struct vcmd_net_flags_v0 vc_data;
25117 +
25118 +       nxi = lookup_nx_info(id);
25119 +       if (!nxi)
25120 +               return -ESRCH;
25121 +
25122 +       vc_data.flagword = nxi->nx_flags;
25123 +
25124 +       /* special STATE flag handling */
25125 +       vc_data.mask = vx_mask_flags(~0UL, nxi->nx_flags, NXF_ONE_TIME);
25126 +
25127 +       put_nx_info(nxi);
25128 +
25129 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
25130 +               return -EFAULT;
25131 +       return 0;
25132 +}
25133 +
25134 +int vc_set_nflags(uint32_t id, void __user *data)
25135 +{
25136 +       struct nx_info *nxi;
25137 +       struct vcmd_net_flags_v0 vc_data;
25138 +       uint64_t mask, trigger;
25139 +
25140 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
25141 +               return -EFAULT;
25142 +
25143 +       nxi = lookup_nx_info(id);
25144 +       if (!nxi)
25145 +               return -ESRCH;
25146 +
25147 +       /* special STATE flag handling */
25148 +       mask = vx_mask_mask(vc_data.mask, nxi->nx_flags, NXF_ONE_TIME);
25149 +       trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
25150 +
25151 +       nxi->nx_flags = vx_mask_flags(nxi->nx_flags,
25152 +               vc_data.flagword, mask);
25153 +       if (trigger & NXF_PERSISTENT)
25154 +               nx_set_persistent(nxi);
25155 +
25156 +       put_nx_info(nxi);
25157 +       return 0;
25158 +}
25159 +
25160 +int vc_get_ncaps(uint32_t id, void __user *data)
25161 +{
25162 +       struct nx_info *nxi;
25163 +       struct vcmd_net_caps_v0 vc_data;
25164 +
25165 +       nxi = lookup_nx_info(id);
25166 +       if (!nxi)
25167 +               return -ESRCH;
25168 +
25169 +       vc_data.ncaps = nxi->nx_ncaps;
25170 +       vc_data.cmask = ~0UL;
25171 +       put_nx_info(nxi);
25172 +
25173 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
25174 +               return -EFAULT;
25175 +       return 0;
25176 +}
25177 +
25178 +int vc_set_ncaps(uint32_t id, void __user *data)
25179 +{
25180 +       struct nx_info *nxi;
25181 +       struct vcmd_net_caps_v0 vc_data;
25182 +
25183 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
25184 +               return -EFAULT;
25185 +
25186 +       nxi = lookup_nx_info(id);
25187 +       if (!nxi)
25188 +               return -ESRCH;
25189 +
25190 +       nxi->nx_ncaps = vx_mask_flags(nxi->nx_ncaps,
25191 +               vc_data.ncaps, vc_data.cmask);
25192 +       put_nx_info(nxi);
25193 +       return 0;
25194 +}
25195 +
25196 +
25197 +#include <linux/module.h>
25198 +
25199 +EXPORT_SYMBOL_GPL(free_nx_info);
25200 +EXPORT_SYMBOL_GPL(unhash_nx_info);
25201 +
25202 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/proc.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/proc.c
25203 --- linux-2.6.16.20/kernel/vserver/proc.c       1970-01-01 01:00:00 +0100
25204 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/proc.c  2006-05-27 19:15:19 +0200
25205 @@ -0,0 +1,921 @@
25206 +/*
25207 + *  linux/kernel/vserver/proc.c
25208 + *
25209 + *  Virtual Context Support
25210 + *
25211 + *  Copyright (C) 2003-2005  Herbert Pötzl
25212 + *
25213 + *  V0.01  basic structure
25214 + *  V0.02  adaptation vs1.3.0
25215 + *  V0.03  proc permissions
25216 + *  V0.04  locking/generic
25217 + *  V0.05  next generation procfs
25218 + *  V0.06  inode validation
25219 + *  V0.07  generic rewrite vid
25220 + *
25221 + */
25222 +
25223 +#include <linux/errno.h>
25224 +#include <linux/proc_fs.h>
25225 +#include <linux/sched.h>
25226 +#include <linux/vs_context.h>
25227 +#include <linux/vs_network.h>
25228 +#include <linux/vs_cvirt.h>
25229 +
25230 +#include <linux/vserver/switch.h>
25231 +#include <linux/vserver/global.h>
25232 +
25233 +#include <asm/uaccess.h>
25234 +#include <asm/unistd.h>
25235 +
25236 +#include "cvirt_proc.h"
25237 +#include "limit_proc.h"
25238 +#include "sched_proc.h"
25239 +#include "vci_config.h"
25240 +
25241 +static struct proc_dir_entry *proc_virtual;
25242 +
25243 +static struct proc_dir_entry *proc_vnet;
25244 +
25245 +
25246 +enum vid_directory_inos {
25247 +       PROC_XID_INO = 32,
25248 +       PROC_XID_INFO,
25249 +       PROC_XID_STATUS,
25250 +       PROC_XID_LIMIT,
25251 +       PROC_XID_SCHED,
25252 +       PROC_XID_CVIRT,
25253 +       PROC_XID_CACCT,
25254 +
25255 +       PROC_NID_INO = 64,
25256 +       PROC_NID_INFO,
25257 +       PROC_NID_STATUS,
25258 +};
25259 +
25260 +#define PROC_VID_MASK  0x60
25261 +
25262 +
25263 +/* first the actual feeds */
25264 +
25265 +
25266 +static int proc_virtual_info(int vid, char *buffer)
25267 +{
25268 +       return sprintf(buffer,
25269 +               "VCIVersion:\t%04x:%04x\n"
25270 +               "VCISyscall:\t%d\n"
25271 +               "VCIKernel:\t%08x\n"
25272 +               ,VCI_VERSION >> 16
25273 +               ,VCI_VERSION & 0xFFFF
25274 +               ,__NR_vserver
25275 +               ,vci_kernel_config()
25276 +               );
25277 +}
25278 +
25279 +static int proc_virtual_status(int vid, char *buffer)
25280 +{
25281 +       return sprintf(buffer,
25282 +               "#CTotal:\t%d\n"
25283 +               "#CActive:\t%d\n"
25284 +               ,atomic_read(&vx_global_ctotal)
25285 +               ,atomic_read(&vx_global_cactive)
25286 +               );
25287 +}
25288 +
25289 +
25290 +int proc_xid_info (int vid, char *buffer)
25291 +{
25292 +       struct vx_info *vxi;
25293 +       int length;
25294 +
25295 +       vxi = lookup_vx_info(vid);
25296 +       if (!vxi)
25297 +               return 0;
25298 +       length = sprintf(buffer,
25299 +               "ID:\t%d\n"
25300 +               "Info:\t%p\n"
25301 +               "Init:\t%d\n"
25302 +               ,vxi->vx_id
25303 +               ,vxi
25304 +               ,vxi->vx_initpid
25305 +               );
25306 +       put_vx_info(vxi);
25307 +       return length;
25308 +}
25309 +
25310 +int proc_xid_status (int vid, char *buffer)
25311 +{
25312 +       struct vx_info *vxi;
25313 +       int length;
25314 +
25315 +       vxi = lookup_vx_info(vid);
25316 +       if (!vxi)
25317 +               return 0;
25318 +       length = sprintf(buffer,
25319 +               "UseCnt:\t%d\n"
25320 +               "Tasks:\t%d\n"
25321 +               "Flags:\t%016llx\n"
25322 +               "BCaps:\t%016llx\n"
25323 +               "CCaps:\t%016llx\n"
25324 +//             "Ticks:\t%d\n"
25325 +               ,atomic_read(&vxi->vx_usecnt)
25326 +               ,atomic_read(&vxi->vx_tasks)
25327 +               ,(unsigned long long)vxi->vx_flags
25328 +               ,(unsigned long long)vxi->vx_bcaps
25329 +               ,(unsigned long long)vxi->vx_ccaps
25330 +//             ,atomic_read(&vxi->limit.ticks)
25331 +               );
25332 +       put_vx_info(vxi);
25333 +       return length;
25334 +}
25335 +
25336 +int proc_xid_limit (int vid, char *buffer)
25337 +{
25338 +       struct vx_info *vxi;
25339 +       int length;
25340 +
25341 +       vxi = lookup_vx_info(vid);
25342 +       if (!vxi)
25343 +               return 0;
25344 +       length = vx_info_proc_limit(&vxi->limit, buffer);
25345 +       put_vx_info(vxi);
25346 +       return length;
25347 +}
25348 +
25349 +int proc_xid_sched (int vid, char *buffer)
25350 +{
25351 +       struct vx_info *vxi;
25352 +       int cpu, length;
25353 +
25354 +       vxi = lookup_vx_info(vid);
25355 +       if (!vxi)
25356 +               return 0;
25357 +       length = vx_info_proc_sched(&vxi->sched, buffer);
25358 +       for_each_online_cpu(cpu) {
25359 +               length += vx_info_proc_sched_pc(
25360 +                       &vx_per_cpu(vxi, sched_pc, cpu),
25361 +                       buffer + length, cpu);
25362 +       }
25363 +       put_vx_info(vxi);
25364 +       return length;
25365 +}
25366 +
25367 +int proc_xid_cvirt (int vid, char *buffer)
25368 +{
25369 +       struct vx_info *vxi;
25370 +       int cpu, length;
25371 +
25372 +       vxi = lookup_vx_info(vid);
25373 +       if (!vxi)
25374 +               return 0;
25375 +       vx_update_load(vxi);
25376 +       length = vx_info_proc_cvirt(&vxi->cvirt, buffer);
25377 +       for_each_online_cpu(cpu) {
25378 +               length += vx_info_proc_cvirt_pc(
25379 +                       &vx_per_cpu(vxi, cvirt_pc, cpu),
25380 +                       buffer + length, cpu);
25381 +       }
25382 +       put_vx_info(vxi);
25383 +       return length;
25384 +}
25385 +
25386 +int proc_xid_cacct (int vid, char *buffer)
25387 +{
25388 +       struct vx_info *vxi;
25389 +       int length;
25390 +
25391 +       vxi = lookup_vx_info(vid);
25392 +       if (!vxi)
25393 +               return 0;
25394 +       length = vx_info_proc_cacct(&vxi->cacct, buffer);
25395 +       put_vx_info(vxi);
25396 +       return length;
25397 +}
25398 +
25399 +
25400 +static int proc_vnet_info(int vid, char *buffer)
25401 +{
25402 +       return sprintf(buffer,
25403 +               "VCIVersion:\t%04x:%04x\n"
25404 +               "VCISyscall:\t%d\n"
25405 +               ,VCI_VERSION >> 16
25406 +               ,VCI_VERSION & 0xFFFF
25407 +               ,__NR_vserver
25408 +               );
25409 +}
25410 +
25411 +
25412 +int proc_nid_info (int vid, char *buffer)
25413 +{
25414 +       struct nx_info *nxi;
25415 +       int length, i;
25416 +
25417 +       nxi = lookup_nx_info(vid);
25418 +       if (!nxi)
25419 +               return 0;
25420 +       length = sprintf(buffer,
25421 +               "ID:\t%d\n"
25422 +               "Info:\t%p\n"
25423 +               ,nxi->nx_id
25424 +               ,nxi
25425 +               );
25426 +       for (i=0; i<nxi->nbipv4; i++) {
25427 +               length += sprintf(buffer + length,
25428 +                       "%d:\t" NIPQUAD_FMT "/" NIPQUAD_FMT "\n", i,
25429 +                       NIPQUAD(nxi->ipv4[i]), NIPQUAD(nxi->mask[i]));
25430 +       }
25431 +       put_nx_info(nxi);
25432 +       return length;
25433 +}
25434 +
25435 +int proc_nid_status (int vid, char *buffer)
25436 +{
25437 +       struct nx_info *nxi;
25438 +       int length;
25439 +
25440 +       nxi = lookup_nx_info(vid);
25441 +       if (!nxi)
25442 +               return 0;
25443 +       length = sprintf(buffer,
25444 +               "UseCnt:\t%d\n"
25445 +               "Tasks:\t%d\n"
25446 +               ,atomic_read(&nxi->nx_usecnt)
25447 +               ,atomic_read(&nxi->nx_tasks)
25448 +               );
25449 +       put_nx_info(nxi);
25450 +       return length;
25451 +}
25452 +
25453 +/* here the inode helpers */
25454 +
25455 +
25456 +#define fake_ino(id,nr) (((nr) & 0xFFFF) | \
25457 +                       (((id) & 0xFFFF) << 16))
25458 +
25459 +#define inode_vid(i)   (((i)->i_ino >> 16) & 0xFFFF)
25460 +#define inode_type(i)  ((i)->i_ino & 0xFFFF)
25461 +
25462 +#define MAX_MULBY10    ((~0U-9)/10)
25463 +
25464 +
25465 +static struct inode *proc_vid_make_inode(struct super_block * sb,
25466 +       int vid, int ino)
25467 +{
25468 +       struct inode *inode = new_inode(sb);
25469 +
25470 +       if (!inode)
25471 +               goto out;
25472 +
25473 +       inode->i_mtime = inode->i_atime =
25474 +               inode->i_ctime = CURRENT_TIME;
25475 +       inode->i_ino = fake_ino(vid, ino);
25476 +
25477 +       inode->i_uid = 0;
25478 +       inode->i_gid = 0;
25479 +out:
25480 +       return inode;
25481 +}
25482 +
25483 +static int proc_vid_revalidate(struct dentry * dentry, struct nameidata *nd)
25484 +{
25485 +       struct inode * inode = dentry->d_inode;
25486 +       int vid, hashed=0;
25487 +
25488 +       vid = inode_vid(inode);
25489 +       switch (inode_type(inode) & PROC_VID_MASK) {
25490 +               case PROC_XID_INO:
25491 +                       hashed = xid_is_hashed(vid);
25492 +                       break;
25493 +               case PROC_NID_INO:
25494 +                       hashed = nid_is_hashed(vid);
25495 +                       break;
25496 +       }
25497 +       if (hashed)
25498 +               return 1;
25499 +       d_drop(dentry);
25500 +       return 0;
25501 +}
25502 +
25503 +
25504 +#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
25505 +
25506 +static ssize_t proc_vid_info_read(struct file * file, char __user * buf,
25507 +                         size_t count, loff_t *ppos)
25508 +{
25509 +       struct inode * inode = file->f_dentry->d_inode;
25510 +       unsigned long page;
25511 +       ssize_t length;
25512 +       int vid;
25513 +
25514 +       if (count > PROC_BLOCK_SIZE)
25515 +               count = PROC_BLOCK_SIZE;
25516 +       if (!(page = __get_free_page(GFP_KERNEL)))
25517 +               return -ENOMEM;
25518 +
25519 +       vid = inode_vid(inode);
25520 +       length = PROC_I(inode)->op.proc_vid_read(vid, (char*)page);
25521 +
25522 +       if (length >= 0)
25523 +               length = simple_read_from_buffer(buf, count, ppos,
25524 +                       (char *)page, length);
25525 +       free_page(page);
25526 +       return length;
25527 +}
25528 +
25529 +
25530 +
25531 +
25532 +
25533 +/* here comes the lower level (vid) */
25534 +
25535 +static struct file_operations proc_vid_info_file_operations = {
25536 +       .read =         proc_vid_info_read,
25537 +};
25538 +
25539 +static struct dentry_operations proc_vid_dentry_operations = {
25540 +       .d_revalidate = proc_vid_revalidate,
25541 +};
25542 +
25543 +
25544 +struct vid_entry {
25545 +       int type;
25546 +       int len;
25547 +       char *name;
25548 +       mode_t mode;
25549 +};
25550 +
25551 +#define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)}
25552 +
25553 +static struct vid_entry vx_base_stuff[] = {
25554 +       E(PROC_XID_INFO,        "info",         S_IFREG|S_IRUGO),
25555 +       E(PROC_XID_STATUS,      "status",       S_IFREG|S_IRUGO),
25556 +       E(PROC_XID_LIMIT,       "limit",        S_IFREG|S_IRUGO),
25557 +       E(PROC_XID_SCHED,       "sched",        S_IFREG|S_IRUGO),
25558 +       E(PROC_XID_CVIRT,       "cvirt",        S_IFREG|S_IRUGO),
25559 +       E(PROC_XID_CACCT,       "cacct",        S_IFREG|S_IRUGO),
25560 +       {0,0,NULL,0}
25561 +};
25562 +
25563 +static struct vid_entry vn_base_stuff[] = {
25564 +       E(PROC_NID_INFO,        "info",         S_IFREG|S_IRUGO),
25565 +       E(PROC_NID_STATUS,      "status",       S_IFREG|S_IRUGO),
25566 +       {0,0,NULL,0}
25567 +};
25568 +
25569 +
25570 +
25571 +static struct dentry *proc_vid_lookup(struct inode *dir,
25572 +       struct dentry *dentry, struct nameidata *nd)
25573 +{
25574 +       struct inode *inode;
25575 +       struct vid_entry *p;
25576 +       int error;
25577 +
25578 +       error = -ENOENT;
25579 +       inode = NULL;
25580 +
25581 +       switch (inode_type(dir)) {
25582 +               case PROC_XID_INO:
25583 +                       p = vx_base_stuff;
25584 +                       break;
25585 +               case PROC_NID_INO:
25586 +                       p = vn_base_stuff;
25587 +                       break;
25588 +               default:
25589 +                       goto out;
25590 +       }
25591 +
25592 +       for (; p->name; p++) {
25593 +               if (p->len != dentry->d_name.len)
25594 +                       continue;
25595 +               if (!memcmp(dentry->d_name.name, p->name, p->len))
25596 +                       break;
25597 +       }
25598 +       if (!p->name)
25599 +               goto out;
25600 +
25601 +       error = -EINVAL;
25602 +       inode = proc_vid_make_inode(dir->i_sb, inode_vid(dir), p->type);
25603 +       if (!inode)
25604 +               goto out;
25605 +
25606 +       switch(p->type) {
25607 +       case PROC_XID_INFO:
25608 +               PROC_I(inode)->op.proc_vid_read = proc_xid_info;
25609 +               break;
25610 +       case PROC_XID_STATUS:
25611 +               PROC_I(inode)->op.proc_vid_read = proc_xid_status;
25612 +               break;
25613 +       case PROC_XID_LIMIT:
25614 +               PROC_I(inode)->op.proc_vid_read = proc_xid_limit;
25615 +               break;
25616 +       case PROC_XID_SCHED:
25617 +               PROC_I(inode)->op.proc_vid_read = proc_xid_sched;
25618 +               break;
25619 +       case PROC_XID_CVIRT:
25620 +               PROC_I(inode)->op.proc_vid_read = proc_xid_cvirt;
25621 +               break;
25622 +       case PROC_XID_CACCT:
25623 +               PROC_I(inode)->op.proc_vid_read = proc_xid_cacct;
25624 +               break;
25625 +
25626 +       case PROC_NID_INFO:
25627 +               PROC_I(inode)->op.proc_vid_read = proc_nid_info;
25628 +               break;
25629 +       case PROC_NID_STATUS:
25630 +               PROC_I(inode)->op.proc_vid_read = proc_nid_status;
25631 +               break;
25632 +
25633 +       default:
25634 +               printk("procfs: impossible type (%d)",p->type);
25635 +               iput(inode);
25636 +               return ERR_PTR(-EINVAL);
25637 +       }
25638 +       inode->i_mode = p->mode;
25639 +       inode->i_fop = &proc_vid_info_file_operations;
25640 +       inode->i_nlink = 1;
25641 +       inode->i_flags|=S_IMMUTABLE;
25642 +
25643 +       dentry->d_op = &proc_vid_dentry_operations;
25644 +       d_add(dentry, inode);
25645 +       error = 0;
25646 +out:
25647 +       return ERR_PTR(error);
25648 +}
25649 +
25650 +
25651 +static int proc_vid_readdir(struct file * filp,
25652 +       void * dirent, filldir_t filldir)
25653 +{
25654 +       int i, size;
25655 +       struct inode *inode = filp->f_dentry->d_inode;
25656 +       struct vid_entry *p;
25657 +
25658 +       i = filp->f_pos;
25659 +       switch (i) {
25660 +       case 0:
25661 +               if (filldir(dirent, ".", 1, i,
25662 +                       inode->i_ino, DT_DIR) < 0)
25663 +                       return 0;
25664 +               i++;
25665 +               filp->f_pos++;
25666 +               /* fall through */
25667 +       case 1:
25668 +               if (filldir(dirent, "..", 2, i,
25669 +                       PROC_ROOT_INO, DT_DIR) < 0)
25670 +                       return 0;
25671 +               i++;
25672 +               filp->f_pos++;
25673 +               /* fall through */
25674 +       default:
25675 +               i -= 2;
25676 +               switch (inode_type(inode)) {
25677 +               case PROC_XID_INO:
25678 +                       size = sizeof(vx_base_stuff);
25679 +                       p = vx_base_stuff + i;
25680 +                       break;
25681 +               case PROC_NID_INO:
25682 +                       size = sizeof(vn_base_stuff);
25683 +                       p = vn_base_stuff + i;
25684 +                       break;
25685 +               default:
25686 +                       return 1;
25687 +               }
25688 +               if (i >= size/sizeof(struct vid_entry))
25689 +                       return 1;
25690 +               while (p->name) {
25691 +                       if (filldir(dirent, p->name, p->len,
25692 +                               filp->f_pos, fake_ino(inode_vid(inode),
25693 +                               p->type), p->mode >> 12) < 0)
25694 +                               return 0;
25695 +                       filp->f_pos++;
25696 +                       p++;
25697 +               }
25698 +       }
25699 +       return 1;
25700 +}
25701 +
25702 +
25703 +
25704 +
25705 +/* now the upper level (virtual) */
25706 +
25707 +static struct file_operations proc_vid_file_operations = {
25708 +       .read =         generic_read_dir,
25709 +       .readdir =      proc_vid_readdir,
25710 +};
25711 +
25712 +static struct inode_operations proc_vid_inode_operations = {
25713 +       .lookup =       proc_vid_lookup,
25714 +};
25715 +
25716 +
25717 +
25718 +static __inline__ int atovid(const char *str, int len)
25719 +{
25720 +       int vid, c;
25721 +
25722 +       vid = 0;
25723 +       while (len-- > 0) {
25724 +               c = *str - '0';
25725 +               str++;
25726 +               if (c > 9)
25727 +                       return -1;
25728 +               if (vid >= MAX_MULBY10)
25729 +                       return -1;
25730 +               vid *= 10;
25731 +               vid += c;
25732 +               if (!vid)
25733 +                       return -1;
25734 +       }
25735 +       return vid;
25736 +}
25737 +
25738 +static __inline__ unsigned long atoaddr(const char *str, int len)
25739 +{
25740 +       unsigned long addr, c;
25741 +
25742 +       addr = 0;
25743 +       while (len-- > 0) {
25744 +               c = *str - '0';
25745 +               if (c > 9)
25746 +                       c -= 'A'-'0'+10;
25747 +               if (c > 15)
25748 +                       c -= 'a'-'A';
25749 +               if (c > 15)
25750 +                       return -1;
25751 +               str++;
25752 +               if (addr >= ((1 << 28) - 1))
25753 +                       return -1;
25754 +               addr = (addr << 4) | c;
25755 +               if (!addr)
25756 +                       return -1;
25757 +       }
25758 +       return addr;
25759 +}
25760 +
25761 +
25762 +struct dentry *proc_virtual_lookup(struct inode *dir,
25763 +       struct dentry * dentry, struct nameidata *nd)
25764 +{
25765 +       int xid, len, ret;
25766 +       struct vx_info *vxi;
25767 +       const char *name;
25768 +       struct inode *inode;
25769 +
25770 +       name = dentry->d_name.name;
25771 +       len = dentry->d_name.len;
25772 +       ret = -ENOMEM;
25773 +
25774 +#if 0
25775 +       if (len == 7 && !memcmp(name, "current", 7)) {
25776 +               inode = new_inode(dir->i_sb);
25777 +               if (!inode)
25778 +                       goto out;
25779 +               inode->i_mtime = inode->i_atime =
25780 +                       inode->i_ctime = CURRENT_TIME;
25781 +               inode->i_ino = fake_ino(1, PROC_XID_INO);
25782 +               inode->i_mode = S_IFLNK|S_IRWXUGO;
25783 +               inode->i_uid = inode->i_gid = 0;
25784 +               d_add(dentry, inode);
25785 +               return NULL;
25786 +       }
25787 +#endif
25788 +       if (len == 4 && !memcmp(name, "info", 4)) {
25789 +               inode = proc_vid_make_inode(dir->i_sb, 0, PROC_XID_INFO);
25790 +               if (!inode)
25791 +                       goto out;
25792 +               inode->i_fop = &proc_vid_info_file_operations;
25793 +               PROC_I(inode)->op.proc_vid_read = proc_virtual_info;
25794 +               inode->i_mode = S_IFREG|S_IRUGO;
25795 +               d_add(dentry, inode);
25796 +               return NULL;
25797 +       }
25798 +       if (len == 6 && !memcmp(name, "status", 6)) {
25799 +               inode = proc_vid_make_inode(dir->i_sb, 0, PROC_XID_STATUS);
25800 +               if (!inode)
25801 +                       goto out;
25802 +               inode->i_fop = &proc_vid_info_file_operations;
25803 +               PROC_I(inode)->op.proc_vid_read = proc_virtual_status;
25804 +               inode->i_mode = S_IFREG|S_IRUGO;
25805 +               d_add(dentry, inode);
25806 +               return NULL;
25807 +       }
25808 +
25809 +       ret = -ENOENT;
25810 +       xid = atovid(name, len);
25811 +       if (xid < 0)
25812 +               goto out;
25813 +       vxi = lookup_vx_info(xid);
25814 +       if (!vxi)
25815 +               goto out;
25816 +
25817 +       inode = NULL;
25818 +       if (vx_check(xid, VX_ADMIN|VX_WATCH|VX_IDENT))
25819 +               inode = proc_vid_make_inode(dir->i_sb,
25820 +                       vxi->vx_id, PROC_XID_INO);
25821 +       if (!inode)
25822 +               goto out_release;
25823 +
25824 +       inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
25825 +       inode->i_op = &proc_vid_inode_operations;
25826 +       inode->i_fop = &proc_vid_file_operations;
25827 +       inode->i_nlink = 2;
25828 +       inode->i_flags|=S_IMMUTABLE;
25829 +
25830 +       dentry->d_op = &proc_vid_dentry_operations;
25831 +       d_add(dentry, inode);
25832 +       ret = 0;
25833 +
25834 +out_release:
25835 +       put_vx_info(vxi);
25836 +out:
25837 +       return ERR_PTR(ret);
25838 +}
25839 +
25840 +
25841 +struct dentry *proc_vnet_lookup(struct inode *dir,
25842 +       struct dentry * dentry, struct nameidata *nd)
25843 +{
25844 +       int nid, len, ret;
25845 +       struct nx_info *nxi;
25846 +       const char *name;
25847 +       struct inode *inode;
25848 +
25849 +       name = dentry->d_name.name;
25850 +       len = dentry->d_name.len;
25851 +       ret = -ENOMEM;
25852 +#if 0
25853 +       if (len == 7 && !memcmp(name, "current", 7)) {
25854 +               inode = new_inode(dir->i_sb);
25855 +               if (!inode)
25856 +                       goto out;
25857 +               inode->i_mtime = inode->i_atime =
25858 +                       inode->i_ctime = CURRENT_TIME;
25859 +               inode->i_ino = fake_ino(1, PROC_NID_INO);
25860 +               inode->i_mode = S_IFLNK|S_IRWXUGO;
25861 +               inode->i_uid = inode->i_gid = 0;
25862 +               d_add(dentry, inode);
25863 +               return NULL;
25864 +       }
25865 +#endif
25866 +       if (len == 4 && !memcmp(name, "info", 4)) {
25867 +               inode = proc_vid_make_inode(dir->i_sb, 0, PROC_NID_INFO);
25868 +               if (!inode)
25869 +                       goto out;
25870 +               inode->i_fop = &proc_vid_info_file_operations;
25871 +               PROC_I(inode)->op.proc_vid_read = proc_vnet_info;
25872 +               inode->i_mode = S_IFREG|S_IRUGO;
25873 +               d_add(dentry, inode);
25874 +               return NULL;
25875 +       }
25876 +
25877 +       ret = -ENOENT;
25878 +       nid = atovid(name, len);
25879 +       if (nid < 0)
25880 +               goto out;
25881 +       nxi = lookup_nx_info(nid);
25882 +       if (!nxi)
25883 +               goto out;
25884 +
25885 +       inode = NULL;
25886 +       if (1)
25887 +               inode = proc_vid_make_inode(dir->i_sb,
25888 +                       nxi->nx_id, PROC_NID_INO);
25889 +       if (!inode)
25890 +               goto out_release;
25891 +
25892 +       inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
25893 +       inode->i_op = &proc_vid_inode_operations;
25894 +       inode->i_fop = &proc_vid_file_operations;
25895 +       inode->i_nlink = 2;
25896 +       inode->i_flags|=S_IMMUTABLE;
25897 +
25898 +       dentry->d_op = &proc_vid_dentry_operations;
25899 +       d_add(dentry, inode);
25900 +       ret = 0;
25901 +
25902 +out_release:
25903 +       put_nx_info(nxi);
25904 +out:
25905 +       return ERR_PTR(ret);
25906 +}
25907 +
25908 +
25909 +
25910 +
25911 +#define PROC_NUMBUF 10
25912 +#define PROC_MAXVIDS 32
25913 +
25914 +int proc_virtual_readdir(struct file * filp,
25915 +       void * dirent, filldir_t filldir)
25916 +{
25917 +       unsigned int xid_array[PROC_MAXVIDS];
25918 +       char buf[PROC_NUMBUF];
25919 +       unsigned int nr = filp->f_pos-3;
25920 +       unsigned int nr_xids, i;
25921 +       int visible = vx_check(0, VX_ADMIN|VX_WATCH);
25922 +       ino_t ino;
25923 +
25924 +       switch ((long)filp->f_pos) {
25925 +       case 0:
25926 +               ino = fake_ino(0, PROC_XID_INO);
25927 +               if (filldir(dirent, ".", 1,
25928 +                       filp->f_pos, ino, DT_DIR) < 0)
25929 +                       return 0;
25930 +               filp->f_pos++;
25931 +               /* fall through */
25932 +       case 1:
25933 +               ino = filp->f_dentry->d_parent->d_inode->i_ino;
25934 +               if (filldir(dirent, "..", 2,
25935 +                       filp->f_pos, ino, DT_DIR) < 0)
25936 +                       return 0;
25937 +               filp->f_pos++;
25938 +               /* fall through */
25939 +       case 2:
25940 +               if (visible) {
25941 +                       ino = fake_ino(0, PROC_XID_INFO);
25942 +                       if (filldir(dirent, "info", 4,
25943 +                               filp->f_pos, ino, DT_REG) < 0)
25944 +                               return 0;
25945 +               }
25946 +               filp->f_pos++;
25947 +               /* fall through */
25948 +       case 3:
25949 +               ino = fake_ino(0, PROC_XID_STATUS);
25950 +               if (filldir(dirent, "status", 6,
25951 +                       filp->f_pos, ino, DT_REG) < 0)
25952 +                       return 0;
25953 +               filp->f_pos++;
25954 +               /* fall through */
25955 +       }
25956 +
25957 +       nr_xids = get_xid_list(nr, xid_array, PROC_MAXVIDS);
25958 +       for (i = 0; i < nr_xids; i++) {
25959 +               int xid = xid_array[i];
25960 +               ino_t ino = fake_ino(xid, PROC_XID_INO);
25961 +               unsigned int j = PROC_NUMBUF;
25962 +
25963 +               do buf[--j] = '0' + (xid % 10); while (xid/=10);
25964 +
25965 +               if (filldir(dirent, buf+j, PROC_NUMBUF-j,
25966 +                       filp->f_pos, ino, DT_DIR) < 0)
25967 +                       break;
25968 +               filp->f_pos++;
25969 +       }
25970 +       return 0;
25971 +}
25972 +
25973 +
25974 +static struct file_operations proc_virtual_dir_operations = {
25975 +       .read =         generic_read_dir,
25976 +       .readdir =      proc_virtual_readdir,
25977 +};
25978 +
25979 +static struct inode_operations proc_virtual_dir_inode_operations = {
25980 +       .lookup =       proc_virtual_lookup,
25981 +};
25982 +
25983 +
25984 +int proc_vnet_readdir(struct file * filp,
25985 +       void * dirent, filldir_t filldir)
25986 +{
25987 +       unsigned int nid_array[PROC_MAXVIDS];
25988 +       char buf[PROC_NUMBUF];
25989 +       unsigned int nr = filp->f_pos-2;
25990 +       unsigned int nr_nids, i;
25991 +//     int visible = vx_check(0, VX_ADMIN|VX_WATCH);
25992 +       ino_t ino;
25993 +
25994 +       switch ((long)filp->f_pos) {
25995 +       case 0:
25996 +               ino = fake_ino(0, PROC_NID_INO);
25997 +               if (filldir(dirent, ".", 1,
25998 +                       filp->f_pos, ino, DT_DIR) < 0)
25999 +                       return 0;
26000 +               filp->f_pos++;
26001 +               /* fall through */
26002 +       case 1:
26003 +               ino = filp->f_dentry->d_parent->d_inode->i_ino;
26004 +               if (filldir(dirent, "..", 2,
26005 +                       filp->f_pos, ino, DT_DIR) < 0)
26006 +                       return 0;
26007 +               filp->f_pos++;
26008 +               /* fall through */
26009 +       case 2:
26010 +               ino = fake_ino(0, PROC_NID_INFO);
26011 +               if (filldir(dirent, "info", 4,
26012 +                       filp->f_pos, ino, DT_REG) < 0)
26013 +                       return 0;
26014 +               filp->f_pos++;
26015 +               /* fall through */
26016 +       }
26017 +
26018 +       nr_nids = get_nid_list(nr, nid_array, PROC_MAXVIDS);
26019 +       for (i = 0; i < nr_nids; i++) {
26020 +               int nid = nid_array[i];
26021 +               ino_t ino = fake_ino(nid, PROC_NID_INO);
26022 +               unsigned long j = PROC_NUMBUF;
26023 +
26024 +               do buf[--j] = '0' + (nid % 10); while (nid/=10);
26025 +
26026 +               if (filldir(dirent, buf+j, PROC_NUMBUF-j,
26027 +                       filp->f_pos, ino, DT_DIR) < 0)
26028 +                       break;
26029 +               filp->f_pos++;
26030 +       }
26031 +       return 0;
26032 +}
26033 +
26034 +
26035 +static struct file_operations proc_vnet_dir_operations = {
26036 +       .read =         generic_read_dir,
26037 +       .readdir =      proc_vnet_readdir,
26038 +};
26039 +
26040 +static struct inode_operations proc_vnet_dir_inode_operations = {
26041 +       .lookup =       proc_vnet_lookup,
26042 +};
26043 +
26044 +
26045 +
26046 +void proc_vx_init(void)
26047 +{
26048 +       struct proc_dir_entry *ent;
26049 +
26050 +       ent = proc_mkdir("virtual", 0);
26051 +       if (ent) {
26052 +               ent->proc_fops = &proc_virtual_dir_operations;
26053 +               ent->proc_iops = &proc_virtual_dir_inode_operations;
26054 +       }
26055 +       proc_virtual = ent;
26056 +
26057 +       ent = proc_mkdir("virtnet", 0);
26058 +       if (ent) {
26059 +               ent->proc_fops = &proc_vnet_dir_operations;
26060 +               ent->proc_iops = &proc_vnet_dir_inode_operations;
26061 +       }
26062 +       proc_vnet = ent;
26063 +}
26064 +
26065 +
26066 +
26067 +
26068 +/* per pid info */
26069 +
26070 +
26071 +int proc_pid_vx_info(struct task_struct *p, char *buffer)
26072 +{
26073 +       struct vx_info *vxi;
26074 +       char * orig = buffer;
26075 +
26076 +       buffer += sprintf (buffer,"XID:\t%d\n", vx_task_xid(p));
26077 +       if (vx_flags(VXF_INFO_HIDE, 0))
26078 +               goto out;
26079 +
26080 +       vxi = task_get_vx_info(p);
26081 +       if (!vxi)
26082 +               goto out;
26083 +
26084 +       buffer += sprintf (buffer,"BCaps:\t%016llx\n"
26085 +               ,(unsigned long long)vxi->vx_bcaps);
26086 +       buffer += sprintf (buffer,"CCaps:\t%016llx\n"
26087 +               ,(unsigned long long)vxi->vx_ccaps);
26088 +       buffer += sprintf (buffer,"CFlags:\t%016llx\n"
26089 +               ,(unsigned long long)vxi->vx_flags);
26090 +       buffer += sprintf (buffer,"CIPid:\t%d\n"
26091 +               ,vxi->vx_initpid);
26092 +
26093 +       put_vx_info(vxi);
26094 +out:
26095 +       return buffer - orig;
26096 +}
26097 +
26098 +
26099 +int proc_pid_nx_info(struct task_struct *p, char *buffer)
26100 +{
26101 +       struct nx_info *nxi;
26102 +       char * orig = buffer;
26103 +       int i;
26104 +
26105 +       buffer += sprintf (buffer,"NID:\t%d\n", nx_task_nid(p));
26106 +       if (vx_flags(VXF_INFO_HIDE, 0))
26107 +               goto out;
26108 +       nxi = task_get_nx_info(p);
26109 +       if (!nxi)
26110 +               goto out;
26111 +
26112 +       for (i=0; i<nxi->nbipv4; i++){
26113 +               buffer += sprintf (buffer,
26114 +                       "V4Root[%d]:\t%d.%d.%d.%d/%d.%d.%d.%d\n", i
26115 +                       ,NIPQUAD(nxi->ipv4[i])
26116 +                       ,NIPQUAD(nxi->mask[i]));
26117 +       }
26118 +       buffer += sprintf (buffer,
26119 +               "V4Root[bcast]:\t%d.%d.%d.%d\n"
26120 +               ,NIPQUAD(nxi->v4_bcast));
26121 +
26122 +       put_nx_info(nxi);
26123 +out:
26124 +       return buffer - orig;
26125 +}
26126 +
26127 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/sched.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/sched.c
26128 --- linux-2.6.16.20/kernel/vserver/sched.c      1970-01-01 01:00:00 +0100
26129 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/sched.c 2006-04-29 02:58:07 +0200
26130 @@ -0,0 +1,334 @@
26131 +/*
26132 + *  linux/kernel/vserver/sched.c
26133 + *
26134 + *  Virtual Server: Scheduler Support
26135 + *
26136 + *  Copyright (C) 2004-2005  Herbert Pötzl
26137 + *
26138 + *  V0.01  adapted Sam Vilains version to 2.6.3
26139 + *  V0.02  removed legacy interface
26140 + *
26141 + */
26142 +
26143 +#include <linux/sched.h>
26144 +#include <linux/vs_context.h>
26145 +#include <linux/vs_sched.h>
26146 +#include <linux/vserver/sched_cmd.h>
26147 +
26148 +#include <asm/errno.h>
26149 +#include <asm/uaccess.h>
26150 +
26151 +#define vxd_check_range(val, min, max) do {            \
26152 +       vxlprintk((val<min) || (val>max),               \
26153 +               "check_range(%ld,%ld,%ld)",             \
26154 +               (long)val, (long)min, (long)max,        \
26155 +               __FILE__, __LINE__);                    \
26156 +       } while (0)
26157 +
26158 +
26159 +void vx_update_sched_param(struct _vx_sched *sched,
26160 +       struct _vx_sched_pc *sched_pc)
26161 +{
26162 +       unsigned int set_mask = sched->update_mask;
26163 +
26164 +       if (set_mask & VXSM_FILL_RATE)
26165 +               sched_pc->fill_rate[0] = sched->fill_rate[0];
26166 +       if (set_mask & VXSM_INTERVAL)
26167 +               sched_pc->interval[0] = sched->interval[0];
26168 +       if (set_mask & VXSM_FILL_RATE2)
26169 +               sched_pc->fill_rate[1] = sched->fill_rate[1];
26170 +       if (set_mask & VXSM_INTERVAL2)
26171 +               sched_pc->interval[1] = sched->interval[1];
26172 +       if (set_mask & VXSM_TOKENS)
26173 +               sched_pc->tokens = sched->tokens;
26174 +       if (set_mask & VXSM_TOKENS_MIN)
26175 +               sched_pc->tokens_min = sched->tokens_min;
26176 +       if (set_mask & VXSM_TOKENS_MAX)
26177 +               sched_pc->tokens_max = sched->tokens_max;
26178 +
26179 +       if (set_mask & VXSM_IDLE_TIME)
26180 +               sched_pc->flags |= VXSF_IDLE_TIME;
26181 +       else
26182 +               sched_pc->flags &= ~VXSF_IDLE_TIME;
26183 +
26184 +       /* reset time */
26185 +       sched_pc->norm_time = jiffies;
26186 +}
26187 +
26188 +
26189 +/*
26190 + * recalculate the context's scheduling tokens
26191 + *
26192 + * ret > 0 : number of tokens available
26193 + * ret < 0 : on hold, check delta_min[]
26194 + *          -1 only jiffies
26195 + *          -2 also idle time
26196 + *
26197 + */
26198 +int vx_tokens_recalc(struct _vx_sched_pc *sched_pc,
26199 +       unsigned long *norm_time, unsigned long *idle_time, int delta_min[2])
26200 +{
26201 +       long delta;
26202 +       long tokens = 0;
26203 +       int flags = sched_pc->flags;
26204 +
26205 +       /* how much time did pass? */
26206 +       delta = *norm_time - sched_pc->norm_time;
26207 +       vxd_check_range(delta, 0, INT_MAX);
26208 +
26209 +       if (delta >= sched_pc->interval[0]) {
26210 +               long tokens, integral;
26211 +
26212 +               /* calc integral token part */
26213 +               tokens = delta / sched_pc->interval[0];
26214 +               integral = tokens * sched_pc->interval[0];
26215 +               tokens *= sched_pc->fill_rate[0];
26216 +#ifdef CONFIG_VSERVER_HARDCPU
26217 +               delta_min[0] = delta - integral;
26218 +               vxd_check_range(delta_min[0], 0, sched_pc->interval[0]);
26219 +#endif
26220 +               /* advance time */
26221 +               sched_pc->norm_time += delta;
26222 +
26223 +               /* add tokens */
26224 +               sched_pc->tokens += tokens;
26225 +               sched_pc->token_time += tokens;
26226 +       }
26227 +       else
26228 +               delta_min[0] = delta;
26229 +
26230 +#ifdef CONFIG_VSERVER_IDLETIME
26231 +       if (!(flags & VXSF_IDLE_TIME))
26232 +               goto skip_idle;
26233 +
26234 +       /* how much was the idle skip? */
26235 +       delta = *idle_time - sched_pc->idle_time;
26236 +       vxd_check_range(delta, 0, INT_MAX);
26237 +
26238 +       if (delta >= sched_pc->interval[1]) {
26239 +               long tokens, integral;
26240 +
26241 +               /* calc fair share token part */
26242 +               tokens = delta / sched_pc->interval[1];
26243 +               integral = tokens * sched_pc->interval[1];
26244 +               tokens *= sched_pc->fill_rate[1];
26245 +               delta_min[1] = delta - integral;
26246 +               vxd_check_range(delta_min[1], 0, sched_pc->interval[1]);
26247 +
26248 +               /* advance idle time */
26249 +               sched_pc->idle_time += integral;
26250 +
26251 +               /* add tokens */
26252 +               sched_pc->tokens += tokens;
26253 +               sched_pc->token_time += tokens;
26254 +       }
26255 +       else
26256 +               delta_min[1] = delta;
26257 +skip_idle:
26258 +#endif
26259 +
26260 +       /* clip at maximum */
26261 +       if (sched_pc->tokens > sched_pc->tokens_max)
26262 +               sched_pc->tokens = sched_pc->tokens_max;
26263 +       tokens = sched_pc->tokens;
26264 +
26265 +       if ((flags & VXSF_ONHOLD)) {
26266 +               /* can we unhold? */
26267 +               if (tokens >= sched_pc->tokens_min) {
26268 +                       flags &= ~VXSF_ONHOLD;
26269 +                       sched_pc->hold_ticks +=
26270 +                               *norm_time - sched_pc->onhold;
26271 +               }
26272 +               else
26273 +                       goto on_hold;
26274 +       } else {
26275 +               /* put on hold? */
26276 +               if (tokens <= 0) {
26277 +                       flags |= VXSF_ONHOLD;
26278 +                       sched_pc->onhold = *norm_time;
26279 +                       goto on_hold;
26280 +               }
26281 +       }
26282 +       sched_pc->flags = flags;
26283 +       return tokens;
26284 +
26285 +on_hold:
26286 +       tokens = sched_pc->tokens_min - tokens;
26287 +       sched_pc->flags = flags;
26288 +       BUG_ON(tokens < 0);
26289 +
26290 +#ifdef CONFIG_VSERVER_HARDCPU
26291 +       /* next interval? */
26292 +       if (!sched_pc->fill_rate[0])
26293 +               delta_min[0] = HZ;
26294 +       else if (tokens > sched_pc->fill_rate[0])
26295 +               delta_min[0] += sched_pc->interval[0] *
26296 +                       tokens / sched_pc->fill_rate[0];
26297 +       vxd_check_range(delta_min[0], 0, INT_MAX);
26298 +
26299 +#ifdef CONFIG_VSERVER_IDLETIME
26300 +       if (!(flags & VXSF_IDLE_TIME))
26301 +               return -1;
26302 +
26303 +       /* next interval? */
26304 +       if (!sched_pc->fill_rate[1])
26305 +               delta_min[1] = HZ;
26306 +       else if (tokens > sched_pc->fill_rate[1])
26307 +               delta_min[1] += sched_pc->interval[1] *
26308 +                       tokens / sched_pc->fill_rate[1];
26309 +       vxd_check_range(delta_min[1], 0, INT_MAX);
26310 +
26311 +       return -2;
26312 +#else
26313 +       return -1;
26314 +#endif /* CONFIG_VSERVER_IDLETIME */
26315 +#else
26316 +       return 0;
26317 +#endif /* CONFIG_VSERVER_HARDCPU */
26318 +}
26319 +
26320 +
26321 +static int do_set_sched(struct vx_info *vxi, struct vcmd_set_sched_v4 *data)
26322 +{
26323 +       unsigned int set_mask = data->set_mask;
26324 +       unsigned int update_mask;
26325 +
26326 +       /* Sanity check data values */
26327 +       if (data->fill_rate < 0)
26328 +               data->fill_rate = 1;
26329 +       if (data->interval <= 0)
26330 +               data->interval = HZ;
26331 +       if (data->tokens_max <= 0)
26332 +               data->tokens_max = HZ;
26333 +       if (data->tokens_min < 0)
26334 +               data->tokens_min = data->fill_rate*3;
26335 +       if (data->tokens_min >= data->tokens_max)
26336 +               data->tokens_min = data->tokens_max;
26337 +
26338 +       if (data->prio_bias > MAX_PRIO_BIAS)
26339 +               data->prio_bias = MAX_PRIO_BIAS;
26340 +       if (data->prio_bias < MIN_PRIO_BIAS)
26341 +               data->prio_bias = MIN_PRIO_BIAS;
26342 +
26343 +       spin_lock(&vxi->sched.tokens_lock);
26344 +
26345 +       if (set_mask & VXSM_FILL_RATE)
26346 +               vxi->sched.fill_rate[0] = data->fill_rate;
26347 +       if (set_mask & VXSM_INTERVAL)
26348 +               vxi->sched.interval[0] = data->interval;
26349 +       if (set_mask & VXSM_FILL_RATE2)
26350 +               vxi->sched.fill_rate[1] = data->fill_rate;
26351 +       if (set_mask & VXSM_INTERVAL2)
26352 +               vxi->sched.interval[1] = data->interval;
26353 +       if (set_mask & VXSM_TOKENS)
26354 +               vxi->sched.tokens = data->tokens;
26355 +       if (set_mask & VXSM_TOKENS_MIN)
26356 +               vxi->sched.tokens_min = data->tokens_min;
26357 +       if (set_mask & VXSM_TOKENS_MAX)
26358 +               vxi->sched.tokens_max = data->tokens_max;
26359 +       if (set_mask & VXSM_PRIO_BIAS)
26360 +               vxi->sched.prio_bias = data->prio_bias;
26361 +
26362 +       update_mask = vxi->sched.update_mask & VXSM_SET_MASK;
26363 +       update_mask |= (set_mask & (VXSM_SET_MASK|VXSM_IDLE_TIME));
26364 +       vxi->sched.update_mask = update_mask;
26365 +#ifdef CONFIG_SMP
26366 +       rmb();
26367 +       if (set_mask & VXSM_CPU_ID)
26368 +               vxi->sched.update = cpumask_of_cpu(data->cpu_id);
26369 +       else
26370 +               vxi->sched.update = CPU_MASK_ALL;
26371 +       /* forced reload? */
26372 +       if (set_mask & VXSM_FORCE) {
26373 +               int cpu;
26374 +
26375 +               for_each_cpu(cpu)
26376 +                       vx_update_sched_param(&vxi->sched,
26377 +                               &vx_per_cpu(vxi, sched_pc, cpu));
26378 +       }
26379 +#else
26380 +       /* on UP we update immediately */
26381 +       vx_update_sched_param(&vxi->sched,
26382 +               &vx_per_cpu(vxi, sched_pc, 0));
26383 +#endif
26384 +
26385 +       spin_unlock(&vxi->sched.tokens_lock);
26386 +       return 0;
26387 +}
26388 +
26389 +
26390 +#ifdef CONFIG_VSERVER_LEGACY
26391 +
26392 +#define COPY_MASK_V2(name, mask)                       \
26393 +       if (vc_data.name != SCHED_KEEP) {               \
26394 +               vc_data_v4.name = vc_data.name;         \
26395 +               vc_data_v4.set_mask |= mask;            \
26396 +       }
26397 +
26398 +int vc_set_sched_v2(uint32_t xid, void __user *data)
26399 +{
26400 +       struct vcmd_set_sched_v2 vc_data;
26401 +       struct vcmd_set_sched_v4 vc_data_v4 = { .set_mask = 0 };
26402 +       struct vx_info *vxi;
26403 +
26404 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
26405 +               return -EFAULT;
26406 +
26407 +       vxi = lookup_vx_info(xid);
26408 +       if (!vxi)
26409 +               return -ESRCH;
26410 +
26411 +       COPY_MASK_V2(fill_rate,  VXSM_FILL_RATE);
26412 +       COPY_MASK_V2(interval,   VXSM_INTERVAL);
26413 +       COPY_MASK_V2(tokens,     VXSM_TOKENS);
26414 +       COPY_MASK_V2(tokens_min, VXSM_TOKENS_MIN);
26415 +       COPY_MASK_V2(tokens_max, VXSM_TOKENS_MAX);
26416 +       vc_data_v4.bucket_id = 0;
26417 +
26418 +       do_set_sched(vxi, &vc_data_v4);
26419 +       put_vx_info(vxi);
26420 +       return 0;
26421 +}
26422 +#endif
26423 +
26424 +int vc_set_sched_v3(uint32_t xid, void __user *data)
26425 +{
26426 +       struct vcmd_set_sched_v3 vc_data;
26427 +       struct vcmd_set_sched_v4 vc_data_v4;
26428 +       struct vx_info *vxi;
26429 +       int ret;
26430 +
26431 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
26432 +               return -EFAULT;
26433 +
26434 +       vxi = lookup_vx_info(xid);
26435 +       if (!vxi)
26436 +               return -ESRCH;
26437 +
26438 +       /* structures are binary compatible */
26439 +       memcpy(&vc_data_v4, &vc_data, sizeof(vc_data));
26440 +       vc_data_v4.set_mask &= VXSM_V3_MASK;
26441 +       vc_data_v4.bucket_id = 0;
26442 +       ret = do_set_sched(vxi, &vc_data_v4);
26443 +       put_vx_info(vxi);
26444 +       return ret;
26445 +}
26446 +
26447 +int vc_set_sched(uint32_t xid, void __user *data)
26448 +{
26449 +       struct vcmd_set_sched_v4 vc_data;
26450 +       struct vx_info *vxi;
26451 +       int ret;
26452 +
26453 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
26454 +               return -EFAULT;
26455 +
26456 +       vxi = lookup_vx_info(xid);
26457 +       if (!vxi)
26458 +               return -ESRCH;
26459 +
26460 +       ret = do_set_sched(vxi, &vc_data);
26461 +       put_vx_info(vxi);
26462 +       return ret;
26463 +}
26464 +
26465 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/sched_init.h linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/sched_init.h
26466 --- linux-2.6.16.20/kernel/vserver/sched_init.h 1970-01-01 01:00:00 +0100
26467 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/sched_init.h    2006-04-26 19:07:00 +0200
26468 @@ -0,0 +1,47 @@
26469 +
26470 +static inline void vx_info_init_sched(struct _vx_sched *sched)
26471 +{
26472 +       /* scheduling; hard code starting values as constants */
26473 +       sched->fill_rate[0]     = 1;
26474 +       sched->interval[0]      = 4;
26475 +       sched->fill_rate[1]     = 1;
26476 +       sched->interval[1]      = 8;
26477 +       sched->tokens           = HZ >> 2;
26478 +       sched->tokens_min       = HZ >> 4;
26479 +       sched->tokens_max       = HZ >> 1;
26480 +       sched->tokens_lock      = SPIN_LOCK_UNLOCKED;
26481 +       sched->prio_bias        = 0;
26482 +       sched->vavavoom         = 0;
26483 +}
26484 +
26485 +static inline
26486 +void vx_info_init_sched_pc(struct _vx_sched_pc *sched_pc, int cpu)
26487 +{
26488 +       sched_pc->fill_rate[0]  = 1;
26489 +       sched_pc->interval[0]   = 4;
26490 +       sched_pc->fill_rate[1]  = 1;
26491 +       sched_pc->interval[1]   = 8;
26492 +       sched_pc->tokens        = HZ >> 2;
26493 +       sched_pc->tokens_min    = HZ >> 4;
26494 +       sched_pc->tokens_max    = HZ >> 1;
26495 +       sched_pc->token_time    = 0;
26496 +       sched_pc->idle_time     = 0;
26497 +       sched_pc->norm_time     = jiffies;
26498 +
26499 +       sched_pc->user_ticks = 0;
26500 +       sched_pc->sys_ticks = 0;
26501 +       sched_pc->hold_ticks = 0;
26502 +}
26503 +
26504 +
26505 +static inline void vx_info_exit_sched(struct _vx_sched *sched)
26506 +{
26507 +       return;
26508 +}
26509 +
26510 +static inline
26511 +void vx_info_exit_sched_pc(struct _vx_sched_pc *sched_pc, int cpu)
26512 +{
26513 +       return;
26514 +}
26515 +
26516 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/sched_proc.h linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/sched_proc.h
26517 --- linux-2.6.16.20/kernel/vserver/sched_proc.h 1970-01-01 01:00:00 +0100
26518 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/sched_proc.h    2006-04-26 19:07:00 +0200
26519 @@ -0,0 +1,59 @@
26520 +#ifndef _VX_SCHED_PROC_H
26521 +#define _VX_SCHED_PROC_H
26522 +
26523 +
26524 +static inline
26525 +int vx_info_proc_sched(struct _vx_sched *sched, char *buffer)
26526 +{
26527 +       int length = 0;
26528 +
26529 +       length += sprintf(buffer,
26530 +               "FillRate:\t%8d,%d\n"
26531 +               "Interval:\t%8d,%d\n"
26532 +               "TokensMin:\t%8d\n"
26533 +               "TokensMax:\t%8d\n"
26534 +               "PrioBias:\t%8d\n"
26535 +               "VaVaVoom:\t%8d\n"
26536 +               ,sched->fill_rate[0]
26537 +               ,sched->fill_rate[1]
26538 +               ,sched->interval[0]
26539 +               ,sched->interval[1]
26540 +               ,sched->tokens_min
26541 +               ,sched->tokens_max
26542 +               ,sched->prio_bias
26543 +               ,sched->vavavoom
26544 +               );
26545 +       return length;
26546 +}
26547 +
26548 +static inline
26549 +int vx_info_proc_sched_pc(struct _vx_sched_pc *sched_pc,
26550 +       char *buffer, int cpu)
26551 +{
26552 +       int length = 0;
26553 +
26554 +       length += sprintf(buffer + length,
26555 +               "cpu %d: %lld %lld %lld %ld %ld"
26556 +               ,cpu
26557 +               ,(unsigned long long)sched_pc->user_ticks
26558 +               ,(unsigned long long)sched_pc->sys_ticks
26559 +               ,(unsigned long long)sched_pc->hold_ticks
26560 +               ,sched_pc->token_time
26561 +               ,sched_pc->idle_time
26562 +               );
26563 +       length += sprintf(buffer + length,
26564 +               " %c%c %d %d %d %d/%d %d/%d\n"
26565 +               ,(sched_pc->flags & VXSF_ONHOLD) ? 'H' : 'R'
26566 +               ,(sched_pc->flags & VXSF_IDLE_TIME) ? 'I' : '-'
26567 +               ,sched_pc->tokens
26568 +               ,sched_pc->tokens_min
26569 +               ,sched_pc->tokens_max
26570 +               ,sched_pc->fill_rate[0]
26571 +               ,sched_pc->interval[0]
26572 +               ,sched_pc->fill_rate[1]
26573 +               ,sched_pc->interval[1]
26574 +               );
26575 +       return length;
26576 +}
26577 +
26578 +#endif /* _VX_SCHED_PROC_H */
26579 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/signal.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/signal.c
26580 --- linux-2.6.16.20/kernel/vserver/signal.c     1970-01-01 01:00:00 +0100
26581 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/signal.c        2006-04-26 19:07:00 +0200
26582 @@ -0,0 +1,137 @@
26583 +/*
26584 + *  linux/kernel/vserver/signal.c
26585 + *
26586 + *  Virtual Server: Signal Support
26587 + *
26588 + *  Copyright (C) 2003-2005  Herbert Pötzl
26589 + *
26590 + *  V0.01  broken out from vcontext V0.05
26591 + *
26592 + */
26593 +
26594 +#include <linux/sched.h>
26595 +
26596 +#include <asm/errno.h>
26597 +#include <asm/uaccess.h>
26598 +
26599 +#include <linux/vs_context.h>
26600 +#include <linux/vserver/signal_cmd.h>
26601 +
26602 +
26603 +int vx_info_kill(struct vx_info *vxi, int pid, int sig)
26604 +{
26605 +       int retval, count=0;
26606 +       struct task_struct *p;
26607 +       unsigned long priv = 0;
26608 +
26609 +       retval = -ESRCH;
26610 +       vxdprintk(VXD_CBIT(misc, 4),
26611 +               "vx_info_kill(%p[#%d],%d,%d)*",
26612 +               vxi, vxi->vx_id, pid, sig);
26613 +       read_lock(&tasklist_lock);
26614 +       switch (pid) {
26615 +       case  0:
26616 +               priv = 1;
26617 +       case -1:
26618 +               for_each_process(p) {
26619 +                       int err = 0;
26620 +
26621 +                       if (vx_task_xid(p) != vxi->vx_id || p->pid <= 1 ||
26622 +                               (pid && vxi->vx_initpid == p->pid))
26623 +                               continue;
26624 +
26625 +                       err = group_send_sig_info(sig, (void*)priv, p);
26626 +                       ++count;
26627 +                       if (err != -EPERM)
26628 +                               retval = err;
26629 +               }
26630 +               break;
26631 +
26632 +       case 1:
26633 +               if (vxi->vx_initpid) {
26634 +                       pid = vxi->vx_initpid;
26635 +                       priv = 1;
26636 +               }
26637 +               /* fallthrough */
26638 +       default:
26639 +               p = find_task_by_real_pid(pid);
26640 +               if (p) {
26641 +                       if (vx_task_xid(p) == vxi->vx_id)
26642 +                               retval = group_send_sig_info(sig,
26643 +                                       (void*)priv, p);
26644 +               }
26645 +               break;
26646 +       }
26647 +       read_unlock(&tasklist_lock);
26648 +       vxdprintk(VXD_CBIT(misc, 4),
26649 +               "vx_info_kill(%p[#%d],%d,%d) = %d",
26650 +               vxi, vxi->vx_id, pid, sig, retval);
26651 +       return retval;
26652 +}
26653 +
26654 +int vc_ctx_kill(uint32_t id, void __user *data)
26655 +{
26656 +       int retval;
26657 +       struct vcmd_ctx_kill_v0 vc_data;
26658 +       struct vx_info *vxi;
26659 +
26660 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
26661 +               return -EFAULT;
26662 +
26663 +       vxi = lookup_vx_info(id);
26664 +       if (!vxi)
26665 +               return -ESRCH;
26666 +
26667 +       retval = vx_info_kill(vxi, vc_data.pid, vc_data.sig);
26668 +       put_vx_info(vxi);
26669 +       return retval;
26670 +}
26671 +
26672 +
26673 +static int __wait_exit(struct vx_info *vxi)
26674 +{
26675 +       DECLARE_WAITQUEUE(wait, current);
26676 +       int ret = 0;
26677 +
26678 +       add_wait_queue(&vxi->vx_wait, &wait);
26679 +       set_current_state(TASK_INTERRUPTIBLE);
26680 +
26681 +wait:
26682 +       if (vx_info_state(vxi,
26683 +               VXS_SHUTDOWN|VXS_HASHED|VXS_HELPER) == VXS_SHUTDOWN)
26684 +               goto out;
26685 +       if (signal_pending(current)) {
26686 +               ret = -ERESTARTSYS;
26687 +               goto out;
26688 +       }
26689 +       schedule();
26690 +       goto wait;
26691 +
26692 +out:
26693 +       set_current_state(TASK_RUNNING);
26694 +       remove_wait_queue(&vxi->vx_wait, &wait);
26695 +       return ret;
26696 +}
26697 +
26698 +
26699 +
26700 +int vc_wait_exit(uint32_t id, void __user *data)
26701 +{
26702 +       struct vx_info *vxi;
26703 +       struct vcmd_wait_exit_v0 vc_data;
26704 +       int ret;
26705 +
26706 +       vxi = lookup_vx_info(id);
26707 +       if (!vxi)
26708 +               return -ESRCH;
26709 +
26710 +       ret = __wait_exit(vxi);
26711 +       vc_data.reboot_cmd = vxi->reboot_cmd;
26712 +       vc_data.exit_code = vxi->exit_code;
26713 +       put_vx_info(vxi);
26714 +
26715 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
26716 +               ret = -EFAULT;
26717 +       return ret;
26718 +}
26719 +
26720 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/switch.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/switch.c
26721 --- linux-2.6.16.20/kernel/vserver/switch.c     1970-01-01 01:00:00 +0100
26722 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/switch.c        2006-04-26 19:07:00 +0200
26723 @@ -0,0 +1,297 @@
26724 +/*
26725 + *  linux/kernel/vserver/switch.c
26726 + *
26727 + *  Virtual Server: Syscall Switch
26728 + *
26729 + *  Copyright (C) 2003-2005  Herbert Pötzl
26730 + *
26731 + *  V0.01  syscall switch
26732 + *  V0.02  added signal to context
26733 + *  V0.03  added rlimit functions
26734 + *  V0.04  added iattr, task/xid functions
26735 + *  V0.05  added debug/history stuff
26736 + *  V0.06  added compat32 layer
26737 + *
26738 + */
26739 +
26740 +#include <linux/linkage.h>
26741 +#include <linux/sched.h>
26742 +#include <linux/compat.h>
26743 +#include <asm/errno.h>
26744 +
26745 +#include <linux/vserver/network.h>
26746 +#include <linux/vserver/switch.h>
26747 +#include <linux/vserver/debug.h>
26748 +
26749 +static inline
26750 +int vc_get_version(uint32_t id)
26751 +{
26752 +#ifdef CONFIG_VSERVER_LEGACY_VERSION
26753 +       if (id == 63)
26754 +               return VCI_LEGACY_VERSION;
26755 +#endif
26756 +       return VCI_VERSION;
26757 +}
26758 +
26759 +#include "vci_config.h"
26760 +
26761 +static inline
26762 +int vc_get_vci(uint32_t id)
26763 +{
26764 +       return vci_kernel_config();
26765 +}
26766 +
26767 +#include <linux/vserver/context_cmd.h>
26768 +#include <linux/vserver/cvirt_cmd.h>
26769 +#include <linux/vserver/limit_cmd.h>
26770 +#include <linux/vserver/network_cmd.h>
26771 +#include <linux/vserver/sched_cmd.h>
26772 +#include <linux/vserver/debug_cmd.h>
26773 +#include <linux/vserver/inode_cmd.h>
26774 +#include <linux/vserver/dlimit_cmd.h>
26775 +#include <linux/vserver/signal_cmd.h>
26776 +#include <linux/vserver/namespace_cmd.h>
26777 +
26778 +#include <linux/vserver/legacy.h>
26779 +#include <linux/vserver/inode.h>
26780 +#include <linux/vserver/dlimit.h>
26781 +
26782 +
26783 +#ifdef CONFIG_COMPAT
26784 +#define __COMPAT(name, id, data, compat)       \
26785 +       (compat) ? name ## _x32 (id, data) : name (id, data)
26786 +#else
26787 +#define __COMPAT(name, id, data, compat)       \
26788 +       name (id, data)
26789 +#endif
26790 +
26791 +
26792 +static inline
26793 +long do_vserver(uint32_t cmd, uint32_t id, void __user *data, int compat)
26794 +{
26795 +       vxdprintk(VXD_CBIT(switch, 0),
26796 +               "vc: VCMD_%02d_%d[%d], %d,%p,%d",
26797 +               VC_CATEGORY(cmd), VC_COMMAND(cmd),
26798 +               VC_VERSION(cmd), id, data, compat);
26799 +
26800 +#ifdef CONFIG_VSERVER_LEGACY
26801 +       if (!capable(CAP_CONTEXT) &&
26802 +               /* dirty hack for capremove */
26803 +               !(cmd==VCMD_new_s_context && id==-2))
26804 +               return -EPERM;
26805 +#else
26806 +       if (!capable(CAP_CONTEXT))
26807 +               return -EPERM;
26808 +#endif
26809 +       /* moved here from the individual commands */
26810 +       if (!capable(CAP_SYS_ADMIN))
26811 +               return -EPERM;
26812 +
26813 +       switch (cmd) {
26814 +       case VCMD_get_version:
26815 +               return vc_get_version(id);
26816 +       case VCMD_get_vci:
26817 +               return vc_get_vci(id);
26818 +
26819 +       case VCMD_dump_history:
26820 +#ifdef CONFIG_VSERVER_HISTORY
26821 +               return vc_dump_history(id);
26822 +#else
26823 +               return -ENOSYS;
26824 +#endif
26825 +
26826 +#ifdef CONFIG_VSERVER_LEGACY
26827 +       case VCMD_new_s_context:
26828 +               return vc_new_s_context(id, data);
26829 +#endif
26830 +#ifdef CONFIG_VSERVER_LEGACYNET
26831 +       case VCMD_set_ipv4root:
26832 +               return vc_set_ipv4root(id, data);
26833 +#endif
26834 +
26835 +       case VCMD_task_xid:
26836 +               return vc_task_xid(id, data);
26837 +       case VCMD_vx_info:
26838 +               return vc_vx_info(id, data);
26839 +
26840 +       case VCMD_task_nid:
26841 +               return vc_task_nid(id, data);
26842 +       case VCMD_nx_info:
26843 +               return vc_nx_info(id, data);
26844 +
26845 +       case VCMD_set_namespace_v0:
26846 +               return vc_set_namespace(-1, data);
26847 +       /* this is version 1 */
26848 +       case VCMD_set_namespace:
26849 +               return vc_set_namespace(id, data);
26850 +       case VCMD_cleanup_namespace:
26851 +               return vc_cleanup_namespace(id, data);
26852 +       }
26853 +
26854 +       /* those are allowed while in setup too */
26855 +       if (!vx_check(0, VX_ADMIN|VX_WATCH) &&
26856 +               !vx_flags(VXF_STATE_SETUP,0))
26857 +               return -EPERM;
26858 +
26859 +#ifdef CONFIG_VSERVER_LEGACY
26860 +       switch (cmd) {
26861 +       case VCMD_set_cflags:
26862 +       case VCMD_set_ccaps_v0:
26863 +               if (vx_check(0, VX_WATCH))
26864 +                       return 0;
26865 +       }
26866 +#endif
26867 +
26868 +       switch (cmd) {
26869 +#ifdef CONFIG_IA32_EMULATION
26870 +       case VCMD_get_rlimit:
26871 +               return __COMPAT(vc_get_rlimit, id, data, compat);
26872 +       case VCMD_set_rlimit:
26873 +               return __COMPAT(vc_set_rlimit, id, data, compat);
26874 +#else
26875 +       case VCMD_get_rlimit:
26876 +               return vc_get_rlimit(id, data);
26877 +       case VCMD_set_rlimit:
26878 +               return vc_set_rlimit(id, data);
26879 +#endif
26880 +       case VCMD_get_rlimit_mask:
26881 +               return vc_get_rlimit_mask(id, data);
26882 +
26883 +       case VCMD_get_vhi_name:
26884 +               return vc_get_vhi_name(id, data);
26885 +       case VCMD_set_vhi_name:
26886 +               return vc_set_vhi_name(id, data);
26887 +
26888 +       case VCMD_set_cflags:
26889 +               return vc_set_cflags(id, data);
26890 +       case VCMD_get_cflags:
26891 +               return vc_get_cflags(id, data);
26892 +
26893 +       case VCMD_set_ccaps_v0:
26894 +               return vc_set_ccaps_v0(id, data);
26895 +       /* this is version 1 */
26896 +       case VCMD_set_ccaps:
26897 +               return vc_set_ccaps(id, data);
26898 +       case VCMD_get_ccaps_v0:
26899 +               return vc_get_ccaps_v0(id, data);
26900 +       /* this is version 1 */
26901 +       case VCMD_get_ccaps:
26902 +               return vc_get_ccaps(id, data);
26903 +       case VCMD_set_bcaps:
26904 +               return vc_set_bcaps(id, data);
26905 +       case VCMD_get_bcaps:
26906 +               return vc_get_bcaps(id, data);
26907 +
26908 +       case VCMD_set_nflags:
26909 +               return vc_set_nflags(id, data);
26910 +       case VCMD_get_nflags:
26911 +               return vc_get_nflags(id, data);
26912 +
26913 +       case VCMD_set_ncaps:
26914 +               return vc_set_ncaps(id, data);
26915 +       case VCMD_get_ncaps:
26916 +               return vc_get_ncaps(id, data);
26917 +
26918 +#ifdef CONFIG_VSERVER_LEGACY
26919 +       case VCMD_set_sched_v2:
26920 +               return vc_set_sched_v2(id, data);
26921 +#endif
26922 +       case VCMD_set_sched_v3:
26923 +               return vc_set_sched_v3(id, data);
26924 +       /* this is version 4 */
26925 +       case VCMD_set_sched:
26926 +               return vc_set_sched(id, data);
26927 +
26928 +       case VCMD_add_dlimit:
26929 +               return __COMPAT(vc_add_dlimit, id, data, compat);
26930 +       case VCMD_rem_dlimit:
26931 +               return __COMPAT(vc_rem_dlimit, id, data, compat);
26932 +       case VCMD_set_dlimit:
26933 +               return __COMPAT(vc_set_dlimit, id, data, compat);
26934 +       case VCMD_get_dlimit:
26935 +               return __COMPAT(vc_get_dlimit, id, data, compat);
26936 +       }
26937 +
26938 +       /* below here only with VX_ADMIN */
26939 +       if (!vx_check(0, VX_ADMIN|VX_WATCH))
26940 +               return -EPERM;
26941 +
26942 +       switch (cmd) {
26943 +       case VCMD_ctx_kill:
26944 +               return vc_ctx_kill(id, data);
26945 +
26946 +       case VCMD_wait_exit:
26947 +               return vc_wait_exit(id, data);
26948 +
26949 +       case VCMD_create_context:
26950 +#ifdef CONFIG_VSERVER_LEGACY
26951 +               return vc_ctx_create(id, NULL);
26952 +#else
26953 +               return -ENOSYS;
26954 +#endif
26955 +
26956 +       case VCMD_get_iattr:
26957 +               return __COMPAT(vc_get_iattr, id, data, compat);
26958 +       case VCMD_set_iattr:
26959 +               return __COMPAT(vc_set_iattr, id, data, compat);
26960 +
26961 +       case VCMD_enter_namespace:
26962 +               return vc_enter_namespace(id, data);
26963 +
26964 +       case VCMD_ctx_create_v0:
26965 +#ifdef CONFIG_VSERVER_LEGACY
26966 +               if (id == 1) {
26967 +                       current->xid = 1;
26968 +                       return 1;
26969 +               }
26970 +#endif
26971 +               return vc_ctx_create(id, NULL);
26972 +       case VCMD_ctx_create:
26973 +               return vc_ctx_create(id, data);
26974 +       case VCMD_ctx_migrate_v0:
26975 +               return vc_ctx_migrate(id, NULL);
26976 +       case VCMD_ctx_migrate:
26977 +               return vc_ctx_migrate(id, data);
26978 +
26979 +       case VCMD_net_create_v0:
26980 +               return vc_net_create(id, NULL);
26981 +       case VCMD_net_create:
26982 +               return vc_net_create(id, data);
26983 +       case VCMD_net_migrate:
26984 +               return vc_net_migrate(id, data);
26985 +       case VCMD_net_add:
26986 +               return vc_net_add(id, data);
26987 +       case VCMD_net_remove:
26988 +               return vc_net_remove(id, data);
26989 +
26990 +       }
26991 +       return -ENOSYS;
26992 +}
26993 +
26994 +extern asmlinkage long
26995 +sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
26996 +{
26997 +       long ret = do_vserver(cmd, id, data, 0);
26998 +
26999 +       vxdprintk(VXD_CBIT(switch, 1),
27000 +               "vc: VCMD_%02d_%d[%d] = %08lx(%ld)",
27001 +               VC_CATEGORY(cmd), VC_COMMAND(cmd),
27002 +               VC_VERSION(cmd), ret, ret);
27003 +       return ret;
27004 +}
27005 +
27006 +#ifdef CONFIG_COMPAT
27007 +
27008 +extern asmlinkage long
27009 +sys32_vserver(uint32_t cmd, uint32_t id, void __user *data)
27010 +{
27011 +       long ret = do_vserver(cmd, id, data, 1);
27012 +
27013 +       vxdprintk(VXD_CBIT(switch, 1),
27014 +               "vc: VCMD_%02d_%d[%d] = %08lx(%ld)",
27015 +               VC_CATEGORY(cmd), VC_COMMAND(cmd),
27016 +               VC_VERSION(cmd), ret, ret);
27017 +       return ret;
27018 +}
27019 +
27020 +#endif /* CONFIG_COMPAT */
27021 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/sysctl.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/sysctl.c
27022 --- linux-2.6.16.20/kernel/vserver/sysctl.c     1970-01-01 01:00:00 +0100
27023 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/sysctl.c        2006-04-26 19:07:00 +0200
27024 @@ -0,0 +1,241 @@
27025 +/*
27026 + *  kernel/vserver/sysctl.c
27027 + *
27028 + *  Virtual Context Support
27029 + *
27030 + *  Copyright (C) 2004-2005  Herbert Pötzl
27031 + *
27032 + *  V0.01  basic structure
27033 + *
27034 + */
27035 +
27036 +#include <linux/errno.h>
27037 +#include <linux/module.h>
27038 +#include <linux/types.h>
27039 +#include <linux/ctype.h>
27040 +#include <linux/sysctl.h>
27041 +#include <linux/parser.h>
27042 +#include <linux/fs.h>
27043 +
27044 +#include <asm/uaccess.h>
27045 +#include <asm/unistd.h>
27046 +
27047 +
27048 +#define CTL_VSERVER    4242    /* unused? */
27049 +
27050 +enum {
27051 +       CTL_DEBUG_ERROR         = 0,
27052 +       CTL_DEBUG_SWITCH        = 1,
27053 +       CTL_DEBUG_XID,
27054 +       CTL_DEBUG_NID,
27055 +       CTL_DEBUG_TAG,
27056 +       CTL_DEBUG_NET,
27057 +       CTL_DEBUG_LIMIT,
27058 +       CTL_DEBUG_CRES,
27059 +       CTL_DEBUG_DLIM,
27060 +       CTL_DEBUG_QUOTA,
27061 +       CTL_DEBUG_CVIRT,
27062 +       CTL_DEBUG_MISC,
27063 +};
27064 +
27065 +
27066 +unsigned int vx_debug_switch   = 0;
27067 +unsigned int vx_debug_xid      = 0;
27068 +unsigned int vx_debug_nid      = 0;
27069 +unsigned int vx_debug_tag      = 0;
27070 +unsigned int vx_debug_net      = 0;
27071 +unsigned int vx_debug_limit    = 0;
27072 +unsigned int vx_debug_cres     = 0;
27073 +unsigned int vx_debug_dlim     = 0;
27074 +unsigned int vx_debug_quota    = 0;
27075 +unsigned int vx_debug_cvirt    = 0;
27076 +unsigned int vx_debug_misc     = 0;
27077 +
27078 +
27079 +static struct ctl_table_header *vserver_table_header;
27080 +static ctl_table vserver_table[];
27081 +
27082 +
27083 +void vserver_register_sysctl(void)
27084 +{
27085 +       if (!vserver_table_header) {
27086 +               vserver_table_header = register_sysctl_table(vserver_table, 1);
27087 +       }
27088 +
27089 +}
27090 +
27091 +void vserver_unregister_sysctl(void)
27092 +{
27093 +       if (vserver_table_header) {
27094 +               unregister_sysctl_table(vserver_table_header);
27095 +               vserver_table_header = NULL;
27096 +       }
27097 +}
27098 +
27099 +
27100 +static int proc_dodebug(ctl_table *table, int write,
27101 +       struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
27102 +{
27103 +       char            tmpbuf[20], *p, c;
27104 +       unsigned int    value;
27105 +       size_t          left, len;
27106 +
27107 +       if ((*ppos && !write) || !*lenp) {
27108 +               *lenp = 0;
27109 +               return 0;
27110 +       }
27111 +
27112 +       left = *lenp;
27113 +
27114 +       if (write) {
27115 +               if (!access_ok(VERIFY_READ, buffer, left))
27116 +                       return -EFAULT;
27117 +               p = (char *) buffer;
27118 +               while (left && __get_user(c, p) >= 0 && isspace(c))
27119 +                       left--, p++;
27120 +               if (!left)
27121 +                       goto done;
27122 +
27123 +               if (left > sizeof(tmpbuf) - 1)
27124 +                       return -EINVAL;
27125 +               if (copy_from_user(tmpbuf, p, left))
27126 +                       return -EFAULT;
27127 +               tmpbuf[left] = '\0';
27128 +
27129 +               for (p = tmpbuf, value = 0; '0' <= *p && *p <= '9'; p++, left--)
27130 +                       value = 10 * value + (*p - '0');
27131 +               if (*p && !isspace(*p))
27132 +                       return -EINVAL;
27133 +               while (left && isspace(*p))
27134 +                       left--, p++;
27135 +               *(unsigned int *) table->data = value;
27136 +       } else {
27137 +               if (!access_ok(VERIFY_WRITE, buffer, left))
27138 +                       return -EFAULT;
27139 +               len = sprintf(tmpbuf, "%d", *(unsigned int *) table->data);
27140 +               if (len > left)
27141 +                       len = left;
27142 +               if (__copy_to_user(buffer, tmpbuf, len))
27143 +                       return -EFAULT;
27144 +               if ((left -= len) > 0) {
27145 +                       if (put_user('\n', (char *)buffer + len))
27146 +                               return -EFAULT;
27147 +                       left--;
27148 +               }
27149 +       }
27150 +
27151 +done:
27152 +       *lenp -= left;
27153 +       *ppos += *lenp;
27154 +       return 0;
27155 +}
27156 +
27157 +
27158 +#define        CTL_ENTRY(ctl, name)                            \
27159 +       {                                               \
27160 +               .ctl_name       = ctl,                  \
27161 +               .procname       = #name,                \
27162 +               .data           = &vx_##name,           \
27163 +               .maxlen         = sizeof(int),          \
27164 +               .mode           = 0644,                 \
27165 +               .proc_handler   = &proc_dodebug         \
27166 +       }
27167 +
27168 +static ctl_table debug_table[] = {
27169 +       CTL_ENTRY (CTL_DEBUG_SWITCH,    debug_switch),
27170 +       CTL_ENTRY (CTL_DEBUG_XID,       debug_xid),
27171 +       CTL_ENTRY (CTL_DEBUG_NID,       debug_nid),
27172 +       CTL_ENTRY (CTL_DEBUG_TAG,       debug_tag),
27173 +       CTL_ENTRY (CTL_DEBUG_NET,       debug_net),
27174 +       CTL_ENTRY (CTL_DEBUG_LIMIT,     debug_limit),
27175 +       CTL_ENTRY (CTL_DEBUG_CRES,      debug_cres),
27176 +       CTL_ENTRY (CTL_DEBUG_DLIM,      debug_dlim),
27177 +       CTL_ENTRY (CTL_DEBUG_QUOTA,     debug_quota),
27178 +       CTL_ENTRY (CTL_DEBUG_CVIRT,     debug_cvirt),
27179 +       CTL_ENTRY (CTL_DEBUG_MISC,      debug_misc),
27180 +       { .ctl_name = 0 }
27181 +};
27182 +
27183 +static ctl_table vserver_table[] = {
27184 +       {
27185 +               .ctl_name       = CTL_VSERVER,
27186 +               .procname       = "vserver",
27187 +               .mode           = 0555,
27188 +               .child          = debug_table
27189 +       },
27190 +       { .ctl_name = 0 }
27191 +};
27192 +
27193 +
27194 +static match_table_t tokens = {
27195 +       { CTL_DEBUG_SWITCH,     "switch=%x"     },
27196 +       { CTL_DEBUG_XID,        "xid=%x"        },
27197 +       { CTL_DEBUG_NID,        "nid=%x"        },
27198 +       { CTL_DEBUG_TAG,        "tag=%x"        },
27199 +       { CTL_DEBUG_NET,        "net=%x"        },
27200 +       { CTL_DEBUG_LIMIT,      "limit=%x"      },
27201 +       { CTL_DEBUG_CRES,       "cres=%x"       },
27202 +       { CTL_DEBUG_DLIM,       "dlim=%x"       },
27203 +       { CTL_DEBUG_QUOTA,      "quota=%x"      },
27204 +       { CTL_DEBUG_CVIRT,      "cvirt=%x"      },
27205 +       { CTL_DEBUG_MISC,       "misc=%x"       },
27206 +       { CTL_DEBUG_ERROR,      NULL            }
27207 +};
27208 +
27209 +#define        HANDLE_CASE(id, name, val)                              \
27210 +       case CTL_DEBUG_ ## id:                                  \
27211 +               vx_debug_ ## name = val;                        \
27212 +               printk("vs_debug_" #name "=0x%x\n", val);       \
27213 +               break
27214 +
27215 +
27216 +static int __init vs_debug_setup(char *str)
27217 +{
27218 +       char *p;
27219 +       int token;
27220 +
27221 +       printk("vs_debug_setup(%s)\n", str);
27222 +       while ((p = strsep(&str, ",")) != NULL) {
27223 +               substring_t args[MAX_OPT_ARGS];
27224 +               unsigned int value;
27225 +
27226 +               if (!*p)
27227 +                       continue;
27228 +
27229 +               token = match_token(p, tokens, args);
27230 +               value = (token>0)?simple_strtoul(args[0].from, NULL, 0):0;
27231 +
27232 +               switch (token) {
27233 +               HANDLE_CASE(SWITCH, switch, value);
27234 +               HANDLE_CASE(XID,    xid,    value);
27235 +               HANDLE_CASE(NID,    nid,    value);
27236 +               HANDLE_CASE(NET,    net,    value);
27237 +               HANDLE_CASE(LIMIT,  limit,  value);
27238 +               HANDLE_CASE(CRES,   cres,   value);
27239 +               HANDLE_CASE(DLIM,   dlim,   value);
27240 +               HANDLE_CASE(QUOTA,  quota,  value);
27241 +               HANDLE_CASE(CVIRT,  cvirt,  value);
27242 +               HANDLE_CASE(MISC,   misc,   value);
27243 +               default:
27244 +                       return -EINVAL;
27245 +                       break;
27246 +               }
27247 +       }
27248 +       return 1;
27249 +}
27250 +
27251 +__setup("vsdebug=", vs_debug_setup);
27252 +
27253 +
27254 +
27255 +EXPORT_SYMBOL_GPL(vx_debug_switch);
27256 +EXPORT_SYMBOL_GPL(vx_debug_xid);
27257 +EXPORT_SYMBOL_GPL(vx_debug_nid);
27258 +EXPORT_SYMBOL_GPL(vx_debug_net);
27259 +EXPORT_SYMBOL_GPL(vx_debug_limit);
27260 +EXPORT_SYMBOL_GPL(vx_debug_cres);
27261 +EXPORT_SYMBOL_GPL(vx_debug_dlim);
27262 +EXPORT_SYMBOL_GPL(vx_debug_quota);
27263 +EXPORT_SYMBOL_GPL(vx_debug_cvirt);
27264 +EXPORT_SYMBOL_GPL(vx_debug_misc);
27265 +
27266 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/vci_config.h linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/vci_config.h
27267 --- linux-2.6.16.20/kernel/vserver/vci_config.h 1970-01-01 01:00:00 +0100
27268 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/vci_config.h    2006-04-26 19:07:00 +0200
27269 @@ -0,0 +1,78 @@
27270 +
27271 +enum {
27272 +       VCI_KCBIT_LEGACY = 1,
27273 +       VCI_KCBIT_LEGACYNET,
27274 +       VCI_KCBIT_NGNET,
27275 +
27276 +       VCI_KCBIT_PROC_SECURE,
27277 +       VCI_KCBIT_HARDCPU,
27278 +       VCI_KCBIT_IDLELIMIT,
27279 +       VCI_KCBIT_IDLETIME,
27280 +
27281 +       VCI_KCBIT_COWBL,
27282 +
27283 +       VCI_KCBIT_LEGACY_VERSION = 15,
27284 +       VCI_KCBIT_DEBUG = 16,
27285 +       VCI_KCBIT_HISTORY = 20,
27286 +       VCI_KCBIT_TAGGED = 24,
27287 +};
27288 +
27289 +
27290 +static inline uint32_t vci_kernel_config(void)
27291 +{
27292 +       return
27293 +       /* various legacy options */
27294 +#ifdef CONFIG_VSERVER_LEGACY
27295 +       (1 << VCI_KCBIT_LEGACY) |
27296 +#endif
27297 +#ifdef CONFIG_VSERVER_LEGACYNET
27298 +       (1 << VCI_KCBIT_LEGACYNET) |
27299 +#endif
27300 +#ifdef CONFIG_VSERVER_LEGACY_VERSION
27301 +       (1 << VCI_KCBIT_LEGACY_VERSION) |
27302 +#endif
27303 +
27304 +       /* configured features */
27305 +#ifdef CONFIG_VSERVER_PROC_SECURE
27306 +       (1 << VCI_KCBIT_PROC_SECURE) |
27307 +#endif
27308 +#ifdef CONFIG_VSERVER_HARDCPU
27309 +       (1 << VCI_KCBIT_HARDCPU) |
27310 +#endif
27311 +#ifdef CONFIG_VSERVER_IDLELIMIT
27312 +       (1 << VCI_KCBIT_IDLELIMIT) |
27313 +#endif
27314 +#ifdef CONFIG_VSERVER_IDLETIME
27315 +       (1 << VCI_KCBIT_IDLETIME) |
27316 +#endif
27317 +#ifdef CONFIG_VSERVER_COWBL
27318 +       (1 << VCI_KCBIT_COWBL) |
27319 +#endif
27320 +
27321 +       /* debug options */
27322 +#ifdef CONFIG_VSERVER_DEBUG
27323 +       (1 << VCI_KCBIT_DEBUG) |
27324 +#endif
27325 +#ifdef CONFIG_VSERVER_HISTORY
27326 +       (1 << VCI_KCBIT_HISTORY) |
27327 +#endif
27328 +
27329 +       /* inode context tagging */
27330 +#if    defined(CONFIG_TAGGING_NONE)
27331 +       (0 << VCI_KCBIT_TAGGED) |
27332 +#elif  defined(CONFIG_TAGGING_UID16)
27333 +       (1 << VCI_KCBIT_TAGGED) |
27334 +#elif  defined(CONFIG_TAGGING_GID16)
27335 +       (2 << VCI_KCBIT_TAGGED) |
27336 +#elif  defined(CONFIG_TAGGING_ID24)
27337 +       (3 << VCI_KCBIT_TAGGED) |
27338 +#elif  defined(CONFIG_TAGGING_INTERN)
27339 +       (4 << VCI_KCBIT_TAGGED) |
27340 +#elif  defined(CONFIG_TAGGING_RUNTIME)
27341 +       (5 << VCI_KCBIT_TAGGED) |
27342 +#else
27343 +       (7 << VCI_KCBIT_TAGGED) |
27344 +#endif
27345 +       0;
27346 +}
27347 +
27348 diff -NurpP --minimal linux-2.6.16.20/mm/filemap.c linux-2.6.16.20-vs2.1.1-rc22/mm/filemap.c
27349 --- linux-2.6.16.20/mm/filemap.c        2006-04-09 13:49:58 +0200
27350 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/filemap.c   2006-05-04 03:22:57 +0200
27351 @@ -1111,6 +1111,31 @@ int file_send_actor(read_descriptor_t * 
27352         return written;
27353  }
27354  
27355 +/* FIXME: It would be as simple as this, if we had a (void __user*) to write.
27356 + * We already have a kernel buffer, so it should be even simpler, right? ;)
27357 + *
27358 + * Yes, sorta.  After duplicating the complete path of generic_file_write(),
27359 + * at least some special cases could be removed, so the copy is simpler than
27360 + * the original.  But it remains a copy, so overall complexity increases.
27361 + */
27362 +static ssize_t
27363 +generic_kernel_file_write(struct file *, const char *, size_t, loff_t *);
27364 +
27365 +ssize_t generic_file_sendpage(struct file *file, struct page *page,
27366 +               int offset, size_t size, loff_t *ppos, int more)
27367 +{
27368 +       ssize_t ret;
27369 +       char *kaddr;
27370 +
27371 +       kaddr = kmap(page);
27372 +       ret = generic_kernel_file_write(file, kaddr + offset, size, ppos);
27373 +       kunmap(page);
27374 +
27375 +       return ret;
27376 +}
27377 +
27378 +EXPORT_SYMBOL(generic_file_sendpage);
27379 +
27380  ssize_t generic_file_sendfile(struct file *in_file, loff_t *ppos,
27381                          size_t count, read_actor_t actor, void *target)
27382  {
27383 @@ -1761,6 +1786,19 @@ int remove_suid(struct dentry *dentry)
27384  }
27385  EXPORT_SYMBOL(remove_suid);
27386  
27387 +static inline size_t
27388 +filemap_copy_from_kernel(struct page *page, unsigned long offset,
27389 +                        const char *buf, unsigned bytes)
27390 +{
27391 +       char *kaddr;
27392 +
27393 +       kaddr = kmap(page);
27394 +       memcpy(kaddr + offset, buf, bytes);
27395 +       kunmap(page);
27396 +
27397 +       return bytes;
27398 +}
27399 +
27400  size_t
27401  __filemap_copy_from_user_iovec(char *vaddr, 
27402                         const struct iovec *iov, size_t base, size_t bytes)
27403 @@ -2134,6 +2172,175 @@ out:
27404  }
27405  EXPORT_SYMBOL(generic_file_aio_write_nolock);
27406  
27407 +static inline void
27408 +filemap_set_next_kvec(const struct kvec **iovp, size_t *basep, size_t bytes)
27409 +{
27410 +       const struct kvec *iov = *iovp;
27411 +       size_t base = *basep;
27412 +
27413 +       while (bytes) {
27414 +               int copy = min(bytes, iov->iov_len - base);
27415 +
27416 +               bytes -= copy;
27417 +               base += copy;
27418 +               if (iov->iov_len == base) {
27419 +                       iov++;
27420 +                       base = 0;
27421 +               }
27422 +       }
27423 +       *iovp = iov;
27424 +       *basep = base;
27425 +}
27426 +
27427 +/*
27428 + * TODO:
27429 + * This largely tries to copy generic_file_aio_write_nolock(), although it
27430 + * doesn't have to be nearly as generic.  A real cleanup should either
27431 + * merge this into generic_file_aio_write_nolock() as well or keep it special
27432 + * and remove as much code as possible.
27433 + */
27434 +static ssize_t
27435 +generic_kernel_file_aio_write_nolock(struct kiocb *iocb, const struct kvec*iov,
27436 +                                    unsigned long nr_segs, loff_t *ppos)
27437 +{
27438 +       struct file *file = iocb->ki_filp;
27439 +       struct address_space * mapping = file->f_mapping;
27440 +       struct address_space_operations *a_ops = mapping->a_ops;
27441 +       size_t ocount;          /* original count */
27442 +       size_t count;           /* after file limit checks */
27443 +       struct inode    *inode = mapping->host;
27444 +       long            status = 0;
27445 +       loff_t          pos;
27446 +       struct page     *page;
27447 +       struct page     *cached_page = NULL;
27448 +       const int       isblk = S_ISBLK(inode->i_mode);
27449 +       ssize_t         written;
27450 +       ssize_t         err;
27451 +       size_t          bytes;
27452 +       struct pagevec  lru_pvec;
27453 +       const struct kvec *cur_iov = iov; /* current kvec */
27454 +       size_t          iov_base = 0;      /* offset in the current kvec */
27455 +       unsigned long   seg;
27456 +       char            *buf;
27457 +
27458 +       ocount = 0;
27459 +       for (seg = 0; seg < nr_segs; seg++) {
27460 +               const struct kvec *iv = &iov[seg];
27461 +
27462 +               /*
27463 +                * If any segment has a negative length, or the cumulative
27464 +                * length ever wraps negative then return -EINVAL.
27465 +                */
27466 +               ocount += iv->iov_len;
27467 +               if (unlikely((ssize_t)(ocount|iv->iov_len) < 0))
27468 +                       return -EINVAL;
27469 +       }
27470 +
27471 +       count = ocount;
27472 +       pos = *ppos;
27473 +       pagevec_init(&lru_pvec, 0);
27474 +
27475 +       /* We can write back this queue in page reclaim */
27476 +       current->backing_dev_info = mapping->backing_dev_info;
27477 +       written = 0;
27478 +
27479 +       err = generic_write_checks(file, &pos, &count, isblk);
27480 +       if (err)
27481 +               goto out;
27482 +
27483 +
27484 +       if (count == 0)
27485 +               goto out;
27486 +
27487 +       remove_suid(file->f_dentry);
27488 +       file_update_time(file);
27489 +
27490 +       /* There is no sane reason to use O_DIRECT */
27491 +       BUG_ON(file->f_flags & O_DIRECT);
27492 +
27493 +       buf = iov->iov_base;
27494 +       do {
27495 +               unsigned long index;
27496 +               unsigned long offset;
27497 +               size_t copied;
27498 +
27499 +               offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
27500 +               index = pos >> PAGE_CACHE_SHIFT;
27501 +               bytes = PAGE_CACHE_SIZE - offset;
27502 +               if (bytes > count)
27503 +                       bytes = count;
27504 +
27505 +               page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec);
27506 +               if (!page) {
27507 +                       status = -ENOMEM;
27508 +                       break;
27509 +               }
27510 +
27511 +               status = a_ops->prepare_write(file, page, offset, offset+bytes);
27512 +               if (unlikely(status)) {
27513 +                       loff_t isize = i_size_read(inode);
27514 +                       /*
27515 +                        * prepare_write() may have instantiated a few blocks
27516 +                        * outside i_size.  Trim these off again.
27517 +                        */
27518 +                       unlock_page(page);
27519 +                       page_cache_release(page);
27520 +                       if (pos + bytes > isize)
27521 +                               vmtruncate(inode, isize);
27522 +                       break;
27523 +               }
27524 +
27525 +               BUG_ON(nr_segs != 1);
27526 +               copied = filemap_copy_from_kernel(page, offset, buf, bytes);
27527 +
27528 +               flush_dcache_page(page);
27529 +               status = a_ops->commit_write(file, page, offset, offset+bytes);
27530 +               if (likely(copied > 0)) {
27531 +                       if (!status)
27532 +                               status = copied;
27533 +
27534 +                       if (status >= 0) {
27535 +                               written += status;
27536 +                               count -= status;
27537 +                               pos += status;
27538 +                               buf += status;
27539 +                               if (unlikely(nr_segs > 1))
27540 +                                       filemap_set_next_kvec(&cur_iov,
27541 +                                                       &iov_base, status);
27542 +                       }
27543 +               }
27544 +               if (unlikely(copied != bytes))
27545 +                       if (status >= 0)
27546 +                               status = -EFAULT;
27547 +               unlock_page(page);
27548 +               mark_page_accessed(page);
27549 +               page_cache_release(page);
27550 +               if (status < 0)
27551 +                       break;
27552 +               balance_dirty_pages_ratelimited(mapping);
27553 +               cond_resched();
27554 +       } while (count);
27555 +       *ppos = pos;
27556 +
27557 +       if (cached_page)
27558 +               page_cache_release(cached_page);
27559 +
27560 +       /*
27561 +        * For now, when the user asks for O_SYNC, we'll actually give O_DSYNC
27562 +        */
27563 +       if (status >= 0) {
27564 +               if ((file->f_flags & O_SYNC) || IS_SYNC(inode))
27565 +                       status = generic_osync_inode(inode, mapping,
27566 +                                       OSYNC_METADATA|OSYNC_DATA);
27567 +       }
27568 +
27569 +       err = written ? written : status;
27570 +out:
27571 +       pagevec_lru_add(&lru_pvec);
27572 +       current->backing_dev_info = 0;
27573 +       return err;
27574 +}
27575 +
27576  ssize_t
27577  generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
27578                                 unsigned long nr_segs, loff_t *ppos)
27579 @@ -2183,6 +2390,21 @@ generic_file_write_nolock(struct file *f
27580                 ret = wait_on_sync_kiocb(&kiocb);
27581         return ret;
27582  }
27583 +
27584 +static ssize_t
27585 +generic_kernel_file_write_nolock(struct file *file, const struct kvec *iov,
27586 +                                unsigned long nr_segs, loff_t *ppos)
27587 +{
27588 +       struct kiocb kiocb;
27589 +       ssize_t ret;
27590 +
27591 +       init_sync_kiocb(&kiocb, file);
27592 +       ret = generic_kernel_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
27593 +       if (ret == -EIOCBQUEUED)
27594 +               ret = wait_on_sync_kiocb(&kiocb);
27595 +       return ret;
27596 +}
27597 +
27598  EXPORT_SYMBOL(generic_file_write_nolock);
27599  
27600  ssize_t generic_file_aio_write(struct kiocb *iocb, const char __user *buf,
27601 @@ -2237,6 +2459,21 @@ ssize_t generic_file_write(struct file *
27602  }
27603  EXPORT_SYMBOL(generic_file_write);
27604  
27605 +static ssize_t generic_kernel_file_write(struct file *file, const char *buf,
27606 +                                        size_t count, loff_t *ppos)
27607 +{
27608 +       struct inode    *inode = file->f_mapping->host;
27609 +       ssize_t         err;
27610 +       struct kvec local_iov = { .iov_base = (char *) buf,
27611 +                                 .iov_len = count };
27612 +
27613 +       mutex_lock(&inode->i_mutex);
27614 +       err = generic_kernel_file_write_nolock(file, &local_iov, 1, ppos);
27615 +       mutex_unlock(&inode->i_mutex);
27616 +
27617 +       return err;
27618 +}
27619 +
27620  ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
27621                         unsigned long nr_segs, loff_t *ppos)
27622  {
27623 diff -NurpP --minimal linux-2.6.16.20/mm/filemap_xip.c linux-2.6.16.20-vs2.1.1-rc22/mm/filemap_xip.c
27624 --- linux-2.6.16.20/mm/filemap_xip.c    2006-04-09 13:49:58 +0200
27625 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/filemap_xip.c       2006-04-26 19:07:00 +0200
27626 @@ -13,6 +13,7 @@
27627  #include <linux/module.h>
27628  #include <linux/uio.h>
27629  #include <linux/rmap.h>
27630 +#include <linux/vs_memory.h>
27631  #include <asm/tlbflush.h>
27632  #include "filemap.h"
27633  
27634 diff -NurpP --minimal linux-2.6.16.20/mm/fremap.c linux-2.6.16.20-vs2.1.1-rc22/mm/fremap.c
27635 --- linux-2.6.16.20/mm/fremap.c 2006-01-03 17:30:13 +0100
27636 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/fremap.c    2006-04-26 19:07:00 +0200
27637 @@ -15,6 +15,7 @@
27638  #include <linux/rmap.h>
27639  #include <linux/module.h>
27640  #include <linux/syscalls.h>
27641 +#include <linux/vs_memory.h>
27642  
27643  #include <asm/mmu_context.h>
27644  #include <asm/cacheflush.h>
27645 @@ -35,6 +36,7 @@ static int zap_pte(struct mm_struct *mm,
27646                                 set_page_dirty(page);
27647                         page_remove_rmap(page);
27648                         page_cache_release(page);
27649 +                       // dec_mm_counter(mm, file_rss);
27650                 }
27651         } else {
27652                 if (!pte_file(pte))
27653 @@ -74,6 +76,8 @@ int install_page(struct mm_struct *mm, s
27654         err = -ENOMEM;
27655         if (page_mapcount(page) > INT_MAX/2)
27656                 goto unlock;
27657 +       if (!vx_rsspages_avail(mm, 1))
27658 +               goto unlock;
27659  
27660         if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte))
27661                 inc_mm_counter(mm, file_rss);
27662 diff -NurpP --minimal linux-2.6.16.20/mm/hugetlb.c linux-2.6.16.20-vs2.1.1-rc22/mm/hugetlb.c
27663 --- linux-2.6.16.20/mm/hugetlb.c        2006-02-18 14:40:38 +0100
27664 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/hugetlb.c   2006-04-26 19:07:00 +0200
27665 @@ -18,6 +18,7 @@
27666  #include <asm/pgtable.h>
27667  
27668  #include <linux/hugetlb.h>
27669 +#include <linux/vs_memory.h>
27670  
27671  const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
27672  static unsigned long nr_huge_pages, free_huge_pages;
27673 diff -NurpP --minimal linux-2.6.16.20/mm/memory.c linux-2.6.16.20-vs2.1.1-rc22/mm/memory.c
27674 --- linux-2.6.16.20/mm/memory.c 2006-04-09 13:49:58 +0200
27675 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/memory.c    2006-04-26 19:07:00 +0200
27676 @@ -1904,6 +1904,10 @@ again:
27677                 grab_swap_token();
27678         }
27679  
27680 +       if (!vx_rsspages_avail(mm, 1)) {
27681 +               ret = VM_FAULT_OOM;
27682 +               goto out;
27683 +       }
27684         mark_page_accessed(page);
27685         lock_page(page);
27686         if (!PageSwapCache(page)) {
27687 @@ -1981,6 +1985,8 @@ static int do_anonymous_page(struct mm_s
27688                 /* Allocate our own private page. */
27689                 pte_unmap(page_table);
27690  
27691 +               if (!vx_rsspages_avail(mm, 1))
27692 +                       goto oom;
27693                 if (unlikely(anon_vma_prepare(vma)))
27694                         goto oom;
27695                 page = alloc_zeroed_user_highpage(vma, address);
27696 @@ -2059,6 +2065,9 @@ static int do_no_page(struct mm_struct *
27697                 smp_rmb(); /* serializes i_size against truncate_count */
27698         }
27699  retry:
27700 +       /* FIXME: is that check useful here? */
27701 +       if (!vx_rsspages_avail(mm, 1))
27702 +               return VM_FAULT_OOM;
27703         new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
27704         /*
27705          * No smp_rmb is needed here as long as there's a full
27706 @@ -2206,21 +2215,32 @@ static inline int handle_pte_fault(struc
27707         pte_t entry;
27708         pte_t old_entry;
27709         spinlock_t *ptl;
27710 +       int ret, type = VXPT_UNKNOWN;
27711  
27712         old_entry = entry = *pte;
27713         if (!pte_present(entry)) {
27714                 if (pte_none(entry)) {
27715 -                       if (!vma->vm_ops || !vma->vm_ops->nopage)
27716 -                               return do_anonymous_page(mm, vma, address,
27717 +                       if (!vma->vm_ops || !vma->vm_ops->nopage) {
27718 +                               ret = do_anonymous_page(mm, vma, address,
27719                                         pte, pmd, write_access);
27720 -                       return do_no_page(mm, vma, address,
27721 +                               type = VXPT_ANON;
27722 +                               goto out;
27723 +                       }
27724 +                       ret = do_no_page(mm, vma, address,
27725                                         pte, pmd, write_access);
27726 +                       type = VXPT_NONE;
27727 +                       goto out;
27728                 }
27729 -               if (pte_file(entry))
27730 -                       return do_file_page(mm, vma, address,
27731 +               if (pte_file(entry)) {
27732 +                       ret = do_file_page(mm, vma, address,
27733                                         pte, pmd, write_access, entry);
27734 -               return do_swap_page(mm, vma, address,
27735 +                       type = VXPT_FILE;
27736 +                       goto out;
27737 +               }
27738 +               ret = do_swap_page(mm, vma, address,
27739                                         pte, pmd, write_access, entry);
27740 +               type = VXPT_SWAP;
27741 +               goto out;
27742         }
27743  
27744         ptl = pte_lockptr(mm, pmd);
27745 @@ -2228,9 +2248,12 @@ static inline int handle_pte_fault(struc
27746         if (unlikely(!pte_same(*pte, entry)))
27747                 goto unlock;
27748         if (write_access) {
27749 -               if (!pte_write(entry))
27750 -                       return do_wp_page(mm, vma, address,
27751 +               if (!pte_write(entry)) {
27752 +                       ret = do_wp_page(mm, vma, address,
27753                                         pte, pmd, ptl, entry);
27754 +                       type = VXPT_WRITE;
27755 +                       goto out;
27756 +               }
27757                 entry = pte_mkdirty(entry);
27758         }
27759         entry = pte_mkyoung(entry);
27760 @@ -2250,7 +2273,10 @@ static inline int handle_pte_fault(struc
27761         }
27762  unlock:
27763         pte_unmap_unlock(pte, ptl);
27764 -       return VM_FAULT_MINOR;
27765 +       ret = VM_FAULT_MINOR;
27766 +out:
27767 +       vx_page_fault(mm, vma, type, ret);
27768 +       return ret;
27769  }
27770  
27771  /*
27772 diff -NurpP --minimal linux-2.6.16.20/mm/mempolicy.c linux-2.6.16.20-vs2.1.1-rc22/mm/mempolicy.c
27773 --- linux-2.6.16.20/mm/mempolicy.c      2006-05-22 16:25:40 +0200
27774 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/mempolicy.c 2006-05-22 06:19:46 +0200
27775 @@ -86,6 +86,7 @@
27776  #include <linux/swap.h>
27777  #include <linux/seq_file.h>
27778  #include <linux/proc_fs.h>
27779 +#include <linux/vs_pid.h>
27780  
27781  #include <asm/tlbflush.h>
27782  #include <asm/uaccess.h>
27783 diff -NurpP --minimal linux-2.6.16.20/mm/mlock.c linux-2.6.16.20-vs2.1.1-rc22/mm/mlock.c
27784 --- linux-2.6.16.20/mm/mlock.c  2006-04-09 13:49:58 +0200
27785 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/mlock.c     2006-04-26 19:07:00 +0200
27786 @@ -10,6 +10,7 @@
27787  #include <linux/mm.h>
27788  #include <linux/mempolicy.h>
27789  #include <linux/syscalls.h>
27790 +#include <linux/vs_memory.h>
27791  
27792  
27793  static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
27794 @@ -65,7 +66,7 @@ success:
27795                         ret = make_pages_present(start, end);
27796         }
27797  
27798 -       vma->vm_mm->locked_vm -= pages;
27799 +       vx_vmlocked_sub(vma->vm_mm, pages);
27800  out:
27801         if (ret == -ENOMEM)
27802                 ret = -EAGAIN;
27803 @@ -123,7 +124,7 @@ static int do_mlock(unsigned long start,
27804  
27805  asmlinkage long sys_mlock(unsigned long start, size_t len)
27806  {
27807 -       unsigned long locked;
27808 +       unsigned long locked, grow;
27809         unsigned long lock_limit;
27810         int error = -ENOMEM;
27811  
27812 @@ -134,8 +135,10 @@ asmlinkage long sys_mlock(unsigned long 
27813         len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
27814         start &= PAGE_MASK;
27815  
27816 -       locked = len >> PAGE_SHIFT;
27817 -       locked += current->mm->locked_vm;
27818 +       grow = len >> PAGE_SHIFT;
27819 +       if (!vx_vmlocked_avail(current->mm, grow))
27820 +               goto out;
27821 +       locked = current->mm->locked_vm + grow;
27822  
27823         lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
27824         lock_limit >>= PAGE_SHIFT;
27825 @@ -143,6 +146,7 @@ asmlinkage long sys_mlock(unsigned long 
27826         /* check against resource limits */
27827         if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
27828                 error = do_mlock(start, len, 1);
27829 +out:
27830         up_write(&current->mm->mmap_sem);
27831         return error;
27832  }
27833 @@ -202,6 +206,8 @@ asmlinkage long sys_mlockall(int flags)
27834         lock_limit >>= PAGE_SHIFT;
27835  
27836         ret = -ENOMEM;
27837 +       if (!vx_vmlocked_avail(current->mm, current->mm->total_vm))
27838 +               goto out;
27839         if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
27840             capable(CAP_IPC_LOCK))
27841                 ret = do_mlockall(flags);
27842 diff -NurpP --minimal linux-2.6.16.20/mm/mmap.c linux-2.6.16.20-vs2.1.1-rc22/mm/mmap.c
27843 --- linux-2.6.16.20/mm/mmap.c   2006-02-18 14:40:38 +0100
27844 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/mmap.c      2006-04-26 19:07:00 +0200
27845 @@ -1116,10 +1116,10 @@ munmap_back:
27846                 kmem_cache_free(vm_area_cachep, vma);
27847         }
27848  out:   
27849 -       mm->total_vm += len >> PAGE_SHIFT;
27850 +       vx_vmpages_add(mm, len >> PAGE_SHIFT);
27851         vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
27852         if (vm_flags & VM_LOCKED) {
27853 -               mm->locked_vm += len >> PAGE_SHIFT;
27854 +               vx_vmlocked_add(mm, len >> PAGE_SHIFT);
27855                 make_pages_present(addr, addr + len);
27856         }
27857         if (flags & MAP_POPULATE) {
27858 @@ -1479,9 +1479,9 @@ static int acct_stack_growth(struct vm_a
27859                 return -ENOMEM;
27860  
27861         /* Ok, everything looks good - let it rip */
27862 -       mm->total_vm += grow;
27863 +       vx_vmpages_add(mm, grow);
27864         if (vma->vm_flags & VM_LOCKED)
27865 -               mm->locked_vm += grow;
27866 +               vx_vmlocked_add(mm, grow);
27867         vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
27868         return 0;
27869  }
27870 @@ -1634,9 +1634,9 @@ static void remove_vma_list(struct mm_st
27871         do {
27872                 long nrpages = vma_pages(vma);
27873  
27874 -               mm->total_vm -= nrpages;
27875 +               vx_vmpages_sub(mm, nrpages);
27876                 if (vma->vm_flags & VM_LOCKED)
27877 -                       mm->locked_vm -= nrpages;
27878 +                       vx_vmlocked_sub(mm, nrpages);
27879                 vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
27880                 vma = remove_vma(vma);
27881         } while (vma);
27882 @@ -1865,6 +1865,8 @@ unsigned long do_brk(unsigned long addr,
27883                 lock_limit >>= PAGE_SHIFT;
27884                 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
27885                         return -EAGAIN;
27886 +               if (!vx_vmlocked_avail(mm, len >> PAGE_SHIFT))
27887 +                       return -ENOMEM;
27888         }
27889  
27890         /*
27891 @@ -1891,7 +1893,8 @@ unsigned long do_brk(unsigned long addr,
27892         if (mm->map_count > sysctl_max_map_count)
27893                 return -ENOMEM;
27894  
27895 -       if (security_vm_enough_memory(len >> PAGE_SHIFT))
27896 +       if (security_vm_enough_memory(len >> PAGE_SHIFT) ||
27897 +               !vx_vmpages_avail(mm, len >> PAGE_SHIFT))
27898                 return -ENOMEM;
27899  
27900         flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
27901 @@ -1919,9 +1922,9 @@ unsigned long do_brk(unsigned long addr,
27902         vma->vm_page_prot = protection_map[flags & 0x0f];
27903         vma_link(mm, vma, prev, rb_link, rb_parent);
27904  out:
27905 -       mm->total_vm += len >> PAGE_SHIFT;
27906 +       vx_vmpages_add(mm, len >> PAGE_SHIFT);
27907         if (flags & VM_LOCKED) {
27908 -               mm->locked_vm += len >> PAGE_SHIFT;
27909 +               vx_vmlocked_add(mm, len >> PAGE_SHIFT);
27910                 make_pages_present(addr, addr + len);
27911         }
27912         return addr;
27913 @@ -1947,6 +1950,11 @@ void exit_mmap(struct mm_struct *mm)
27914         free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
27915         tlb_finish_mmu(tlb, 0, end);
27916  
27917 +       set_mm_counter(mm, file_rss, 0);
27918 +       set_mm_counter(mm, anon_rss, 0);
27919 +       vx_vmpages_sub(mm, mm->total_vm);
27920 +       vx_vmlocked_sub(mm, mm->locked_vm);
27921 +
27922         /*
27923          * Walk the list again, actually closing and freeing it,
27924          * with preemption enabled, without holding any MM locks.
27925 @@ -1986,7 +1994,8 @@ int insert_vm_struct(struct mm_struct * 
27926         if (__vma && __vma->vm_start < vma->vm_end)
27927                 return -ENOMEM;
27928         if ((vma->vm_flags & VM_ACCOUNT) &&
27929 -            security_vm_enough_memory(vma_pages(vma)))
27930 +               (security_vm_enough_memory(vma_pages(vma)) ||
27931 +               !vx_vmpages_avail(mm, vma_pages(vma))))
27932                 return -ENOMEM;
27933         vma_link(mm, vma, prev, rb_link, rb_parent);
27934         return 0;
27935 @@ -2059,5 +2068,7 @@ int may_expand_vm(struct mm_struct *mm, 
27936  
27937         if (cur + npages > lim)
27938                 return 0;
27939 +       if (!vx_vmpages_avail(mm, npages))
27940 +               return 0;
27941         return 1;
27942  }
27943 diff -NurpP --minimal linux-2.6.16.20/mm/mremap.c linux-2.6.16.20-vs2.1.1-rc22/mm/mremap.c
27944 --- linux-2.6.16.20/mm/mremap.c 2006-04-09 13:49:58 +0200
27945 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/mremap.c    2006-04-26 19:07:00 +0200
27946 @@ -18,6 +18,7 @@
27947  #include <linux/highmem.h>
27948  #include <linux/security.h>
27949  #include <linux/syscalls.h>
27950 +#include <linux/vs_memory.h>
27951  
27952  #include <asm/uaccess.h>
27953  #include <asm/cacheflush.h>
27954 @@ -211,7 +212,7 @@ static unsigned long move_vma(struct vm_
27955          * If this were a serious issue, we'd add a flag to do_munmap().
27956          */
27957         hiwater_vm = mm->hiwater_vm;
27958 -       mm->total_vm += new_len >> PAGE_SHIFT;
27959 +       vx_vmpages_add(mm, new_len >> PAGE_SHIFT);
27960         vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT);
27961  
27962         if (do_munmap(mm, old_addr, old_len) < 0) {
27963 @@ -229,7 +230,7 @@ static unsigned long move_vma(struct vm_
27964         }
27965  
27966         if (vm_flags & VM_LOCKED) {
27967 -               mm->locked_vm += new_len >> PAGE_SHIFT;
27968 +               vx_vmlocked_add(mm, new_len >> PAGE_SHIFT);
27969                 if (new_len > old_len)
27970                         make_pages_present(new_addr + old_len,
27971                                            new_addr + new_len);
27972 @@ -336,6 +337,9 @@ unsigned long do_mremap(unsigned long ad
27973                 ret = -EAGAIN;
27974                 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
27975                         goto out;
27976 +               if (!vx_vmlocked_avail(current->mm,
27977 +                       (new_len - old_len) >> PAGE_SHIFT))
27978 +                       goto out;
27979         }
27980         if (!may_expand_vm(mm, (new_len - old_len) >> PAGE_SHIFT)) {
27981                 ret = -ENOMEM;
27982 @@ -364,10 +368,10 @@ unsigned long do_mremap(unsigned long ad
27983                         vma_adjust(vma, vma->vm_start,
27984                                 addr + new_len, vma->vm_pgoff, NULL);
27985  
27986 -                       mm->total_vm += pages;
27987 +                       vx_vmpages_add(mm, pages);
27988                         vm_stat_account(mm, vma->vm_flags, vma->vm_file, pages);
27989                         if (vma->vm_flags & VM_LOCKED) {
27990 -                               mm->locked_vm += pages;
27991 +                               vx_vmlocked_add(mm, pages);
27992                                 make_pages_present(addr + old_len,
27993                                                    addr + new_len);
27994                         }
27995 diff -NurpP --minimal linux-2.6.16.20/mm/nommu.c linux-2.6.16.20-vs2.1.1-rc22/mm/nommu.c
27996 --- linux-2.6.16.20/mm/nommu.c  2006-04-09 13:49:58 +0200
27997 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/nommu.c     2006-04-26 19:07:00 +0200
27998 @@ -820,7 +820,7 @@ unsigned long do_mmap_pgoff(struct file 
27999         realalloc += kobjsize(vma);
28000         askedalloc += sizeof(*vma);
28001  
28002 -       current->mm->total_vm += len >> PAGE_SHIFT;
28003 +       vx_vmpages_add(current->mm, len >> PAGE_SHIFT);
28004  
28005         add_nommu_vma(vma);
28006  
28007 @@ -937,7 +937,7 @@ int do_munmap(struct mm_struct *mm, unsi
28008         kfree(vml);
28009  
28010         update_hiwater_vm(mm);
28011 -       mm->total_vm -= len >> PAGE_SHIFT;
28012 +       vx_vmpages_sub(mm, len >> PAGE_SHIFT);
28013  
28014  #ifdef DEBUG
28015         show_process_blocks();
28016 @@ -956,7 +956,7 @@ void exit_mmap(struct mm_struct * mm)
28017                 printk("Exit_mmap:\n");
28018  #endif
28019  
28020 -               mm->total_vm = 0;
28021 +               vx_vmpages_sub(mm, mm->total_vm);
28022  
28023                 while ((tmp = mm->context.vmlist)) {
28024                         mm->context.vmlist = tmp->next;
28025 diff -NurpP --minimal linux-2.6.16.20/mm/oom_kill.c linux-2.6.16.20-vs2.1.1-rc22/mm/oom_kill.c
28026 --- linux-2.6.16.20/mm/oom_kill.c       2006-04-09 13:49:58 +0200
28027 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/oom_kill.c  2006-04-26 19:07:00 +0200
28028 @@ -55,6 +55,7 @@ unsigned long badness(struct task_struct
28029          * The memory size of the process is the basis for the badness.
28030          */
28031         points = p->mm->total_vm;
28032 +       /* FIXME: add vserver badness ;) */
28033  
28034         /*
28035          * Processes which fork a lot of child processes are likely
28036 @@ -230,8 +231,8 @@ static void __oom_kill_task(task_t *p, c
28037                 return;
28038         }
28039         task_unlock(p);
28040 -       printk(KERN_ERR "%s: Killed process %d (%s).\n",
28041 -                               message, p->pid, p->comm);
28042 +       printk(KERN_ERR "%s: Killed process %d[#%u] (%s).\n",
28043 +               message, p->pid, p->xid, p->comm);
28044  
28045         /*
28046          * We give our sacrificial lamb high priority and access to
28047 diff -NurpP --minimal linux-2.6.16.20/mm/page_alloc.c linux-2.6.16.20-vs2.1.1-rc22/mm/page_alloc.c
28048 --- linux-2.6.16.20/mm/page_alloc.c     2006-06-06 15:37:20 +0200
28049 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/page_alloc.c        2006-06-06 15:31:32 +0200
28050 @@ -37,6 +37,7 @@
28051  #include <linux/cpu.h>
28052  #include <linux/cpuset.h>
28053  #include <linux/memory_hotplug.h>
28054 +#include <linux/vs_limit.h>
28055  #include <linux/nodemask.h>
28056  #include <linux/vmalloc.h>
28057  #include <linux/mempolicy.h>
28058 @@ -1356,6 +1357,8 @@ void si_meminfo(struct sysinfo *val)
28059         val->freehigh = 0;
28060  #endif
28061         val->mem_unit = PAGE_SIZE;
28062 +       if (vx_flags(VXF_VIRT_MEM, 0))
28063 +               vx_vsi_meminfo(val);
28064  }
28065  
28066  EXPORT_SYMBOL(si_meminfo);
28067 @@ -1370,6 +1373,8 @@ void si_meminfo_node(struct sysinfo *val
28068         val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages;
28069         val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages;
28070         val->mem_unit = PAGE_SIZE;
28071 +       if (vx_flags(VXF_VIRT_MEM, 0))
28072 +               vx_vsi_meminfo(val);
28073  }
28074  #endif
28075  
28076 diff -NurpP --minimal linux-2.6.16.20/mm/rmap.c linux-2.6.16.20-vs2.1.1-rc22/mm/rmap.c
28077 --- linux-2.6.16.20/mm/rmap.c   2006-04-09 13:49:58 +0200
28078 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/rmap.c      2006-04-26 19:07:00 +0200
28079 @@ -53,6 +53,7 @@
28080  #include <linux/rmap.h>
28081  #include <linux/rcupdate.h>
28082  #include <linux/module.h>
28083 +#include <linux/vs_memory.h>
28084  
28085  #include <asm/tlbflush.h>
28086  
28087 diff -NurpP --minimal linux-2.6.16.20/mm/shmem.c linux-2.6.16.20-vs2.1.1-rc22/mm/shmem.c
28088 --- linux-2.6.16.20/mm/shmem.c  2006-05-22 16:25:40 +0200
28089 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/shmem.c     2006-05-22 06:19:46 +0200
28090 @@ -51,7 +51,6 @@
28091  #include <asm/pgtable.h>
28092  
28093  /* This magic number is used in glibc for posix shared memory */
28094 -#define TMPFS_MAGIC    0x01021994
28095  
28096  #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
28097  #define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
28098 @@ -1656,7 +1655,7 @@ static int shmem_statfs(struct super_blo
28099  {
28100         struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
28101  
28102 -       buf->f_type = TMPFS_MAGIC;
28103 +       buf->f_type = TMPFS_SUPER_MAGIC;
28104         buf->f_bsize = PAGE_CACHE_SIZE;
28105         buf->f_namelen = NAME_MAX;
28106         spin_lock(&sbinfo->stat_lock);
28107 @@ -2098,7 +2097,7 @@ static int shmem_fill_super(struct super
28108         sb->s_maxbytes = SHMEM_MAX_BYTES;
28109         sb->s_blocksize = PAGE_CACHE_SIZE;
28110         sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
28111 -       sb->s_magic = TMPFS_MAGIC;
28112 +       sb->s_magic = TMPFS_SUPER_MAGIC;
28113         sb->s_op = &shmem_ops;
28114  
28115         inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
28116 diff -NurpP --minimal linux-2.6.16.20/mm/slab.c linux-2.6.16.20-vs2.1.1-rc22/mm/slab.c
28117 --- linux-2.6.16.20/mm/slab.c   2006-04-09 13:49:58 +0200
28118 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/slab.c      2006-04-26 19:07:00 +0200
28119 @@ -487,6 +487,8 @@ struct kmem_cache {
28120  #define STATS_INC_FREEMISS(x)  do { } while (0)
28121  #endif
28122  
28123 +#include "slab_vs.h"
28124 +
28125  #if DEBUG
28126  /* Magic nums for obj red zoning.
28127   * Placed in the first word before and the first word after an obj.
28128 @@ -2819,6 +2821,7 @@ __cache_alloc(struct kmem_cache *cachep,
28129  
28130         local_irq_save(save_flags);
28131         objp = ____cache_alloc(cachep, flags);
28132 +       vx_slab_alloc(cachep, flags);
28133         local_irq_restore(save_flags);
28134         objp = cache_alloc_debugcheck_after(cachep, flags, objp,
28135                                             caller);
28136 @@ -2864,6 +2867,7 @@ static void *__cache_alloc_node(struct k
28137  
28138         obj = slab_get_obj(cachep, slabp, nodeid);
28139         check_slabp(cachep, slabp);
28140 +       vx_slab_alloc(cachep, flags);
28141         l3->free_objects--;
28142         /* move slabp to correct slabp list: */
28143         list_del(&slabp->list);
28144 @@ -2899,6 +2903,7 @@ static void free_block(struct kmem_cache
28145         int i;
28146         struct kmem_list3 *l3;
28147  
28148 +       // printk("·· free_block(%x) = %dx%x\n", cachep->gfpflags, nr_objects, cachep->objsize);
28149         for (i = 0; i < nr_objects; i++) {
28150                 void *objp = objpp[i];
28151                 struct slab *slabp;
28152 @@ -2996,6 +3001,7 @@ static inline void __cache_free(struct k
28153  
28154         check_irq_off();
28155         objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
28156 +       vx_slab_free(cachep);
28157  
28158         /* Make sure we are not freeing a object from another
28159          * node to the array cache on this cpu.
28160 diff -NurpP --minimal linux-2.6.16.20/mm/slab_vs.h linux-2.6.16.20-vs2.1.1-rc22/mm/slab_vs.h
28161 --- linux-2.6.16.20/mm/slab_vs.h        1970-01-01 01:00:00 +0100
28162 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/slab_vs.h   2006-04-29 02:58:07 +0200
28163 @@ -0,0 +1,23 @@
28164 +
28165 +static inline
28166 +void vx_slab_alloc(struct kmem_cache *cachep, gfp_t flags)
28167 +{
28168 +       int what = gfp_zone(cachep->gfpflags);
28169 +
28170 +       if (!current->vx_info)
28171 +               return;
28172 +
28173 +       atomic_add(cachep->buffer_size, &current->vx_info->cacct.slab[what]);
28174 +}
28175 +
28176 +static inline
28177 +void vx_slab_free(struct kmem_cache *cachep)
28178 +{
28179 +       int what = gfp_zone(cachep->gfpflags);
28180 +
28181 +       if (!current->vx_info)
28182 +               return;
28183 +
28184 +       atomic_sub(cachep->buffer_size, &current->vx_info->cacct.slab[what]);
28185 +}
28186 +
28187 diff -NurpP --minimal linux-2.6.16.20/mm/swapfile.c linux-2.6.16.20-vs2.1.1-rc22/mm/swapfile.c
28188 --- linux-2.6.16.20/mm/swapfile.c       2006-02-18 14:40:38 +0100
28189 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/swapfile.c  2006-04-26 19:07:00 +0200
28190 @@ -32,6 +32,7 @@
28191  #include <asm/pgtable.h>
28192  #include <asm/tlbflush.h>
28193  #include <linux/swapops.h>
28194 +#include <linux/vs_memory.h>
28195  
28196  DEFINE_SPINLOCK(swap_lock);
28197  unsigned int nr_swapfiles;
28198 @@ -1634,6 +1635,8 @@ void si_swapinfo(struct sysinfo *val)
28199         val->freeswap = nr_swap_pages + nr_to_be_unused;
28200         val->totalswap = total_swap_pages + nr_to_be_unused;
28201         spin_unlock(&swap_lock);
28202 +       if (vx_flags(VXF_VIRT_MEM, 0))
28203 +               vx_vsi_swapinfo(val);
28204  }
28205  
28206  /*
28207 diff -NurpP --minimal linux-2.6.16.20/mm/vmscan.c linux-2.6.16.20-vs2.1.1-rc22/mm/vmscan.c
28208 --- linux-2.6.16.20/mm/vmscan.c 2006-05-22 16:25:40 +0200
28209 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/vmscan.c    2006-05-22 06:19:46 +0200
28210 @@ -1843,7 +1843,7 @@ static int __init kswapd_init(void)
28211         swap_setup();
28212         for_each_pgdat(pgdat)
28213                 pgdat->kswapd
28214 -               = find_task_by_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL));
28215 +               = find_task_by_real_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL));
28216         total_memory = nr_free_pagecache_pages();
28217         hotcpu_notifier(cpu_callback, 0);
28218         return 0;
28219 diff -NurpP --minimal linux-2.6.16.20/net/core/dev.c linux-2.6.16.20-vs2.1.1-rc22/net/core/dev.c
28220 --- linux-2.6.16.20/net/core/dev.c      2006-05-11 21:25:37 +0200
28221 +++ linux-2.6.16.20-vs2.1.1-rc22/net/core/dev.c 2006-04-26 19:07:00 +0200
28222 @@ -114,6 +114,7 @@
28223  #include <linux/wireless.h>            /* Note : will define WIRELESS_EXT */
28224  #include <net/iw_handler.h>
28225  #endif /* CONFIG_NET_RADIO */
28226 +#include <linux/vs_network.h>
28227  #include <asm/current.h>
28228  
28229  /*
28230 @@ -1848,6 +1849,9 @@ static int dev_ifconf(char __user *arg)
28231  
28232         total = 0;
28233         for (dev = dev_base; dev; dev = dev->next) {
28234 +               if (vx_flags(VXF_HIDE_NETIF, 0) &&
28235 +                       !dev_in_nx_info(dev, current->nx_info))
28236 +                       continue;
28237                 for (i = 0; i < NPROTO; i++) {
28238                         if (gifconf_list[i]) {
28239                                 int done;
28240 @@ -1908,6 +1912,10 @@ void dev_seq_stop(struct seq_file *seq, 
28241  
28242  static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
28243  {
28244 +       struct nx_info *nxi = current->nx_info;
28245 +
28246 +       if (vx_flags(VXF_HIDE_NETIF, 0) && !dev_in_nx_info(dev, nxi))
28247 +               return;
28248         if (dev->get_stats) {
28249                 struct net_device_stats *stats = dev->get_stats(dev);
28250  
28251 diff -NurpP --minimal linux-2.6.16.20/net/core/rtnetlink.c linux-2.6.16.20-vs2.1.1-rc22/net/core/rtnetlink.c
28252 --- linux-2.6.16.20/net/core/rtnetlink.c        2006-02-18 14:40:39 +0100
28253 +++ linux-2.6.16.20-vs2.1.1-rc22/net/core/rtnetlink.c   2006-04-26 19:07:00 +0200
28254 @@ -278,6 +278,9 @@ static int rtnetlink_dump_ifinfo(struct 
28255         for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
28256                 if (idx < s_idx)
28257                         continue;
28258 +               if (vx_info_flags(skb->sk->sk_vx_info, VXF_HIDE_NETIF, 0) &&
28259 +                       !dev_in_nx_info(dev, skb->sk->sk_nx_info))
28260 +                       continue;
28261                 if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK,
28262                                           NETLINK_CB(cb->skb).pid,
28263                                           cb->nlh->nlmsg_seq, 0,
28264 @@ -451,6 +454,9 @@ void rtmsg_ifinfo(int type, struct net_d
28265                                sizeof(struct rtnl_link_ifmap) +
28266                                sizeof(struct rtnl_link_stats) + 128);
28267  
28268 +       if (vx_flags(VXF_HIDE_NETIF, 0) &&
28269 +               !dev_in_nx_info(dev, current->nx_info))
28270 +               return;
28271         skb = alloc_skb(size, GFP_KERNEL);
28272         if (!skb)
28273                 return;
28274 diff -NurpP --minimal linux-2.6.16.20/net/core/sock.c linux-2.6.16.20-vs2.1.1-rc22/net/core/sock.c
28275 --- linux-2.6.16.20/net/core/sock.c     2006-05-11 21:25:37 +0200
28276 +++ linux-2.6.16.20-vs2.1.1-rc22/net/core/sock.c        2006-04-26 19:07:00 +0200
28277 @@ -125,6 +125,9 @@
28278  #include <linux/ipsec.h>
28279  
28280  #include <linux/filter.h>
28281 +#include <linux/vs_socket.h>
28282 +#include <linux/vs_limit.h>
28283 +#include <linux/vs_context.h>
28284  
28285  #ifdef CONFIG_INET
28286  #include <net/tcp.h>
28287 @@ -661,6 +664,8 @@ struct sock *sk_alloc(int family, gfp_t 
28288                         sk->sk_prot = sk->sk_prot_creator = prot;
28289                         sock_lock_init(sk);
28290                 }
28291 +               sock_vx_init(sk);
28292 +               sock_nx_init(sk);
28293                 
28294                 if (security_sk_alloc(sk, family, priority))
28295                         goto out_free;
28296 @@ -699,6 +704,11 @@ void sk_free(struct sock *sk)
28297                        __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
28298  
28299         security_sk_free(sk);
28300 +       vx_sock_dec(sk);
28301 +       clr_vx_info(&sk->sk_vx_info);
28302 +       sk->sk_xid = -1;
28303 +       clr_nx_info(&sk->sk_nx_info);
28304 +       sk->sk_nid = -1;
28305         if (sk->sk_prot_creator->slab != NULL)
28306                 kmem_cache_free(sk->sk_prot_creator->slab, sk);
28307         else
28308 @@ -716,6 +726,8 @@ struct sock *sk_clone(const struct sock 
28309                 memcpy(newsk, sk, sk->sk_prot->obj_size);
28310  
28311                 /* SANITY */
28312 +               sock_vx_init(newsk);
28313 +               sock_nx_init(newsk);
28314                 sk_node_init(&newsk->sk_node);
28315                 sock_lock_init(newsk);
28316                 bh_lock_sock(newsk);
28317 @@ -756,6 +768,12 @@ struct sock *sk_clone(const struct sock 
28318                 newsk->sk_priority = 0;
28319                 atomic_set(&newsk->sk_refcnt, 2);
28320  
28321 +               set_vx_info(&newsk->sk_vx_info, sk->sk_vx_info);
28322 +               newsk->sk_xid = sk->sk_xid;
28323 +               vx_sock_inc(newsk);
28324 +               set_nx_info(&newsk->sk_nx_info, sk->sk_nx_info);
28325 +               newsk->sk_nid = sk->sk_nid;
28326 +
28327                 /*
28328                  * Increment the counter in the same struct proto as the master
28329                  * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
28330 @@ -1320,6 +1338,11 @@ void sock_init_data(struct socket *sock,
28331         sk->sk_stamp.tv_sec     = -1L;
28332         sk->sk_stamp.tv_usec    = -1L;
28333  
28334 +       set_vx_info(&sk->sk_vx_info, current->vx_info);
28335 +       sk->sk_xid = vx_current_xid();
28336 +       vx_sock_inc(sk);
28337 +       set_nx_info(&sk->sk_nx_info, current->nx_info);
28338 +       sk->sk_nid = nx_current_nid();
28339         atomic_set(&sk->sk_refcnt, 1);
28340  }
28341  
28342 diff -NurpP --minimal linux-2.6.16.20/net/ipv4/af_inet.c linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/af_inet.c
28343 --- linux-2.6.16.20/net/ipv4/af_inet.c  2006-02-18 14:40:39 +0100
28344 +++ linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/af_inet.c     2006-04-26 19:07:00 +0200
28345 @@ -114,6 +114,7 @@
28346  #ifdef CONFIG_IP_MROUTE
28347  #include <linux/mroute.h>
28348  #endif
28349 +#include <linux/vs_limit.h>
28350  
28351  DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly;
28352  
28353 @@ -282,9 +283,11 @@ lookup_protocol:
28354         }
28355  
28356         err = -EPERM;
28357 +       if ((protocol == IPPROTO_ICMP) && vx_ccaps(VXC_RAW_ICMP))
28358 +               goto override;
28359         if (answer->capability > 0 && !capable(answer->capability))
28360                 goto out_rcu_unlock;
28361 -
28362 +override:
28363         sock->ops = answer->ops;
28364         answer_prot = answer->prot;
28365         answer_no_check = answer->no_check;
28366 @@ -401,6 +404,10 @@ int inet_bind(struct socket *sock, struc
28367         unsigned short snum;
28368         int chk_addr_ret;
28369         int err;
28370 +       __u32 s_addr;   /* Address used for validation */
28371 +       __u32 s_addr1;  /* Address used for socket */
28372 +       __u32 s_addr2;  /* Broadcast address for the socket */
28373 +       struct nx_info *nxi = sk->sk_nx_info;
28374  
28375         /* If the socket has its own bind function then use it. (RAW) */
28376         if (sk->sk_prot->bind) {
28377 @@ -411,7 +418,40 @@ int inet_bind(struct socket *sock, struc
28378         if (addr_len < sizeof(struct sockaddr_in))
28379                 goto out;
28380  
28381 -       chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr);
28382 +       s_addr = addr->sin_addr.s_addr;
28383 +       s_addr1 = s_addr;
28384 +       s_addr2 = 0xffffffffl;
28385 +
28386 +       vxdprintk(VXD_CBIT(net, 3),
28387 +               "inet_bind(%p)* %p,%p;%lx %d.%d.%d.%d",
28388 +               sk, sk->sk_nx_info, sk->sk_socket,
28389 +               (sk->sk_socket?sk->sk_socket->flags:0),
28390 +               VXD_QUAD(s_addr));
28391 +       if (nxi) {
28392 +               __u32 v4_bcast = nxi->v4_bcast;
28393 +               __u32 ipv4root = nxi->ipv4[0];
28394 +               int nbipv4 = nxi->nbipv4;
28395 +
28396 +               if (s_addr == 0) {
28397 +                       /* bind to any for 1-n */
28398 +                       s_addr = ipv4root;
28399 +                       s_addr1 = (nbipv4 > 1) ? 0 : s_addr;
28400 +                       s_addr2 = v4_bcast;
28401 +               } else if (s_addr == IPI_LOOPBACK) {
28402 +                       /* rewrite localhost to ipv4root */
28403 +                       s_addr = ipv4root;
28404 +                       s_addr1 = ipv4root;
28405 +               } else if (s_addr != v4_bcast) {
28406 +                       /* normal address bind */
28407 +                       if (!addr_in_nx_info(nxi, s_addr))
28408 +                               return -EADDRNOTAVAIL;
28409 +               }
28410 +       }
28411 +       chk_addr_ret = inet_addr_type(s_addr);
28412 +
28413 +       vxdprintk(VXD_CBIT(net, 3),
28414 +               "inet_bind(%p) %d.%d.%d.%d, %d.%d.%d.%d, %d.%d.%d.%d",
28415 +               sk, VXD_QUAD(s_addr), VXD_QUAD(s_addr1), VXD_QUAD(s_addr2));
28416  
28417         /* Not specified by any standard per-se, however it breaks too
28418          * many applications when removed.  It is unfortunate since
28419 @@ -423,7 +463,7 @@ int inet_bind(struct socket *sock, struc
28420         err = -EADDRNOTAVAIL;
28421         if (!sysctl_ip_nonlocal_bind &&
28422             !inet->freebind &&
28423 -           addr->sin_addr.s_addr != INADDR_ANY &&
28424 +           s_addr != INADDR_ANY &&
28425             chk_addr_ret != RTN_LOCAL &&
28426             chk_addr_ret != RTN_MULTICAST &&
28427             chk_addr_ret != RTN_BROADCAST)
28428 @@ -448,7 +488,8 @@ int inet_bind(struct socket *sock, struc
28429         if (sk->sk_state != TCP_CLOSE || inet->num)
28430                 goto out_release_sock;
28431  
28432 -       inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr;
28433 +       inet->rcv_saddr = inet->saddr = s_addr1;
28434 +       inet->rcv_saddr2 = s_addr2;
28435         if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
28436                 inet->saddr = 0;  /* Use device */
28437  
28438 diff -NurpP --minimal linux-2.6.16.20/net/ipv4/devinet.c linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/devinet.c
28439 --- linux-2.6.16.20/net/ipv4/devinet.c  2006-02-18 14:40:39 +0100
28440 +++ linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/devinet.c     2006-04-26 19:07:00 +0200
28441 @@ -607,6 +607,9 @@ int devinet_ioctl(unsigned int cmd, void
28442                 *colon = ':';
28443  
28444         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
28445 +               struct nx_info *nxi = current->nx_info;
28446 +               int hide_netif = vx_flags(VXF_HIDE_NETIF, 0);
28447 +
28448                 if (tryaddrmatch) {
28449                         /* Matthias Andree */
28450                         /* compare label and address (4.4BSD style) */
28451 @@ -615,6 +618,8 @@ int devinet_ioctl(unsigned int cmd, void
28452                            This is checked above. */
28453                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
28454                              ifap = &ifa->ifa_next) {
28455 +                               if (hide_netif && !ifa_in_nx_info(ifa, nxi))
28456 +                                       continue;
28457                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
28458                                     sin_orig.sin_addr.s_addr ==
28459                                                         ifa->ifa_address) {
28460 @@ -627,9 +632,12 @@ int devinet_ioctl(unsigned int cmd, void
28461                    comparing just the label */
28462                 if (!ifa) {
28463                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
28464 -                            ifap = &ifa->ifa_next)
28465 +                            ifap = &ifa->ifa_next) {
28466 +                               if (hide_netif && !ifa_in_nx_info(ifa, nxi))
28467 +                                       continue;
28468                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
28469                                         break;
28470 +                       }
28471                 }
28472         }
28473  
28474 @@ -780,6 +788,9 @@ static int inet_gifconf(struct net_devic
28475                 goto out;
28476  
28477         for (; ifa; ifa = ifa->ifa_next) {
28478 +               if (vx_flags(VXF_HIDE_NETIF, 0) &&
28479 +                       !ifa_in_nx_info(ifa, current->nx_info))
28480 +                       continue;
28481                 if (!buf) {
28482                         done += sizeof(ifr);
28483                         continue;
28484 @@ -1091,6 +1102,7 @@ static int inet_dump_ifaddr(struct sk_bu
28485         struct net_device *dev;
28486         struct in_device *in_dev;
28487         struct in_ifaddr *ifa;
28488 +       struct sock *sk = skb->sk;
28489         int s_ip_idx, s_idx = cb->args[0];
28490  
28491         s_ip_idx = ip_idx = cb->args[1];
28492 @@ -1108,6 +1120,9 @@ static int inet_dump_ifaddr(struct sk_bu
28493  
28494                 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
28495                      ifa = ifa->ifa_next, ip_idx++) {
28496 +                       if (sk && vx_info_flags(sk->sk_vx_info, VXF_HIDE_NETIF, 0) &&
28497 +                               !ifa_in_nx_info(ifa, sk->sk_nx_info))
28498 +                               continue;
28499                         if (ip_idx < s_ip_idx)
28500                                 continue;
28501                         if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
28502 diff -NurpP --minimal linux-2.6.16.20/net/ipv4/fib_hash.c linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/fib_hash.c
28503 --- linux-2.6.16.20/net/ipv4/fib_hash.c 2006-04-09 13:49:59 +0200
28504 +++ linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/fib_hash.c    2006-04-26 19:07:00 +0200
28505 @@ -989,6 +989,8 @@ static unsigned fib_flag_trans(int type,
28506         return flags;
28507  }
28508  
28509 +extern int dev_in_nx_info(struct net_device *, struct nx_info *);
28510 +
28511  /* 
28512   *     This outputs /proc/net/route.
28513   *
28514 @@ -1019,7 +1021,8 @@ static int fib_seq_show(struct seq_file 
28515         prefix  = f->fn_key;
28516         mask    = FZ_MASK(iter->zone);
28517         flags   = fib_flag_trans(fa->fa_type, mask, fi);
28518 -       if (fi)
28519 +       if (fi && (!vx_flags(VXF_HIDE_NETIF, 0) ||
28520 +               dev_in_nx_info(fi->fib_dev, current->nx_info)))
28521                 snprintf(bf, sizeof(bf),
28522                          "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
28523                          fi->fib_dev ? fi->fib_dev->name : "*", prefix,
28524 diff -NurpP --minimal linux-2.6.16.20/net/ipv4/inet_connection_sock.c linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/inet_connection_sock.c
28525 --- linux-2.6.16.20/net/ipv4/inet_connection_sock.c     2006-02-18 14:40:39 +0100
28526 +++ linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/inet_connection_sock.c        2006-04-26 19:07:00 +0200
28527 @@ -40,7 +40,6 @@ int sysctl_local_port_range[2] = { 1024,
28528  int inet_csk_bind_conflict(const struct sock *sk,
28529                            const struct inet_bind_bucket *tb)
28530  {
28531 -       const u32 sk_rcv_saddr = inet_rcv_saddr(sk);
28532         struct sock *sk2;
28533         struct hlist_node *node;
28534         int reuse = sk->sk_reuse;
28535 @@ -53,9 +52,8 @@ int inet_csk_bind_conflict(const struct 
28536                      sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
28537                         if (!reuse || !sk2->sk_reuse ||
28538                             sk2->sk_state == TCP_LISTEN) {
28539 -                               const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
28540 -                               if (!sk2_rcv_saddr || !sk_rcv_saddr ||
28541 -                                   sk2_rcv_saddr == sk_rcv_saddr)
28542 +                               if (nx_addr_conflict(sk->sk_nx_info,
28543 +                                       inet_rcv_saddr(sk), sk2))
28544                                         break;
28545                         }
28546                 }
28547 diff -NurpP --minimal linux-2.6.16.20/net/ipv4/inet_diag.c linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/inet_diag.c
28548 --- linux-2.6.16.20/net/ipv4/inet_diag.c        2006-04-09 13:49:59 +0200
28549 +++ linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/inet_diag.c   2006-04-26 19:07:00 +0200
28550 @@ -694,6 +694,8 @@ static int inet_diag_dump(struct sk_buff
28551                         sk_for_each(sk, node, &hashinfo->listening_hash[i]) {
28552                                 struct inet_sock *inet = inet_sk(sk);
28553  
28554 +                               if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
28555 +                                       continue;
28556                                 if (num < s_num) {
28557                                         num++;
28558                                         continue;
28559 @@ -754,6 +756,8 @@ skip_listen_ht:
28560                 sk_for_each(sk, node, &head->chain) {
28561                         struct inet_sock *inet = inet_sk(sk);
28562  
28563 +                       if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
28564 +                               continue;
28565                         if (num < s_num)
28566                                 goto next_normal;
28567                         if (!(r->idiag_states & (1 << sk->sk_state)))
28568 @@ -778,6 +782,8 @@ next_normal:
28569                         inet_twsk_for_each(tw, node,
28570                                     &hashinfo->ehash[i + hashinfo->ehash_size].chain) {
28571  
28572 +                               if (!vx_check(tw->tw_xid, VX_IDENT|VX_WATCH))
28573 +                                       continue;
28574                                 if (num < s_num)
28575                                         goto next_dying;
28576                                 if (r->id.idiag_sport != tw->tw_sport &&
28577 diff -NurpP --minimal linux-2.6.16.20/net/ipv4/inet_hashtables.c linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/inet_hashtables.c
28578 --- linux-2.6.16.20/net/ipv4/inet_hashtables.c  2006-02-18 14:40:39 +0100
28579 +++ linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/inet_hashtables.c     2006-04-26 19:07:00 +0200
28580 @@ -143,11 +143,10 @@ struct sock *__inet_lookup_listener(cons
28581                         const __u32 rcv_saddr = inet->rcv_saddr;
28582                         int score = sk->sk_family == PF_INET ? 1 : 0;
28583  
28584 -                       if (rcv_saddr) {
28585 -                               if (rcv_saddr != daddr)
28586 -                                       continue;
28587 +                       if (inet_addr_match(sk->sk_nx_info, daddr, rcv_saddr))
28588                                 score += 2;
28589 -                       }
28590 +                       else
28591 +                               continue;
28592                         if (sk->sk_bound_dev_if) {
28593                                 if (sk->sk_bound_dev_if != dif)
28594                                         continue;
28595 diff -NurpP --minimal linux-2.6.16.20/net/ipv4/raw.c linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/raw.c
28596 --- linux-2.6.16.20/net/ipv4/raw.c      2006-02-18 14:40:41 +0100
28597 +++ linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/raw.c 2006-04-26 19:07:00 +0200
28598 @@ -102,6 +102,27 @@ static void raw_v4_unhash(struct sock *s
28599         write_unlock_bh(&raw_v4_lock);
28600  }
28601  
28602 +
28603 +/*
28604 + *     Check if a given address matches for a socket
28605 + *
28606 + *     nxi:            the socket's nx_info if any
28607 + *     addr:           to be verified address
28608 + *     saddr/baddr:    socket addresses
28609 + */
28610 +static inline int raw_addr_match (
28611 +       struct nx_info *nxi,
28612 +       uint32_t addr,
28613 +       uint32_t saddr,
28614 +       uint32_t baddr)
28615 +{
28616 +       if (addr && (saddr == addr || baddr == addr))
28617 +               return 1;
28618 +       if (!saddr)
28619 +               return addr_in_nx_info(nxi, addr);
28620 +       return 0;
28621 +}
28622 +
28623  struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
28624                              unsigned long raddr, unsigned long laddr,
28625                              int dif)
28626 @@ -113,7 +134,8 @@ struct sock *__raw_v4_lookup(struct sock
28627  
28628                 if (inet->num == num                                    &&
28629                     !(inet->daddr && inet->daddr != raddr)              &&
28630 -                   !(inet->rcv_saddr && inet->rcv_saddr != laddr)      &&
28631 +                   raw_addr_match(sk->sk_nx_info, laddr,
28632 +                       inet->rcv_saddr, inet->rcv_saddr2)              &&
28633                     !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
28634                         goto found; /* gotcha */
28635         }
28636 @@ -313,6 +335,11 @@ static int raw_send_hdrinc(struct sock *
28637                 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
28638         }
28639  
28640 +       err = -EPERM;
28641 +       if (!vx_check(0, VX_ADMIN) && !capable(CAP_NET_RAW)
28642 +               && (!addr_in_nx_info(sk->sk_nx_info, iph->saddr)))
28643 +               goto error_free;
28644 +
28645         err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
28646                       dst_output);
28647         if (err > 0)
28648 @@ -324,6 +351,7 @@ out:
28649  
28650  error_fault:
28651         err = -EFAULT;
28652 +error_free:
28653         kfree_skb(skb);
28654  error:
28655         IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
28656 @@ -484,6 +512,12 @@ static int raw_sendmsg(struct kiocb *ioc
28657                 if (!inet->hdrincl)
28658                         raw_probe_proto_opt(&fl, msg);
28659  
28660 +               if (sk->sk_nx_info) {
28661 +                       err = ip_find_src(sk->sk_nx_info, &rt, &fl);
28662 +
28663 +                       if (err)
28664 +                               goto done;
28665 +               }
28666                 err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
28667         }
28668         if (err)
28669 @@ -753,7 +787,8 @@ static struct sock *raw_get_first(struct
28670                 struct hlist_node *node;
28671  
28672                 sk_for_each(sk, node, &raw_v4_htable[state->bucket])
28673 -                       if (sk->sk_family == PF_INET)
28674 +                       if (sk->sk_family == PF_INET &&
28675 +                               vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
28676                                 goto found;
28677         }
28678         sk = NULL;
28679 @@ -769,7 +804,8 @@ static struct sock *raw_get_next(struct 
28680                 sk = sk_next(sk);
28681  try_again:
28682                 ;
28683 -       } while (sk && sk->sk_family != PF_INET);
28684 +       } while (sk && (sk->sk_family != PF_INET ||
28685 +               !vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)));
28686  
28687         if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) {
28688                 sk = sk_head(&raw_v4_htable[state->bucket]);
28689 diff -NurpP --minimal linux-2.6.16.20/net/ipv4/tcp.c linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/tcp.c
28690 --- linux-2.6.16.20/net/ipv4/tcp.c      2006-04-09 13:49:59 +0200
28691 +++ linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/tcp.c 2006-04-26 19:07:00 +0200
28692 @@ -257,6 +257,7 @@
28693  #include <linux/fs.h>
28694  #include <linux/random.h>
28695  #include <linux/bootmem.h>
28696 +#include <linux/in.h>
28697  
28698  #include <net/icmp.h>
28699  #include <net/tcp.h>
28700 diff -NurpP --minimal linux-2.6.16.20/net/ipv4/tcp_ipv4.c linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/tcp_ipv4.c
28701 --- linux-2.6.16.20/net/ipv4/tcp_ipv4.c 2006-02-18 14:40:42 +0100
28702 +++ linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/tcp_ipv4.c    2006-04-26 19:07:00 +0200
28703 @@ -77,6 +77,7 @@
28704  #include <linux/stddef.h>
28705  #include <linux/proc_fs.h>
28706  #include <linux/seq_file.h>
28707 +#include <linux/vserver/debug.h>
28708  
28709  int sysctl_tcp_tw_reuse;
28710  int sysctl_tcp_low_latency;
28711 @@ -1350,6 +1351,12 @@ static void *listening_get_next(struct s
28712                 req = req->dl_next;
28713                 while (1) {
28714                         while (req) {
28715 +                               vxdprintk(VXD_CBIT(net, 6),
28716 +                                       "sk,req: %p [#%d] (from %d)", req->sk,
28717 +                                       (req->sk)?req->sk->sk_xid:0, vx_current_xid());
28718 +                               if (req->sk &&
28719 +                                       !vx_check(req->sk->sk_xid, VX_IDENT|VX_WATCH))
28720 +                                       continue;
28721                                 if (req->rsk_ops->family == st->family) {
28722                                         cur = req;
28723                                         goto out;
28724 @@ -1374,6 +1381,10 @@ get_req:
28725         }
28726  get_sk:
28727         sk_for_each_from(sk, node) {
28728 +               vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)",
28729 +                       sk, sk->sk_xid, vx_current_xid());
28730 +               if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
28731 +                       continue;
28732                 if (sk->sk_family == st->family) {
28733                         cur = sk;
28734                         goto out;
28735 @@ -1425,18 +1436,26 @@ static void *established_get_first(struc
28736  
28737                 read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
28738                 sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
28739 -                       if (sk->sk_family != st->family) {
28740 +                       vxdprintk(VXD_CBIT(net, 6),
28741 +                               "sk,egf: %p [#%d] (from %d)",
28742 +                               sk, sk->sk_xid, vx_current_xid());
28743 +                       if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
28744 +                               continue;
28745 +                       if (sk->sk_family != st->family)
28746                                 continue;
28747 -                       }
28748                         rc = sk;
28749                         goto out;
28750                 }
28751                 st->state = TCP_SEQ_STATE_TIME_WAIT;
28752                 inet_twsk_for_each(tw, node,
28753                                    &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) {
28754 -                       if (tw->tw_family != st->family) {
28755 +                       vxdprintk(VXD_CBIT(net, 6),
28756 +                               "tw: %p [#%d] (from %d)",
28757 +                               tw, tw->tw_xid, vx_current_xid());
28758 +                       if (!vx_check(tw->tw_xid, VX_IDENT|VX_WATCH))
28759 +                               continue;
28760 +                       if (tw->tw_family != st->family)
28761                                 continue;
28762 -                       }
28763                         rc = tw;
28764                         goto out;
28765                 }
28766 @@ -1460,7 +1479,8 @@ static void *established_get_next(struct
28767                 tw = cur;
28768                 tw = tw_next(tw);
28769  get_tw:
28770 -               while (tw && tw->tw_family != st->family) {
28771 +               while (tw && (tw->tw_family != st->family ||
28772 +                       !vx_check(tw->tw_xid, VX_IDENT|VX_WATCH))) {
28773                         tw = tw_next(tw);
28774                 }
28775                 if (tw) {
28776 @@ -1484,6 +1504,11 @@ get_tw:
28777                 sk = sk_next(sk);
28778  
28779         sk_for_each_from(sk, node) {
28780 +               vxdprintk(VXD_CBIT(net, 6),
28781 +                       "sk,egn: %p [#%d] (from %d)",
28782 +                       sk, sk->sk_xid, vx_current_xid());
28783 +               if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
28784 +                       continue;
28785                 if (sk->sk_family == st->family)
28786                         goto found;
28787         }
28788 diff -NurpP --minimal linux-2.6.16.20/net/ipv4/tcp_minisocks.c linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/tcp_minisocks.c
28789 --- linux-2.6.16.20/net/ipv4/tcp_minisocks.c    2006-04-09 13:49:59 +0200
28790 +++ linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/tcp_minisocks.c       2006-04-26 19:07:00 +0200
28791 @@ -29,6 +29,10 @@
28792  #include <net/inet_common.h>
28793  #include <net/xfrm.h>
28794  
28795 +#include <linux/vs_limit.h>
28796 +#include <linux/vs_socket.h>
28797 +#include <linux/vs_context.h>
28798 +
28799  #ifdef CONFIG_SYSCTL
28800  #define SYNC_INIT 0 /* let the user enable it */
28801  #else
28802 @@ -295,6 +299,11 @@ void tcp_time_wait(struct sock *sk, int 
28803                 tcptw->tw_ts_recent     = tp->rx_opt.ts_recent;
28804                 tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
28805  
28806 +               tw->tw_xid              = sk->sk_xid;
28807 +               tw->tw_vx_info          = NULL;
28808 +               tw->tw_nid              = sk->sk_nid;
28809 +               tw->tw_nx_info          = NULL;
28810 +
28811  #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
28812                 if (tw->tw_family == PF_INET6) {
28813                         struct ipv6_pinfo *np = inet6_sk(sk);
28814 diff -NurpP --minimal linux-2.6.16.20/net/ipv4/udp.c linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/udp.c
28815 --- linux-2.6.16.20/net/ipv4/udp.c      2006-02-18 14:40:42 +0100
28816 +++ linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/udp.c 2006-05-21 23:37:20 +0200
28817 @@ -176,14 +176,12 @@ gotit:
28818                         struct inet_sock *inet2 = inet_sk(sk2);
28819  
28820                         if (inet2->num == snum &&
28821 -                           sk2 != sk &&
28822 -                           !ipv6_only_sock(sk2) &&
28823 +                           sk2 != sk && !ipv6_only_sock(sk2) &&
28824                             (!sk2->sk_bound_dev_if ||
28825                              !sk->sk_bound_dev_if ||
28826                              sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
28827 -                           (!inet2->rcv_saddr ||
28828 -                            !inet->rcv_saddr ||
28829 -                            inet2->rcv_saddr == inet->rcv_saddr) &&
28830 +                           nx_addr_conflict(sk->sk_nx_info,
28831 +                            inet_rcv_saddr(sk), sk2) &&
28832                             (!sk2->sk_reuse || !sk->sk_reuse))
28833                                 goto fail;
28834                 }
28835 @@ -218,6 +216,7 @@ static void udp_v4_unhash(struct sock *s
28836         write_unlock_bh(&udp_hash_lock);
28837  }
28838  
28839 +
28840  /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
28841   * harder than this. -DaveM
28842   */
28843 @@ -238,6 +237,11 @@ static struct sock *udp_v4_lookup_longwa
28844                                 if (inet->rcv_saddr != daddr)
28845                                         continue;
28846                                 score+=2;
28847 +                       } else if (sk->sk_nx_info) {
28848 +                               if (addr_in_nx_info(sk->sk_nx_info, daddr))
28849 +                                       score+=2;
28850 +                               else
28851 +                                       continue;
28852                         }
28853                         if (inet->daddr) {
28854                                 if (inet->daddr != saddr)
28855 @@ -294,7 +298,8 @@ static inline struct sock *udp_v4_mcast_
28856                 if (inet->num != hnum                                   ||
28857                     (inet->daddr && inet->daddr != rmt_addr)            ||
28858                     (inet->dport != rmt_port && inet->dport)            ||
28859 -                   (inet->rcv_saddr && inet->rcv_saddr != loc_addr)    ||
28860 +                   (inet->rcv_saddr && inet->rcv_saddr != loc_addr &&
28861 +                    inet->rcv_saddr2 && inet->rcv_saddr2 != loc_addr)  ||
28862                     ipv6_only_sock(s)                                   ||
28863                     (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
28864                         continue;
28865 @@ -604,6 +609,19 @@ int udp_sendmsg(struct kiocb *iocb, stru
28866                                     .uli_u = { .ports =
28867                                                { .sport = inet->sport,
28868                                                  .dport = dport } } };
28869 +               struct nx_info *nxi = sk->sk_nx_info;
28870 +
28871 +               if (nxi) {
28872 +                       err = ip_find_src(nxi, &rt, &fl);
28873 +                       if (err)
28874 +                               goto out;
28875 +                       if (daddr == IPI_LOOPBACK && !vx_check(0, VX_ADMIN))
28876 +                               daddr = fl.fl4_dst = nxi->ipv4[0];
28877 +#ifdef CONFIG_VSERVER_REMAP_SADDR
28878 +                       if (saddr == IPI_LOOPBACK && !vx_check(0, VX_ADMIN))
28879 +                               saddr = fl.fl4_src = nxi->ipv4[0];
28880 +#endif
28881 +               }
28882                 err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
28883                 if (err)
28884                         goto out;
28885 @@ -1370,8 +1388,10 @@ static struct sock *udp_get_first(struct
28886  
28887         for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
28888                 struct hlist_node *node;
28889 +
28890                 sk_for_each(sk, node, &udp_hash[state->bucket]) {
28891 -                       if (sk->sk_family == state->family)
28892 +                       if (sk->sk_family == state->family &&
28893 +                               vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
28894                                 goto found;
28895                 }
28896         }
28897 @@ -1388,7 +1408,8 @@ static struct sock *udp_get_next(struct 
28898                 sk = sk_next(sk);
28899  try_again:
28900                 ;
28901 -       } while (sk && sk->sk_family != state->family);
28902 +       } while (sk && (sk->sk_family != state->family ||
28903 +               !vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)));
28904  
28905         if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
28906                 sk = sk_head(&udp_hash[state->bucket]);
28907 diff -NurpP --minimal linux-2.6.16.20/net/ipv6/addrconf.c linux-2.6.16.20-vs2.1.1-rc22/net/ipv6/addrconf.c
28908 --- linux-2.6.16.20/net/ipv6/addrconf.c 2006-04-09 13:49:59 +0200
28909 +++ linux-2.6.16.20-vs2.1.1-rc22/net/ipv6/addrconf.c    2006-04-26 19:07:00 +0200
28910 @@ -2646,7 +2646,10 @@ static void if6_seq_stop(struct seq_file
28911  static int if6_seq_show(struct seq_file *seq, void *v)
28912  {
28913         struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v;
28914 -       seq_printf(seq,
28915 +
28916 +       /* no ipv6 inside a vserver for now */
28917 +       if (vx_check(0, VX_ADMIN|VX_WATCH))
28918 +               seq_printf(seq,
28919                    NIP6_SEQFMT " %02x %02x %02x %02x %8s\n",
28920                    NIP6(ifp->addr),
28921                    ifp->idev->dev->ifindex,
28922 @@ -3002,6 +3005,10 @@ static int inet6_dump_addr(struct sk_buf
28923         struct ifmcaddr6 *ifmca;
28924         struct ifacaddr6 *ifaca;
28925  
28926 +       /* no ipv6 inside a vserver for now */
28927 +       if (skb->sk && skb->sk->sk_vx_info)
28928 +               return skb->len;
28929 +
28930         s_idx = cb->args[0];
28931         s_ip_idx = ip_idx = cb->args[1];
28932         read_lock(&dev_base_lock);
28933 @@ -3207,6 +3214,10 @@ static int inet6_dump_ifinfo(struct sk_b
28934         struct net_device *dev;
28935         struct inet6_dev *idev;
28936  
28937 +       /* no ipv6 inside a vserver for now */
28938 +       if (skb->sk && skb->sk->sk_vx_info)
28939 +               return skb->len;
28940 +
28941         read_lock(&dev_base_lock);
28942         for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
28943                 if (idx < s_idx)
28944 diff -NurpP --minimal linux-2.6.16.20/net/netlink/af_netlink.c linux-2.6.16.20-vs2.1.1-rc22/net/netlink/af_netlink.c
28945 --- linux-2.6.16.20/net/netlink/af_netlink.c    2006-04-09 13:49:59 +0200
28946 +++ linux-2.6.16.20-vs2.1.1-rc22/net/netlink/af_netlink.c       2006-04-26 19:07:00 +0200
28947 @@ -56,6 +56,9 @@
28948  #include <linux/mm.h>
28949  #include <linux/types.h>
28950  #include <linux/audit.h>
28951 +#include <linux/vs_context.h>
28952 +#include <linux/vs_network.h>
28953 +#include <linux/vs_limit.h>
28954  
28955  #include <net/sock.h>
28956  #include <net/scm.h>
28957 diff -NurpP --minimal linux-2.6.16.20/net/socket.c linux-2.6.16.20-vs2.1.1-rc22/net/socket.c
28958 --- linux-2.6.16.20/net/socket.c        2006-02-18 14:40:43 +0100
28959 +++ linux-2.6.16.20-vs2.1.1-rc22/net/socket.c   2006-04-26 19:07:00 +0200
28960 @@ -96,6 +96,7 @@
28961  
28962  #include <net/sock.h>
28963  #include <linux/netfilter.h>
28964 +#include <linux/vs_socket.h>
28965  
28966  static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
28967  static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf,
28968 @@ -536,7 +537,7 @@ static inline int __sock_sendmsg(struct 
28969                                  struct msghdr *msg, size_t size)
28970  {
28971         struct sock_iocb *si = kiocb_to_siocb(iocb);
28972 -       int err;
28973 +       int err, len;
28974  
28975         si->sock = sock;
28976         si->scm = NULL;
28977 @@ -547,7 +548,21 @@ static inline int __sock_sendmsg(struct 
28978         if (err)
28979                 return err;
28980  
28981 -       return sock->ops->sendmsg(iocb, sock, msg, size);
28982 +       len = sock->ops->sendmsg(iocb, sock, msg, size);
28983 +       if (sock->sk) {
28984 +               if (len == size)
28985 +                       vx_sock_send(sock->sk, size);
28986 +               else
28987 +                       vx_sock_fail(sock->sk, size);
28988 +       }
28989 +       vxdprintk(VXD_CBIT(net, 7),
28990 +               "__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d",
28991 +               sock, sock->sk,
28992 +               (sock->sk)?sock->sk->sk_nx_info:0,
28993 +               (sock->sk)?sock->sk->sk_vx_info:0,
28994 +               (sock->sk)?sock->sk->sk_xid:0,
28995 +               (unsigned int)size, len);
28996 +       return len;
28997  }
28998  
28999  int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
29000 @@ -585,7 +600,7 @@ int kernel_sendmsg(struct socket *sock, 
29001  static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, 
29002                                  struct msghdr *msg, size_t size, int flags)
29003  {
29004 -       int err;
29005 +       int err, len;
29006         struct sock_iocb *si = kiocb_to_siocb(iocb);
29007  
29008         si->sock = sock;
29009 @@ -598,7 +613,17 @@ static inline int __sock_recvmsg(struct 
29010         if (err)
29011                 return err;
29012  
29013 -       return sock->ops->recvmsg(iocb, sock, msg, size, flags);
29014 +       len = sock->ops->recvmsg(iocb, sock, msg, size, flags);
29015 +       if ((len >= 0) && sock->sk)
29016 +               vx_sock_recv(sock->sk, len);
29017 +       vxdprintk(VXD_CBIT(net, 7),
29018 +               "__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d",
29019 +               sock, sock->sk,
29020 +               (sock->sk)?sock->sk->sk_nx_info:0,
29021 +               (sock->sk)?sock->sk->sk_vx_info:0,
29022 +               (sock->sk)?sock->sk->sk_xid:0,
29023 +               (unsigned int)size, len);
29024 +       return len;
29025  }
29026  
29027  int sock_recvmsg(struct socket *sock, struct msghdr *msg, 
29028 @@ -1088,6 +1113,10 @@ static int __sock_create(int family, int
29029         if (type < 0 || type >= SOCK_MAX)
29030                 return -EINVAL;
29031  
29032 +       /* disable IPv6 inside vservers for now */
29033 +       if (family == PF_INET6 && !vx_check(0, VX_ADMIN))
29034 +               return -EAFNOSUPPORT;
29035 +
29036         /* Compatibility.
29037  
29038            This uglymoron is moved from INET layer to here to avoid
29039 @@ -1198,6 +1227,7 @@ asmlinkage long sys_socket(int family, i
29040         if (retval < 0)
29041                 goto out;
29042  
29043 +       set_bit(SOCK_USER_SOCKET, &sock->flags);
29044         retval = sock_map_fd(sock);
29045         if (retval < 0)
29046                 goto out_release;
29047 @@ -1228,10 +1258,12 @@ asmlinkage long sys_socketpair(int famil
29048         err = sock_create(family, type, protocol, &sock1);
29049         if (err < 0)
29050                 goto out;
29051 +       set_bit(SOCK_USER_SOCKET, &sock1->flags);
29052  
29053         err = sock_create(family, type, protocol, &sock2);
29054         if (err < 0)
29055                 goto out_release_1;
29056 +       set_bit(SOCK_USER_SOCKET, &sock2->flags);
29057  
29058         err = sock1->ops->socketpair(sock1, sock2);
29059         if (err < 0) 
29060 diff -NurpP --minimal linux-2.6.16.20/net/sunrpc/auth.c linux-2.6.16.20-vs2.1.1-rc22/net/sunrpc/auth.c
29061 --- linux-2.6.16.20/net/sunrpc/auth.c   2006-02-18 14:40:43 +0100
29062 +++ linux-2.6.16.20-vs2.1.1-rc22/net/sunrpc/auth.c      2006-04-26 19:07:00 +0200
29063 @@ -13,6 +13,7 @@
29064  #include <linux/errno.h>
29065  #include <linux/sunrpc/clnt.h>
29066  #include <linux/spinlock.h>
29067 +#include <linux/vs_tag.h>
29068  
29069  #ifdef RPC_DEBUG
29070  # define RPCDBG_FACILITY       RPCDBG_AUTH
29071 @@ -251,6 +252,7 @@ rpcauth_lookupcred(struct rpc_auth *auth
29072         struct auth_cred acred = {
29073                 .uid = current->fsuid,
29074                 .gid = current->fsgid,
29075 +               .tag = dx_current_tag(),
29076                 .group_info = current->group_info,
29077         };
29078         struct rpc_cred *ret;
29079 @@ -270,6 +272,7 @@ rpcauth_bindcred(struct rpc_task *task)
29080         struct auth_cred acred = {
29081                 .uid = current->fsuid,
29082                 .gid = current->fsgid,
29083 +               .tag = dx_current_tag(),
29084                 .group_info = current->group_info,
29085         };
29086         struct rpc_cred *ret;
29087 diff -NurpP --minimal linux-2.6.16.20/net/sunrpc/auth_unix.c linux-2.6.16.20-vs2.1.1-rc22/net/sunrpc/auth_unix.c
29088 --- linux-2.6.16.20/net/sunrpc/auth_unix.c      2006-02-18 14:40:43 +0100
29089 +++ linux-2.6.16.20-vs2.1.1-rc22/net/sunrpc/auth_unix.c 2006-04-26 19:07:00 +0200
29090 @@ -11,12 +11,14 @@
29091  #include <linux/module.h>
29092  #include <linux/sunrpc/clnt.h>
29093  #include <linux/sunrpc/auth.h>
29094 +#include <linux/vs_tag.h>
29095  
29096  #define NFS_NGROUPS    16
29097  
29098  struct unx_cred {
29099         struct rpc_cred         uc_base;
29100         gid_t                   uc_gid;
29101 +       tag_t                   uc_tag;
29102         gid_t                   uc_gids[NFS_NGROUPS];
29103  };
29104  #define uc_uid                 uc_base.cr_uid
29105 @@ -78,6 +80,7 @@ unx_create_cred(struct rpc_auth *auth, s
29106         if (flags & RPCAUTH_LOOKUP_ROOTCREDS) {
29107                 cred->uc_uid = 0;
29108                 cred->uc_gid = 0;
29109 +               cred->uc_tag = dx_current_tag();
29110                 cred->uc_gids[0] = NOGROUP;
29111         } else {
29112                 int groups = acred->group_info->ngroups;
29113 @@ -86,6 +89,7 @@ unx_create_cred(struct rpc_auth *auth, s
29114  
29115                 cred->uc_uid = acred->uid;
29116                 cred->uc_gid = acred->gid;
29117 +               cred->uc_tag = acred->tag;
29118                 for (i = 0; i < groups; i++)
29119                         cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
29120                 if (i < NFS_NGROUPS)
29121 @@ -117,7 +121,8 @@ unx_match(struct auth_cred *acred, struc
29122                 int groups;
29123  
29124                 if (cred->uc_uid != acred->uid
29125 -                || cred->uc_gid != acred->gid)
29126 +                || cred->uc_gid != acred->gid
29127 +                || cred->uc_tag != acred->tag)
29128                         return 0;
29129  
29130                 groups = acred->group_info->ngroups;
29131 @@ -143,7 +148,7 @@ unx_marshal(struct rpc_task *task, u32 *
29132         struct rpc_clnt *clnt = task->tk_client;
29133         struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred;
29134         u32             *base, *hold;
29135 -       int             i;
29136 +       int             i, tag;
29137  
29138         *p++ = htonl(RPC_AUTH_UNIX);
29139         base = p++;
29140 @@ -153,9 +158,12 @@ unx_marshal(struct rpc_task *task, u32 *
29141          * Copy the UTS nodename captured when the client was created.
29142          */
29143         p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
29144 +       tag = task->tk_client->cl_tag;
29145  
29146 -       *p++ = htonl((u32) cred->uc_uid);
29147 -       *p++ = htonl((u32) cred->uc_gid);
29148 +       *p++ = htonl((u32) TAGINO_UID(tag,
29149 +               cred->uc_uid, cred->uc_tag));
29150 +       *p++ = htonl((u32) TAGINO_GID(tag,
29151 +               cred->uc_gid, cred->uc_tag));
29152         hold = p++;
29153         for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++)
29154                 *p++ = htonl((u32) cred->uc_gids[i]);
29155 diff -NurpP --minimal linux-2.6.16.20/net/sunrpc/clnt.c linux-2.6.16.20-vs2.1.1-rc22/net/sunrpc/clnt.c
29156 --- linux-2.6.16.20/net/sunrpc/clnt.c   2006-04-09 13:50:00 +0200
29157 +++ linux-2.6.16.20-vs2.1.1-rc22/net/sunrpc/clnt.c      2006-04-26 19:07:00 +0200
29158 @@ -34,6 +34,7 @@
29159  #include <linux/sunrpc/rpc_pipe_fs.h>
29160  
29161  #include <linux/nfs.h>
29162 +#include <linux/vs_cvirt.h>
29163  
29164  
29165  #define RPC_SLACK_SPACE                (1024)  /* total overkill */
29166 @@ -168,10 +169,10 @@ rpc_new_client(struct rpc_xprt *xprt, ch
29167         }
29168  
29169         /* save the nodename */
29170 -       clnt->cl_nodelen = strlen(system_utsname.nodename);
29171 +       clnt->cl_nodelen = strlen(vx_new_uts(nodename));
29172         if (clnt->cl_nodelen > UNX_MAXNODENAME)
29173                 clnt->cl_nodelen = UNX_MAXNODENAME;
29174 -       memcpy(clnt->cl_nodename, system_utsname.nodename, clnt->cl_nodelen);
29175 +       memcpy(clnt->cl_nodename, vx_new_uts(nodename), clnt->cl_nodelen);
29176         return clnt;
29177  
29178  out_no_auth:
29179 diff -NurpP --minimal linux-2.6.16.20/net/unix/af_unix.c linux-2.6.16.20-vs2.1.1-rc22/net/unix/af_unix.c
29180 --- linux-2.6.16.20/net/unix/af_unix.c  2006-04-09 13:50:00 +0200
29181 +++ linux-2.6.16.20-vs2.1.1-rc22/net/unix/af_unix.c     2006-04-26 19:07:00 +0200
29182 @@ -117,6 +117,9 @@
29183  #include <linux/mount.h>
29184  #include <net/checksum.h>
29185  #include <linux/security.h>
29186 +#include <linux/vs_context.h>
29187 +#include <linux/vs_network.h>
29188 +#include <linux/vs_limit.h>
29189  
29190  int sysctl_unix_max_dgram_qlen = 10;
29191  
29192 @@ -235,6 +238,8 @@ static struct sock *__unix_find_socket_b
29193         sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
29194                 struct unix_sock *u = unix_sk(s);
29195  
29196 +               if (!vx_check(s->sk_xid, VX_IDENT|VX_WATCH))
29197 +                       continue;
29198                 if (u->addr->len == len &&
29199                     !memcmp(u->addr->name, sunname, len))
29200                         goto found;
29201 @@ -781,7 +786,7 @@ static int unix_bind(struct socket *sock
29202                  */
29203                 mode = S_IFSOCK |
29204                        (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
29205 -               err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
29206 +               err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0, NULL);
29207                 if (err)
29208                         goto out_mknod_dput;
29209                 mutex_unlock(&nd.dentry->d_inode->i_mutex);
29210 diff -NurpP --minimal linux-2.6.16.20/net/x25/af_x25.c linux-2.6.16.20-vs2.1.1-rc22/net/x25/af_x25.c
29211 --- linux-2.6.16.20/net/x25/af_x25.c    2006-02-18 14:40:43 +0100
29212 +++ linux-2.6.16.20-vs2.1.1-rc22/net/x25/af_x25.c       2006-04-26 19:07:00 +0200
29213 @@ -491,7 +491,10 @@ static int x25_create(struct socket *soc
29214  
29215         x25 = x25_sk(sk);
29216  
29217 -       sock_init_data(sock, sk);
29218 +       sk->sk_socket = sock;
29219 +       sk->sk_type = sock->type;
29220 +       sk->sk_sleep = &sock->wait;
29221 +       sock->sk = sk;
29222  
29223         x25_init_timers(sk);
29224  
29225 diff -NurpP --minimal linux-2.6.16.20/security/commoncap.c linux-2.6.16.20-vs2.1.1-rc22/security/commoncap.c
29226 --- linux-2.6.16.20/security/commoncap.c        2006-02-18 14:40:44 +0100
29227 +++ linux-2.6.16.20-vs2.1.1-rc22/security/commoncap.c   2006-04-28 04:54:41 +0200
29228 @@ -27,7 +27,7 @@
29229  
29230  int cap_netlink_send(struct sock *sk, struct sk_buff *skb)
29231  {
29232 -       NETLINK_CB(skb).eff_cap = current->cap_effective;
29233 +       cap_t(NETLINK_CB(skb).eff_cap) = vx_mbcap(cap_effective);
29234         return 0;
29235  }
29236  
29237 @@ -45,7 +45,7 @@ EXPORT_SYMBOL(cap_netlink_recv);
29238  int cap_capable (struct task_struct *tsk, int cap)
29239  {
29240         /* Derived from include/linux/sched.h:capable. */
29241 -       if (cap_raised(tsk->cap_effective, cap))
29242 +       if (vx_cap_raised(tsk->vx_info, tsk->cap_effective, cap))
29243                 return 0;
29244         return -EPERM;
29245  }
29246 @@ -143,7 +143,8 @@ void cap_bprm_apply_creds (struct linux_
29247         /* Derived from fs/exec.c:compute_creds. */
29248         kernel_cap_t new_permitted, working;
29249  
29250 -       new_permitted = cap_intersect (bprm->cap_permitted, cap_bset);
29251 +       new_permitted = cap_intersect (bprm->cap_permitted,
29252 +                                       vx_current_cap_bset());
29253         working = cap_intersect (bprm->cap_inheritable,
29254                                  current->cap_inheritable);
29255         new_permitted = cap_combine (new_permitted, working);
29256 @@ -312,7 +313,8 @@ void cap_task_reparent_to_init (struct t
29257  
29258  int cap_syslog (int type)
29259  {
29260 -       if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN))
29261 +       if ((type != 3 && type != 10) &&
29262 +               !vx_capable(CAP_SYS_ADMIN, VXC_SYSLOG))
29263                 return -EPERM;
29264         return 0;
29265  }
29266 diff -NurpP --minimal linux-2.6.16.20/security/dummy.c linux-2.6.16.20-vs2.1.1-rc22/security/dummy.c
29267 --- linux-2.6.16.20/security/dummy.c    2006-04-09 13:50:00 +0200
29268 +++ linux-2.6.16.20-vs2.1.1-rc22/security/dummy.c       2006-04-27 20:29:01 +0200
29269 @@ -85,7 +85,7 @@ static int dummy_sysctl (ctl_table * tab
29270         return 0;
29271  }
29272  
29273 -static int dummy_quotactl (int cmds, int type, int id, struct super_block *sb)
29274 +static int dummy_quotactl (int cmds, int type, int id, struct dqhash *hash)
29275  {
29276         return 0;
29277  }
29278 @@ -656,7 +656,7 @@ static int dummy_sem_semop (struct sem_a
29279  
29280  static int dummy_netlink_send (struct sock *sk, struct sk_buff *skb)
29281  {
29282 -       NETLINK_CB(skb).eff_cap = current->cap_effective;
29283 +       cap_t(NETLINK_CB(skb).eff_cap) = vx_mbcap(cap_effective);
29284         return 0;
29285  }
29286  
29287 diff -NurpP --minimal linux-2.6.16.20/security/security.c linux-2.6.16.20-vs2.1.1-rc22/security/security.c
29288 --- linux-2.6.16.20/security/security.c 2006-02-18 14:40:44 +0100
29289 +++ linux-2.6.16.20-vs2.1.1-rc22/security/security.c    2006-04-27 21:33:12 +0200
29290 @@ -186,6 +186,8 @@ int mod_unreg_security(const char *name,
29291   */
29292  int capable(int cap)
29293  {
29294 +       if (vx_check_bit(VXC_CAP_MASK, cap) && !vx_mcaps(1L << cap))
29295 +               return 0;
29296         if (security_ops->capable(current, cap)) {
29297                 /* capability denied */
29298                 return 0;
29299 @@ -196,6 +198,7 @@ int capable(int cap)
29300         return 1;
29301  }
29302  
29303 +
29304  EXPORT_SYMBOL_GPL(register_security);
29305  EXPORT_SYMBOL_GPL(unregister_security);
29306  EXPORT_SYMBOL_GPL(mod_reg_security);
29307 diff -NurpP --minimal linux-2.6.16.20/security/selinux/hooks.c linux-2.6.16.20-vs2.1.1-rc22/security/selinux/hooks.c
29308 --- linux-2.6.16.20/security/selinux/hooks.c    2006-04-09 13:50:00 +0200
29309 +++ linux-2.6.16.20-vs2.1.1-rc22/security/selinux/hooks.c       2006-04-26 19:07:00 +0200
29310 @@ -1348,9 +1348,10 @@ static int selinux_sysctl(ctl_table *tab
29311         return error;
29312  }
29313  
29314 -static int selinux_quotactl(int cmds, int type, int id, struct super_block *sb)
29315 +static int selinux_quotactl(int cmds, int type, int id, struct dqhash *hash)
29316  {
29317         int rc = 0;
29318 +       struct super_block *sb = hash->dqh_sb;
29319  
29320         if (!sb)
29321                 return 0;
This page took 2.311901 seconds and 3 git commands to generate.